### A Revision to pandas DataFrames

In [1]:
import pandas as pd
import numpy as np

In [2]:
array_a = np.array([[3, 2, 1], [6, 3, 2]])
array_a

array([[3, 2, 1],
       [6, 3, 2]])

In [3]:
pd.DataFrame(array_a)

Unnamed: 0,0,1,2
0,3,2,1
1,6,3,2


In [4]:
type(pd.DataFrame(array_a))

pandas.core.frame.DataFrame

In [5]:
df = pd.DataFrame(array_a, columns = ['Column 1', 'Column 2', 'Column 3'])
df

Unnamed: 0,Column 1,Column 2,Column 3
0,3,2,1
1,6,3,2


In [6]:
df = pd.DataFrame(array_a, columns = ['Column 1', 'Column 2', 'Column 3'], index = ['Row 1', 'Row 2'])
df

Unnamed: 0,Column 1,Column 2,Column 3
Row 1,3,2,1
Row 2,6,3,2


In [7]:
# Can we change to "StringID"?
data = pd.read_csv('Lending-company.csv', index_col = 'LoanID')
lending_co_data = data.copy()
lending_co_data.head()

Unnamed: 0_level_0,StringID,Product,CustomerGender,Location,Region,TotalPrice,StartDate,Deposit,DailyRate,TotalDaysYr,AmtPaid36,AmtPaid60,AmtPaid360,LoanStatus
LoanID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
1,LoanID_1,Product B,Female,Location 3,Region 2,17600.0,04/07/2018,2200,45,365,3221,4166,14621,Active
2,LoanID_2,Product D,Female,Location 6,Region 6,,02/01/2019,2200,45,365,3161,4096,16041,Active
3,LoanID_3,Product B,Male,Location 8,Region 3,16600.0,08/12/2016,1000,45,365,2260,3205,16340,
4,LoanID_4,Product A,Male,Location 26,Region 2,17600.0,,2200,45,365,3141,4166,16321,Active
5,LoanID_5,Product B,Female,Location 34,Region 3,21250.0,28/10/2017,2200,55,365,3570,4745,14720,Active


In [8]:
type(lending_co_data)

pandas.core.frame.DataFrame

### Common Attributes for Working with DataFrames

In [9]:
# Can we change to "StringID"?
data = pd.read_csv('Lending-company.csv', index_col = 'LoanID')
lending_co_data = data.copy()
lending_co_data.head()

Unnamed: 0_level_0,StringID,Product,CustomerGender,Location,Region,TotalPrice,StartDate,Deposit,DailyRate,TotalDaysYr,AmtPaid36,AmtPaid60,AmtPaid360,LoanStatus
LoanID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
1,LoanID_1,Product B,Female,Location 3,Region 2,17600.0,04/07/2018,2200,45,365,3221,4166,14621,Active
2,LoanID_2,Product D,Female,Location 6,Region 6,,02/01/2019,2200,45,365,3161,4096,16041,Active
3,LoanID_3,Product B,Male,Location 8,Region 3,16600.0,08/12/2016,1000,45,365,2260,3205,16340,
4,LoanID_4,Product A,Male,Location 26,Region 2,17600.0,,2200,45,365,3141,4166,16321,Active
5,LoanID_5,Product B,Female,Location 34,Region 3,21250.0,28/10/2017,2200,55,365,3570,4745,14720,Active


In [10]:
lending_co_data.index

Int64Index([   1,    2,    3,    4,    5,    6,    7,    8,    9,   10,
            ...
            1034, 1035, 1036, 1037, 1038, 1039, 1040, 1041, 1042, 1043],
           dtype='int64', name='LoanID', length=1043)

In [11]:
type(lending_co_data.index)

pandas.core.indexes.numeric.Int64Index

In [12]:
lending_co_data.columns

Index(['StringID', 'Product', 'CustomerGender', 'Location', 'Region',
       'TotalPrice', 'StartDate', 'Deposit', 'DailyRate', 'TotalDaysYr',
       'AmtPaid36', 'AmtPaid60', 'AmtPaid360', 'LoanStatus'],
      dtype='object')

In [13]:
type(lending_co_data.columns)

pandas.core.indexes.base.Index

In [14]:
lending_co_data.axes

[Int64Index([   1,    2,    3,    4,    5,    6,    7,    8,    9,   10,
             ...
             1034, 1035, 1036, 1037, 1038, 1039, 1040, 1041, 1042, 1043],
            dtype='int64', name='LoanID', length=1043),
 Index(['StringID', 'Product', 'CustomerGender', 'Location', 'Region',
        'TotalPrice', 'StartDate', 'Deposit', 'DailyRate', 'TotalDaysYr',
        'AmtPaid36', 'AmtPaid60', 'AmtPaid360', 'LoanStatus'],
       dtype='object')]

In [15]:
lending_co_data.dtypes

StringID           object
Product            object
CustomerGender     object
Location           object
Region             object
TotalPrice        float64
StartDate          object
Deposit             int64
DailyRate           int64
TotalDaysYr         int64
AmtPaid36           int64
AmtPaid60           int64
AmtPaid360          int64
LoanStatus         object
dtype: object

In [16]:
lending_co_data.values

array([['LoanID_1', 'Product B', 'Female', ..., 4166, 14621, 'Active'],
       ['LoanID_2', 'Product D', 'Female', ..., 4096, 16041, 'Active'],
       ['LoanID_3', 'Product B', 'Male', ..., 3205, 16340, nan],
       ...,
       ['LoanID_1041', 'Product A', 'NotSpecified', ..., 5143, 16617,
        'Finished Payment'],
       ['LoanID_1042', 'Product B', 'Female', ..., 3462, 15617,
        'Finished Payment'],
       ['LoanID_1043', 'Product A', 'NotSpecified', ..., 4743, 16617,
        'Finished Payment']], dtype=object)

In [17]:
type(lending_co_data.values)

numpy.ndarray

In [18]:
lending_co_data.to_numpy()

AttributeError: 'DataFrame' object has no attribute 'to_numpy'

In [None]:
type(lending_co_data.to_numpy())

In [19]:
lending_co_data.shape

(1043, 14)

In [20]:
len(lending_co_data.columns)

14

In [21]:
location_data = pd.read_csv('Lending-company.csv', usecols = ['Location'], squeeze = True)
location_data

0         Location 3
1         Location 6
2         Location 8
3        Location 26
4        Location 34
5        Location 34
6        Location 25
7        Location 46
8       Location 156
9        Location 21
10       Location 13
11       Location 25
12      Location 579
13      Location 602
14       Location 10
15       Location 44
16       Location 25
17       Location 44
18       Location 26
19       Location 30
20       Location 48
21      Location 196
22       Location 64
23       Location 91
24       Location 62
25       Location 75
26       Location 26
27       Location 42
28      Location 233
29       Location 95
            ...     
1013     Location 44
1014     Location 46
1015     Location 69
1016     Location 44
1017     Location 23
1018      Location 8
1019     Location 57
1020    Location 576
1021      Location 6
1022     Location 25
1023     Location 22
1024     Location 47
1025     Location 84
1026    Location 139
1027    Location 113
1028    Location 105
1029      Loc

In [22]:
type(location_data)

pandas.core.series.Series

In [23]:
location_data.shape

(1043,)

### Data Selection in pandas DataFrames

In [27]:
import pandas as pd

In [28]:
data = pd.read_csv('Lending-company.csv', index_col = 'StringID')
lending_co_data = data.copy()
lending_co_data.head()

Unnamed: 0_level_0,LoanID,Product,CustomerGender,Location,Region,TotalPrice,StartDate,Deposit,DailyRate,TotalDaysYr,AmtPaid36,AmtPaid60,AmtPaid360,LoanStatus
StringID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
LoanID_1,1,Product B,Female,Location 3,Region 2,17600.0,04/07/2018,2200,45,365,3221,4166,14621,Active
LoanID_2,2,Product D,Female,Location 6,Region 6,,02/01/2019,2200,45,365,3161,4096,16041,Active
LoanID_3,3,Product B,Male,Location 8,Region 3,16600.0,08/12/2016,1000,45,365,2260,3205,16340,
LoanID_4,4,Product A,Male,Location 26,Region 2,17600.0,,2200,45,365,3141,4166,16321,Active
LoanID_5,5,Product B,Female,Location 34,Region 3,21250.0,28/10/2017,2200,55,365,3570,4745,14720,Active


In [29]:
lending_co_data.Product

StringID
LoanID_1       Product B
LoanID_2       Product D
LoanID_3       Product B
LoanID_4       Product A
LoanID_5       Product B
LoanID_6       Product A
LoanID_7       Product A
LoanID_8       Product D
LoanID_9       Product A
LoanID_10      Product C
LoanID_11      Product B
LoanID_12      Product D
LoanID_13      Product B
LoanID_14      Product A
LoanID_15      Product A
LoanID_16      Product B
LoanID_17      Product A
LoanID_18      Product B
LoanID_19      Product B
LoanID_20      Product E
LoanID_21      Product B
LoanID_22      Product A
LoanID_23      Product A
LoanID_24      Product C
LoanID_25      Product D
LoanID_26      Product A
LoanID_27      Product A
LoanID_28      Product F
LoanID_29      Product D
LoanID_30      Product B
                 ...    
LoanID_1014    Product A
LoanID_1015    Product B
LoanID_1016    Product A
LoanID_1017    Product E
LoanID_1018    Product E
LoanID_1019    Product B
LoanID_1020    Product A
LoanID_1021    Product A
LoanID_1022    P

In [30]:
lending_co_data.Location

StringID
LoanID_1         Location 3
LoanID_2         Location 6
LoanID_3         Location 8
LoanID_4        Location 26
LoanID_5        Location 34
LoanID_6        Location 34
LoanID_7        Location 25
LoanID_8        Location 46
LoanID_9       Location 156
LoanID_10       Location 21
LoanID_11       Location 13
LoanID_12       Location 25
LoanID_13      Location 579
LoanID_14      Location 602
LoanID_15       Location 10
LoanID_16       Location 44
LoanID_17       Location 25
LoanID_18       Location 44
LoanID_19       Location 26
LoanID_20       Location 30
LoanID_21       Location 48
LoanID_22      Location 196
LoanID_23       Location 64
LoanID_24       Location 91
LoanID_25       Location 62
LoanID_26       Location 75
LoanID_27       Location 26
LoanID_28       Location 42
LoanID_29      Location 233
LoanID_30       Location 95
                   ...     
LoanID_1014     Location 44
LoanID_1015     Location 46
LoanID_1016     Location 69
LoanID_1017     Location 44
LoanID_1018

In [31]:
lending_co_data['Product']

StringID
LoanID_1       Product B
LoanID_2       Product D
LoanID_3       Product B
LoanID_4       Product A
LoanID_5       Product B
LoanID_6       Product A
LoanID_7       Product A
LoanID_8       Product D
LoanID_9       Product A
LoanID_10      Product C
LoanID_11      Product B
LoanID_12      Product D
LoanID_13      Product B
LoanID_14      Product A
LoanID_15      Product A
LoanID_16      Product B
LoanID_17      Product A
LoanID_18      Product B
LoanID_19      Product B
LoanID_20      Product E
LoanID_21      Product B
LoanID_22      Product A
LoanID_23      Product A
LoanID_24      Product C
LoanID_25      Product D
LoanID_26      Product A
LoanID_27      Product A
LoanID_28      Product F
LoanID_29      Product D
LoanID_30      Product B
                 ...    
LoanID_1014    Product A
LoanID_1015    Product B
LoanID_1016    Product A
LoanID_1017    Product E
LoanID_1018    Product E
LoanID_1019    Product B
LoanID_1020    Product A
LoanID_1021    Product A
LoanID_1022    P

In [32]:
lending_co_data['Location']

StringID
LoanID_1         Location 3
LoanID_2         Location 6
LoanID_3         Location 8
LoanID_4        Location 26
LoanID_5        Location 34
LoanID_6        Location 34
LoanID_7        Location 25
LoanID_8        Location 46
LoanID_9       Location 156
LoanID_10       Location 21
LoanID_11       Location 13
LoanID_12       Location 25
LoanID_13      Location 579
LoanID_14      Location 602
LoanID_15       Location 10
LoanID_16       Location 44
LoanID_17       Location 25
LoanID_18       Location 44
LoanID_19       Location 26
LoanID_20       Location 30
LoanID_21       Location 48
LoanID_22      Location 196
LoanID_23       Location 64
LoanID_24       Location 91
LoanID_25       Location 62
LoanID_26       Location 75
LoanID_27       Location 26
LoanID_28       Location 42
LoanID_29      Location 233
LoanID_30       Location 95
                   ...     
LoanID_1014     Location 44
LoanID_1015     Location 46
LoanID_1016     Location 69
LoanID_1017     Location 44
LoanID_1018

In [33]:
lending_co_data['location']

KeyError: 'location'

In [34]:
type(lending_co_data['Location'])

pandas.core.series.Series

In [35]:
lending_co_data[['Location']]

Unnamed: 0_level_0,Location
StringID,Unnamed: 1_level_1
LoanID_1,Location 3
LoanID_2,Location 6
LoanID_3,Location 8
LoanID_4,Location 26
LoanID_5,Location 34
LoanID_6,Location 34
LoanID_7,Location 25
LoanID_8,Location 46
LoanID_9,Location 156
LoanID_10,Location 21


In [36]:
type(lending_co_data[['Location']])

pandas.core.frame.DataFrame

In [37]:
lending_co_data[['Location', 'Product']].head()

Unnamed: 0_level_0,Location,Product
StringID,Unnamed: 1_level_1,Unnamed: 2_level_1
LoanID_1,Location 3,Product B
LoanID_2,Location 6,Product D
LoanID_3,Location 8,Product B
LoanID_4,Location 26,Product A
LoanID_5,Location 34,Product B


In [38]:
prod_loc = ['Location', 'Product']
lending_co_data[prod_loc].head()

Unnamed: 0_level_0,Location,Product
StringID,Unnamed: 1_level_1,Unnamed: 2_level_1
LoanID_1,Location 3,Product B
LoanID_2,Location 6,Product D
LoanID_3,Location 8,Product B
LoanID_4,Location 26,Product A
LoanID_5,Location 34,Product B


In [39]:
lending_co_data['Product', 'Location']

KeyError: ('Product', 'Location')

### Data Selection - Indexing Data with .iloc[]

In [40]:
import pandas as pd

In [41]:
data = pd.read_csv('Lending-company.csv', index_col = 'StringID')
lending_co_data = data.copy()
lending_co_data.head()

Unnamed: 0_level_0,LoanID,Product,CustomerGender,Location,Region,TotalPrice,StartDate,Deposit,DailyRate,TotalDaysYr,AmtPaid36,AmtPaid60,AmtPaid360,LoanStatus
StringID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
LoanID_1,1,Product B,Female,Location 3,Region 2,17600.0,04/07/2018,2200,45,365,3221,4166,14621,Active
LoanID_2,2,Product D,Female,Location 6,Region 6,,02/01/2019,2200,45,365,3161,4096,16041,Active
LoanID_3,3,Product B,Male,Location 8,Region 3,16600.0,08/12/2016,1000,45,365,2260,3205,16340,
LoanID_4,4,Product A,Male,Location 26,Region 2,17600.0,,2200,45,365,3141,4166,16321,Active
LoanID_5,5,Product B,Female,Location 34,Region 3,21250.0,28/10/2017,2200,55,365,3570,4745,14720,Active


In [44]:
lending_co_data[1]

KeyError: 1

In [45]:
lending_co_data[0,1]

KeyError: (0, 1)

In [46]:
lending_co_data['Product']

StringID
LoanID_1       Product B
LoanID_2       Product D
LoanID_3       Product B
LoanID_4       Product A
LoanID_5       Product B
LoanID_6       Product A
LoanID_7       Product A
LoanID_8       Product D
LoanID_9       Product A
LoanID_10      Product C
LoanID_11      Product B
LoanID_12      Product D
LoanID_13      Product B
LoanID_14      Product A
LoanID_15      Product A
LoanID_16      Product B
LoanID_17      Product A
LoanID_18      Product B
LoanID_19      Product B
LoanID_20      Product E
LoanID_21      Product B
LoanID_22      Product A
LoanID_23      Product A
LoanID_24      Product C
LoanID_25      Product D
LoanID_26      Product A
LoanID_27      Product A
LoanID_28      Product F
LoanID_29      Product D
LoanID_30      Product B
                 ...    
LoanID_1014    Product A
LoanID_1015    Product B
LoanID_1016    Product A
LoanID_1017    Product E
LoanID_1018    Product E
LoanID_1019    Product B
LoanID_1020    Product A
LoanID_1021    Product A
LoanID_1022    P

In [47]:
lending_co_data.iloc[1]

LoanID                     2
Product            Product D
CustomerGender        Female
Location          Location 6
Region              Region 6
TotalPrice               NaN
StartDate         02/01/2019
Deposit                 2200
DailyRate                 45
TotalDaysYr              365
AmtPaid36               3161
AmtPaid60               4096
AmtPaid360             16041
LoanStatus            Active
Name: LoanID_2, dtype: object

In [48]:
lending_co_data.iloc[1, 3]

'Location 6'

In [49]:
lending_co_data.iloc[1,:]

LoanID                     2
Product            Product D
CustomerGender        Female
Location          Location 6
Region              Region 6
TotalPrice               NaN
StartDate         02/01/2019
Deposit                 2200
DailyRate                 45
TotalDaysYr              365
AmtPaid36               3161
AmtPaid60               4096
AmtPaid360             16041
LoanStatus            Active
Name: LoanID_2, dtype: object

In [50]:
lending_co_data.iloc[:, 3]

StringID
LoanID_1         Location 3
LoanID_2         Location 6
LoanID_3         Location 8
LoanID_4        Location 26
LoanID_5        Location 34
LoanID_6        Location 34
LoanID_7        Location 25
LoanID_8        Location 46
LoanID_9       Location 156
LoanID_10       Location 21
LoanID_11       Location 13
LoanID_12       Location 25
LoanID_13      Location 579
LoanID_14      Location 602
LoanID_15       Location 10
LoanID_16       Location 44
LoanID_17       Location 25
LoanID_18       Location 44
LoanID_19       Location 26
LoanID_20       Location 30
LoanID_21       Location 48
LoanID_22      Location 196
LoanID_23       Location 64
LoanID_24       Location 91
LoanID_25       Location 62
LoanID_26       Location 75
LoanID_27       Location 26
LoanID_28       Location 42
LoanID_29      Location 233
LoanID_30       Location 95
                   ...     
LoanID_1014     Location 44
LoanID_1015     Location 46
LoanID_1016     Location 69
LoanID_1017     Location 44
LoanID_1018

In [51]:
type(lending_co_data.iloc[1, 3])

str

In [52]:
type(lending_co_data.iloc[1, :])

pandas.core.series.Series

In [53]:
type(lending_co_data.iloc[:, 3])

pandas.core.series.Series

In [55]:
lending_co_data.iloc[[1, 3], :]

Unnamed: 0_level_0,LoanID,Product,CustomerGender,Location,Region,TotalPrice,StartDate,Deposit,DailyRate,TotalDaysYr,AmtPaid36,AmtPaid60,AmtPaid360,LoanStatus
StringID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
LoanID_2,2,Product D,Female,Location 6,Region 6,,02/01/2019,2200,45,365,3161,4096,16041,Active
LoanID_4,4,Product A,Male,Location 26,Region 2,17600.0,,2200,45,365,3141,4166,16321,Active


In [56]:
lending_co_data.iloc[:, [3, 1]]

Unnamed: 0_level_0,Location,Product
StringID,Unnamed: 1_level_1,Unnamed: 2_level_1
LoanID_1,Location 3,Product B
LoanID_2,Location 6,Product D
LoanID_3,Location 8,Product B
LoanID_4,Location 26,Product A
LoanID_5,Location 34,Product B
LoanID_6,Location 34,Product A
LoanID_7,Location 25,Product A
LoanID_8,Location 46,Product D
LoanID_9,Location 156,Product A
LoanID_10,Location 21,Product C


### Data Selection - Indexing Data with .loc[]

In [57]:
import pandas as pd

In [58]:
data = pd.read_csv('Lending-company.csv', index_col = 'StringID')
lending_co_data = data.copy()
lending_co_data

Unnamed: 0_level_0,LoanID,Product,CustomerGender,Location,Region,TotalPrice,StartDate,Deposit,DailyRate,TotalDaysYr,AmtPaid36,AmtPaid60,AmtPaid360,LoanStatus
StringID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
LoanID_1,1,Product B,Female,Location 3,Region 2,17600.0,04/07/2018,2200,45,365,3221,4166,14621,Active
LoanID_2,2,Product D,Female,Location 6,Region 6,,02/01/2019,2200,45,365,3161,4096,16041,Active
LoanID_3,3,Product B,Male,Location 8,Region 3,16600.0,08/12/2016,1000,45,365,2260,3205,16340,
LoanID_4,4,Product A,Male,Location 26,Region 2,17600.0,,2200,45,365,3141,4166,16321,Active
LoanID_5,5,Product B,Female,Location 34,Region 3,21250.0,28/10/2017,2200,55,365,3570,4745,14720,Active
LoanID_6,6,Product A,Male,Location 34,Region 1,,19/04/2019,2200,45,365,3301,4066,15141,Active
LoanID_7,7,Product A,Male,Location 25,,21250.0,04/07/2020,2200,55,365,1951,3176,18701,Active
LoanID_8,8,Product D,Male,Location 46,Region 5,17600.0,24/04/2018,2200,45,365,4071,4056,16351,Active
LoanID_9,9,Product A,Male,Location 156,Region 6,23250.0,03/09/2019,5000,55,365,5850,7375,21250,
LoanID_10,10,Product C,Male,Location 21,Region 9,21250.0,25/07/2020,2200,55,365,2051,3176,18351,Active


In [59]:
lending_co_data.loc['LoanID_3']

LoanID                     3
Product            Product B
CustomerGender          Male
Location          Location 8
Region              Region 3
TotalPrice             16600
StartDate         08/12/2016
Deposit                 1000
DailyRate                 45
TotalDaysYr              365
AmtPaid36               2260
AmtPaid60               3205
AmtPaid360             16340
LoanStatus               NaN
Name: LoanID_3, dtype: object

In [60]:
lending_co_data.loc['LoanID_3', :]

LoanID                     3
Product            Product B
CustomerGender          Male
Location          Location 8
Region              Region 3
TotalPrice             16600
StartDate         08/12/2016
Deposit                 1000
DailyRate                 45
TotalDaysYr              365
AmtPaid36               2260
AmtPaid60               3205
AmtPaid360             16340
LoanStatus               NaN
Name: LoanID_3, dtype: object

In [61]:
lending_co_data.loc['LoanID_3', 'Region']

'Region 3'

In [62]:
lending_co_data['Location']

StringID
LoanID_1         Location 3
LoanID_2         Location 6
LoanID_3         Location 8
LoanID_4        Location 26
LoanID_5        Location 34
LoanID_6        Location 34
LoanID_7        Location 25
LoanID_8        Location 46
LoanID_9       Location 156
LoanID_10       Location 21
LoanID_11       Location 13
LoanID_12       Location 25
LoanID_13      Location 579
LoanID_14      Location 602
LoanID_15       Location 10
LoanID_16       Location 44
LoanID_17       Location 25
LoanID_18       Location 44
LoanID_19       Location 26
LoanID_20       Location 30
LoanID_21       Location 48
LoanID_22      Location 196
LoanID_23       Location 64
LoanID_24       Location 91
LoanID_25       Location 62
LoanID_26       Location 75
LoanID_27       Location 26
LoanID_28       Location 42
LoanID_29      Location 233
LoanID_30       Location 95
                   ...     
LoanID_1014     Location 44
LoanID_1015     Location 46
LoanID_1016     Location 69
LoanID_1017     Location 44
LoanID_1018

In [63]:
lending_co_data.loc['Location']

KeyError: 'the label [Location] is not in the [index]'

In [64]:
lending_co_data.loc[:, 'Location']

StringID
LoanID_1         Location 3
LoanID_2         Location 6
LoanID_3         Location 8
LoanID_4        Location 26
LoanID_5        Location 34
LoanID_6        Location 34
LoanID_7        Location 25
LoanID_8        Location 46
LoanID_9       Location 156
LoanID_10       Location 21
LoanID_11       Location 13
LoanID_12       Location 25
LoanID_13      Location 579
LoanID_14      Location 602
LoanID_15       Location 10
LoanID_16       Location 44
LoanID_17       Location 25
LoanID_18       Location 44
LoanID_19       Location 26
LoanID_20       Location 30
LoanID_21       Location 48
LoanID_22      Location 196
LoanID_23       Location 64
LoanID_24       Location 91
LoanID_25       Location 62
LoanID_26       Location 75
LoanID_27       Location 26
LoanID_28       Location 42
LoanID_29      Location 233
LoanID_30       Location 95
                   ...     
LoanID_1014     Location 44
LoanID_1015     Location 46
LoanID_1016     Location 69
LoanID_1017     Location 44
LoanID_1018

In [65]:
lending_co_data.loc[:, 'Locations']

KeyError: 'the label [Locations] is not in the [columns]'

### A Few Comments on Using .loc[] and .iloc[]

In [66]:
import pandas as pd

In [67]:
data = pd.read_csv('Lending-company.csv', index_col = 'LoanID')
lending_co_data = data.copy()
lending_co_data

Unnamed: 0_level_0,StringID,Product,CustomerGender,Location,Region,TotalPrice,StartDate,Deposit,DailyRate,TotalDaysYr,AmtPaid36,AmtPaid60,AmtPaid360,LoanStatus
LoanID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
1,LoanID_1,Product B,Female,Location 3,Region 2,17600.0,04/07/2018,2200,45,365,3221,4166,14621,Active
2,LoanID_2,Product D,Female,Location 6,Region 6,,02/01/2019,2200,45,365,3161,4096,16041,Active
3,LoanID_3,Product B,Male,Location 8,Region 3,16600.0,08/12/2016,1000,45,365,2260,3205,16340,
4,LoanID_4,Product A,Male,Location 26,Region 2,17600.0,,2200,45,365,3141,4166,16321,Active
5,LoanID_5,Product B,Female,Location 34,Region 3,21250.0,28/10/2017,2200,55,365,3570,4745,14720,Active
6,LoanID_6,Product A,Male,Location 34,Region 1,,19/04/2019,2200,45,365,3301,4066,15141,Active
7,LoanID_7,Product A,Male,Location 25,,21250.0,04/07/2020,2200,55,365,1951,3176,18701,Active
8,LoanID_8,Product D,Male,Location 46,Region 5,17600.0,24/04/2018,2200,45,365,4071,4056,16351,Active
9,LoanID_9,Product A,Male,Location 156,Region 6,23250.0,03/09/2019,5000,55,365,5850,7375,21250,
10,LoanID_10,Product C,Male,Location 21,Region 9,21250.0,25/07/2020,2200,55,365,2051,3176,18351,Active


In [68]:
lending_co_data.shape

(1043, 14)

In [69]:
lending_co_data.iloc[1043, :]

IndexError: single positional indexer is out-of-bounds

In [70]:
lending_co_data.iloc[10000, :]

IndexError: single positional indexer is out-of-bounds

In [71]:
lending_co_data.iloc[:, 14]

IndexError: single positional indexer is out-of-bounds

In [72]:
lending_co_data.iloc[:, 13]

LoanID
1                 Active
2                 Active
3                    NaN
4                 Active
5                 Active
6                 Active
7                 Active
8                 Active
9                    NaN
10                Active
11                Active
12                Active
13                Active
14                Active
15                Active
16                Active
17                Active
18                Active
19                Active
20                Active
21                Active
22                Active
23                Active
24                Active
25                Active
26                Active
27                Active
28                Active
29                Active
30                Active
              ...       
1014    Finished Payment
1015    Finished Payment
1016    Finished Payment
1017    Finished Payment
1018    Finished Payment
1019    Finished Payment
1020    Finished Payment
1021    Finished Payment
1022    Finished P

In [73]:
lending_co_data.iloc[:, -1]

LoanID
1                 Active
2                 Active
3                    NaN
4                 Active
5                 Active
6                 Active
7                 Active
8                 Active
9                    NaN
10                Active
11                Active
12                Active
13                Active
14                Active
15                Active
16                Active
17                Active
18                Active
19                Active
20                Active
21                Active
22                Active
23                Active
24                Active
25                Active
26                Active
27                Active
28                Active
29                Active
30                Active
              ...       
1014    Finished Payment
1015    Finished Payment
1016    Finished Payment
1017    Finished Payment
1018    Finished Payment
1019    Finished Payment
1020    Finished Payment
1021    Finished Payment
1022    Finished P

In [74]:
lending_co_data.head()

Unnamed: 0_level_0,StringID,Product,CustomerGender,Location,Region,TotalPrice,StartDate,Deposit,DailyRate,TotalDaysYr,AmtPaid36,AmtPaid60,AmtPaid360,LoanStatus
LoanID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
1,LoanID_1,Product B,Female,Location 3,Region 2,17600.0,04/07/2018,2200,45,365,3221,4166,14621,Active
2,LoanID_2,Product D,Female,Location 6,Region 6,,02/01/2019,2200,45,365,3161,4096,16041,Active
3,LoanID_3,Product B,Male,Location 8,Region 3,16600.0,08/12/2016,1000,45,365,2260,3205,16340,
4,LoanID_4,Product A,Male,Location 26,Region 2,17600.0,,2200,45,365,3141,4166,16321,Active
5,LoanID_5,Product B,Female,Location 34,Region 3,21250.0,28/10/2017,2200,55,365,3570,4745,14720,Active


In [75]:
# incorrect (single indexer required)
lending_co_data['TotalPrice'].iloc[0, :]

IndexingError: Too many indexers

In [76]:
lending_co_data['TotalPrice'].iloc[0]

17600.0

In [77]:
# avoid for index columns composed of labels/integers
lending_co_data['TotalPrice'][0]

KeyError: 0

In [78]:
# AVOID
lending_co_data['TotalPrice'][1]

17600.0

In [79]:
lending_co_data['TotalPrice'].loc[1]

17600.0

In [80]:
data = pd.read_csv('Lending-company.csv', index_col = 'StringID')
lending_co_data = data.copy()
lending_co_data.head()

Unnamed: 0_level_0,LoanID,Product,CustomerGender,Location,Region,TotalPrice,StartDate,Deposit,DailyRate,TotalDaysYr,AmtPaid36,AmtPaid60,AmtPaid360,LoanStatus
StringID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
LoanID_1,1,Product B,Female,Location 3,Region 2,17600.0,04/07/2018,2200,45,365,3221,4166,14621,Active
LoanID_2,2,Product D,Female,Location 6,Region 6,,02/01/2019,2200,45,365,3161,4096,16041,Active
LoanID_3,3,Product B,Male,Location 8,Region 3,16600.0,08/12/2016,1000,45,365,2260,3205,16340,
LoanID_4,4,Product A,Male,Location 26,Region 2,17600.0,,2200,45,365,3141,4166,16321,Active
LoanID_5,5,Product B,Female,Location 34,Region 3,21250.0,28/10/2017,2200,55,365,3570,4745,14720,Active


In [81]:
lending_co_data['TotalPrice'].iloc[0]

17600.0

In [82]:
lending_co_data['TotalPrice'].loc['LoanID_1']

17600.0

In [83]:
# AVOID
lending_co_data['TotalPrice'][0]

17600.0

In [84]:
# AVOID
lending_co_data['TotalPrice'][1]

nan

In [85]:
# AVOID
lending_co_data['TotalPrice']['LoanID_1']

17600.0

In [86]:
# AVOID
lending_co_data.iloc[0][5]

17600.0

In [87]:
lending_co_data.iloc[0, 5]

17600.0

In [88]:
# AVOID
lending_co_data.iloc[[0, 5]]

Unnamed: 0_level_0,LoanID,Product,CustomerGender,Location,Region,TotalPrice,StartDate,Deposit,DailyRate,TotalDaysYr,AmtPaid36,AmtPaid60,AmtPaid360,LoanStatus
StringID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
LoanID_1,1,Product B,Female,Location 3,Region 2,17600.0,04/07/2018,2200,45,365,3221,4166,14621,Active
LoanID_6,6,Product A,Male,Location 34,Region 1,,19/04/2019,2200,45,365,3301,4066,15141,Active


In [89]:
lending_co_data.iloc[[0, 5], :]

Unnamed: 0_level_0,LoanID,Product,CustomerGender,Location,Region,TotalPrice,StartDate,Deposit,DailyRate,TotalDaysYr,AmtPaid36,AmtPaid60,AmtPaid360,LoanStatus
StringID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
LoanID_1,1,Product B,Female,Location 3,Region 2,17600.0,04/07/2018,2200,45,365,3221,4166,14621,Active
LoanID_6,6,Product A,Male,Location 34,Region 1,,19/04/2019,2200,45,365,3301,4066,15141,Active


In [90]:
# AVOID
lending_co_data['TotalPrice']['LoanID_1']

17600.0

In [91]:
lending_co_data.loc['LoanID_1', 'TotalPrice']

17600.0

In [92]:
# AVOID
lending_co_data.loc[['LoanID_1', 'LoanID_6']]

Unnamed: 0_level_0,LoanID,Product,CustomerGender,Location,Region,TotalPrice,StartDate,Deposit,DailyRate,TotalDaysYr,AmtPaid36,AmtPaid60,AmtPaid360,LoanStatus
StringID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
LoanID_1,1,Product B,Female,Location 3,Region 2,17600.0,04/07/2018,2200,45,365,3221,4166,14621,Active
LoanID_6,6,Product A,Male,Location 34,Region 1,,19/04/2019,2200,45,365,3301,4066,15141,Active


In [93]:
lending_co_data.loc[['LoanID_1', 'LoanID_6'], :]

Unnamed: 0_level_0,LoanID,Product,CustomerGender,Location,Region,TotalPrice,StartDate,Deposit,DailyRate,TotalDaysYr,AmtPaid36,AmtPaid60,AmtPaid360,LoanStatus
StringID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
LoanID_1,1,Product B,Female,Location 3,Region 2,17600.0,04/07/2018,2200,45,365,3221,4166,14621,Active
LoanID_6,6,Product A,Male,Location 34,Region 1,,19/04/2019,2200,45,365,3301,4066,15141,Active


In [94]:
# AVOID
lending_co_data.TotalPrice['LoanID_1']

17600.0

In [95]:
# AVOID
lending_co_data['TotalPrice'].iloc[[0, 5]]

StringID
LoanID_1    17600.0
LoanID_6        NaN
Name: TotalPrice, dtype: float64

In [96]:
lending_co_data.loc[:, 'TotalPrice'].iloc[[0,5]]

StringID
LoanID_1    17600.0
LoanID_6        NaN
Name: TotalPrice, dtype: float64