## A Revision to Pandas DataFrame

In [1]:
# Dataframe - a tabular structure that contains multiple observations for a given set of variables
# Kind of 2-D Array = Dataframe contains relevant rows and columns

In [2]:
import pandas as pd
import numpy as np

In [3]:
array_a = np.array([[3,2,1],[6,3,2]])
array_a

array([[3, 2, 1],
       [6, 3, 2]])

In [4]:
pd.DataFrame(array_a)

Unnamed: 0,0,1,2
0,3,2,1
1,6,3,2


In [5]:
type(pd.DataFrame(array_a))

pandas.core.frame.DataFrame

In [6]:
df = pd.DataFrame(array_a, columns = ['Column 1', 'Column 2', 'Column 3'])
df

Unnamed: 0,Column 1,Column 2,Column 3
0,3,2,1
1,6,3,2


In [7]:
df = pd.DataFrame(array_a, columns = ['Column 1', 'Column 2', 'Column 3'], index = ['Row 1', 'Row 2'])
df

Unnamed: 0,Column 1,Column 2,Column 3
Row 1,3,2,1
Row 2,6,3,2


In [8]:
# Can we change to "StringID"?  --> yes
data = pd.read_csv('Lending-company.csv', index_col = 'LoanID')
lending_co_data = data.copy()
lending_co_data.head()

Unnamed: 0_level_0,StringID,Product,CustomerGender,Location,Region,TotalPrice,StartDate,Deposit,DailyRate,TotalDaysYr,AmtPaid36,AmtPaid60,AmtPaid360,LoanStatus
LoanID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
1,LoanID_1,Product B,Female,Location 3,Region 2,17600.0,04/07/2018,2200,45,365,3221,4166,14621,Active
2,LoanID_2,Product D,Female,Location 6,Region 6,,02/01/2019,2200,45,365,3161,4096,16041,Active
3,LoanID_3,Product B,Male,Location 8,Region 3,16600.0,08/12/2016,1000,45,365,2260,3205,16340,
4,LoanID_4,Product A,Male,Location 26,Region 2,17600.0,,2200,45,365,3141,4166,16321,Active
5,LoanID_5,Product B,Female,Location 34,Region 3,21250.0,28/10/2017,2200,55,365,3570,4745,14720,Active


In [9]:
type(lending_co_data)

pandas.core.frame.DataFrame

## Common Attributes for Working with DataFrames

In [10]:
lending_co_data.index

Int64Index([   1,    2,    3,    4,    5,    6,    7,    8,    9,   10,
            ...
            1034, 1035, 1036, 1037, 1038, 1039, 1040, 1041, 1042, 1043],
           dtype='int64', name='LoanID', length=1043)

In [11]:
type(lending_co_data.index)

pandas.core.indexes.numeric.Int64Index

In [12]:
lending_co_data.columns

Index(['StringID', 'Product', 'CustomerGender', 'Location', 'Region',
       'TotalPrice', 'StartDate', 'Deposit', 'DailyRate', 'TotalDaysYr',
       'AmtPaid36', 'AmtPaid60', 'AmtPaid360', 'LoanStatus'],
      dtype='object')

In [13]:
type(lending_co_data.columns)

pandas.core.indexes.base.Index

In [14]:
lending_co_data.axes

[Int64Index([   1,    2,    3,    4,    5,    6,    7,    8,    9,   10,
             ...
             1034, 1035, 1036, 1037, 1038, 1039, 1040, 1041, 1042, 1043],
            dtype='int64', name='LoanID', length=1043),
 Index(['StringID', 'Product', 'CustomerGender', 'Location', 'Region',
        'TotalPrice', 'StartDate', 'Deposit', 'DailyRate', 'TotalDaysYr',
        'AmtPaid36', 'AmtPaid60', 'AmtPaid360', 'LoanStatus'],
       dtype='object')]

In [15]:
lending_co_data.dtypes

StringID           object
Product            object
CustomerGender     object
Location           object
Region             object
TotalPrice        float64
StartDate          object
Deposit             int64
DailyRate           int64
TotalDaysYr         int64
AmtPaid36           int64
AmtPaid60           int64
AmtPaid360          int64
LoanStatus         object
dtype: object

In [16]:
lending_co_data.values

array([['LoanID_1', 'Product B', 'Female', ..., 4166, 14621, 'Active'],
       ['LoanID_2', 'Product D', 'Female', ..., 4096, 16041, 'Active'],
       ['LoanID_3', 'Product B', 'Male', ..., 3205, 16340, nan],
       ...,
       ['LoanID_1041', 'Product A', 'NotSpecified', ..., 5143, 16617,
        'Finished Payment'],
       ['LoanID_1042', 'Product B', 'Female', ..., 3462, 15617,
        'Finished Payment'],
       ['LoanID_1043', 'Product A', 'NotSpecified', ..., 4743, 16617,
        'Finished Payment']], dtype=object)

In [17]:
type(lending_co_data.values)

numpy.ndarray

In [18]:
lending_co_data.to_numpy()

array([['LoanID_1', 'Product B', 'Female', ..., 4166, 14621, 'Active'],
       ['LoanID_2', 'Product D', 'Female', ..., 4096, 16041, 'Active'],
       ['LoanID_3', 'Product B', 'Male', ..., 3205, 16340, nan],
       ...,
       ['LoanID_1041', 'Product A', 'NotSpecified', ..., 5143, 16617,
        'Finished Payment'],
       ['LoanID_1042', 'Product B', 'Female', ..., 3462, 15617,
        'Finished Payment'],
       ['LoanID_1043', 'Product A', 'NotSpecified', ..., 4743, 16617,
        'Finished Payment']], dtype=object)

In [19]:
type(lending_co_data.to_numpy())

numpy.ndarray

In [20]:
lending_co_data.shape

(1043, 14)

In [21]:
len(lending_co_data.columns)

14

## Data Selection in Pandas DataFrames

In [22]:
# Data selection or Subset selection in pandas DF, means extracting elements,rows,columns, or subsets from such an object.
# data selection allows us to work on just a portion of a dataset.

# One way to select the data set is INDEXING.
# Indexing  - using one or both type of indexes a dataframe has -
# the row index and the column index - to aceess or select specific parts of the data

In [23]:
data = pd.read_csv('Lending-company.csv', index_col = 'LoanID')
lending_co_data = data.copy()
lending_co_data.head()

Unnamed: 0_level_0,StringID,Product,CustomerGender,Location,Region,TotalPrice,StartDate,Deposit,DailyRate,TotalDaysYr,AmtPaid36,AmtPaid60,AmtPaid360,LoanStatus
LoanID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
1,LoanID_1,Product B,Female,Location 3,Region 2,17600.0,04/07/2018,2200,45,365,3221,4166,14621,Active
2,LoanID_2,Product D,Female,Location 6,Region 6,,02/01/2019,2200,45,365,3161,4096,16041,Active
3,LoanID_3,Product B,Male,Location 8,Region 3,16600.0,08/12/2016,1000,45,365,2260,3205,16340,
4,LoanID_4,Product A,Male,Location 26,Region 2,17600.0,,2200,45,365,3141,4166,16321,Active
5,LoanID_5,Product B,Female,Location 34,Region 3,21250.0,28/10/2017,2200,55,365,3570,4745,14720,Active


In [24]:
lending_co_data.Product

LoanID
1       Product B
2       Product D
3       Product B
4       Product A
5       Product B
          ...    
1039    Product B
1040    Product A
1041    Product A
1042    Product B
1043    Product A
Name: Product, Length: 1043, dtype: object

In [25]:
lending_co_data.Location

LoanID
1        Location 3
2        Location 6
3        Location 8
4       Location 26
5       Location 34
           ...     
1039    Location 73
1040    Location 82
1041    Location 11
1042    Location 26
1043    Location 94
Name: Location, Length: 1043, dtype: object

In [26]:
# To avoid any white space error - Use this professional way as good practice
lending_co_data['Product']

LoanID
1       Product B
2       Product D
3       Product B
4       Product A
5       Product B
          ...    
1039    Product B
1040    Product A
1041    Product A
1042    Product B
1043    Product A
Name: Product, Length: 1043, dtype: object

In [27]:
type(lending_co_data['Location'])

pandas.core.series.Series

In [28]:
lending_co_data[['Location']]

Unnamed: 0_level_0,Location
LoanID,Unnamed: 1_level_1
1,Location 3
2,Location 6
3,Location 8
4,Location 26
5,Location 34
...,...
1039,Location 73
1040,Location 82
1041,Location 11
1042,Location 26


In [29]:
type(lending_co_data[['Location']])

pandas.core.frame.DataFrame

In [30]:
lending_co_data[['Location', 'Product']].head()

Unnamed: 0_level_0,Location,Product
LoanID,Unnamed: 1_level_1,Unnamed: 2_level_1
1,Location 3,Product B
2,Location 6,Product D
3,Location 8,Product B
4,Location 26,Product A
5,Location 34,Product B


In [31]:
prod_loc = ['Location', 'Product']
lending_co_data[prod_loc].head()

Unnamed: 0_level_0,Location,Product
LoanID,Unnamed: 1_level_1,Unnamed: 2_level_1
1,Location 3,Product B
2,Location 6,Product D
3,Location 8,Product B
4,Location 26,Product A
5,Location 34,Product B


In [32]:
# This is throwing error because it's NOT a pandas dataframe
lending_co_data['Product', 'Location']

KeyError: ('Product', 'Location')

## Data Selection - Indexing with .iloc[]

In [33]:
#.iloc is a attribute indexer or accessor purely integer-location bases indexing for selection by position
# Strict implicit, integer-location,position based indexing
# .iloc[] has been programmed to deliver the desired portion of the dataset whether we provide one or two loc specifiers

In [34]:
data = pd.read_csv('Lending-company.csv', index_col = 'StringID')
lending_co_data = data.copy()
lending_co_data.head()

Unnamed: 0_level_0,LoanID,Product,CustomerGender,Location,Region,TotalPrice,StartDate,Deposit,DailyRate,TotalDaysYr,AmtPaid36,AmtPaid60,AmtPaid360,LoanStatus
StringID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
LoanID_1,1,Product B,Female,Location 3,Region 2,17600.0,04/07/2018,2200,45,365,3221,4166,14621,Active
LoanID_2,2,Product D,Female,Location 6,Region 6,,02/01/2019,2200,45,365,3161,4096,16041,Active
LoanID_3,3,Product B,Male,Location 8,Region 3,16600.0,08/12/2016,1000,45,365,2260,3205,16340,
LoanID_4,4,Product A,Male,Location 26,Region 2,17600.0,,2200,45,365,3141,4166,16321,Active
LoanID_5,5,Product B,Female,Location 34,Region 3,21250.0,28/10/2017,2200,55,365,3570,4745,14720,Active


In [35]:
lending_co_data[1]

KeyError: 1

In [36]:
lending_co_data.iloc[1] # Will get the 2nd row of the dataset , Synatax iloc[Row(s),Column(s)]

LoanID                     2
Product            Product D
CustomerGender        Female
Location          Location 6
Region              Region 6
TotalPrice               NaN
StartDate         02/01/2019
Deposit                 2200
DailyRate                 45
TotalDaysYr              365
AmtPaid36               3161
AmtPaid60               4096
AmtPaid360             16041
LoanStatus            Active
Name: LoanID_2, dtype: object

In [37]:
lending_co_data.iloc[1,3] # Column specifier , here 3 refers to the 4th column value

'Location 6'

In [38]:
lending_co_data.iloc[1,:]

LoanID                     2
Product            Product D
CustomerGender        Female
Location          Location 6
Region              Region 6
TotalPrice               NaN
StartDate         02/01/2019
Deposit                 2200
DailyRate                 45
TotalDaysYr              365
AmtPaid36               3161
AmtPaid60               4096
AmtPaid360             16041
LoanStatus            Active
Name: LoanID_2, dtype: object

In [39]:
lending_co_data.iloc[2,:]

LoanID                     3
Product            Product B
CustomerGender          Male
Location          Location 8
Region              Region 3
TotalPrice           16600.0
StartDate         08/12/2016
Deposit                 1000
DailyRate                 45
TotalDaysYr              365
AmtPaid36               2260
AmtPaid60               3205
AmtPaid360             16340
LoanStatus               NaN
Name: LoanID_3, dtype: object

In [40]:
lending_co_data.iloc[:,3]

StringID
LoanID_1        Location 3
LoanID_2        Location 6
LoanID_3        Location 8
LoanID_4       Location 26
LoanID_5       Location 34
                  ...     
LoanID_1039    Location 73
LoanID_1040    Location 82
LoanID_1041    Location 11
LoanID_1042    Location 26
LoanID_1043    Location 94
Name: Location, Length: 1043, dtype: object

In [41]:
type(lending_co_data.iloc[1,3])

str

In [42]:
type(lending_co_data.iloc[1,:])

pandas.core.series.Series

In [43]:
type(lending_co_data.iloc[:,3])

pandas.core.series.Series

In [44]:
lending_co_data.iloc[[1, 3], :]

Unnamed: 0_level_0,LoanID,Product,CustomerGender,Location,Region,TotalPrice,StartDate,Deposit,DailyRate,TotalDaysYr,AmtPaid36,AmtPaid60,AmtPaid360,LoanStatus
StringID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
LoanID_2,2,Product D,Female,Location 6,Region 6,,02/01/2019,2200,45,365,3161,4096,16041,Active
LoanID_4,4,Product A,Male,Location 26,Region 2,17600.0,,2200,45,365,3141,4166,16321,Active


In [45]:
type(lending_co_data.iloc[[1, 3], :])

pandas.core.frame.DataFrame

In [46]:
lending_co_data.iloc[:,[1, 3]] # All rows with 2 and 4th column

Unnamed: 0_level_0,Product,Location
StringID,Unnamed: 1_level_1,Unnamed: 2_level_1
LoanID_1,Product B,Location 3
LoanID_2,Product D,Location 6
LoanID_3,Product B,Location 8
LoanID_4,Product A,Location 26
LoanID_5,Product B,Location 34
...,...,...
LoanID_1039,Product B,Location 73
LoanID_1040,Product A,Location 82
LoanID_1041,Product A,Location 11
LoanID_1042,Product B,Location 26


## Data selection - Indexing with .loc[]

In [47]:
# loc indexer = loc accessor
# Sub-select information from a Dataframe by referring to it's index labels
# .loc[] has been designed to let you take advantage of the explicit index and column labes of your data table

In [48]:
data = pd.read_csv('Lending-company.csv', index_col = 'StringID')
lending_co_data = data.copy()
lending_co_data.head()

Unnamed: 0_level_0,LoanID,Product,CustomerGender,Location,Region,TotalPrice,StartDate,Deposit,DailyRate,TotalDaysYr,AmtPaid36,AmtPaid60,AmtPaid360,LoanStatus
StringID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
LoanID_1,1,Product B,Female,Location 3,Region 2,17600.0,04/07/2018,2200,45,365,3221,4166,14621,Active
LoanID_2,2,Product D,Female,Location 6,Region 6,,02/01/2019,2200,45,365,3161,4096,16041,Active
LoanID_3,3,Product B,Male,Location 8,Region 3,16600.0,08/12/2016,1000,45,365,2260,3205,16340,
LoanID_4,4,Product A,Male,Location 26,Region 2,17600.0,,2200,45,365,3141,4166,16321,Active
LoanID_5,5,Product B,Female,Location 34,Region 3,21250.0,28/10/2017,2200,55,365,3570,4745,14720,Active


In [49]:
lending_co_data.loc['LoanID_3']

LoanID                     3
Product            Product B
CustomerGender          Male
Location          Location 8
Region              Region 3
TotalPrice           16600.0
StartDate         08/12/2016
Deposit                 1000
DailyRate                 45
TotalDaysYr              365
AmtPaid36               2260
AmtPaid60               3205
AmtPaid360             16340
LoanStatus               NaN
Name: LoanID_3, dtype: object

In [50]:
lending_co_data.loc['LoanID_3', :]

LoanID                     3
Product            Product B
CustomerGender          Male
Location          Location 8
Region              Region 3
TotalPrice           16600.0
StartDate         08/12/2016
Deposit                 1000
DailyRate                 45
TotalDaysYr              365
AmtPaid36               2260
AmtPaid60               3205
AmtPaid360             16340
LoanStatus               NaN
Name: LoanID_3, dtype: object

In [51]:
lending_co_data.loc['LoanID_3', 'Region']

'Region 3'

In [52]:
lending_co_data.loc[:,'Location']

StringID
LoanID_1        Location 3
LoanID_2        Location 6
LoanID_3        Location 8
LoanID_4       Location 26
LoanID_5       Location 34
                  ...     
LoanID_1039    Location 73
LoanID_1040    Location 82
LoanID_1041    Location 11
LoanID_1042    Location 26
LoanID_1043    Location 94
Name: Location, Length: 1043, dtype: object

## A Few Comments on Using .loc[] and .iloc[]

In [53]:
data = pd.read_csv('Lending-company.csv', index_col = 'LoanID')
lending_co_data = data.copy()
lending_co_data.head()

Unnamed: 0_level_0,StringID,Product,CustomerGender,Location,Region,TotalPrice,StartDate,Deposit,DailyRate,TotalDaysYr,AmtPaid36,AmtPaid60,AmtPaid360,LoanStatus
LoanID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
1,LoanID_1,Product B,Female,Location 3,Region 2,17600.0,04/07/2018,2200,45,365,3221,4166,14621,Active
2,LoanID_2,Product D,Female,Location 6,Region 6,,02/01/2019,2200,45,365,3161,4096,16041,Active
3,LoanID_3,Product B,Male,Location 8,Region 3,16600.0,08/12/2016,1000,45,365,2260,3205,16340,
4,LoanID_4,Product A,Male,Location 26,Region 2,17600.0,,2200,45,365,3141,4166,16321,Active
5,LoanID_5,Product B,Female,Location 34,Region 3,21250.0,28/10/2017,2200,55,365,3570,4745,14720,Active


In [54]:
lending_co_data.shape

(1043, 14)

In [55]:
# throw error because our indexing for rows count from 0 and range would be 0 to 1042
lending_co_data.iloc[1043, :]

IndexError: single positional indexer is out-of-bounds

In [56]:
lending_co_data.iloc[:, 13]

LoanID
1                 Active
2                 Active
3                    NaN
4                 Active
5                 Active
              ...       
1039    Finished Payment
1040    Finished Payment
1041    Finished Payment
1042    Finished Payment
1043    Finished Payment
Name: LoanStatus, Length: 1043, dtype: object

In [57]:
# incorrect (single indexer required)
lending_co_data['TotalPrice'].iloc[0]

17600.0

In [58]:
# avoid for index columns composed of labels/integers
lending_co_data['TotalPrice'][0]

KeyError: 0

In [59]:
lending_co_data['TotalPrice'].loc[1]

17600.0

In [60]:
lending_co_data['TotalPrice'].iloc[0]

17600.0

In [61]:
data = pd.read_csv('Lending-company.csv', index_col = 'StringID')
lending_co_data = data.copy()
lending_co_data.head()

Unnamed: 0_level_0,LoanID,Product,CustomerGender,Location,Region,TotalPrice,StartDate,Deposit,DailyRate,TotalDaysYr,AmtPaid36,AmtPaid60,AmtPaid360,LoanStatus
StringID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
LoanID_1,1,Product B,Female,Location 3,Region 2,17600.0,04/07/2018,2200,45,365,3221,4166,14621,Active
LoanID_2,2,Product D,Female,Location 6,Region 6,,02/01/2019,2200,45,365,3161,4096,16041,Active
LoanID_3,3,Product B,Male,Location 8,Region 3,16600.0,08/12/2016,1000,45,365,2260,3205,16340,
LoanID_4,4,Product A,Male,Location 26,Region 2,17600.0,,2200,45,365,3141,4166,16321,Active
LoanID_5,5,Product B,Female,Location 34,Region 3,21250.0,28/10/2017,2200,55,365,3570,4745,14720,Active


In [62]:
lending_co_data['TotalPrice'].loc['LoanID_1']

17600.0

In [63]:
# AVOID
lending_co_data['TotalPrice'][0]

17600.0

In [64]:
# AVOID
lending_co_data['TotalPrice']['LoanID_1']

17600.0

In [65]:
# AVOID
lending_co_data.iloc[0][5]

17600.0

In [66]:
lending_co_data.iloc[0, 5] # Better way of using iloc

17600.0

In [67]:
# AVOID
lending_co_data.iloc[[0, 5]]

Unnamed: 0_level_0,LoanID,Product,CustomerGender,Location,Region,TotalPrice,StartDate,Deposit,DailyRate,TotalDaysYr,AmtPaid36,AmtPaid60,AmtPaid360,LoanStatus
StringID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
LoanID_1,1,Product B,Female,Location 3,Region 2,17600.0,04/07/2018,2200,45,365,3221,4166,14621,Active
LoanID_6,6,Product A,Male,Location 34,Region 1,,19/04/2019,2200,45,365,3301,4066,15141,Active


In [68]:
lending_co_data.iloc[[0, 5], :] # Clear way to deliver the output

Unnamed: 0_level_0,LoanID,Product,CustomerGender,Location,Region,TotalPrice,StartDate,Deposit,DailyRate,TotalDaysYr,AmtPaid36,AmtPaid60,AmtPaid360,LoanStatus
StringID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
LoanID_1,1,Product B,Female,Location 3,Region 2,17600.0,04/07/2018,2200,45,365,3221,4166,14621,Active
LoanID_6,6,Product A,Male,Location 34,Region 1,,19/04/2019,2200,45,365,3301,4066,15141,Active


In [69]:
# AVOID
lending_co_data['TotalPrice']['LoanID_1']

17600.0

In [70]:
lending_co_data.loc['LoanID_1', 'TotalPrice']

17600.0

In [71]:
# AVOID
lending_co_data.loc[['LoanID_1', 'LoanID_6']]

Unnamed: 0_level_0,LoanID,Product,CustomerGender,Location,Region,TotalPrice,StartDate,Deposit,DailyRate,TotalDaysYr,AmtPaid36,AmtPaid60,AmtPaid360,LoanStatus
StringID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
LoanID_1,1,Product B,Female,Location 3,Region 2,17600.0,04/07/2018,2200,45,365,3221,4166,14621,Active
LoanID_6,6,Product A,Male,Location 34,Region 1,,19/04/2019,2200,45,365,3301,4066,15141,Active


In [72]:
lending_co_data.loc[['LoanID_1', 'LoanID_6'], :]  # Clear way to deliver the output

Unnamed: 0_level_0,LoanID,Product,CustomerGender,Location,Region,TotalPrice,StartDate,Deposit,DailyRate,TotalDaysYr,AmtPaid36,AmtPaid60,AmtPaid360,LoanStatus
StringID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
LoanID_1,1,Product B,Female,Location 3,Region 2,17600.0,04/07/2018,2200,45,365,3221,4166,14621,Active
LoanID_6,6,Product A,Male,Location 34,Region 1,,19/04/2019,2200,45,365,3301,4066,15141,Active


In [73]:
# AVOID
lending_co_data.TotalPrice['LoanID_1']

17600.0

In [74]:
# AVOID
lending_co_data['TotalPrice'].iloc[[0, 5]]

StringID
LoanID_1    17600.0
LoanID_6        NaN
Name: TotalPrice, dtype: float64

In [75]:
lending_co_data.loc[:, 'TotalPrice'].iloc[[0,5]]

StringID
LoanID_1    17600.0
LoanID_6        NaN
Name: TotalPrice, dtype: float64