## Pandas

Deal with missing data/ Import & Export / Combine data
<hr>
To preserve data consistency: must have data values of a single type stored in my series object or in each column of my DataFrame

In [1]:
import pandas as pd
import numpy as np

## Pandas series 

Data in a series belongs to one data-type only

In [2]:
products = ['A', 'B', 'C', 'D']

In [3]:
type(products)

list

In [4]:
products_categories = pd.Series(products)

In [5]:
print(products_categories)

0    A
1    B
2    C
3    D
dtype: object


In [6]:
type(products_categories)

pandas.core.series.Series

In [7]:
import numpy as np


In [8]:
array_a = np.array([10,20,30,40,50])
array_a

array([10, 20, 30, 40, 50])

In [9]:
series_a = pd.Series(array_a)
series_a

0    10
1    20
2    30
3    40
4    50
dtype: int32

## Working with attributes in Python

In [10]:
series = pd.Series([1,2,3,4])

In [11]:
series.dtype

dtype('int64')

In [12]:
series.size

4

In [13]:
products_categories.dtype

dtype('O')

In [14]:
products_categories.name

In [15]:
products_categories.name = "Product categories"

In [16]:
products_categories.name

'Product categories'

## Using an index in Python: index allows fast retrieval

In [17]:
price_per_product = {'Product A': 22250, 'Product B': 16600, 'Product C': 15600}
price_per_product

{'Product A': 22250, 'Product B': 16600, 'Product C': 15600}

In [18]:
# key-value pairs are preserved, in which key is the index of the series
price_per_category = pd.Series(price_per_product)
price_per_category

Product A    22250
Product B    16600
Product C    15600
dtype: int64

In [19]:
# access series index
price_per_category.index

Index(['Product A', 'Product B', 'Product C'], dtype='object')

In [20]:
type(price_per_category.index)

pandas.core.indexes.base.Index

## Label-based and position based indexing

In [21]:
series_a = pd.Series([10,20,30,40,50])
series_a

0    10
1    20
2    30
3    40
4    50
dtype: int64

In [22]:
# RangeIndex - position-based indexing
series_a.index

RangeIndex(start=0, stop=5, step=1)

In [23]:
type(series_a.index)

pandas.core.indexes.range.RangeIndex

In [24]:
list(series_a.index)

[0, 1, 2, 3, 4]

In [25]:
# Label-based indexing
price_per_category.index

Index(['Product A', 'Product B', 'Product C'], dtype='object')

## Indexing

In [26]:
series_a

0    10
1    20
2    30
3    40
4    50
dtype: int64

In [27]:
# position-based indexing
series_a[0]

10

In [28]:
price_per_category

Product A    22250
Product B    16600
Product C    15600
dtype: int64

In [29]:
# label-based indexing
price_per_category['Product A']

22250

In [30]:
# position-based
price_per_category[0]

22250

In [31]:
# explicit index
series_b = pd.Series([10,20,30,40,50], index=[1, 2, 3, 4, 5])

In [32]:
 series_b[1]

10

In [33]:
series_c = pd.Series([10,20,30,40,50], index=["1", "2", "3", "4", "5"])

In [34]:
series_c[1]

20

In [35]:
series_c["1"]

10

## Using methods in Python

In [36]:
start_date_deposit = pd.Series({
    '7/4/2014': 2000,
    '1/2/2015': 2000,
    '12/8/2012': 2000,
    '3/2/2011': 4000
})



In [37]:
start_date_deposit 

7/4/2014     2000
1/2/2015     2000
12/8/2012    2000
3/2/2011     4000
dtype: int64

In [38]:
start_date_deposit.sum()

10000

In [39]:
start_date_deposit.min()

2000

In [40]:
start_date_deposit.idxmax()

'3/2/2011'

In [41]:
start_date_deposit.head()

7/4/2014     2000
1/2/2015     2000
12/8/2012    2000
3/2/2011     4000
dtype: int64

In [42]:
start_date_deposit.tail()

7/4/2014     2000
1/2/2015     2000
12/8/2012    2000
3/2/2011     4000
dtype: int64

## Parameters and Arguments


In [43]:
start_date_deposit.head(3)

7/4/2014     2000
1/2/2015     2000
12/8/2012    2000
dtype: int64

In [44]:
start_date_deposit.head(n=2)

7/4/2014    2000
1/2/2015    2000
dtype: int64

##  Pandas documentation

https://pandas.pydata.org/docs/

In [45]:
start_date_deposit.head()

7/4/2014     2000
1/2/2015     2000
12/8/2012    2000
3/2/2011     4000
dtype: int64

## Create dataframe from scratch

In [46]:
# Create a dataframe from a dictionary of lists
data = {'ProductName': ['Product A', 'Product B', 'Product C'],'Price': [22250, 16600, 12500]}
df = pd.DataFrame(data)

In [47]:
df

Unnamed: 0,ProductName,Price
0,Product A,22250
1,Product B,16600
2,Product C,12500


In [48]:
# # Create a dataframe from a dictionary of lists + specify the index
df = pd.DataFrame(data, index=['A', 'B', 'C'])
df


Unnamed: 0,ProductName,Price
A,Product A,22250
B,Product B,16600
C,Product C,12500


In [49]:
# create a dataframe from a list of dictionaries
data = [{'ProductName': 'ProductA', "Price": 22250},
       {'ProductName': 'ProductB', 'Price': 16600}, 
       {'ProductName': 'ProductC', 'Price': 12500}]
df = pd.DataFrame(data)
df

Unnamed: 0,ProductName,Price
0,ProductA,22250
1,ProductB,16600
2,ProductC,12500


In [50]:
# a dictionary of Series
data = {'ProductName': pd.Series(['Product A', 'Product B', 'Product C']),'Price': pd.Series([22250, 16600, 12500])}
df = pd.DataFrame(data)
df

Unnamed: 0,ProductName,Price
0,Product A,22250
1,Product B,16600
2,Product C,12500


In [51]:
# a dictionary of Series + index
products = pd.Series(['Product A', 'Product B', 'Product C'], index=['A', 'B', 'C'])
prices = pd.Series([22250, 16600, 12500], index=['A', 'B', 'C'])
data = {'ProductName': products,'Price': prices}
df = pd.DataFrame(data)
df

Unnamed: 0,ProductName,Price
A,Product A,22250
B,Product B,16600
C,Product C,12500


In [52]:
# from a list of list
df = pd.DataFrame(data=[['ProductA', 22250], ['ProductB', 16600], ['ProductC', 12500]], columns=['ProductName', 'Price'], index=['A', 'B', 'C'])

In [53]:
df


Unnamed: 0,ProductName,Price
A,ProductA,22250
B,ProductB,16600
C,ProductC,12500


In [54]:
df.shape

(3, 2)

## Data Cleaning and Preprocessing: pd.Series

#### .unique() & .nunique()

In [55]:
data = pd.read_csv('Location.csv', squeeze=True)
location_data = data.copy()
location_data.head()



  data = pd.read_csv('Location.csv', squeeze=True)


0     Location 3
1     Location 6
2     Location 8
3    Location 26
4    Location 34
Name: Location, dtype: object

In [56]:
type(location_data)

pandas.core.series.Series

In [57]:
location_data.describe()

count            1043
unique            296
top       Location 25
freq               31
Name: Location, dtype: object

In [58]:
len(location_data)

1043

In [59]:
location_data.nunique()

296

In [60]:
location_data.unique()

array(['Location 3', 'Location 6', 'Location 8', 'Location 26',
       'Location 34', 'Location 25', 'Location 46', 'Location 156',
       'Location 21', 'Location 13', 'Location 579', 'Location 602',
       'Location 10', 'Location 44', 'Location 30', 'Location 48',
       'Location 196', 'Location 64', 'Location 91', 'Location 62',
       'Location 75', 'Location 42', 'Location 233', 'Location 95',
       'Location 78', 'Location 61', 'Location 87', 'Location 19',
       'Location 115', 'Location 350', 'Location 377', 'Location 17',
       'Location 113', 'Location 81', 'Location 58', 'Location 212',
       'Location 53', 'Location 337', 'Location 41', 'Location 632',
       'Location 73', 'Location 214', 'Location 218', 'Location 38',
       'Location 172', 'Location 197', 'Location 101', 'Location 185',
       'Location 129', 'Location 235', 'Location 142', 'Location 50',
       'Location 76', 'Location 11', 'Location 33', 'Location 22',
       'Location 145', 'Location 203', 'Loca

### Converting Series into Arrays


In [61]:
s = pd.Series(data={'ProductA': 22250, 'ProductB': 12345, 'ProductC': 56789})

In [62]:
# not use this one
s.values

array([22250, 12345, 56789], dtype=int64)

In [63]:
s.array

<PandasArray>
[22250, 12345, 56789]
Length: 3, dtype: int64

In [64]:
# built on top of numpy array
type(s.array)

pandas.core.arrays.numpy_.PandasArray

In [65]:
# convert to numpy
s.to_numpy()

array([22250, 12345, 56789], dtype=int64)

In [66]:
test_array = s[['ProductA', 'ProductB']].to_numpy(dtype='float')

In [67]:
test_array

array([22250., 12345.])

In [68]:
type(test_array[0])

numpy.float64

In [69]:
type(s.array[0])

numpy.int64

### .sort_values()

In [70]:
numbers = pd.Series([123, 56, 45, 46, 10])
numbers.sort_values()

4     10
2     45
3     46
1     56
0    123
dtype: int64

In [71]:
numbers.sort_values(ascending=False)

0    123
1     56
3     46
2     45
4     10
dtype: int64

### Attribute and method chainning

In [72]:
location_data.index

RangeIndex(start=0, stop=1043, step=1)

In [73]:
location_data.index.name

In [74]:
location_data.index.name = 'Index'

In [75]:
location_data

Index
0        Location 3
1        Location 6
2        Location 8
3       Location 26
4       Location 34
           ...     
1038    Location 73
1039    Location 82
1040    Location 11
1041    Location 26
1042    Location 94
Name: Location, Length: 1043, dtype: object

In [76]:
location_data.sort_values()

Index
637     Location 1
884     Location 1
465     Location 1
716    Location 10
623    Location 10
          ...     
482    Location 97
128    Location 97
669    Location 97
757    Location 98
372    Location 99
Name: Location, Length: 1043, dtype: object

In [77]:
location_data.sort_values().head()

Index
637     Location 1
884     Location 1
465     Location 1
716    Location 10
623    Location 10
Name: Location, dtype: object

In [78]:
location_data.index.to_numpy()

array([   0,    1,    2, ..., 1040, 1041, 1042], dtype=int64)

### .sort_index()

In [79]:
location_data_sv = location_data.sort_values(ascending=False)

In [80]:
location_data_sv.head()

Index
372    Location 99
757    Location 98
128    Location 97
482    Location 97
271    Location 97
Name: Location, dtype: object

In [81]:
location_data_sv = location_data.sort_values(ascending=False)

In [82]:
location_data_sv.index

Int64Index([ 372,  757,  128,  482,  271,  669,  612,   29,  518,  598,
            ...
             904,  912, 1010,   14,  716,  202,  298,  637,  884,  465],
           dtype='int64', name='Index', length=1043)

In [83]:
location_data_sv.index.array

<PandasArray>
[ 372,  757,  128,  482,  271,  669,  612,   29,  518,  598,
 ...
  904,  912, 1010,   14,  716,  202,  298,  637,  884,  465]
Length: 1043, dtype: int64

In [84]:
location_data_sv.index.sort_values()

Int64Index([   0,    1,    2,    3,    4,    5,    6,    7,    8,    9,
            ...
            1033, 1034, 1035, 1036, 1037, 1038, 1039, 1040, 1041, 1042],
           dtype='int64', name='Index', length=1043)

In [85]:
location_data_sv

Index
372    Location 99
757    Location 98
128    Location 97
482    Location 97
271    Location 97
          ...     
202    Location 10
298    Location 10
637     Location 1
884     Location 1
465     Location 1
Name: Location, Length: 1043, dtype: object

In [86]:
location_data_sv.sort_index()

Index
0        Location 3
1        Location 6
2        Location 8
3       Location 26
4       Location 34
           ...     
1038    Location 73
1039    Location 82
1040    Location 11
1041    Location 26
1042    Location 94
Name: Location, Length: 1043, dtype: object

### A revision to DataFrame

In [87]:
df = pd.DataFrame(np.array([[1,2,3],[4,5,6]]), columns=['Column 1', 'Column 2', 'Column 3'], index=['Row 1', 'Row 2'])

In [88]:
df

Unnamed: 0,Column 1,Column 2,Column 3
Row 1,1,2,3
Row 2,4,5,6


In [89]:
data = pd.read_csv('Lending-company.csv', index_col='LoanID')
lending_company_data = data.copy()
lending_company_data.head()

Unnamed: 0_level_0,StringID,Product,CustomerGender,Location,Region,TotalPrice,StartDate,Deposit,DailyRate,TotalDaysYr,AmtPaid36,AmtPaid60,AmtPaid360,LoanStatus
LoanID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
1,LoanID_1,Product B,Female,Location 3,Region 2,17600.0,04/07/2018,2200,45,365,3221,4166,14621,Active
2,LoanID_2,Product D,Female,Location 6,Region 6,,02/01/2019,2200,45,365,3161,4096,16041,Active
3,LoanID_3,Product B,Male,Location 8,Region 3,16600.0,08/12/2016,1000,45,365,2260,3205,16340,
4,LoanID_4,Product A,Male,Location 26,Region 2,17600.0,,2200,45,365,3141,4166,16321,Active
5,LoanID_5,Product B,Female,Location 34,Region 3,21250.0,28/10/2017,2200,55,365,3570,4745,14720,Active


In [90]:
lending_company_data.index

Int64Index([   1,    2,    3,    4,    5,    6,    7,    8,    9,   10,
            ...
            1034, 1035, 1036, 1037, 1038, 1039, 1040, 1041, 1042, 1043],
           dtype='int64', name='LoanID', length=1043)

In [91]:
type(lending_company_data.index)

pandas.core.indexes.numeric.Int64Index

In [92]:
lending_company_data.columns

Index(['StringID', 'Product', 'CustomerGender', 'Location', 'Region',
       'TotalPrice', 'StartDate', 'Deposit', 'DailyRate', 'TotalDaysYr',
       'AmtPaid36', 'AmtPaid60', 'AmtPaid360', 'LoanStatus'],
      dtype='object')

In [93]:
type(lending_company_data.columns)

pandas.core.indexes.base.Index

In [94]:
lending_company_data.axes


[Int64Index([   1,    2,    3,    4,    5,    6,    7,    8,    9,   10,
             ...
             1034, 1035, 1036, 1037, 1038, 1039, 1040, 1041, 1042, 1043],
            dtype='int64', name='LoanID', length=1043),
 Index(['StringID', 'Product', 'CustomerGender', 'Location', 'Region',
        'TotalPrice', 'StartDate', 'Deposit', 'DailyRate', 'TotalDaysYr',
        'AmtPaid36', 'AmtPaid60', 'AmtPaid360', 'LoanStatus'],
       dtype='object')]

In [95]:
lending_company_data.dtypes

StringID           object
Product            object
CustomerGender     object
Location           object
Region             object
TotalPrice        float64
StartDate          object
Deposit             int64
DailyRate           int64
TotalDaysYr         int64
AmtPaid36           int64
AmtPaid60           int64
AmtPaid360          int64
LoanStatus         object
dtype: object

In [96]:
lending_company_data.values

array([['LoanID_1', 'Product B', 'Female', ..., 4166, 14621, 'Active'],
       ['LoanID_2', 'Product D', 'Female', ..., 4096, 16041, 'Active'],
       ['LoanID_3', 'Product B', 'Male', ..., 3205, 16340, nan],
       ...,
       ['LoanID_1041', 'Product A', 'NotSpecified', ..., 5143, 16617,
        'Finished Payment'],
       ['LoanID_1042', 'Product B', 'Female', ..., 3462, 15617,
        'Finished Payment'],
       ['LoanID_1043', 'Product A', 'NotSpecified', ..., 4743, 16617,
        'Finished Payment']], dtype=object)

In [97]:
lending_company_data.to_numpy()

array([['LoanID_1', 'Product B', 'Female', ..., 4166, 14621, 'Active'],
       ['LoanID_2', 'Product D', 'Female', ..., 4096, 16041, 'Active'],
       ['LoanID_3', 'Product B', 'Male', ..., 3205, 16340, nan],
       ...,
       ['LoanID_1041', 'Product A', 'NotSpecified', ..., 5143, 16617,
        'Finished Payment'],
       ['LoanID_1042', 'Product B', 'Female', ..., 3462, 15617,
        'Finished Payment'],
       ['LoanID_1043', 'Product A', 'NotSpecified', ..., 4743, 16617,
        'Finished Payment']], dtype=object)

In [98]:
lending_company_data.shape

(1043, 14)

In [99]:
len(lending_company_data.columns)

14

### Data selection

In [100]:
data = pd.read_csv('Lending-company.csv', index_col='StringID')
lending_company_data = data.copy()
lending_company_data.head()

Unnamed: 0_level_0,LoanID,Product,CustomerGender,Location,Region,TotalPrice,StartDate,Deposit,DailyRate,TotalDaysYr,AmtPaid36,AmtPaid60,AmtPaid360,LoanStatus
StringID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
LoanID_1,1,Product B,Female,Location 3,Region 2,17600.0,04/07/2018,2200,45,365,3221,4166,14621,Active
LoanID_2,2,Product D,Female,Location 6,Region 6,,02/01/2019,2200,45,365,3161,4096,16041,Active
LoanID_3,3,Product B,Male,Location 8,Region 3,16600.0,08/12/2016,1000,45,365,2260,3205,16340,
LoanID_4,4,Product A,Male,Location 26,Region 2,17600.0,,2200,45,365,3141,4166,16321,Active
LoanID_5,5,Product B,Female,Location 34,Region 3,21250.0,28/10/2017,2200,55,365,3570,4745,14720,Active


In [101]:
lending_company_data.LoanID

StringID
LoanID_1          1
LoanID_2          2
LoanID_3          3
LoanID_4          4
LoanID_5          5
               ... 
LoanID_1039    1039
LoanID_1040    1040
LoanID_1041    1041
LoanID_1042    1042
LoanID_1043    1043
Name: LoanID, Length: 1043, dtype: int64

In [102]:
lending_company_data['LoanID']

StringID
LoanID_1          1
LoanID_2          2
LoanID_3          3
LoanID_4          4
LoanID_5          5
               ... 
LoanID_1039    1039
LoanID_1040    1040
LoanID_1041    1041
LoanID_1042    1042
LoanID_1043    1043
Name: LoanID, Length: 1043, dtype: int64

In [103]:
type(lending_company_data['LoanID'])

pandas.core.series.Series

In [104]:
lending_company_data[['LoanID']]

Unnamed: 0_level_0,LoanID
StringID,Unnamed: 1_level_1
LoanID_1,1
LoanID_2,2
LoanID_3,3
LoanID_4,4
LoanID_5,5
...,...
LoanID_1039,1039
LoanID_1040,1040
LoanID_1041,1041
LoanID_1042,1042


In [105]:
type(lending_company_data[['LoanID']])

pandas.core.frame.DataFrame

In [106]:
lending_company_data[['Location', 'Product']]

Unnamed: 0_level_0,Location,Product
StringID,Unnamed: 1_level_1,Unnamed: 2_level_1
LoanID_1,Location 3,Product B
LoanID_2,Location 6,Product D
LoanID_3,Location 8,Product B
LoanID_4,Location 26,Product A
LoanID_5,Location 34,Product B
...,...,...
LoanID_1039,Location 73,Product B
LoanID_1040,Location 82,Product A
LoanID_1041,Location 11,Product A
LoanID_1042,Location 26,Product B


### iloc

In [107]:
data = pd.read_csv('Lending-company.csv', index_col='StringID')
lending_company_data = data.copy()
lending_company_data.head()

Unnamed: 0_level_0,LoanID,Product,CustomerGender,Location,Region,TotalPrice,StartDate,Deposit,DailyRate,TotalDaysYr,AmtPaid36,AmtPaid60,AmtPaid360,LoanStatus
StringID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
LoanID_1,1,Product B,Female,Location 3,Region 2,17600.0,04/07/2018,2200,45,365,3221,4166,14621,Active
LoanID_2,2,Product D,Female,Location 6,Region 6,,02/01/2019,2200,45,365,3161,4096,16041,Active
LoanID_3,3,Product B,Male,Location 8,Region 3,16600.0,08/12/2016,1000,45,365,2260,3205,16340,
LoanID_4,4,Product A,Male,Location 26,Region 2,17600.0,,2200,45,365,3141,4166,16321,Active
LoanID_5,5,Product B,Female,Location 34,Region 3,21250.0,28/10/2017,2200,55,365,3570,4745,14720,Active


In [108]:
lending_company_data[1]

KeyError: 1

In [109]:
lending_company_data['Product']

StringID
LoanID_1       Product B
LoanID_2       Product D
LoanID_3       Product B
LoanID_4       Product A
LoanID_5       Product B
                 ...    
LoanID_1039    Product B
LoanID_1040    Product A
LoanID_1041    Product A
LoanID_1042    Product B
LoanID_1043    Product A
Name: Product, Length: 1043, dtype: object

In [111]:
lending_company_data.iloc[1]

LoanID                     2
Product            Product D
CustomerGender        Female
Location          Location 6
Region              Region 6
TotalPrice               NaN
StartDate         02/01/2019
Deposit                 2200
DailyRate                 45
TotalDaysYr              365
AmtPaid36               3161
AmtPaid60               4096
AmtPaid360             16041
LoanStatus            Active
Name: LoanID_2, dtype: object

In [112]:
lending_company_data.iloc[1,2]

'Female'

In [114]:
lending_company_data.iloc[1, :]

LoanID                     2
Product            Product D
CustomerGender        Female
Location          Location 6
Region              Region 6
TotalPrice               NaN
StartDate         02/01/2019
Deposit                 2200
DailyRate                 45
TotalDaysYr              365
AmtPaid36               3161
AmtPaid60               4096
AmtPaid360             16041
LoanStatus            Active
Name: LoanID_2, dtype: object

In [115]:
# return an entire column
lending_company_data.iloc[:, 3]

StringID
LoanID_1        Location 3
LoanID_2        Location 6
LoanID_3        Location 8
LoanID_4       Location 26
LoanID_5       Location 34
                  ...     
LoanID_1039    Location 73
LoanID_1040    Location 82
LoanID_1041    Location 11
LoanID_1042    Location 26
LoanID_1043    Location 94
Name: Location, Length: 1043, dtype: object

In [116]:
lending_company_data.iloc[:, 1:3]

Unnamed: 0_level_0,Product,CustomerGender
StringID,Unnamed: 1_level_1,Unnamed: 2_level_1
LoanID_1,Product B,Female
LoanID_2,Product D,Female
LoanID_3,Product B,Male
LoanID_4,Product A,Male
LoanID_5,Product B,Female
...,...,...
LoanID_1039,Product B,Male
LoanID_1040,Product A,Male
LoanID_1041,Product A,NotSpecified
LoanID_1042,Product B,Female


### .loc

In [117]:
lending_company_data.loc['LoanID_3', :]

LoanID                     3
Product            Product B
CustomerGender          Male
Location          Location 8
Region              Region 3
TotalPrice           16600.0
StartDate         08/12/2016
Deposit                 1000
DailyRate                 45
TotalDaysYr              365
AmtPaid36               2260
AmtPaid60               3205
AmtPaid360             16340
LoanStatus               NaN
Name: LoanID_3, dtype: object

In [120]:
lending_company_data.loc['LoanID_3', 'Region']

'Region 3'

In [121]:
lending_company_data.loc[:, 'Location']

StringID
LoanID_1        Location 3
LoanID_2        Location 6
LoanID_3        Location 8
LoanID_4       Location 26
LoanID_5       Location 34
                  ...     
LoanID_1039    Location 73
LoanID_1040    Location 82
LoanID_1041    Location 11
LoanID_1042    Location 26
LoanID_1043    Location 94
Name: Location, Length: 1043, dtype: object