#### Imports

In [1]:
#Numpy
import numpy as np
#Pandas
import pandas as pd
# Import train_test_split
from sklearn.model_selection import train_test_split
# Import MinMaxScaler
from sklearn.preprocessing import MinMaxScaler
# Import LogisticRegression
from sklearn.linear_model import LogisticRegression
# Import confusion_matrix
from sklearn.metrics import confusion_matrix
# Import GridSearchCV
from sklearn.model_selection import GridSearchCV

#### Getting Tha Data

In [2]:
#data file path
data_path = 'datasets\cc_approvals.data'
#importing data into cc_approvals
cc_approvals = pd.read_csv(data_path, header=None)

#### Inspecting The Data

### Take a Quick Look at The Data Structure

In [3]:
cc_approvals.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15
0,b,30.83,0.0,u,g,w,v,1.25,t,t,1,f,g,202,0,+
1,a,58.67,4.46,u,g,q,h,3.04,t,t,6,f,g,43,560,+
2,a,24.5,0.5,u,g,q,h,1.5,t,f,0,f,g,280,824,+
3,b,27.83,1.54,u,g,w,v,3.75,t,t,5,t,g,100,3,+
4,b,20.17,5.625,u,g,w,v,1.71,t,f,0,f,s,120,0,+


The output is a bit confusing since the features have been anonymized to protect the privacy. The probable freature in a typical credit card application are <code>Gender</code>, <code>Age</code>, <code>Debt</code>, <code>Married</code>, <code>BankCustomer</code>, <code>EducationLevel</code>, <code>Ethnicity</code>, <code>YearsEmployed</code>, <code>PriorDefault</code>, <code>Employed</code>, <code>CreditScore</code>, <code>DriversLicense</code>, <code>Citizen</code>, <code>ZipCode</code>, <code>Income</code> and finally the <code>ApprovalStatus</code> (The target value). For more info, check <a href="http://rstudio-pubs-static.s3.amazonaws.com/73039_9946de135c0a49daa7a0a9eda4a67a72.html">this blog</a>.

For clarity, let's rename features.

In [10]:
# list of new names 
feature_new = ['Gender', 'Age', 'Debt', 'Married', 'BankCustomer', 'EducationLevel',
                 'Ethnicity', 'YearsEmployed', 'PriorDefault', 'Employd', 'CreditScore', 
                 'DriversLicense', 'Citizen', 'ZipCode', 'Income', 'ApprovalStatus']
#list of old names
feature_old = [i for i in range(16)]

#dict of oldname:newname 
dict_names = dict(zip(feature_old, feature_new))

#New df with the new feature names
cc_apps = cc_approvals.rename(columns=dict_names)
cc_apps.head()

Unnamed: 0,Gender,Age,Debt,Married,BankCustomer,EducationLevel,Ethnicity,YearsEmployed,PriorDefault,Employd,CreditScore,DriversLicense,Citizen,ZipCode,Income,ApprovalStatus
0,b,30.83,0.0,u,g,w,v,1.25,t,t,1,f,g,202,0,+
1,a,58.67,4.46,u,g,q,h,3.04,t,t,6,f,g,43,560,+
2,a,24.5,0.5,u,g,q,h,1.5,t,f,0,f,g,280,824,+
3,b,27.83,1.54,u,g,w,v,3.75,t,t,5,t,g,100,3,+
4,b,20.17,5.625,u,g,w,v,1.71,t,f,0,f,s,120,0,+


In [13]:
# Print summary statistics
cc_apps_description = cc_apps.describe()
print(cc_apps_description)

             Debt  YearsEmployed  CreditScore         Income
count  690.000000     690.000000    690.00000     690.000000
mean     4.758725       2.223406      2.40000    1017.385507
std      4.978163       3.346513      4.86294    5210.102598
min      0.000000       0.000000      0.00000       0.000000
25%      1.000000       0.165000      0.00000       0.000000
50%      2.750000       1.000000      0.00000       5.000000
75%      7.207500       2.625000      3.00000     395.500000
max     28.000000      28.500000     67.00000  100000.000000


In [14]:
#Print data information
cc_apps.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 690 entries, 0 to 689
Data columns (total 16 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   Gender          690 non-null    object 
 1   Age             690 non-null    object 
 2   Debt            690 non-null    float64
 3   Married         690 non-null    object 
 4   BankCustomer    690 non-null    object 
 5   EducationLevel  690 non-null    object 
 6   Ethnicity       690 non-null    object 
 7   YearsEmployed   690 non-null    float64
 8   PriorDefault    690 non-null    object 
 9   Employd         690 non-null    object 
 10  CreditScore     690 non-null    int64  
 11  DriversLicense  690 non-null    object 
 12  Citizen         690 non-null    object 
 13  ZipCode         690 non-null    object 
 14  Income          690 non-null    int64  
 15  ApprovalStatus  690 non-null    object 
dtypes: float64(2), int64(2), object(12)
memory usage: 86.4+ KB
