### Data Ingestion

In [1]:
import pandas as pd
df = pd.read_csv("data/UCI_Credit_Card.csv")

In [2]:
df.head()

Unnamed: 0,ID,LIMIT_BAL,SEX,EDUCATION,MARRIAGE,AGE,PAY_0,PAY_2,PAY_3,PAY_4,...,BILL_AMT4,BILL_AMT5,BILL_AMT6,PAY_AMT1,PAY_AMT2,PAY_AMT3,PAY_AMT4,PAY_AMT5,PAY_AMT6,default.payment.next.month
0,1,20000.0,2,2,1,24,2,2,-1,-1,...,0.0,0.0,0.0,0.0,689.0,0.0,0.0,0.0,0.0,1
1,2,120000.0,2,2,2,26,-1,2,0,0,...,3272.0,3455.0,3261.0,0.0,1000.0,1000.0,1000.0,0.0,2000.0,1
2,3,90000.0,2,2,2,34,0,0,0,0,...,14331.0,14948.0,15549.0,1518.0,1500.0,1000.0,1000.0,1000.0,5000.0,0
3,4,50000.0,2,2,1,37,0,0,0,0,...,28314.0,28959.0,29547.0,2000.0,2019.0,1200.0,1100.0,1069.0,1000.0,0
4,5,50000.0,1,2,1,57,-1,0,-1,0,...,20940.0,19146.0,19131.0,2000.0,36681.0,10000.0,9000.0,689.0,679.0,0


### Dropping ID Column

In [3]:
df = df.drop(labels='ID', axis=1)

In [4]:
df.head()

Unnamed: 0,LIMIT_BAL,SEX,EDUCATION,MARRIAGE,AGE,PAY_0,PAY_2,PAY_3,PAY_4,PAY_5,...,BILL_AMT4,BILL_AMT5,BILL_AMT6,PAY_AMT1,PAY_AMT2,PAY_AMT3,PAY_AMT4,PAY_AMT5,PAY_AMT6,default.payment.next.month
0,20000.0,2,2,1,24,2,2,-1,-1,-2,...,0.0,0.0,0.0,0.0,689.0,0.0,0.0,0.0,0.0,1
1,120000.0,2,2,2,26,-1,2,0,0,0,...,3272.0,3455.0,3261.0,0.0,1000.0,1000.0,1000.0,0.0,2000.0,1
2,90000.0,2,2,2,34,0,0,0,0,0,...,14331.0,14948.0,15549.0,1518.0,1500.0,1000.0,1000.0,1000.0,5000.0,0
3,50000.0,2,2,1,37,0,0,0,0,0,...,28314.0,28959.0,29547.0,2000.0,2019.0,1200.0,1100.0,1069.0,1000.0,0
4,50000.0,1,2,1,57,-1,0,-1,0,0,...,20940.0,19146.0,19131.0,2000.0,36681.0,10000.0,9000.0,689.0,679.0,0


### Divided into Dependent and Independent Feature

In [5]:
X = df.drop(labels=['default.payment.next.month'],axis=1)
y = df[['default.payment.next.month']]

In [6]:
X.head(5)

Unnamed: 0,LIMIT_BAL,SEX,EDUCATION,MARRIAGE,AGE,PAY_0,PAY_2,PAY_3,PAY_4,PAY_5,...,BILL_AMT3,BILL_AMT4,BILL_AMT5,BILL_AMT6,PAY_AMT1,PAY_AMT2,PAY_AMT3,PAY_AMT4,PAY_AMT5,PAY_AMT6
0,20000.0,2,2,1,24,2,2,-1,-1,-2,...,689.0,0.0,0.0,0.0,0.0,689.0,0.0,0.0,0.0,0.0
1,120000.0,2,2,2,26,-1,2,0,0,0,...,2682.0,3272.0,3455.0,3261.0,0.0,1000.0,1000.0,1000.0,0.0,2000.0
2,90000.0,2,2,2,34,0,0,0,0,0,...,13559.0,14331.0,14948.0,15549.0,1518.0,1500.0,1000.0,1000.0,1000.0,5000.0
3,50000.0,2,2,1,37,0,0,0,0,0,...,49291.0,28314.0,28959.0,29547.0,2000.0,2019.0,1200.0,1100.0,1069.0,1000.0
4,50000.0,1,2,1,57,-1,0,-1,0,0,...,35835.0,20940.0,19146.0,19131.0,2000.0,36681.0,10000.0,9000.0,689.0,679.0


In [7]:
y.head()

Unnamed: 0,default.payment.next.month
0,1
1,1
2,0
3,0
4,0


### EDA

In [8]:
from sklearn.impute import SimpleImputer ## HAndling Missing Values
from sklearn.preprocessing import StandardScaler # HAndling Feature Scaling
## pipelines
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer

In [9]:
numerical_columns = X.columns
numerical_columns

Index(['LIMIT_BAL', 'SEX', 'EDUCATION', 'MARRIAGE', 'AGE', 'PAY_0', 'PAY_2',
       'PAY_3', 'PAY_4', 'PAY_5', 'PAY_6', 'BILL_AMT1', 'BILL_AMT2',
       'BILL_AMT3', 'BILL_AMT4', 'BILL_AMT5', 'BILL_AMT6', 'PAY_AMT1',
       'PAY_AMT2', 'PAY_AMT3', 'PAY_AMT4', 'PAY_AMT5', 'PAY_AMT6'],
      dtype='object')

In [10]:
num_pipeline=Pipeline(
    steps=[
    ('imputer',SimpleImputer(strategy='median')),
    ('scaler',StandardScaler())])

In [11]:
preprocessor=ColumnTransformer([
('num_pipeline',num_pipeline,numerical_columns)
])


### Train Test Split

In [12]:
from sklearn.model_selection import train_test_split

X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.30,random_state=30)

In [13]:
X_train = pd.DataFrame(preprocessor.fit_transform(X_train),
                       columns=preprocessor.get_feature_names_out())

In [14]:
X_test = pd.DataFrame(preprocessor.fit_transform(X_test),
                       columns=preprocessor.get_feature_names_out())

In [15]:
X_train.head()

Unnamed: 0,num_pipeline__LIMIT_BAL,num_pipeline__SEX,num_pipeline__EDUCATION,num_pipeline__MARRIAGE,num_pipeline__AGE,num_pipeline__PAY_0,num_pipeline__PAY_2,num_pipeline__PAY_3,num_pipeline__PAY_4,num_pipeline__PAY_5,...,num_pipeline__BILL_AMT3,num_pipeline__BILL_AMT4,num_pipeline__BILL_AMT5,num_pipeline__BILL_AMT6,num_pipeline__PAY_AMT1,num_pipeline__PAY_AMT2,num_pipeline__PAY_AMT3,num_pipeline__PAY_AMT4,num_pipeline__PAY_AMT5,num_pipeline__PAY_AMT6
0,-1.139003,-1.231086,0.189892,0.855242,-1.031921,-0.872883,-0.720412,-0.694606,-0.665419,-1.532938,...,-0.667972,-0.683707,-0.66967,-0.629882,-0.259967,-0.21968,-0.284034,-0.262225,-0.16992,-0.289958
1,-0.13854,-1.231086,-1.076056,-1.060909,0.599775,0.901613,-1.5581,-0.694606,-0.665419,-0.647261,...,-0.651531,-0.586762,-0.662746,-0.652089,-0.33337,-0.166585,0.008592,-0.313948,-0.305063,-0.289958
2,0.631048,0.812291,-1.076056,0.855242,-0.705581,1.788862,1.792653,1.821359,1.910872,2.009769,...,1.124524,1.227215,1.416612,1.498716,0.010071,-0.060316,-0.284034,0.080871,-0.057618,-0.021965
3,1.477594,-1.231086,-1.076056,-1.060909,0.382216,0.901613,-1.5581,-1.533261,-1.524183,-0.647261,...,-0.674425,-0.671125,-0.651354,-0.53399,-0.33337,-0.237431,-0.284034,-0.268127,0.140338,-0.289958
4,0.861924,-1.231086,0.189892,-1.060909,0.926114,0.014365,0.117276,0.144049,0.193345,0.238416,...,1.080023,1.177725,1.212598,1.283075,0.121951,-0.059174,-0.055687,-0.028477,-0.038076,-0.067667


In [16]:
X_test.head()

Unnamed: 0,num_pipeline__LIMIT_BAL,num_pipeline__SEX,num_pipeline__EDUCATION,num_pipeline__MARRIAGE,num_pipeline__AGE,num_pipeline__PAY_0,num_pipeline__PAY_2,num_pipeline__PAY_3,num_pipeline__PAY_4,num_pipeline__PAY_5,...,num_pipeline__BILL_AMT3,num_pipeline__BILL_AMT4,num_pipeline__BILL_AMT5,num_pipeline__BILL_AMT6,num_pipeline__PAY_AMT1,num_pipeline__PAY_AMT2,num_pipeline__PAY_AMT3,num_pipeline__PAY_AMT4,num_pipeline__PAY_AMT5,num_pipeline__PAY_AMT6
0,-1.131446,-1.241923,0.176373,-1.048914,0.163489,0.016029,0.098944,0.126935,0.178178,1.977229,...,-0.525733,-0.500082,-0.483642,-0.524558,-0.217457,-0.256498,-0.28787,-0.297307,-0.308731,-0.243889
1,0.338197,0.805203,0.176373,0.866328,1.025977,0.912045,-1.56088,-1.529936,-1.516963,-1.523496,...,-0.686493,-0.67571,-0.663804,-0.654214,-0.367058,-0.348952,-0.337831,-0.297307,-0.339256,-0.301939
2,1.885189,-1.241923,-1.087447,0.866328,-0.91462,0.016029,-0.730968,-0.7015,0.178178,-0.648315,...,-0.456194,-0.044943,-0.371186,-0.08157,0.703711,0.641909,2.315544,0.762993,2.111135,-0.280518
3,0.338197,0.805203,-1.087447,0.866328,-0.698998,-1.776005,-1.56088,-1.529936,-1.516963,-1.523496,...,-0.686538,-0.675757,-0.653168,-0.623284,-0.367058,-0.348952,-0.337831,-0.258786,-0.207288,-0.220031
4,0.028798,0.805203,-1.087447,0.866328,0.379111,0.016029,0.098944,0.126935,0.178178,0.226866,...,-0.368323,-0.312823,-0.416165,-0.595246,-0.033583,-0.040772,0.093673,-0.051493,-0.088879,0.134653


### Data Visualization

In [17]:
import seaborn as sns

### Model Training

In [18]:
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier

from sklearn.metrics import accuracy_score, confusion_matrix, f1_score

### Function for Model Evaluation 


In [2]:
import numpy as np
def evaluate_model(true, predicted):
    
    acc_score = accuracy_score(true, predicted)
    f1 = f1_score(true, predicted)

    return acc_score, f1

### Train Multiple Model

In [21]:
models={
    'SVC':SVC(),
    'RF_cls' : RandomForestClassifier(),
    'GB_cls' : GradientBoostingClassifier(),
    'DT_cls' : DecisionTreeClassifier(),
    'KNN_cls': KNeighborsClassifier() 
}

trained_model_list=[]
model_list=[]
f1_list = []
acc_score =[]

for i in range(len(list(models))):
    model=list(models.values())[i]
    model.fit(X_train,y_train)

    #Make Predictions
    y_pred=model.predict(X_test)

    acc_score, f1 =evaluate_model(y_test,y_pred)

    print(list(models.keys())[i])
    model_list.append(list(models.keys())[i])

    print('Model Training Performance')
    print("Sccuracy Score",acc_score)
    print("f1",f1)

    acc_score.append(acc_score)
    f1_list.append(f1)
    
    print('='*35)
    print('\n')


  y = column_or_1d(y, warn=True)


NameError: name 'evaluate_model' is not defined