In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
df=pd.read_csv("credit_score.csv")

In [None]:
df.head()

In [None]:
df=df.drop(columns=['ID','Customer_ID','Name','SSN','Credit_History_Age','Type_of_Loan'])
df.head()

In [None]:
df.shape

In [None]:
df.info()

In [None]:
df.describe()

In [None]:
df.isnull().sum()

In [None]:
df['Age']=df['Age'].str.replace("_","")
df['Age']=df['Age'].astype(int)

In [None]:
df['Occupation']=df['Occupation'].replace("______",np.nan)

In [None]:
df['Annual_Income']=df['Annual_Income'].str.replace("_","")
df['Annual_Income']=df['Annual_Income'].astype(float)

In [None]:
df['Num_of_Loan']=df['Num_of_Loan'].str.replace("_","")
df['Num_of_Loan']=df['Num_of_Loan'].astype(int)

In [None]:
df['Num_of_Delayed_Payment']=df['Num_of_Delayed_Payment'].str.replace("_","")
df['Num_of_Delayed_Payment']=df['Num_of_Delayed_Payment'].astype(float)

In [None]:
df['Credit_Score']=df['Credit_Score'].replace(['Poor','Standard','Good'],[0,1,2])

In [None]:
df['Monthly_Balance']=df['Monthly_Balance'].str.replace("_","")
df['Monthly_Balance']=df['Monthly_Balance'].astype(float)

In [None]:
df['Payment_Behaviour']=df['Payment_Behaviour'].replace('!@#%8','np.nan')

In [None]:
df['Amount_invested_monthly']=df['Amount_invested_monthly'].str.replace("_","")
df['Amount_invested_monthly']=df['Amount_invested_monthly'].astype(float)

In [None]:
df['Payment_of_Min_Amount']=df['Payment_of_Min_Amount'].replace('NM','NO')

In [None]:
df['Payment_of_Min_Amount']=df['Payment_of_Min_Amount'].replace(['YES','NO'],[1,0])

In [None]:
df['Outstanding_Debt']=df['Outstanding_Debt'].str.replace("_","")
df['Outstanding_Debt']=df['Outstanding_Debt'].astype(float)

In [None]:
df['Credit_Mix']=df['Credit_Mix'].replace("_",np.nan)
df['Credit_Mix']=df['Credit_Mix'].replace(['Standard','Good','Bad'],[1,2,0])

In [None]:
df['Changed_Credit_Limit']=df['Changed_Credit_Limit'].replace("_",np.nan)
df['Changed_Credit_Limit']=df['Changed_Credit_Limit'].astype(float)

In [None]:
df=df.fillna(method='ffill')

In [None]:
df=df.fillna(method='bfill')

In [None]:
df.isnull().sum()

In [None]:
sns.boxplot(df['Age'])

In [None]:
col_names=['Age']
q1=df.Age.quantile(0.25)
q2=df.Age.quantile(0.75)
iqr=q2-q1
data=df[(df.Age>=q1-1.5 * iqr) & (df.Age<=q2+1.5*iqr)]
sns.boxplot(data['Age'])

In [None]:
from sklearn.preprocessing import LabelEncoder
le=LabelEncoder()
df['Month']=le.fit_transform(df['Month'])
df['Occupation']=le.fit_transform(df['Occupation'])
df['Payment_Behaviour']=le.fit_transform(df['Payment_Behaviour'])

df.info()

In [None]:
from statsmodels.stats.outliers_influence import variance_inflation_factor
col_list=[]
for col in df.columns:
    if ((df[col].dtype!='object') & (col!='Credit_Score')):
        col_list.append(col)
X=df[col_list]
vif_data=pd.DataFrame()
vif_data['feature']=X.columns
vif_data['VIF']=[variance_inflation_factor(X.values,i)for i in range (len(X.columns))]
vif_data

In [None]:
x=df.drop(columns=['Credit_Score'])
y=df['Credit_Score']

In [None]:
from sklearn.model_selection import train_test_split

In [None]:
x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.2,random_state=42)

In [None]:
from sklearn.linear_model import LogisticRegression
lr=LogisticRegression()
lr.fit(x_train,y_train)
y_pred=lr.predict(x_test)
y_pred

In [None]:
from sklearn.tree import DecisionTreeClassifier
dt=DecisionTreeClassifier()
dt.fit(x_train,y_train)
y_pred=dt.predict(x_test)
y_pred

In [None]:
from sklearn.metrics import *
confusion_matrix(y_test,y_pred)

In [None]:
accuracy_score(y_test,y_pred)

In [None]:
from sklearn.model_selection import GridSearchCV
parameters={'max_features':['log2','sqrt','auto'],'criterion':['entropy'],['gini'],'max_depth':[2,3,5,10,50],'min_samples_split':[2,3,50,100],'min_samples_leaf':[1,5,8,10]}

In [None]:
grid_obj=GridSearchCV(dt,parameters)
grid_obj=grid_obj.fit(x_train,y_train)
dt=grid_obj.best_estimator_
dt.fit(x_train,y_train)
y_pred=dt.predict(x_test)
acc_dt=round(accuracy_score(y_test,y_pred) * 100,2)
acc_dt

In [None]:
from sklearn.ensemble import RandomForestClassifier
rf=RandomForestClassifier()
rf.fit(x_train,y_train)
y_pred=rf.predict(x_test)
y_pred

In [None]:
accuracy_score(y_test,y_pred)