## CREDIT SCORING MODEL

### 1.Import Libraries

import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score,precision_score,recall_score,f1_score,confusion_matrix,classification_report
from imblearn.over_sampling import SMOTE
from sklearn.preprocessing import StandardScaler

### 2.Load Data

In [None]:
df=pd.read_csv("new_dataset.csv")
print("Orginal Dataset:")
display(df.head())

### 3.Data Cleaning

In [None]:
df.shape

In [64]:
df['CreditWorthiness']=df['Balance'].apply(lambda X:1 if X>1000 else 0)

In [65]:
df=df.drop(["Risk_Level"],axis=1,errors="ignore")

In [None]:
print(df)

In [None]:
df['CreditWorthiness'].value_counts()

In [68]:
X=df.drop('CreditWorthiness',axis=1)
Y=df['CreditWorthiness']

In [69]:
for col in X.columns:
  if X[col].dtype=='object':
    le=LabelEncoder()
    X[col]=le.fit_transform(X[col])

In [70]:
smote=SMOTE(random_state=42)
X_resampled,Y_resampled=smote.fit_resample(X,Y)

In [71]:
print("Before Smote:",Y.value_counts().to_dict())

Before Smote: {0: 321, 1: 47}


In [72]:
print("After smote:",Y_resampled.value_counts().to_dict())

After smote: {0: 321, 1: 321}


### 4.Train_Test Split

In [73]:
X_train,X_test,Y_train,Y_test=train_test_split(X_resampled,Y_resampled,test_size=0.3,random_state=42,stratify=Y_resampled)

### 5.Model Training

In [None]:
model=LogisticRegression(max_iter=2000,solver='saga')
model.fit(X_train,Y_train)

### 6.Prediction & Evaluation

In [75]:
Y_pred=model.predict(X_test)

In [76]:
print("\n CONFUSION MATRIX:")
print(confusion_matrix(Y_test,Y_pred))


 CONFUSION MATRIX:
[[83 14]
 [ 0 96]]


In [77]:
print("MODEL EVALUATION REPORT:")
print("Accuracy:",
      round(accuracy_score(Y_test,Y_pred),2))
print("Precision:",
      round(precision_score(Y_test,Y_pred),2))
print("Recall:",
      round(recall_score(Y_test,Y_pred),2))
print("F1 Score:",
      round(f1_score(Y_test,Y_pred),2))

MODEL EVALUATION REPORT:
Accuracy: 0.93
Precision: 0.87
Recall: 1.0
F1 Score: 0.93


In [78]:
print("CLASSIFICATION REPORT:")
print(classification_report(Y_test,Y_pred))

CLASSIFICATION REPORT:
              precision    recall  f1-score   support

           0       1.00      0.86      0.92        97
           1       0.87      1.00      0.93        96

    accuracy                           0.93       193
   macro avg       0.94      0.93      0.93       193
weighted avg       0.94      0.93      0.93       193



### 7.Final Output

In [79]:
scaler=StandardScaler()

In [80]:
X_train_scaled=scaler.fit_transform(X_train)
X_test_scaled=scaler.transform(X_test)

In [81]:
model.fit(X_train_scaled,Y_train)

0,1,2
,penalty,'l2'
,dual,False
,tol,0.0001
,C,1.0
,fit_intercept,True
,intercept_scaling,1
,class_weight,
,random_state,
,solver,'saga'
,max_iter,2000


In [82]:
df_encoded=df.copy()

In [83]:
X_full=df_encoded.drop('CreditWorthiness',axis=1)
Y_full=df_encoded['CreditWorthiness']

In [84]:
X_full_scaled=scaler.transform(X_full)

In [86]:
df['Predicted CreditWorthiness']=model.predict(X_full_scaled)

In [None]:
print(df.head(10))

In [88]:
df

Unnamed: 0,Income,Limit,Rating,Cards,Age,Education,Student,Married,Balance,CreditWorthiness,Predicted CreditWorthiness
0,14.891,3606,283,2,0.146667,6,0,1,333,0,0
1,106.025,6645,483,3,0.786667,10,1,1,903,0,1
2,104.593,7075,514,4,0.640000,6,0,0,580,0,0
3,55.882,4897,357,2,0.600000,11,0,1,331,0,0
4,80.180,8047,569,4,0.720000,5,0,0,1151,1,1
...,...,...,...,...,...,...,...,...,...,...,...
363,12.096,4100,307,3,0.120000,8,0,1,560,0,0
364,13.364,3838,296,5,0.560000,12,0,0,480,0,0
365,57.872,4171,321,5,0.586667,7,0,1,138,0,0
366,37.728,2525,192,1,0.280000,8,0,1,0,0,0
