In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.ensemble import AdaBoostClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import cross_validate, StratifiedKFold

In [11]:
df=pd.read_csv("C:/AI workforce/aug 29 task/cleaned_credit_data.csv")

In [12]:
y = df['default.payment.next.month']
X = df.drop(columns=['default.payment.next.month'])


In [13]:
from sklearn.model_selection import train_test_split


X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

In [14]:
ada = AdaBoostClassifier(random_state=42)


In [15]:
cv = StratifiedKFold(n_splits=10, shuffle=True, random_state=42)
scoring = ['accuracy', 'precision', 'recall', 'f1']

In [16]:
cv_results = cross_validate(ada, X_train, y_train, cv=cv, scoring=scoring, return_train_score=True)

In [17]:
cv_results['train_accuracy']

array([0.81949074, 0.81962963, 0.81819444, 0.81912037, 0.82018519,
       0.81875   , 0.82023148, 0.81949074, 0.82009259, 0.81884259])

In [18]:
 cv_results['test_accuracy']

array([0.81875   , 0.82      , 0.82625   , 0.82666667, 0.81625   ,
       0.82      , 0.81333333, 0.82125   , 0.81541667, 0.81625   ])

In [19]:
cv_results['test_precision']

array([0.66666667, 0.70040486, 0.73553719, 0.71062271, 0.66917293,
       0.69879518, 0.66938776, 0.69465649, 0.68032787, 0.68145161])

In [20]:
cv_results['test_recall']

array([0.35849057, 0.32580038, 0.33521657, 0.3653484 , 0.33521657,
       0.32768362, 0.30885122, 0.34274953, 0.3126177 , 0.31826742])

In [21]:
cv_results['test_f1']

array([0.46625767, 0.44473008, 0.46054334, 0.48258706, 0.44667503,
       0.44615385, 0.42268041, 0.45901639, 0.4283871 , 0.4338896 ])

HYPERPARAMETER TUNING:

In [33]:
from sklearn.model_selection import GridSearchCV
param_grid = {
    'n_estimators': [50, 100],   # smaller range
    'learning_rate': [0.1, 1.0]  # fewer options
}

In [34]:
grid = GridSearchCV(
    AdaBoostClassifier(random_state=42),
    param_grid,
    cv=5,
    scoring='accuracy'
)

In [35]:
grid.fit(X_train, y_train)

0,1,2
,estimator,AdaBoostClass...ndom_state=42)
,param_grid,"{'learning_rate': [0.1, 1.0], 'n_estimators': [50, 100]}"
,scoring,'accuracy'
,n_jobs,
,refit,True
,cv,5
,verbose,0
,pre_dispatch,'2*n_jobs'
,error_score,
,return_train_score,False

0,1,2
,estimator,
,n_estimators,100
,learning_rate,1.0
,algorithm,'deprecated'
,random_state,42


In [36]:
grid.best_params_

{'learning_rate': 1.0, 'n_estimators': 100}

In [37]:
grid.best_score_

np.float64(0.8188749999999999)

In [40]:
from sklearn.metrics import accuracy_score, confusion_matrix, roc_auc_score, classification_report
best_model = grid.best_estimator_
best_model

0,1,2
,estimator,
,n_estimators,100
,learning_rate,1.0
,algorithm,'deprecated'
,random_state,42


In [41]:
y_pred = best_model.predict(X_test)
y_prob = best_model.predict_proba(X_test)[:,1]


In [42]:
acc = accuracy_score(y_test, y_pred)
acc


0.8171666666666667

In [43]:
roc = roc_auc_score(y_test, y_prob)
roc

0.7685902161094431

In [44]:
cm = confusion_matrix(y_test, y_pred)
cm

array([[4461,  212],
       [ 885,  442]])

In [45]:
classification_report(y_test, y_pred)

'              precision    recall  f1-score   support\n\n           0       0.83      0.95      0.89      4673\n           1       0.68      0.33      0.45      1327\n\n    accuracy                           0.82      6000\n   macro avg       0.76      0.64      0.67      6000\nweighted avg       0.80      0.82      0.79      6000\n'