In [None]:
import pandas as pd
df = pd.read_csv("D:\\stratified_sample_50000.csv")
df.head()

x=df.drop("loan_status",axis=1)
y=df["loan_status"]

from imblearn.over_sampling import SMOTE

smote = SMOTE(sampling_strategy=0.3)
x_res, y_res = smote.fit_resample(x,y)

x_res.shape, y_res.shape
from sklearn.model_selection import train_test_split
X_train, x_test, y_train, y_test = train_test_split(x_res, y_res, test_size=0.2, random_state=123456)

In [None]:
from sklearn.model_selection import GridSearchCV, StratifiedKFold
from sklearn.pipeline import Pipeline
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.naive_bayes import BernoulliNB
from xgboost import XGBClassifier
from catboost import CatBoostClassifier
import numpy as np

# Define the classifiers
knn = KNeighborsClassifier()
lr = LogisticRegression(max_iter=500)
rf = RandomForestClassifier(n_estimators=100)
cb = CatBoostClassifier(iterations=100, learning_rate=0.1, depth=6, verbose=0)
dt = DecisionTreeClassifier()
br = BernoulliNB()
xgc = XGBClassifier(use_label_encoder=False, eval_metric='logloss

In [None]:
# Create individual pipelines
pipelines = {
    'knn': Pipeline([("classifier", knn)]),
    'lr': Pipeline([("classifier", lr)]),
    'rf': Pipeline([("classifier", rf)]),
    'cb': Pipeline([("classifier", cb)]),
    'dt': Pipeline([("classifier", dt)]),
    'br': Pipeline([("classifier", br)]),
    'xgc': Pipeline([("classifier", xgc)]),
}

# Create the parameter grid
params = {
    'lr': {
        'classifier__penalty': ["l2", None],
        'classifier__C': np.linspace(0.001, 10, 20),
        'classifier__solver': ["lbfgs", "newton-cg", "newton-cholesky", "sag"],
    },
    'rf': {
        'classifier__max_depth': [None, 3, 5],
        'classifier__min_samples_split': [2, 5, 10],
        'classifier__min_samples_leaf': [1, 5, 10],
    },
    'dt': {
        'classifier__min_samples_split': [2, 5, 7, 10, 13],
        'classifier__min_samples_leaf': [1, 5, 7, 10, 13],
        'classifier__max_depth': [2, 5, 7, 10, 13, None],
    },
    'br': {
        'classifier__alpha': [0, 0.1, 0.5, 1.5, 2, 2.4, 4],
    },
    # Add more parameter grids for other classifiers if needed
}

In [None]:

# Create a StratifiedKFold object
kfold = StratifiedKFold(n_splits=5, shuffle=True, random_state=24)

# Perform grid search on each pipeline
for model_name, pipeline in pipelines.items():
    if model_name in params:
        gcv = GridSearchCV(pipeline, param_grid=params[model_name], cv=kfold, scoring='neg_log_loss')
        gcv.fit(X_train, y_train)  # X_train and y_train should be your training data
        print(f"Best parameters for {model_name}: {gcv.best_params_}")
        print(f"Best score for {model_name}: {gcv.best_score_}")
        