<a href="https://colab.research.google.com/github/josh130588/MLAIMAR2024/blob/main/Assignment03_HyperparameterTuning.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
#Step 1: Data Loading and Initial Exploration
# Import necessary libraries

import pandas as pd

# Load the dataset
file_path = 'credit_card_default.csv'
df = pd.read_csv(file_path)

# Display the first few rows of the dataframe
df.head()

#Step 2: Data Preprocessing

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

# Handle missing values (if any)
df.fillna(df.mean(), inplace=True)

# Encode categorical variables (if any)
# Assuming all columns are numerical

# Split the dataset into features and target variable
X = df.drop(columns=['default payment next month'])
y = df['default payment next month']

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize the features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

#3. Model Training and Hyperparameter Tuning
#GridSearchCV for hyperparameter tuning.
#k-Nearest Neighbors (kNN)

from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import GridSearchCV

# Define the model
knn = KNeighborsClassifier()

# Define the parameter grid
param_grid = {'n_neighbors': [3, 5, 7, 9, 11]}

# Perform Grid Search
grid_search_knn = GridSearchCV(knn, param_grid, cv=5, scoring='accuracy')
grid_search_knn.fit(X_train, y_train)

# Best parameters and score
best_params_knn = grid_search_knn.best_params_
best_score_knn = grid_search_knn.best_score_

print("Best Parameters for kNN:", best_params_knn)
print("Best Score for kNN:", best_score_knn)

#Support Vector Machine (SVM)

from sklearn.svm import SVC
from sklearn.model_selection import RandomizedSearchCV

# Define the model
svm = SVC()

# Define the parameter distribution
param_dist = {'C': [0.1, 1, 10], 'gamma': [1, 0.1, 0.01]}

# Perform Randomized Search
random_search_svm = RandomizedSearchCV(svm, param_distributions=param_dist, n_iter=10, cv=5, scoring='accuracy', n_jobs=-1, random_state=42)
random_search_svm.fit(X_train, y_train)

# Best parameters and score
best_params_svm = random_search_svm.best_params_
best_score_svm = random_search_svm.best_score_

print("Best Parameters for SVM:", best_params_svm)
print("Best Score for SVM:", best_score_svm)

#Gradient Boosting

from sklearn.ensemble import GradientBoostingClassifier

# Define the model
gb = GradientBoostingClassifier()

# Define the parameter grid
param_grid = {'n_estimators': [50, 100, 150], 'learning_rate': [0.1, 0.01, 0.001], 'max_depth': [3, 4, 5]}

# Perform Grid Search
grid_search_gb = GridSearchCV(gb, param_grid, cv=5, scoring='accuracy')
grid_search_gb.fit(X_train, y_train)

# Best parameters and score
best_params_gb = grid_search_gb.best_params_
best_score_gb = grid_search_gb.best_score_

print("Best Parameters for Gradient Boosting:", best_params_gb)
print("Best Score for Gradient Boosting:", best_score_gb)

#AdaBoosting

from sklearn.ensemble import AdaBoostClassifier

# Define the model
ada = AdaBoostClassifier()

# Define the parameter grid
param_grid = {'n_estimators': [50, 100, 150], 'learning_rate': [0.1, 0.01, 0.001]}

# Perform Grid Search
grid_search_ada = GridSearchCV(ada, param_grid, cv=5, scoring='accuracy')
grid_search_ada.fit(X_train, y_train)

# Best parameters and score
best_params_ada = grid_search_ada.best_params_
best_score_ada = grid_search_ada.best_score_

print("Best Parameters for AdaBoosting:", best_params_ada)
print("Best Score for AdaBoosting:", best_score_ada)

#XGBoost

import xgboost as xgb

# Define the model
xgb_model = xgb.XGBClassifier()

# Define the parameter grid
param_grid = {'n_estimators': [50, 100, 150], 'learning_rate': [0.1, 0.01, 0.001], 'max_depth': [3, 4, 5]}

# Perform Grid Search
grid_search_xgb = GridSearchCV(xgb_model, param_grid, cv=5, scoring='accuracy')
grid_search_xgb.fit(X_train, y_train)

# Best parameters and score
best_params_xgb = grid_search_xgb.best_params_
best_score_xgb = grid_search_xgb.best_score_

print("Best Parameters for XGBoost:", best_params_xgb)
print("Best Score for XGBoost:", best_score_xgb)



Best Parameters for kNN: {'n_neighbors': 11}
Best Score for kNN: 0.8068333333333333




Best Parameters for SVM: {'gamma': 0.01, 'C': 10}
Best Score for SVM: 0.819375
Best Parameters for Gradient Boosting: {'learning_rate': 0.1, 'max_depth': 3, 'n_estimators': 50}
Best Score for Gradient Boosting: 0.8218333333333334
Best Parameters for AdaBoosting: {'learning_rate': 0.01, 'n_estimators': 50}
Best Score for AdaBoosting: 0.8196666666666668
Best Parameters for XGBoost: {'learning_rate': 0.1, 'max_depth': 3, 'n_estimators': 50}
Best Score for XGBoost: 0.8217916666666667
