Kruti Bagwe Roll no.05 TE INFT-A Batch-1

# **Experiment 6 - Classification**

In [None]:
import pandas as pd
import numpy as np
import sklearn
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.model_selection import train_test_split, cross_val_score, GridSearchCV
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score

# Load the data
loan_data = pd.read_csv('loan_data.csv')

# Selecting relevant columns
loan_data = loan_data[['person_gender', 'person_education', 'person_home_ownership', 'loan_amnt',
                       'loan_int_rate', 'loan_percent_income', 'credit_score', 'loan_status']]

# Handling missing values by filling with mean for numeric columns
loan_data['loan_amnt'].fillna(loan_data['loan_amnt'].mean(), inplace=True)
loan_data['loan_int_rate'].fillna(loan_data['loan_int_rate'].mean(), inplace=True)
loan_data['loan_percent_income'].fillna(loan_data['loan_percent_income'].mean(), inplace=True)
loan_data['credit_score'].fillna(loan_data['credit_score'].mean(), inplace=True)

# Label encoding for categorical columns
label_encoder = LabelEncoder()
loan_data['person_gender'] = label_encoder.fit_transform(loan_data['person_gender'].astype(str))
loan_data['person_education'] = label_encoder.fit_transform(loan_data['person_education'].astype(str))
loan_data['person_home_ownership'] = label_encoder.fit_transform(loan_data['person_home_ownership'].astype(str))
loan_data['loan_status'] = label_encoder.fit_transform(loan_data['loan_status'].astype(str))

# Define the features (X) and target (y)
X = loan_data.drop('loan_status', axis=1)
y = loan_data['loan_status']

# Scale the features (important for Naive Bayes)
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Split the data into training and testing sets (80% train, 20% test)
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

# 1. Gaussian Naive Bayes Model with GridSearchCV for hyperparameter tuning
param_grid = {'var_smoothing': [1e-9, 1e-8, 1e-7, 1e-6]}  # Hyperparameter tuning for GaussianNB
grid_search = GridSearchCV(GaussianNB(), param_grid, cv=5)
grid_search.fit(X_train, y_train)

# Best Naive Bayes Model
best_naive_bayes = grid_search.best_estimator_
y_pred_nb = best_naive_bayes.predict(X_test)
accuracy_nb = accuracy_score(y_test, y_pred_nb)

print("OUTPUT: ")
print("\nNaive Bayes Accuracy: ", accuracy_nb)

# 2. Cross-Validation for better estimation
cv_scores_nb = cross_val_score(best_naive_bayes, X_scaled, y, cv=5)

#print("Naive Bayes Cross-Validation Accuracy: ", cv_scores_nb.mean(), "±", cv_scores_nb.std())

OUTPUT: 

Naive Bayes Accuracy:  0.8258888888888889
