In [1]:
import pandas as pd
import numpy as np

from sklearn.preprocessing import MinMaxScaler, StandardScaler

from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, classification_report, accuracy_score, roc_curve, roc_auc_score

from sklearn.model_selection import GridSearchCV, RandomizedSearchCV

import matplotlib.pyplot as plt
import seaborn as sns

# functions
def normalization(x_df):
    # Create instance of scalar
    normal_scalar = MinMaxScaler()

    array = normal_scalar.fit_transform(x_df)

    # Create dataframe again
    x_normal_df = pd.DataFrame(array, columns=x_df.columns)
    return x_normal_df
        
    
def standardization(x_df):
    std_scalar = StandardScaler()

    array = std_scalar.fit_transform(x_df)

    x_std_df = pd.DataFrame(array, columns=x_df.columns)
    return x_std_df

def train_test_split_fun(df_x, y, model_name):
    x = df_x
    
    test_size=0.25 
    random_state=12 
    stratify=y
    x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.25, random_state=12, stratify=y)

    model_details.append(model_name)
    return x_train, x_test, y_train, y_test

def model_training(x_train, y_train):
    knn_clf = KNeighborsClassifier()
    knn_clf.fit(x_train, y_train)
    return knn_clf

def model_evalution_testing(knn_clf, x_test, y_test):
    # Testing Data Evaluation
    y_pred = knn_clf.predict(x_test)

    cnf_matrix = confusion_matrix(y_pred, y_test)
    print("Confusion Matrix:\n",cnf_matrix)

    print("*"*84)

    accuracy = accuracy_score(y_pred, y_test)
    print("Accuracy Score", accuracy)

    # We are appending testing accuracy in list
    Testing_accuracy.append(accuracy)
    print("*"*84)

    clf_report = classification_report(y_pred, y_test)
    print("Classification report:\n",clf_report)
    
def model_evalution_training(knn_clf, x_train, y_train):
    # Training Data Evaluation
    y_pred_train = knn_clf.predict(x_train)

    cnf_matrix = confusion_matrix(y_pred_train, y_train)
    print("Confusion Matrix:\n",cnf_matrix)

    print("*"*84)

    accuracy = accuracy_score(y_pred_train, y_train)
    print("Accuracy Score", accuracy)

    # We are appending training accuracy in list
    Training_accuracy.append(accuracy)

    print("*"*84)

    clf_report = classification_report(y_pred_train, y_train)
    print("Classification report:\n",clf_report)

def get_best_estimator(x_train, y_train):
    # Estimator
    knn_clf = KNeighborsClassifier()

    # param_grid
    hyperparameters = {"n_neighbors" : np.arange(3,30),
                      "p": [1,2]}

    gscv_knn_clf = GridSearchCV(knn_clf, hyperparameters, cv = 5)
    gscv_knn_clf.fit(x_train, y_train)
    gscv_knn_clf.best_estimator_

    model_details.append("Hyperparameter_Tunning")
    
    best_params = gscv_knn_clf.best_params_
    best_params_list.append(best_params)
    
    return gscv_knn_clf.best_estimator_


def get_hyp_testing_accuracy(best_params, x_test, y_test):
    # Testing Data Evaluation

    # To get best model from hyperparameter tunning
    knn_clf = best_params

    y_pred = knn_clf.predict(x_test)

    cnf_matrix = confusion_matrix(y_pred, y_test)
    print("Confusion Matrix:\n",cnf_matrix)

    print("*"*84)
    accuracy = accuracy_score(y_pred, y_test)
    print("Accuracy Score", accuracy)

    # We are appending testing accuracy in list
    Testing_accuracy.append(accuracy)
    print("*"*84)

    clf_report = classification_report(y_pred, y_test)
    print("Classification report:\n",clf_report)

    
def get_hyp_training_accuracy(best_params, x_train, y_train):
    
    knn_clf = best_params
    
    # Training Data Evaluation
    y_pred_train = knn_clf.predict(x_train)

    cnf_matrix = confusion_matrix(y_pred_train, y_train)
    print("Confusion Matrix:\n",cnf_matrix)

    print("*"*84)
    accuracy = accuracy_score(y_pred_train, y_train)
    print("Accuracy Score", accuracy)

    # We are appending testing accuracy in list
    Training_accuracy.append(accuracy)
    print("*"*84)

    clf_report = classification_report(y_pred_train, y_train)
    print("Classification report:\n",clf_report)



# Initialize all the lists
model_details = []
Testing_accuracy = []
Training_accuracy = []
best_params_list = []

df = pd.read_csv("diabetes.csv")

x_df = df.drop("Outcome", axis=1)
y = df['Outcome']

print("****************************** Regular Model ***************************")
model_name = "Regular Model"
# Train test split
x_train, x_test, y_train, y_test = train_test_split_fun(x_df, y, model_name)

# Model training
knn_clf = model_training(x_train, y_train)

# Model Evaluation testing
model_evalution_testing(knn_clf, x_test, y_test)

# Model Evaluation training
model_evalution_training(knn_clf, x_train, y_train)

# Hyperparameter Tunning
best_params = get_best_estimator(x_train, y_train)

# Hyperparameter Testing Accuracy
get_hyp_testing_accuracy(best_params, x_test, y_test)

# Hyperparameter Training Accuracy
get_hyp_training_accuracy(best_params, x_train, y_train)

print("****************************** Normalized Model ***************************")
df = normalization(x_df)

model_name = "Normalized Model"
x_train, x_test, y_train, y_test = train_test_split_fun(df, y, model_name)

# Model training
knn_clf = model_training(x_train, y_train)

# Model Evaluation testing
model_evalution_testing(knn_clf, x_test, y_test)

# Model Evaluation training
model_evalution_training(knn_clf, x_train, y_train)

# Hyperparameter Tunning
best_params = get_best_estimator(x_train, y_train)

# Hyperparameter Testing Accuracy
get_hyp_testing_accuracy(best_params, x_test, y_test)

# Hyperparameter Training Accuracy
get_hyp_training_accuracy(best_params, x_train, y_train)

print("****************************** Satndardized Model ***************************")
df = standardization(x_df)

model_name = "Satndardized Model"
x_train, x_test, y_train, y_test = train_test_split_fun(df, y, model_name)

# Model training
knn_clf = model_training(x_train, y_train)

# Model Evaluation testing
model_evalution_testing(knn_clf, x_test, y_test)

# Model Evaluation training
model_evalution_training(knn_clf, x_train, y_train)

# Hyperparameter Tunning
best_params = get_best_estimator(x_train, y_train)

# Hyperparameter Testing Accuracy
get_hyp_testing_accuracy(best_params, x_test, y_test)

# Hyperparameter Training Accuracy
get_hyp_training_accuracy(best_params, x_train, y_train)

comparison_df = pd.DataFrame({"Model_details":model_details, "Training_accuracy":Training_accuracy, "Testing_accuracy":Testing_accuracy})
comparison_df

****************************** Regular Model ***************************


NameError: name 'model_name' is not defined