In [1]:
import numpy as np
import pandas as pd
import statsmodels.api as sm
from sklearn.metrics import mean_squared_error, r2_score, confusion_matrix, accuracy_score, roc_auc_score, roc_curve, classification_report
import matplotlib.pyplot as plt
from sklearn.preprocessing import scale
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split, GridSearchCV, cross_val_score
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.neural_network import MLPClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from catboost import CatBoostClassifier
from xgboost import XGBClassifier

In [2]:
from warnings import filterwarnings
filterwarnings('ignore')

In [5]:
df = pd.read_csv(".\diabetes.csv")

In [101]:
def comp_cls(df, y, alg):
    y = df[y]
    X = df.drop(["Outcome"], axis = 1)
    result = []
    results = pd.DataFrame(columns = ["Models", "Accuracy"])
    
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.25, random_state = 42)
    
    if alg == MLPClassifier :
        scaler = StandardScaler()
        scaler.fit(X_train)
        X_train = scaler.transform(X_train)
        
        scaler.fit(X_test)
        X_test = scaler.transform(X_test)
        
        model = MLPClassifier(activation = "logistic", solver = "lbfgs").fit(X_train, y_train)
    
    else :
        model = alg().fit(X_train, y_train)
        
    y_pred = model.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    model_name = alg.__name__
    result = pd.DataFrame([[model_name, accuracy*100]], columns = ["Models", "Accuracy"])
    results = results.append(result)
    
    print("Accuracy of", model_name, ":", accuracy_score(y_test, y_pred))

In [102]:
models = [LogisticRegression, GradientBoostingClassifier, MLPClassifier, DecisionTreeClassifier,
         KNeighborsClassifier, SVC, RandomForestClassifier, XGBClassifier]

In [103]:
for model in models:
    comp_cls(df, "Outcome", model)


Accuracy of LogisticRegression : 0.7291666666666666
Accuracy of GradientBoostingClassifier : 0.75
Accuracy of MLPClassifier : 0.671875
Accuracy of DecisionTreeClassifier : 0.6927083333333334
Accuracy of KNeighborsClassifier : 0.65625
Accuracy of SVC : 0.7291666666666666
Accuracy of RandomForestClassifier : 0.7447916666666666
Accuracy of XGBClassifier : 0.75
