In [1]:
import pandas as pd
import numpy as np
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.svm import SVC, LinearSVC
from sklearn.ensemble import AdaBoostClassifier, RandomForestClassifier, GradientBoostingClassifier, BaggingClassifier, ExtraTreesClassifier
from sklearn.linear_model import Perceptron, LogisticRegression
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler

In [None]:
df = pd.read_csv("/kaggle/input/diabetes-data-set/diabetes.csv")
df.head()

In [None]:
df.shape

In [None]:
df.isnull().sum()

In [None]:
numeric_col = ["Pregnancies","Glucose","BloodPressure","SkinThickness","Insulin","BMI","DiabetesPedigreeFunction","Age"]

In [None]:
scaler = MinMaxScaler()

In [None]:
df[numeric_col] = scaler.fit_transform(df[numeric_col])

In [None]:
df.head()

In [None]:
X = df.drop("Outcome",axis=1)
y = df["Outcome"]

In [None]:
X_train,X_test,y_train,y_test = train_test_split(X,y , test_size = 0.2, random_state = 42)

In [None]:
models = [
    KNeighborsClassifier(),
    DecisionTreeClassifier(),
    GaussianNB(),
    LinearDiscriminantAnalysis(),
    SVC(),
    LinearSVC(),
    AdaBoostClassifier(),
    RandomForestClassifier(),
    Perceptron(),
    ExtraTreesClassifier(),
    BaggingClassifier(),
    LogisticRegression(),
    GradientBoostingClassifier()
]
accuracies = {}
for model in models:
    model.fit(X_train, y_train)
    predictions = model.predict(X_test)
    acc = accuracy_score(y_test, predictions).round(2)
    cm = confusion_matrix(y_test, predictions)
    cr = classification_report(y_test, predictions)
    print(f"Model: {model.__class__.__name__}")
    print(f"Accuracy: {acc}")
    print("Confusion Matrix:")
    print(cm)
    print("Classification Report:")
    print(cr)
    print("=" * 50)
    accuracies[model.__class__.__name__] = acc  # Store accuracy in the dictionary
# Print the accuracies dictionary
print("Accuracies:", accuracies)

In [None]:
models = [
    KNeighborsClassifier(),
    DecisionTreeClassifier(),
    GaussianNB(),
    LinearDiscriminantAnalysis(),
    SVC(),
    LinearSVC(),
    AdaBoostClassifier(),
    RandomForestClassifier(),
    Perceptron(),
    ExtraTreesClassifier(),
    BaggingClassifier(),
    LogisticRegression(),
    GradientBoostingClassifier()
]
accuracies1 = {}
for model in models:
    model.fit(X_train, y_train)
    predictions = model.predict(X_train)
    acc = accuracy_score(y_train, predictions).round(2)
    cm = confusion_matrix(y_train, predictions)
    cr = classification_report(y_train, predictions)
    print(f"Model: {model.__class__.__name__}")
    print(f"Accuracy: {acc}")
    print("Confusion Matrix:")
    print(cm)
    print("Classification Report:")
    print(cr)
    print("=" * 50)
    accuracies1[model.__class__.__name__] = acc  # Store accuracy in the dictionary
# Print the accuracies dictionary
print("Accuracies:", accuracies1)

In [None]:
data = {
    'Model': ['KNeighborsClassifier', 'DecisionTreeClassifier', 'GaussianNB', 'LinearDiscriminantAnalysis', 'SVC', 'LinearSVC', 'AdaBoostClassifier', 'RandomForestClassifier', 'Perceptron', 'ExtraTreesClassifier', 'BaggingClassifier', 'LogisticRegression', 'GradientBoostingClassifier'],
    'Accuracy': [0.83, 1.0, 0.75, 0.77, 0.79, 0.77, 0.84, 1.0, 0.76, 1.0, 0.99, 0.75, 0.94]
}

df = pd.DataFrame(data)

df

In [None]:
plt.figure(figsize=(10, 6))
plt.bar(accuracies.keys(), accuracies.values(), color='blue')

plt.xlabel('Model')
plt.ylabel('Accuracy')
plt.title('Accuracy Comparison of Different Models')
plt.ylim(0, 1)
plt.xticks(rotation=45) 
plt.tight_layout()

# Display the plot
plt.show()

In [None]:
max = max(accuracies.values())

In [None]:
best_accuracy = max
bestModels = []
bestAccuracy = []

for model, accuracy in accuracies.items():
    if accuracy == best_accuracy:
        bestModels.append(model)
        bestAccuracy.append(accuracy)

print("Best models:", bestModels)
print("Best Accuracies:", bestAccuracy)
