<a href="https://colab.research.google.com/github/kdemertzis/EKPA/blob/main/Diabetes.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import numpy as np
import pandas as pd
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from sklearn.neural_network import MLPClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix
data = pd.read_csv('/content/diabetes.csv', delimiter=',')

In [2]:
data = data.sample(frac=1).reset_index(drop=True)
data.head()

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age,Outcome
0,11,136,84,35,130,28.3,0.26,42,1
1,1,122,90,51,220,49.7,0.325,31,1
2,1,153,82,42,485,40.6,0.687,23,0
3,1,147,94,41,0,49.3,0.358,27,1
4,3,193,70,31,0,34.9,0.241,25,1


In [3]:
X = data.drop(['Pregnancies', 'Outcome'], axis=1)

In [4]:
X.head()

Unnamed: 0,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age
0,136,84,35,130,28.3,0.26,42
1,122,90,51,220,49.7,0.325,31
2,153,82,42,485,40.6,0.687,23
3,147,94,41,0,49.3,0.358,27
4,193,70,31,0,34.9,0.241,25


In [5]:
#y=np.ravel(data['Label'])
y=np.ravel(data['Outcome'])

In [6]:
X_train, X_test, y_train, y_test = train_test_split(X,y, test_size = 0.3, random_state=43)

In [7]:
# Standardize the features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [8]:
# Define classifiers
classifiers = [
    LogisticRegression(),
    SVC(),
    RandomForestClassifier(),
    MLPClassifier(),
    KNeighborsClassifier(),
    DecisionTreeClassifier()
]

In [9]:
# Evaluate and compare classifiers
results = {'Classifier': [], 'Accuracy': [], 'Precision': [], 'Recall': [], 'F1 Score': []}

for clf in classifiers:
    clf_name = clf.__class__.__name__

    # Train the classifier
    clf.fit(X_train, y_train)

    # Make predictions
    y_pred = clf.predict(X_test)

    # Calculate evaluation metrics
    acc = accuracy_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred, average='weighted')
    recall = recall_score(y_test, y_pred, average='weighted')
    f1 = f1_score(y_test, y_pred, average='weighted')

    # Confusion matrix
    cm = confusion_matrix(y_test, y_pred)

    # Display results
    print(f"\n{clf_name} Metrics:")
    print(f"Accuracy: {acc}")
    print(f"Precision: {precision}")
    print(f"Recall: {recall}")
    print(f"F1 Score: {f1}")
    print("Confusion Matrix:")
    print(cm)

    # Store results for comparison
    results['Classifier'].append(clf_name)
    results['Accuracy'].append(acc)
    results['Precision'].append(precision)
    results['Recall'].append(recall)
    results['F1 Score'].append(f1)


LogisticRegression Metrics:
Accuracy: 0.7662337662337663
Precision: 0.7615584415584417
Recall: 0.7662337662337663
F1 Score: 0.7542122258831028
Confusion Matrix:
[[136  15]
 [ 39  41]]

SVC Metrics:
Accuracy: 0.7402597402597403
Precision: 0.7316319319508476
Recall: 0.7402597402597403
F1 Score: 0.7283741486640039
Confusion Matrix:
[[132  19]
 [ 41  39]]

RandomForestClassifier Metrics:
Accuracy: 0.7748917748917749
Precision: 0.769565787747606
Recall: 0.7748917748917749
F1 Score: 0.769085489390674
Confusion Matrix:
[[132  19]
 [ 33  47]]

MLPClassifier Metrics:
Accuracy: 0.7575757575757576
Precision: 0.7511478420569329
Recall: 0.7575757575757576
F1 Score: 0.7513228347284182
Confusion Matrix:
[[130  21]
 [ 35  45]]

KNeighborsClassifier Metrics:
Accuracy: 0.70995670995671
Precision: 0.7000208626714651
Recall: 0.70995670995671
F1 Score: 0.7018166937670909
Confusion Matrix:
[[125  26]
 [ 41  39]]

DecisionTreeClassifier Metrics:
Accuracy: 0.7359307359307359
Precision: 0.7420914813156193
Rec



In [10]:
# Display comparison results
results_df = pd.DataFrame(results)
results_df = results_df.sort_values(by='Accuracy', ascending=False)
print("\nComparison of Classifiers:")
print(results_df)


Comparison of Classifiers:
               Classifier  Accuracy  Precision    Recall  F1 Score
2  RandomForestClassifier  0.774892   0.769566  0.774892  0.769085
0      LogisticRegression  0.766234   0.761558  0.766234  0.754212
3           MLPClassifier  0.757576   0.751148  0.757576  0.751323
1                     SVC  0.740260   0.731632  0.740260  0.728374
5  DecisionTreeClassifier  0.735931   0.742091  0.735931  0.738332
4    KNeighborsClassifier  0.709957   0.700021  0.709957  0.701817
