In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

### Import Data

In [3]:
data = pd.read_csv("../../datasets/diabetes.csv")
data.head()

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age,Outcome
0,6,148,72,35,0,33.6,0.627,50,1
1,1,85,66,29,0,26.6,0.351,31,0
2,8,183,64,0,0,23.3,0.672,32,1
3,1,89,66,23,94,28.1,0.167,21,0
4,0,137,40,35,168,43.1,2.288,33,1


### Pre-Process Date

In [5]:
x = data.drop("Outcome",axis=1)
y= data["Outcome"]
x = np.array(x)
y = np.array(y)

scalar = StandardScaler()
x = scalar.fit_transform(x)
x_train, x_test, y_train, y_test = train_test_split(x,y,test_size=0.2)

### PCA (Reduce Data Dimensions)
with this model you can reduce dimension of your inputs to make models faster & reduce overFitting

In [6]:
from sklearn.decomposition import PCA

pca = PCA(n_components=3) # Reduce dimensions to 3
pca.fit(x_train)
x_new_train = pca.transform(x_train)
x_new_test = pca.transform(x_test)
print(x_train.shape, x_new_train.shape)

(614, 8) (614, 3)


### Evaluate Model Function

In [15]:
from sklearn.metrics import confusion_matrix, accuracy_score, recall_score, precision_score

def evaluate_model(model):
    y_train_pred = model.predict(x_train)
    y_test_pred = model.predict(x_test)

    confusion_matrix_train = confusion_matrix(y_train, y_train_pred)
    accuracy_train = accuracy_score(y_train, y_train_pred)
    precision_train = precision_score(y_train, y_train_pred)
    recall_train = recall_score(y_train, y_train_pred)

    accuracy_test = accuracy_score(y_test, y_test_pred)

    print("confusion_matrix_train :", confusion_matrix_train)
    print("accuracy_train  :", accuracy_train)
    print("accuracy_test   :", accuracy_test)
    print("precision_train :", precision_train)
    print("recall_train    :", recall_train)

### Naive Bayes Model

In [8]:
from sklearn.naive_bayes import GaussianNB

model = GaussianNB()
model.fit(x_train, y_train)
evaluate_model(model)

confusion_matrix_train : [[335  63]
 [ 85 131]]
accuracy_train  : 0.758957654723127
accuracy_test   : 0.7857142857142857
precision_train : 0.6752577319587629
recall_train    : 0.6064814814814815


### KNN Model

In [9]:
from sklearn.neighbors import KNeighborsClassifier

model = KNeighborsClassifier(n_neighbors=4)
model.fit(x_train, y_train)
evaluate_model(model)

confusion_matrix_train : [[377  21]
 [ 90 126]]
accuracy_train  : 0.8192182410423453
accuracy_test   : 0.7337662337662337
precision_train : 0.8571428571428571
recall_train    : 0.5833333333333334


### Decision Tree Model

In [10]:
from sklearn.tree import DecisionTreeClassifier

model = DecisionTreeClassifier(max_depth=8, min_samples_split=4, min_samples_leaf=2)
model.fit(x_train, y_train)
evaluate_model(model)

confusion_matrix_train : [[376  22]
 [ 33 183]]
accuracy_train  : 0.9104234527687296
accuracy_test   : 0.6363636363636364
precision_train : 0.8926829268292683
recall_train    : 0.8472222222222222


### Random Forest Model

In [11]:
from sklearn.ensemble import RandomForestClassifier

model = RandomForestClassifier(n_estimators=30)
model.fit(x_train, y_train)
evaluate_model(model)

confusion_matrix_train : [[398   0]
 [  1 215]]
accuracy_train  : 0.998371335504886
accuracy_test   : 0.7142857142857143
precision_train : 1.0
recall_train    : 0.9953703703703703


### SVM Model

In [12]:
from sklearn import svm

svm.SVC(kernel="poly")
model.fit(x_train, y_train)
evaluate_model(model)

confusion_matrix_train : [[398   0]
 [  0 216]]
accuracy_train  : 1.0
accuracy_test   : 0.7532467532467533
precision_train : 1.0
recall_train    : 1.0


### Logistic Regression Model

In [13]:
from sklearn.linear_model import LogisticRegression

model = LogisticRegression()
model.fit(x_train,y_train)
evaluate_model(model)

confusion_matrix_train : [[351  47]
 [ 97 119]]
accuracy_train  : 0.7654723127035831
accuracy_test   : 0.7727272727272727
precision_train : 0.7168674698795181
recall_train    : 0.5509259259259259


### ANN (Artificial Neural Network)

In [14]:
from sklearn.neural_network import MLPClassifier

model = MLPClassifier(hidden_layer_sizes=[8], max_iter=200)
model.fit(x_train,y_train)
evaluate_model(model)

confusion_matrix_train : [[346  52]
 [ 82 134]]
accuracy_train  : 0.7817589576547231
accuracy_test   : 0.7792207792207793
precision_train : 0.7204301075268817
recall_train    : 0.6203703703703703


