<a href="https://colab.research.google.com/github/jtao22/PythonAI/blob/main/Diabetes/pimadiabetes.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [36]:
#Import
import tensorflow as tf
from tensorflow import keras
from keras.models import Sequential as seq
from keras.layers import Dense, Flatten, Conv2D, Dropout
from keras.layers import MaxPooling2D as mp2d 
from tensorflow.keras import layers
from keras.utils import to_categorical
from sklearn.model_selection import train_test_split as tts
import numpy as np
import pandas as pd
from sklearn.preprocessing import LabelEncoder as LE
from sklearn.preprocessing import MinMaxScaler as MMS
from sklearn.metrics import accuracy_score as acc
from sklearn.linear_model import LogisticRegression as LR
from sklearn.tree import DecisionTreeClassifier as DTC
from sklearn.ensemble import RandomForestClassifier as RFC
from sklearn.neighbors import KNeighborsClassifier as KNC
from sklearn.svm import SVC

In [None]:
from google.colab import files 
files.upload()

In [47]:
data = pd.read_csv('diabetes.csv')
data.head(5)
data.dtypes

Pregnancies                   int64
Glucose                       int64
BloodPressure                 int64
SkinThickness                 int64
Insulin                       int64
BMI                         float64
DiabetesPedigreeFunction    float64
Age                           int64
Outcome                       int64
dtype: object

In [48]:
Y = data['Outcome']
data = data.drop(columns=['Outcome'])

In [49]:
#clean data
for col in data:
    maxim = np.max(data[col])
    minim = np.min(data[col])
    data[col] = (data[col]-minim)/(maxim-minim)

In [50]:
#split
trainX,testX,trainY,testY = tts(data, Y.values, test_size = 0.15, random_state = 42)

In [51]:
#model
def model(trainX, trainY):
  #Logistic Regression
  lr = LR(random_state = 42)
  lr.fit(trainX, trainY)
  #Decision Tree
  dtc = DTC(criterion='entropy', random_state = 42)
  dtc.fit(trainX, trainY)
  #Random Forest Classifier
  rfc = RFC(n_estimators = 10, criterion = 'entropy', random_state = 42)
  rfc.fit(trainX, trainY)
  #K Neighbors Classifier
  knc=KNC(n_neighbors=8)
  knc.fit(trainX,trainY)
  #Support Vector Machine
  svc = SVC()
  svc.fit(trainX,trainY)
  #print
  print('Logistic Regression Training Accuracy: ', lr.score(trainX,trainY) * 100, '%')
  print('Decision Tree Classifier Accuracy: ', dtc.score(trainX,trainY)*100, '%')
  print('Random Forest Classifier Accuracy: ', rfc.score(trainX,trainY)*100, '%') 
  print('K-Neighbors Classifier Accuracy: ', knc.score(trainX,trainY)*100, '%')
  print('Support Vector Classifier Accuracy: ', svc.score(trainX,trainY)*100, '%') 
  #return
  return lr, dtc, rfc, knc, svc

In [52]:
models = model(trainX,trainY)

Logistic Regression Training Accuracy:  76.22699386503068 %
Decision Tree Classifier Accuracy:  100.0 %
Random Forest Classifier Accuracy:  98.46625766871165 %
K-Neighbors Classifier Accuracy:  78.52760736196319 %
Support Vector Classifier Accuracy:  80.06134969325154 %


In [53]:
#create dense neural network
arch = seq()
arch.add(Dense(units = 9, input_shape= (8,), activation = 'relu')) #input layer
arch.add(Dense(units = 16, activation = 'relu')) #first dense layer
arch.add(Dense(units = 2, activation = 'softmax')) #output layer
arch.compile(optimizer= 'adam', loss = 'sparse_categorical_crossentropy', metrics = ['accuracy'])

In [54]:
arch.fit(trainX, trainY, batch_size= 8, epochs = 10, validation_split= 0.15, shuffle = True,verbose = 2)


Epoch 1/10
70/70 - 1s - loss: 0.6833 - accuracy: 0.6390 - val_loss: 0.6762 - val_accuracy: 0.6327
Epoch 2/10
70/70 - 0s - loss: 0.6642 - accuracy: 0.6534 - val_loss: 0.6601 - val_accuracy: 0.6327
Epoch 3/10
70/70 - 0s - loss: 0.6492 - accuracy: 0.6534 - val_loss: 0.6517 - val_accuracy: 0.6327
Epoch 4/10
70/70 - 0s - loss: 0.6406 - accuracy: 0.6534 - val_loss: 0.6442 - val_accuracy: 0.6327
Epoch 5/10
70/70 - 0s - loss: 0.6330 - accuracy: 0.6534 - val_loss: 0.6358 - val_accuracy: 0.6327
Epoch 6/10
70/70 - 0s - loss: 0.6241 - accuracy: 0.6534 - val_loss: 0.6265 - val_accuracy: 0.6327
Epoch 7/10
70/70 - 0s - loss: 0.6142 - accuracy: 0.6534 - val_loss: 0.6151 - val_accuracy: 0.6327
Epoch 8/10
70/70 - 0s - loss: 0.6031 - accuracy: 0.6534 - val_loss: 0.6041 - val_accuracy: 0.6327
Epoch 9/10
70/70 - 0s - loss: 0.5911 - accuracy: 0.6534 - val_loss: 0.5917 - val_accuracy: 0.6327
Epoch 10/10
70/70 - 0s - loss: 0.5800 - accuracy: 0.6552 - val_loss: 0.5799 - val_accuracy: 0.6327


<tensorflow.python.keras.callbacks.History at 0x7f1dfec5fb10>

In [58]:
#predict 
predictions = arch.predict(testX, batch_size= 2, verbose=0)
rounded = np.argmax(predictions, axis = 1)
count = 0
total = len(rounded)
for i in range(len(rounded)):
  if (rounded[i] == testY[i]):
    count = count+1

print('Dense Neural Network Accuracy: ', count/total*100, '%')
print()
names = ['Logistic Regression', 'Decision Tree Classifier', 'Random Forest Classifier', 'K-Neighbors Classifier', 'Support Vector Classifier']
for i in range(len(models)):
  accuracy = acc(testY, models[i].predict(testX))
  print(names[i], 'Accuracy: ', accuracy * 100, '%')
  print()

Dense Neural Network Accuracy:  65.51724137931035 %

Logistic Regression Accuracy:  76.72413793103449 %

Decision Tree Classifier Accuracy:  77.58620689655173 %

Random Forest Classifier Accuracy:  73.27586206896551 %

K-Neighbors Classifier Accuracy:  69.82758620689656 %

Support Vector Classifier Accuracy:  75.0 %

