# Artificial Neural Network

- Prima Indians Diabetes Database
- https://www.kaggle.com/raffaelesantagati/prima-indians-diabetes-database

# Load data

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
plt.rcParams.update({'figure.dpi':120})

In [None]:
colnames=['preg', 'glucose', 'blood_p', 'skin_thick', 'insulin', 'bmi', 'diabetes_func', 'age', 'Y'] 
dataset = pd.read_csv('pima-indians-diabetes.csv',  names=colnames, header=None)

In [None]:
dataset.head()

# Explore data

In [None]:
print(dataset.describe())

In [None]:
# Class distribution
#print(dataset.groupby('Y').size())
dataset['Y'].value_counts()

In [None]:
# Box and whisker plots
dataset.plot(kind='box', subplots=True, layout=(5,2), figsize=(7,15), sharex=False, sharey=False)
plt.show()

In [None]:
# Histograms
dataset.hist(figsize=(7,8))
plt.show()

# Prepare data

## Load data

In [None]:
# Load DataFrame data to a NumPy array
array = dataset.values

In [None]:
# Extract data into X and y
X = array[:,0:8]
y = array[:,8]

## Scaling data

In [None]:
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()
X_scl = scaler.fit_transform(X)

In [None]:
pd.DataFrame(X_scl, columns=colnames[0:-1]).plot(kind='box', subplots=True, layout=(5,2), figsize=(7,10), sharex=False, sharey=False)
plt.show()

## Split data into test and train

In [None]:
from sklearn.model_selection import train_test_split

# Split data into training and testing data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, random_state=1)

print(X_train.shape)
print(X_test.shape)
print(y_train.shape)
print(y_test.shape)

# Prepare model

In [None]:
from tensorflow.keras.backend import clear_session
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Dense

# Clear session
clear_session()

# define the keras model
model = Sequential()
model.add(Dense(12, input_dim=8, activation='relu'))
model.add(Dense(8, activation='relu'))
model.add(Dense(1, activation='sigmoid'))

In [None]:
print(model.summary())

In [None]:
from tensorflow.keras.utils import plot_model

plot_model(model, to_file='model_plot.png', show_shapes=True, show_layer_names=True)

In [None]:
# compile the keras model
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

# Train the model

In [None]:
# fit the keras model on the dataset
model.fit(X_train, y_train, epochs=200, batch_size=10)

# Validate the model (using test data)

In [None]:
y_pred = model.predict(X_test)

# Plot result 
x_pl = np.arange(0,y_pred.shape[0])
plt.scatter(x_pl,y_pred)
plt.show()

In [None]:
predictions = np.where(y_pred > 0.5, 1, 0)
predictions = predictions.reshape(-1)

In [None]:
from sklearn.metrics import accuracy_score

print(accuracy_score(y_test, predictions))

# Prediction

## Retrain the model with all data

In [None]:
# fit the keras model on the dataset
model.fit(X, y, epochs=200, batch_size=10)

## Unseen X

In [None]:
X_new = [7, 150, 50, 15, 200, 33.11, 0.777, 43]

df_new = pd.DataFrame(X_new).transpose()
df_new.columns = columns=colnames[0:-1]
df_new

## Make prediction

In [None]:
X_new = df_new.iloc[:,:].values
X_new_scl = scaler.transform(X_new)
y_pred = model.predict(X_new_scl)
pred = np.where(y_pred > 0.5, 1, 0)
print(pred.ravel())