#Regression using neural networks
* Find the `pima-diabetes.csv` file in the `./data/` folder.
* Can we predict `Age` knowing all other variables?

In [0]:
from google.colab import files
uploaded = files.upload()

In [0]:
import pandas as pd
data = pd.read_csv('pima-diabetes.csv', delimiter=',')
data.head()

In [0]:
import numpy as np
dataset = np.genfromtxt('pima-diabetes.csv', delimiter=",", skip_header = True) 

In [0]:
np.set_printoptions(formatter = {'float': '{: 0.1f}'.format})

In [0]:
print('')
print(dataset.shape)
print('')
print(dataset[0:5])

In [0]:
# Select age as the output (Y)
Y = dataset[:, 7] 
# All others as the input (X)
X = dataset[:, [0,1,2,3,4,5,6,8]]

In [0]:
# Standardization
mean = X.mean(axis = 0)
X -= mean
std = X.std(axis = 0)
X /= std

# Rescaling
Ymax = Y.max()
Y = Y / Ymax

In [0]:
print(X.shape)
print(X[:5, :])

In [0]:
print(Y[:5])

## Build a neural network

In [0]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

model = Sequential()
model.add(Dense(12, input_dim = 8, activation='relu'))
model.add(Dense(8, activation='relu'))
model.add(Dense(1, activation='linear'))

In [0]:
model.compile(loss='mse', optimizer = 'rmsprop', metrics=['mse'])
model.fit(X, Y, epochs = 1024, verbose = 1)

## Assess the predictions

In [0]:
np.set_printoptions(precision = 2)
print ('True Data:')
print(Y[:10])
prediction = model.predict(X)
print ('Prediction:')
print(prediction[0:5].T)

In [0]:
import matplotlib.pyplot as plt

plt.scatter(Y, prediction)
plt.ylabel('Predicted Age')
plt.xlim(0, 1)
plt.ylim(0, 1)
plt.xlabel('True Age')
plt.show()

## Restore predictions (un-normalize)

In [0]:
# Obtain the actual ages
Pactual = prediction.flatten() * Ymax
Yactual = Y.flatten() * Ymax

plt.scatter(Yactual, Pactual)
plt.ylabel('Predicted Age')
plt.xlim(0, Ymax)
plt.ylim(0, Ymax)
plt.xlabel('True Age')
plt.show()

# Quantitative evaluation of mode the model using MAE

In [0]:
diff = abs(Pactual - Yactual)

In [0]:
print(Yactual[:10])
print(Pactual[:10])
print(diff[:10])

In [0]:
diff.mean()

In [0]:
diff.std()

In [0]:
diff.max()

In [0]:
diff.min()