# Neural Networks with Keras
Need to install some packages

    # Try conda first
    conda install keras
    conda install tensorflow

    # or try pip
    pip install keras
    pip install tensorflow

In [None]:
from __future__ import print_function
import random
import numpy as np
import pandas as pd
from math import sin

from keras.models import Sequential
from keras.layers.core import Dense, Activation
from keras.optimizers import SGD   # Stochastic Gradient Descent

from sklearn.metrics import accuracy_score, confusion_matrix, mean_squared_error
import sklearn.model_selection as cv

%matplotlib inline
import matplotlib.pyplot as plt

## Neural Network Regression
### Polynomial Regression
Let's train a neural network on a few different shapes. First we start with a polynomial (a cubic).

In [None]:
# Create some data
def f(x):
    return x ** 3 - 5 * x + 12 + random.random()

X = np.linspace(-1, 1, 1000).reshape(-1, 1)
y = np.array(list(map(f, X)))
print(X.shape, y.shape)

In [None]:
plt.figure(figsize = (5, 5))
plt.scatter(X, y, s = 4)
plt.show()

In [None]:
# Define a Feed Forward NN
model = Sequential()
model.add(Dense(input_dim = 1, output_dim = 5))
model.add(Activation("tanh"))
model.add(Dense(input_dim = 5, output_dim = 1))
model.add(Activation("linear"))

# lr: learning rate
model.compile(loss = "mse", optimizer = SGD(lr = 0.01))

In [None]:
%%time
# Train the model
print("Training...")
loss = model.fit(X, y,
                 nb_epoch = 500,
                 validation_split = 0.1,
                 batch_size = 128,
                 verbose = False)
print("Done")
print("MSE at last epoch: %.4f" % loss.history["loss"][-1])   # displays MSE at last iteration

In [None]:
# Plot the predictions
predictions = model.predict(X)

plt.figure(figsize = (5, 5))
plt.scatter(X, y, s = 4)
plt.plot(X, predictions, color = "r", linewidth = 2)
plt.text(-1, 8, "MSE: %.4f" % mean_squared_error(predictions, y))
plt.show()

### Sine Regression

In [None]:
# Sine data
X = np.linspace(0, 2 * np.pi, 500).reshape(-1, 1)
y = np.sin(X)

print(X.shape, y.shape)

In [None]:
plt.figure(figsize = (10, 5))
plt.scatter(X, y, s = 4)
plt.show()

In [None]:
# Create the model
model = Sequential()
model.add(Dense(input_dim = 1, output_dim = 5))
model.add(Activation("tanh"))
model.add(Dense(input_dim = 5, output_dim = 1))
model.add(Activation("linear"))

# lr: learning rate
model.compile(loss = "mse", optimizer = SGD(lr = 0.1))

### Train the Model

In [None]:
%%time
print("Training...")
loss = model.fit(X, y,
                 nb_epoch = 150,
                 validation_split = 0.1,
                 batch_size = 128,
                 verbose = False)
print("Complete")
print('MSE at last epoch: %.4f' % loss.history["loss"][-1])   # displays MSE at last iteration

In [None]:
# Plot the predictions
predictions = model.predict(X)

plt.figure(figsize = (10, 5))
plt.scatter(X, y, s = 4)
plt.plot(X, predictions, color = "r", linewidth = 2)
plt.text(0, -1, "MSE: %.4f" % mean_squared_error(predictions, y))
plt.show()

In [None]:
# Plot the error over time
plt.figure(figsize = (10, 5))
plt.plot(range(len(loss.history["loss"])), loss.history["loss"])
plt.xlabel("Epoch")
plt.ylabel("MSE")
plt.title("MSE by Epoch")
plt.show()

### Train longer
If we train for more epochs, we can get a better regression.

In [None]:
%%time
X = np.linspace(0, 2 * np.pi, 1000).reshape(-1, 1)
y = np.sin(X)
print(X.shape, y.shape)

model = Sequential()
model.add(Dense(input_dim = 1, output_dim = 5))
model.add(Activation("tanh"))
model.add(Dense(input_dim = 5, output_dim = 1))
model.add(Activation("linear"))

# lr: learning rate
model.compile(loss = "mse", optimizer = SGD(lr = 0.05))

print("Training...")
loss = model.fit(X, y,
                 nb_epoch = 15000,
                 validation_split = 0.1,
                 batch_size = 128,
                 verbose = False)
print("Complete")
print("MSE at last epoch: %.4f" % loss.history["loss"][-1])   # displays MSE at last iteration
predictions = model.predict(X)

In [None]:
# Plot
plt.figure(figsize = (10, 5))
plt.scatter(X, y, s = 4)
plt.plot(X, predictions, color = "r", linewidth = 2)
plt.text(0, -1, "MSE: %.4f" % mean_squared_error(predictions, y))
plt.show()

We can take a closer look at the error per training epoch.

In [None]:
# Plot the error over time
plt.figure(figsize = (10, 5))
plt.plot(range(len(loss.history["loss"])), loss.history["loss"])
plt.xlabel("Epoch")
plt.ylabel("MSE")
plt.title("MSE by Epoch")
plt.show()

### Exercise: Perform regression the following data
Hints:
- Try adding a hidden layer
- Try lowering the learning rate and using more epochs

In [None]:
def f(x):
    return x ** 2 * np.sin(x ** 2)

# Sine data
X = np.linspace(2, np.pi, 1000).reshape(-1, 1)
y = np.array(list(map(f, X)))
print(X.shape, y.shape)

In [None]:
plt.figure(figsize = (10, 5))
plt.scatter(X, y, s = 1)
plt.show()

In [None]:
%%time
# <Code Here>
# model

# lr: learning rate

# fit

# predictions

In [None]:
# <Code Here>
# Plot

## Classification
We will start with the Iris data set (of course).

In [None]:
import sklearn.datasets as datasets
iris = datasets.load_iris()
X = iris.data
y = iris.target

# Break each output into indicator cols
y_cat = pd.get_dummies(y).values
print(X.shape, y_cat.shape)

In [None]:
# Define a model
model = Sequential()

# input_dim = number of neurons in previous layer.
# output_dim = number of neurons in current layer.

# First layer - input_dim = k features.
model.add(Dense(input_dim = 4, output_dim = 4))
model.add(Activation("tanh"))

# Output layer - output_dim = # of output per point (in y).
# Use "softmax" for class probability. "linear" for regression
model.add(Dense(input_dim = 4, output_dim = 3))
model.add(Activation("softmax"))

# Uses Mean Squared Error and Stochastic Gradient Descent
model.compile(loss = "mse", optimizer = SGD(lr = 0.01))

In [None]:
%%time
# Train the model
print("Training...")
loss = model.fit(X, y_cat, 
                 validation_split = 0.1,
                 nb_epoch = 5000,
                 batch_size = 16,
                 verbose = False)
print("Complete")
print("MSE at last epoch: %.4f" % loss.history["loss"][-1])   # displays MSE at last iteration

In [None]:
def print_confusion_matrix(y, preds):
    cm = confusion_matrix(y, preds)
    r, c = cm.shape
    print("Confusion Matrix")
    for i in range(r):
        for j in range(c):
            print("%5d" % cm[i, j], end = "")
        print()

In [None]:
# Model evaluation
pred_y = model.predict(X, verbose = False)
preds  = model.predict_classes(X, verbose = False)

print("Accuracy: %.4f" % accuracy_score(y, preds))
print_confusion_matrix(y, preds)

In [None]:
# Plot the error over time
plt.figure(figsize = (10, 5))
plt.plot(range(len(loss.history["loss"])), loss.history["loss"])
plt.xlabel("Epoch")
plt.ylabel("MSE")
plt.title("MSE by Epoch")
plt.show()

## Abalone data set

In [None]:
columns = ["Sex",            "Length",       "Diameter",
           "Height",         "Whole Weight", "Shucked weight",
           "Viscera weight", "Shell weight", "Rings"]
df = pd.read_csv("../../../../data/abalone.data", header = 1, names = columns)
df.head()

In [None]:
import seaborn as sns
sns.pairplot(data = df, vars = columns[1:], hue = "Sex")
plt.show()

In [None]:
d = {"M": 0, "F": 1, "I": 2}
df["Sex"] = df["Sex"].apply(lambda x: d[x])
df.head()

In [None]:
X = np.array(df[columns[1:]])
y = np.array(df["Sex"])
y_cat = pd.get_dummies(y).values
print(X.shape, y_cat.shape)

In [None]:
# Define a model
model = Sequential()

# input_dim = number of neurons in previous layer.
# output_dim = number of neurons in current layer.

# First layer - input_dim = k features.
model.add(Dense(input_dim = 8, output_dim = 6))
model.add(Activation("tanh"))

model.add(Dense(input_dim = 6, output_dim = 6))
model.add(Activation("tanh"))

# Output layer - output_dim = # of output per point (in y).
# Use "softmax" for class probability. "linear" for regression
model.add(Dense(input_dim = 6, output_dim = 3))
model.add(Activation("softmax"))

# Uses Mean Squared Error and Stochastic Gradient Descent
model.compile(loss = "mse", optimizer = SGD(lr = 0.1))

In [None]:
%%time
# Train the model
print("Training...")
loss = model.fit(X, y_cat, 
                 validation_split = 0.1,
                 nb_epoch = 1000,
                 batch_size = 16,
                 verbose = False)
print("Complete")
print("MSE at last epoch: %.4f" % loss.history["loss"][-1])   # displays MSE at last iteration

In [None]:
# Model evaluation
pred_y = model.predict(X, verbose = False)
preds  = model.predict_classes(X, verbose = False)

print("Accuracy: %.4f" % accuracy_score(y, preds))
print_confusion_matrix(y, preds)

In [None]:
# Plot the error over time
plt.figure(figsize = (10, 5))
plt.plot(range(len(loss.history["loss"])), loss.history["loss"])
plt.xlabel("Epoch")
plt.ylabel("MSE")
plt.title("MSE by Epoch")
plt.show()

## Exercise
Classify the following data ([source](https://archive.ics.uci.edu/ml/datasets/MAGIC+Gamma+Telescope)). You will need to translate the classes into integers and make dummies. Design a neural network to classify the data and evaluate the results.

In [None]:
names = "fLength fWidth fSize fConc fConc1 fAsym fM3Long fM3Trans fAlpha fDist class".split()
df = pd.read_csv("../../../../data/magic04.data", names = names)
df.head()

In [None]:
d = {"g": 0, "h": 1}
df["class"] = df["class"].apply(lambda x: d[x])

In [None]:
X = np.array(df[df.columns[:-1]])
y = np.array(df["class"])
y_cat = pd.get_dummies(y).values
print(X.shape, y_cat.shape)

In [None]:
# Define a model
# <Code Here>

# Layers
# input_dim = number of neurons in previous layer.
# output_dim = number of neurons in current layer.

# First layer - input_dim = k features.
# <Code Here>

# Output layer - output_dim = # of output per point (in y).
# Use "softmax" for class probability. "linear" for regression
# <Code Here>

# Uses Mean Squared Error and Stochastic Gradient Descent
# <Code Here>

In [None]:
%%time
# Train the model
# <Code Here>

In [None]:
# Model evaluation
# <Code Here>

In [None]:
# Plot the error over time
# <Code Here>