# Business Case: Predicting Abalone Age

>__NOTE:__ This Jupyter Notebook uses a Python3.6 kernel.

In [None]:
import sys
print(f"Python Version: {sys.version}.")

In [None]:
%%capture
!{sys.executable} -m pip install -U pip matplotlib numpy pandas scikit-learn tensorflow

In [None]:
import warnings
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

from sklearn import preprocessing
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

class cleanPrint(keras.callbacks.Callback):
    def on_epoch_end(self, epoch, logs):
        if epoch+1 % 100 == 0:
            print("!")
        else:
            print("-", end="")

%matplotlib inline
warnings.filterwarnings("ignore")

---
## Exploratory Data Analysis: Abalone Dataset

In [None]:
column_names = ["sex", "length", "diameter", "height", "whole_weight", "shucked_weight", "viscera_weight", "shell_weight", "rings"]
abalone_data = pd.read_csv("http://archive.ics.uci.edu/ml/machine-learning-databases/abalone/abalone.data", names=column_names)
abalone_data.head()

In [None]:
abalone_data.describe()

---

## Data Preparation

In [None]:
data = abalone_data[["rings", "sex", "length", "diameter", "height", "whole_weight", "shucked_weight", "viscera_weight", "shell_weight"]]
data = pd.get_dummies(data)
y = data.rings.values
del data["rings"]
X = data.values.astype(np.float)
X = preprocessing.normalize(X)
training_features, testing_features, training_labels, testing_labels = train_test_split(X, y, test_size=0.2, random_state=42)

---
## Model Training

In [None]:
network_layers = [
    Dense(256, activation='relu', kernel_initializer="normal", input_dim=10),
    Dense(128, activation='relu'),
    Dense(64, activation='relu'),
    Dense(32, activation='relu'),
    Dense(1, activation='linear')
]

model = Sequential(network_layers)
model.compile(optimizer="adam", loss="mse", metrics=["mae", "accuracy"])
model.summary()        

In [None]:
training_results = model.fit(training_features, training_labels, validation_data=(testing_features, testing_labels), batch_size=32, epochs=2000, shuffle=True, verbose=0, callbacks=[cleanPrint()])

---
## Model Evaluation (Before Optimization)

### Plot Model Evaluaiton Metrics (RMSE)

In [None]:
fig, ax = plt.subplots(figsize=(15, 10))
ax.plot(testing_labels, model.predict(testing_features), "ob")
ax.plot([0, 25], [0, 25], "-r")
ax.text(8, 1, f"RMSE = {mean_squared_error(testing_labels, model.predict(testing_features), squared=False)}", color="r", fontsize=14, weight="bold")
plt.title("Abalone Model Evaluation", fontweight="bold", fontsize=12)
plt.xlabel("Actual 'Rings'", fontweight="bold", fontsize=12)
plt.ylabel("Predicted 'Rings'", fontweight="bold", fontsize=12)
plt.legend(["Predictions", "Regression Line"], loc="upper left", prop={"weight": "bold"})
plt.show()

### Plot additional performance summaries

#### Training vs. Testing Loss

In [None]:
plt.rcParams["figure.figsize"] = (15, 10)
plt.plot(training_results.history["loss"])
plt.plot(training_results.history["val_loss"])
plt.title("Training vs. Testing Loss", fontweight="bold", fontsize=14)
plt.ylabel("Loss", fontweight="bold", fontsize=14)
plt.xlabel("Epochs", fontweight="bold", fontsize=14)
plt.legend(["Training Loss", "Testing Loss"], loc="upper right", prop={"weight": "bold"})
plt.grid()
plt.show()

In [None]:
plt.rcParams["figure.figsize"] = (15, 10)
plt.plot(training_results.history["mae"])
plt.plot(training_results.history["val_mae"])
plt.title("Training vs. Testing Mean Absolute Error", fontweight="bold", fontsize=14)
plt.ylabel("mae", fontweight="bold", fontsize=14)
plt.xlabel("Epochs", fontweight="bold", fontsize=14)
plt.legend(["Training MAE", "Testing MAE"], loc="upper right", prop={"weight": "bold"})
plt.grid()
plt.show()

---

## Model Evaluation (After Optimization)

In [None]:
network_layers = [
    Dense(64, activation='relu', kernel_initializer="normal", input_dim=10),
    Dense(64, activation='relu'),
    Dense(1, activation='linear')
]

model = Sequential(network_layers)
model.compile(optimizer="adam", loss="mse", metrics=["mae", "accuracy"])
model.summary()
training_results = model.fit(training_features, training_labels, validation_data=(testing_features, testing_labels), batch_size=8, epochs=200, shuffle=True, verbose=1)


In [None]:
fig, ax = plt.subplots(figsize=(15, 10))
ax.plot(testing_labels, model.predict(testing_features), "ob")
ax.plot([0, 25], [0, 25], "-r")
ax.text(8, 1, f"RMSE = {mean_squared_error(testing_labels, model.predict(testing_features), squared=False)}", color="r", fontsize=14, weight="bold")
plt.grid()
plt.title("Abalone Model Evaluation", fontweight="bold", fontsize=12)
plt.xlabel("Actual 'Rings'", fontweight="bold", fontsize=12)
plt.ylabel("Predicted 'Rings'", fontweight="bold", fontsize=12)
plt.legend(["Predictions", "Regression Line"], loc="upper left", prop={"weight": "bold"})
plt.show()

In [None]:
plt.rcParams["figure.figsize"] = (15, 10)
plt.plot(training_results.history["loss"])
plt.plot(training_results.history["val_loss"])
plt.title("Training vs. Testing Loss", fontweight="bold", fontsize=14)
plt.ylabel("Loss", fontweight="bold", fontsize=14)
plt.xlabel("Epochs", fontweight="bold", fontsize=14)
plt.legend(["Training Loss", "Testing Loss"], loc="upper right", prop={"weight": "bold"})
plt.grid()
plt.show()

In [None]:
plt.rcParams["figure.figsize"] = (15, 10)
plt.plot(training_results.history["mae"])
plt.plot(training_results.history["val_mae"])
plt.title("Training vs. Testing Mean Absolute Error", fontweight="bold", fontsize=14)
plt.ylabel("mae", fontweight="bold", fontsize=14)
plt.xlabel("Epochs", fontweight="bold", fontsize=14)
plt.legend(["Training MAE", "Testing MAE"], loc="upper right", prop={"weight": "bold"})
plt.grid()
plt.show()