# Environment Setup

In [5]:
import warnings
warnings.filterwarnings("ignore")

import tensorflow as tf
import kerastuner as kt

from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras import regularizers

physical_devices = tf.config.list_physical_devices('GPU') 
tf.config.experimental.set_memory_growth(physical_devices[0], True)

print('Running TensorFlow ', tf.__version__)
print('Running KerasTuner ', kt.__version__)
print("GPUs Available: ", len(tf.config.experimental.list_physical_devices('GPU')))

import pandas as pd
import numpy as np

import matplotlib.pyplot as plt
import scipy.stats as stats
import seaborn as sns

%matplotlib inline
plt.rcParams['figure.figsize'] = (15,5)
plt.rcParams["font.family"] = 'DejaVu Sans'
plt.rcParams["font.size"] = '12'
plt.rcParams['image.cmap'] = 'rainbow'

Running TensorFlow2.2.0
Running KerasTuner1.0.1
GPUs Available:1


# Understanding the Data

In [6]:
dataset = pd.read_csv('Concrete_Data.csv')

print(dataset.head())

print("Shape of data set:", dataset.shape)

cement   slag  flyash  water  superplas  coarse_agg  fine_agg  age  \
0   540.0    0.0     0.0  162.0        2.5      1040.0     676.0   28   
1   540.0    0.0     0.0  162.0        2.5      1055.0     676.0   28   
2   332.5  142.5     0.0  228.0        0.0       932.0     594.0  270   
3   332.5  142.5     0.0  228.0        0.0       932.0     594.0  365   
4   198.6  132.4     0.0  192.0        0.0       978.4     825.5  360   

   strength  
0     79.99  
1     61.89  
2     40.27  
3     41.05  
4     44.30
Shape of data set:(1030, 9)


In [None]:
test_normality = lambda x: stats.shapiro(x.fillna(0))[1] < 0.01
normal = pd.DataFrame(dataset)
normal = normal.apply(test_normality)
print(not normal.any())

In [None]:
keys = [f for f in dataset.columns if dataset.dtypes[f] != 'object']

fig, ax = plt.subplots(len(keys),2,figsize=(20,len(keys)*6))

for n in range(len(keys)):
    feat = keys[n]
    ax[n,0].scatter(dataset[feat].values, dataset.strength.values, s=4)
    ax[n,0].set_ylabel("Strength")
    ax[n,0].set_xlabel(feat);
    sns.distplot(dataset[feat].dropna(), kde=True, ax=ax[n,1], color="limegreen")
    ax[n,1].set_title("Distribution")

In [None]:
import os
os.environ['PYTHONHASHSEED'] = '0'
seed = 6
import random as rn
rn.seed(seed)
np.random.seed(seed)

tf.random.set_seed(seed)

In [None]:
# from sklearn.model_selection import train_test_split
# train_x, test_x, train_y, test_y = train_test_split(dataset.drop('strength', axis=1), dataset['strength'], test_size=0.2, random_state=42)
# print(train_x.shape, test_x.shape, train_y.shape, test_y.shape)

train_dataset = dataset.sample(frac=0.8,random_state=69)
test_dataset = dataset.drop(train_dataset.index)

print(train_dataset.head(), train_dataset.shape)
print(test_dataset.head(), test_dataset.shape)

In [None]:
train_stats = train_dataset.describe()
train_stats.pop('strength')
train_stats = train_stats.transpose()
train_stats

In [None]:
train_labels = train_dataset.pop('strength')
test_labels = test_dataset.pop('strength')

print(train_labels.head(), train_labels.shape)

## Standardise Data 

In [None]:
def norm(x):
  # return (x - train_stats['min']) / (train_stats['max'] - train_stats['min'])
  return (x - train_stats['mean']) / train_stats['std']
normed_train_data = norm(train_dataset)
normed_test_data = norm(test_dataset)

print(normed_train_data.head())

sns.distplot(normed_train_data['age'], kde=True, color="limegreen")


# Train Model

In [None]:
def train_network(structure, activation, optimizer, epochs):
    
    model = keras.models.Sequential()
    model.add(keras.layers.Dense(units = structure[1], input_dim = structure[0], activation = activation, kernel_regularizer=regularizers.l2(0.001))) 
    layers.Dropout(0.5)
    model.add(keras.layers.Dense(units = structure[2], activation = activation, kernel_regularizer=regularizers.l2(0.001)))
    layers.Dropout(0.5)
    model.add(keras.layers.Dense(units = structure[3], activation = activation, kernel_regularizer=regularizers.l2(0.001)))
    layers.Dropout(0.5)
    model.add(keras.layers.Dense(units = structure[-1], activation = None))
    
    # Compiles the model with parameters
    model.compile(loss = 'mse', optimizer = optimizer, metrics = ['mae', 'mse'])
    print(model.summary(), '\n')
    # This tells the us training has started, so we know that it's actually running
    print('training... ')
    
    early_stop = keras.callbacks.EarlyStopping(monitor='val_loss', patience=20)
    
    # This trains the network
    training_stats = model.fit(normed_train_data, train_labels, batch_size = 32, epochs = epochs, validation_split = 0.2, verbose = 0, callbacks=[early_stop])
    
    # Results!
    # hist = pd.DataFrame(training_stats.history)
    # hist['epoch'] = training_stats.epoch
    # hist.tail()
#     print('train_acc: %0.3f, test_acc: %0.3f' %(training_stats.history['accuracy'][-1], 
#                                                 model.evaluate(test_x, test_y, verbose = 0)[1]))
    
    # This returns the results and the model for use outside the function
    return training_stats, model

# Plots our evaluations in a line graph to see how they compare
def plot_acc(train_acc, test_acc, title):
    # Plots the training and testing accuracy lines
    training_accuracy, = plt.plot(train_acc, label = 'Training Accuracy')
    testing_accuracy, = plt.plot(test_acc, label = 'Testing Accuracy')
    plt.legend(handles = [training_accuracy, testing_accuracy])
    
    # Plots guide lines along y = 0 and y = 1 to help visualise
    xp = np.linspace(0, train_acc.shape[0] - 1, 10 * train_acc.shape[0])
    plt.plot(xp, np.full(xp.shape, 1), c = 'k', linestyle = ':', alpha = 0.5)
    plt.plot(xp, np.full(xp.shape, 0), c = 'k', linestyle = ':', alpha = 0.5)
    
    plt.xticks(range(0, train_acc.shape[0]), range(1, train_acc.shape[0] + 1))
    plt.ylim(0,1)
    plt.title(title)
    
    plt.show()

# Plots our evaluations in a bar chart to see how they compare
def bar_acc(train_acc, test_acc, title, xticks):
    index = range(1, train_acc.shape[0] + 1)
    
    # Plots the training and testing accuracy bars
    training_accuracy = plt.bar(index, train_acc, 0.4, align = 'center')
    testing_accuracy = plt.bar(index, test_acc, 0.4, align = 'edge')
    plt.legend((training_accuracy[0], testing_accuracy[0]), ('Training Accuracy', 'Testing Accuracy'))
    
    plt.xticks(index, xticks)
    plt.title(title)
    
    plt.show()

In [None]:
for hidden1 in range (128, 513, 128):
    print('Evaluating model with %i hidden neurons... ' %hidden1, '\n')
    training_stats, model = train_network(structure = [8, hidden1, hidden1, hidden1, 1], activation = 'relu', optimizer = 'RMSprop', epochs = 1000)
    hist = pd.DataFrame(training_stats.history)
    hist['epoch'] = training_stats.epoch
    print(hist.tail(), '\n')

In [None]:
loss, mae, mse = model.evaluate(normed_test_data, test_labels, verbose=2)

print("Testing set Mean Abs Error: {:5.2f} MPa".format(mae))

In [None]:
test_predictions = model.predict(normed_test_data).flatten()

a = plt.axes(aspect='equal')
plt.scatter(test_labels, test_predictions)
plt.xlabel('True Values Strength')
plt.ylabel('Predictions Strength')
lims = [0, 90]
plt.xlim(lims)
plt.ylim(lims)
_ = plt.plot(lims, lims)


In [None]:
error = test_predictions - test_labels
plt.hist(error, bins = 25)
plt.xlabel("Prediction Error Strength")
_ = plt.ylabel("Count")