# TMA4267 - DOE
Code used for assignment 3 in TMA4267.

In [1]:
import numpy as np
import tensorflow as tf
import keras
from sklearn.metrics import mean_absolute_error
import pandas as pd
from tensorflow.keras.models import Sequential 
from tensorflow.keras.layers import Dense
from tensorflow.keras.callbacks import EarlyStopping

## Loading data
We start by loading the data, remove missing values, and divide origin into three columns (USA, Europe and Japan).

In [2]:
url = 'http://archive.ics.uci.edu/ml/machine-learning-databases/auto-mpg/auto-mpg.data'
column_names = ['MPG', 'Cylinders', 'Displacement', 'Horsepower', 'Weight',
                'Acceleration', 'Model Year', 'Origin']

dataset = pd.read_csv(url, names=column_names, na_values='?', comment='\t', sep=' ', skipinitialspace=True)
dataset = dataset.dropna()
dataset['Origin'] = dataset['Origin'].map({1: 'USA', 2: 'Europe', 3: 'Japan'})
dataset = pd.get_dummies(dataset, columns=['Origin'], prefix='', prefix_sep='')

train_dataset = dataset.sample(frac=0.8, random_state=0)
test_dataset = dataset.drop(train_dataset.index)

train_features = train_dataset.copy()
test_features = test_dataset.copy()
train_labels = train_features.pop('MPG')
test_labels = test_features.pop('MPG')

## Neural network

In [3]:
# Normalize the data before we feed it into to the neural network
def normalized(x):
    return (x - np.min(x)) / (np.max(x) - np.min(x))


def build_and_compile_model(optim, hid1, hid2, seed):
    tf.random.set_seed(seed)
    model=Sequential()
    model.add(Dense(hid1, input_dim=9, activation='relu'))
    model.add(Dense(hid2, activation='relu'))
    model.add(Dense(1))

    model.compile(loss='mean_absolute_error',
                optimizer=optim)
    return model

def train(optim, lr, hid1, hid2, seed):
    if optim == 'Adam':
        dnn_model = build_and_compile_model(tf.keras.optimizers.Adam(lr), hid1, hid2, seed)
    elif optim == 'SGD0' or optim == 'SGD':
        dnn_model = build_and_compile_model(tf.keras.optimizers.SGD(lr), hid1, hid2, seed)
        
    # We will train the network for 100 epochs. When the valdiation loss hasn't decreasing for 5 epochs, 
    # we will stop the training (to reduce overfitting) and use the best model (i.e., the weights of the 
    # epoch with the lowest validation loss)
    callback = EarlyStopping(
        monitor='val_loss',
        patience = 5,
        restore_best_weights=True
    )
    
    # Train the model
    dnn_model.fit(
        normalized(train_features),
        train_labels,
        validation_split=0.25,
        verbose=0, epochs= 100, 
        callbacks = [callback]
    )
    
    # We make predictions on the test set and calculate the MAE
    test_pred = dnn_model.predict(normalized(test_features)).flatten()
    return np.round(mean_absolute_error(test_labels, test_pred),3)

We will test the different factors (with 2 levels each):
1. Optimizer: Adam vs SGD
2. Learning rate: 0.01 vs 0.001
3. Hidden layer 1: 32 or 64
4. Hidden layer 2: 32 or 64

In [5]:
seeds = [0, 1, 11, 111]

for seed in seeds:
    MAE = []
    
    # Use SGD0 instead of SGD to get it the same length as ADAM (looks nicer)
    MAE.append(train('SGD0', 5e-3, 32, 32,seed))
    MAE.append(train('Adam', 5e-3, 32, 32,seed))
    MAE.append(train('SGD0', 1e-2, 32, 32,seed))
    MAE.append(train('Adam', 1e-2, 32, 32,seed))

    MAE.append(train('SGD0', 5e-3, 64, 32,seed))
    MAE.append(train('Adam', 5e-3, 64, 32,seed))
    MAE.append(train('SGD0', 1e-2, 64, 32,seed))
    MAE.append(train('Adam', 1e-2, 64, 32,seed))

    MAE.append(train('SGD0', 5e-3, 32, 64,seed))
    MAE.append(train('Adam', 5e-3, 32, 64,seed))
    MAE.append(train('SGD0', 1e-2, 32, 64,seed))
    MAE.append(train('Adam', 1e-2, 32, 64,seed))

    MAE.append(train('SGD0', 5e-3, 64, 64,seed))
    MAE.append(train('Adam', 5e-3, 64, 64,seed))
    MAE.append(train('SGD0', 1e-2, 64, 64,seed))
    MAE.append(train('Adam', 1e-2, 64, 64,seed))
    
    print('Seed:', seed)
    print(MAE)

Seed: 0
[2.221, 2.009, 1.923, 2.124, 2.464, 1.879, 1.971, 1.998, 2.284, 1.93, 1.98, 1.915, 2.325, 2.035, 2.016, 1.837]
Seed: 1
[2.294, 1.799, 1.856, 1.907, 2.337, 1.937, 1.867, 2.105, 2.161, 1.911, 1.807, 1.914, 2.293, 1.983, 2.039, 1.951]
Seed: 11
[2.267, 1.832, 1.915, 1.876, 2.217, 1.97, 1.948, 1.906, 2.199, 1.926, 1.969, 2.148, 2.233, 1.89, 1.895, 1.939]
Seed: 111
[2.176, 1.947, 1.905, 1.839, 2.161, 1.885, 2.087, 1.953, 2.161, 1.902, 1.817, 2.0, 2.317, 1.848, 1.982, 1.94]
