# Predicting MPG with SDC

**Author:** Richard Hemphill<br>
**ID:** 903877709<br>
**Class:** ECE5268 Theory of Neural Networks<br>
**Instructor:** Dr. Georgios C. Anagnostopoulos<br>
**Description:** Utilize characteristics from various cars to predict miles-per-gallon fuel consumption.  The prediction equation is determined using Stocastic Gradient Descent minimization method.

In [201]:
# CONSTANTS
DATASET_FILE = 'autompg_dataset.csv'
NUMBER_FOR_TRAINING = 200
NUMBER_FOR_VALIDATION = 100
OUTPUT_FEATURE='mpg'
INPUT_FEATURES=['horsepower', 'weight']
EPOCHS = 1000

In [202]:
# LIBRARIES
import numpy as np                  # matrix manipulation
import random                       # shuffle data
import matplotlib.pyplot as plt     # surface plot

In [203]:
# FUNCTIONS
# Create Augmented Design Matrix
def AugmentedDesignMatrix(dataSet, features):
    # Create the design matrix.
    adm = dataSet[features[0]]
    for feature in features[1:]:
        adm = np.column_stack((adm,dataSet[feature]))
     # Augment the design matrix to accomodate the bias term.
    adm = np.column_stack((adm,np.ones(len(adm))))
    return adm

In [204]:
# Calculate Mean Squared Error
def MSE(actual, predicted):
    return np.square(np.subtract(actual, predicted)).mean()

In [205]:
def PredictionEquation(y, xs, w):
    eq = '{} = '.format(y)
    wfmat = lambda i: ('+' if i > 0 else '') + '{:0.6}'.format(i)
    for idx, x in enumerate(xs):
        eq = eq + '{}*{}'.format(wfmat(w[idx]), x)
    eq = eq + wfmat(w[-1])
    return eq

In [206]:
# Load data file
csvFile = open(DATASET_FILE, 'r')
dataSet = np.genfromtxt(csvFile, delimiter=',', names=True, case_sensitive=True)
csvFile.close()

In [207]:
# shuffle data randomly so that training will not use same sets every time.
random.shuffle(dataSet)

In [208]:
# Split the data set into groups for training, validation and test.
trainData = dataSet[:NUMBER_FOR_TRAINING]
valData = dataSet[NUMBER_FOR_TRAINING+1:NUMBER_FOR_TRAINING+NUMBER_FOR_VALIDATION]
testData = dataSet[NUMBER_FOR_TRAINING+NUMBER_FOR_VALIDATION+1:]

## Part (a): Batch Size 1
tbd

In [215]:
EPOCHS = 1
maxLearningRate = 0.1
decay = 1
batchSize = 1
iterations = 100#round(len(trainData)/batchSize)
Wa = np.zeros(len(INPUT_FEATURES)+1)
for e in range(EPOCHS):
    #print('epoch({})'.format(e))
    for i in range(iterations):
        #print('iteration({})'.format(i))
        batch = trainData[batchSize*i:batchSize*(i+1)]
        Y = batch[OUTPUT_FEATURE]
        X = AugmentedDesignMatrix(dataSet=batch,features=INPUT_FEATURES)
        dE = np.dot(Xs.T,(np.dot(Xs,Wa)-Ys))
        g = dE/batchSize
        learningRate = maxLearningRate/(1+(decay*i))
        dW = -1 * learningRate * g
        Wa = Wa + dW
        print(Wa)

[2.3400e+02 6.3072e+03 1.8000e+00]
[-1.43850178e+08 -3.87731556e+09 -1.10653983e+06]
[5.89540561e+13 1.58903856e+15 4.53492739e+11]
[-1.81208217e+19 -4.88425840e+20 -1.39390936e+17]
[4.45586186e+24 1.20102615e+26 3.42758604e+22]
[-9.13069739e+29 -2.46107413e+31 -7.02361337e+27]
[1.60372182e+35 4.32264712e+36 1.23363217e+33]
[-2.46468730e+40 -6.64328022e+41 -1.89591330e+38]
[3.36698932e+45 9.07533122e+46 2.58999179e+43]
[-4.13965176e+50 -1.11579537e+52 -3.18434751e+48]
[4.62692904e+55 1.24713533e+57 3.55917618e+53]
[-4.74059610e+60 -1.27777298e+62 -3.64661239e+58]
[4.48343226e+65 1.20845743e+67 3.44879404e+63]
[-3.93734284e+70 -1.06126533e+72 -3.02872526e+68]
[3.22724752e+75 8.69867332e+76 2.48249809e+73]
[-2.47988900e+80 -6.68425467e+81 -1.90760693e+78]
[1.79350635e+85 4.83418942e+86 1.37962027e+83]
[-1.22503827e+90 -3.30194930e+91 -9.42337130e+87]
[7.92711110e+94 2.13666133e+96 6.09777777e+92]
[-4.87307938e+099 -1.31348232e+101 -3.74852260e+097]
[2.85300396e+104 7.68994298e+105 2.1946

In [210]:
#print(PredictionEquation(y=OUTPUT_FEATURE, xs=INPUT_FEATURES, w=Wa))