## Fuel Efficiency Regression
Predict a car's fuel efficiency / MPG<br>
Last update: 1/1/24

In [1]:
from importlib import reload
import numpy as np
import pandas as pd
from sandbox import activations, costs, initializers, layers, model, optimizers, utils

In [2]:
# Fetch fuel efficiency data
url = 'http://archive.ics.uci.edu/ml/machine-learning-databases/auto-mpg/auto-mpg.data'
column_names = ['MPG', 'Cylinders', 'Displacement', 'Horsepower', 'Weight',
                'Acceleration', 'Model Year', 'Origin']

raw_dataset = pd.read_csv(url, names=column_names,
                          na_values='?', comment='\t',
                          sep=' ', skipinitialspace=True)

# Remove missing values, drop column I don't want to deal with
dataset = raw_dataset.copy()
dataset = dataset.dropna().drop('Origin', axis=1)

# Seperate labels from features
features = dataset.copy()
labels = dataset.pop('MPG')

# Normalize feature set
features = (features - features.mean()) / features.std()

# Normalize label set
labels_mean = labels.mean()
labels_std = labels.std()
labels = (labels - labels_mean) / labels_std

# Convert dataframes to CuPy arrays
input = features.to_numpy()
labels = labels.to_numpy().reshape(-1, 1)

# Split data into training and test sets
train_x = input[50:, :]
train_y = labels[50:, :]
test_x = input[:50, :]
test_y = labels[:50, :]

In [3]:
reload(model)

# Create model
mpg = model.Model()
mpg.add(layers.Dense(units=16, activation=activations.ReLU()))
mpg.add(layers.Dense(units=4, activation=activations.ReLU()))
mpg.add(layers.Dense(units=1, activation=activations.Linear()))

mpg.configure(
    input_size=train_x.shape[1],
    cost_type=costs.MSE(),
    optimizer=optimizers.Adam()
)

# Train model
mpg.train(train_x, train_y, epochs=4, batch_size=32, verbose=True)

Cost on epoch 1: 0.17636
Cost on epoch 2: 0.16697
Cost on epoch 3: 0.05517
Cost on epoch 4: 0.03521


In [6]:
pred = mpg.predict(test_x)
loss = costs.MSE().forward(pred, test_y)
print('Test MSE loss: ', loss)

# Note - because the labels were normalized, model outputs must be adjusted before being used, as so:
pred_actual = pred * labels_std + labels_mean

print('\nPredicted MPG:\n', np.squeeze(np.round(pred_actual.T)))
print('Actual MPG:\n', np.squeeze(test_y.T * labels_std + labels_mean))

Test MSE loss:  0.032852223087920907

Predicted MPG:
 [17. 15. 17. 16. 16. 14. 13. 13. 13. 15. 15. 15. 15. 16. 22. 20. 19. 20.
 24. 24. 22. 22. 22. 23. 20. 11. 12. 12. 11. 25. 25. 23. 19. 18. 18. 19.
 18. 14. 13. 14. 14. 12. 13. 12. 19. 22. 19. 19. 22. 26.]
Actual MPG:
 [18. 15. 18. 16. 17. 15. 14. 14. 14. 15. 15. 14. 15. 14. 24. 22. 18. 21.
 27. 26. 25. 24. 25. 26. 21. 10. 10. 11.  9. 27. 28. 25. 19. 16. 17. 19.
 18. 14. 14. 14. 14. 12. 13. 13. 18. 22. 19. 18. 23. 28.]
