In [27]:
#pip install keras-tuner --upgrade

In [28]:
# Importing Libraries
from numpy import argmax
from numpy import unique
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Activation
from tensorflow.keras.callbacks import EarlyStopping
import tensorflow as tf
import pandas as pd
from pandas import read_csv
import numpy as np
from sklearn import metrics
from sklearn.metrics import accuracy_score

In [29]:
# Placing URL for getting the dataset from the website 
url_link = 'http://archive.ics.uci.edu/ml/machine-learning-databases/auto-mpg/auto-mpg.data'
col_names = ['MPG', 'Cylinders', 'Displacement', 'Horsepower', 'Weight',
                'Acceleration', 'Model Year', 'Origin']
df = pd.read_csv(url_link, names=col_names, na_values='?', comment='\t', sep=' ', skipinitialspace=True)
print(df)

      MPG  Cylinders  Displacement  Horsepower  Weight  Acceleration  \
0    18.0          8         307.0       130.0  3504.0          12.0   
1    15.0          8         350.0       165.0  3693.0          11.5   
2    18.0          8         318.0       150.0  3436.0          11.0   
3    16.0          8         304.0       150.0  3433.0          12.0   
4    17.0          8         302.0       140.0  3449.0          10.5   
..    ...        ...           ...         ...     ...           ...   
393  27.0          4         140.0        86.0  2790.0          15.6   
394  44.0          4          97.0        52.0  2130.0          24.6   
395  32.0          4         135.0        84.0  2295.0          11.6   
396  28.0          4         120.0        79.0  2625.0          18.6   
397  31.0          4         119.0        82.0  2720.0          19.4   

     Model Year  Origin  
0            70       1  
1            70       1  
2            70       1  
3            70       1  
4    

In [30]:
df.isnull().sum()

MPG             0
Cylinders       0
Displacement    0
Horsepower      6
Weight          0
Acceleration    0
Model Year      0
Origin          0
dtype: int64

In [31]:
# Dropping the last two columns Origin and Car Names by using iloc
df.drop(df.iloc[:,7:8], inplace=True, axis=1)
print(df)

      MPG  Cylinders  Displacement  Horsepower  Weight  Acceleration  \
0    18.0          8         307.0       130.0  3504.0          12.0   
1    15.0          8         350.0       165.0  3693.0          11.5   
2    18.0          8         318.0       150.0  3436.0          11.0   
3    16.0          8         304.0       150.0  3433.0          12.0   
4    17.0          8         302.0       140.0  3449.0          10.5   
..    ...        ...           ...         ...     ...           ...   
393  27.0          4         140.0        86.0  2790.0          15.6   
394  44.0          4          97.0        52.0  2130.0          24.6   
395  32.0          4         135.0        84.0  2295.0          11.6   
396  28.0          4         120.0        79.0  2625.0          18.6   
397  31.0          4         119.0        82.0  2720.0          19.4   

     Model Year  
0            70  
1            70  
2            70  
3            70  
4            70  
..          ...  
393      

In [32]:
# Attribute horsepower has 6 missing values. For filling these up with the average horsepower for the respective number of cylinders I have used the mean value. 
mean_value = df['Horsepower'].mean()
print('Mean of values in horsepower :')
print(mean_value)

Mean of values in horsepower :
104.46938775510205


In [33]:
# Replacing missing attributes from the horsepower column with the mean value. 
df['Horsepower'].fillna(value=df['Horsepower'].mean(), inplace=True)
print('Updated Dataframe:')
print(df)

Updated Dataframe:
      MPG  Cylinders  Displacement  Horsepower  Weight  Acceleration  \
0    18.0          8         307.0       130.0  3504.0          12.0   
1    15.0          8         350.0       165.0  3693.0          11.5   
2    18.0          8         318.0       150.0  3436.0          11.0   
3    16.0          8         304.0       150.0  3433.0          12.0   
4    17.0          8         302.0       140.0  3449.0          10.5   
..    ...        ...           ...         ...     ...           ...   
393  27.0          4         140.0        86.0  2790.0          15.6   
394  44.0          4          97.0        52.0  2130.0          24.6   
395  32.0          4         135.0        84.0  2295.0          11.6   
396  28.0          4         120.0        79.0  2625.0          18.6   
397  31.0          4         119.0        82.0  2720.0          19.4   

     Model Year  
0            70  
1            70  
2            70  
3            70  
4            70  
..      

In [34]:
df['Horsepower'].describe

<bound method NDFrame.describe of 0      130.0
1      165.0
2      150.0
3      150.0
4      140.0
       ...  
393     86.0
394     52.0
395     84.0
396     79.0
397     82.0
Name: Horsepower, Length: 398, dtype: float64>

In [35]:
df.isnull().sum()

MPG             0
Cylinders       0
Displacement    0
Horsepower      0
Weight          0
Acceleration    0
Model Year      0
dtype: int64

In [36]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 398 entries, 0 to 397
Data columns (total 7 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   MPG           398 non-null    float64
 1   Cylinders     398 non-null    int64  
 2   Displacement  398 non-null    float64
 3   Horsepower    398 non-null    float64
 4   Weight        398 non-null    float64
 5   Acceleration  398 non-null    float64
 6   Model Year    398 non-null    int64  
dtypes: float64(5), int64(2)
memory usage: 21.9 KB


In [37]:
df.dtypes

MPG             float64
Cylinders         int64
Displacement    float64
Horsepower      float64
Weight          float64
Acceleration    float64
Model Year        int64
dtype: object

In [38]:
#The year attribute has two digits. Adding 1900 so that the year has four digits. For example, 70 to 1970. 
df['Model Year'] = pd.to_datetime(df['Model Year'].astype(str).str.zfill(2), format='%y').dt.year
print(df)

      MPG  Cylinders  Displacement  Horsepower  Weight  Acceleration  \
0    18.0          8         307.0       130.0  3504.0          12.0   
1    15.0          8         350.0       165.0  3693.0          11.5   
2    18.0          8         318.0       150.0  3436.0          11.0   
3    16.0          8         304.0       150.0  3433.0          12.0   
4    17.0          8         302.0       140.0  3449.0          10.5   
..    ...        ...           ...         ...     ...           ...   
393  27.0          4         140.0        86.0  2790.0          15.6   
394  44.0          4          97.0        52.0  2130.0          24.6   
395  32.0          4         135.0        84.0  2295.0          11.6   
396  28.0          4         120.0        79.0  2625.0          18.6   
397  31.0          4         119.0        82.0  2720.0          19.4   

     Model Year  
0          1970  
1          1970  
2          1970  
3          1970  
4          1970  
..          ...  
393      

In [39]:
df['Model Year'].describe

<bound method NDFrame.describe of 0      1970
1      1970
2      1970
3      1970
4      1970
       ... 
393    1982
394    1982
395    1982
396    1982
397    1982
Name: Model Year, Length: 398, dtype: int64>

In [40]:
# Splitting Dataset into x and y. Pandas to Numpy:
x = df[['Cylinders', 'Displacement', 'Horsepower', 'Weight',
       'Acceleration', 'Model Year']].values
y = df['MPG'].values 

n_features = x.shape[1]
n_class = len(unique(y))

In [41]:
# Split into validation and training sets
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=1)
x_train, x_val, y_train, y_val = train_test_split(x_train, y_train, test_size=0.2, random_state=1)

In [42]:
# Deep Nural Network Model (DNN)
model = Sequential()
model.add(Dense(20, input_dim=n_features, activation='tanh', kernel_initializer='normal'))
model.add(Dense(10, activation='relu', kernel_initializer='he_normal'))
model.add(Dense(n_class, activation='softmax'))
model.summary()

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_3 (Dense)             (None, 20)                140       
                                                                 
 dense_4 (Dense)             (None, 10)                210       
                                                                 
 dense_5 (Dense)             (None, 129)               1419      
                                                                 
Total params: 1,769
Trainable params: 1,769
Non-trainable params: 0
_________________________________________________________________


In [43]:
model.compile(loss='mean_squared_error', optimizer='adam')

In [44]:
# This callback will stop the training when there is no improvement in the loss for consecutive epochs.
monitor = tf.keras.callbacks.EarlyStopping(
    monitor="val_loss",
    min_delta=0,
    patience=0,
    verbose=0,
    mode="auto",
    baseline=None,
    restore_best_weights=False,
)
history = model.fit(x_train, y_train, epochs=100, batch_size=30, validation_data=(x_val, y_val))


Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

In [45]:
predict = model.predict(x_test)
predict = argmax(predict, axis=-1).astype('int')
score = np.sqrt(metrics.mean_squared_error(predict,y_test))
print(f"After load score: {score}")

After load score: 81.10931281301795


In [46]:
pd.DataFrame(list(zip(y_test,predict)), columns = ['Actual Values', 'Predicted Values'])

Unnamed: 0,Actual Values,Predicted Values
0,18.0,104
1,28.1,104
2,19.4,104
3,20.3,104
4,20.2,104
...,...,...
75,28.0,104
76,36.4,104
77,15.5,104
78,14.0,104


In [47]:
x_test

array([[6.000e+00, 1.710e+02, 9.700e+01, 2.984e+03, 1.450e+01, 1.975e+03],
       [4.000e+00, 1.410e+02, 8.000e+01, 3.230e+03, 2.040e+01, 1.981e+03],
       [8.000e+00, 3.180e+02, 1.400e+02, 3.735e+03, 1.320e+01, 1.978e+03],
       [5.000e+00, 1.310e+02, 1.030e+02, 2.830e+03, 1.590e+01, 1.978e+03],
       [6.000e+00, 2.320e+02, 9.000e+01, 3.265e+03, 1.820e+01, 1.979e+03],
       [8.000e+00, 3.510e+02, 1.490e+02, 4.335e+03, 1.450e+01, 1.977e+03],
       [4.000e+00, 6.800e+01, 4.900e+01, 1.867e+03, 1.950e+01, 1.973e+03],
       [4.000e+00, 8.900e+01, 6.200e+01, 2.050e+03, 1.730e+01, 1.981e+03],
       [8.000e+00, 3.040e+02, 1.200e+02, 3.962e+03, 1.390e+01, 1.976e+03],
       [8.000e+00, 3.500e+02, 1.800e+02, 4.499e+03, 1.250e+01, 1.973e+03],
       [4.000e+00, 1.350e+02, 8.400e+01, 2.525e+03, 1.600e+01, 1.982e+03],
       [8.000e+00, 2.600e+02, 1.100e+02, 4.060e+03, 1.900e+01, 1.977e+03],
       [6.000e+00, 2.580e+02, 1.200e+02, 3.410e+03, 1.510e+01, 1.978e+03],
       [4.000e+00, 1.210e

In [48]:
# initializing the data
data = [[6,2170,502,3164,4.2,2022], 
        [12,6498,730,3472,3.2,2022], 
        [8,3902,986,3020,2.5,2022], 
        [8,6162,670,3721,2.6,2022], 
        [4,122,181,2496,8.3,2022], 
        [6,3232,155,3232,11.5,1969], 
        [3,598,89,1550,10.1,2022], 
        [3,900,50,642,5.8,2022], 
        [4,1189,60,2355,28.1,1964], 
        [4,201,40,2265,32,1908], 
        ]
  
newDF = pd.DataFrame(data, columns=['Cylinder', 'Displacement','Horsepower', 'Weight','Acceleration', 'Model Year'])
  
# print dataframe.
newDF

Unnamed: 0,Cylinder,Displacement,Horsepower,Weight,Acceleration,Model Year
0,6,2170,502,3164,4.2,2022
1,12,6498,730,3472,3.2,2022
2,8,3902,986,3020,2.5,2022
3,8,6162,670,3721,2.6,2022
4,4,122,181,2496,8.3,2022
5,6,3232,155,3232,11.5,1969
6,3,598,89,1550,10.1,2022
7,3,900,50,642,5.8,2022
8,4,1189,60,2355,28.1,1964
9,4,201,40,2265,32.0,1908


In [49]:
model.predict(newDF)



array([[0.0071657 , 0.00636318, 0.00739182, ..., 0.00771692, 0.00913771,
        0.00774939],
       [0.01151229, 0.00467432, 0.00628825, ..., 0.00505048, 0.00671231,
        0.00961767],
       [0.00775696, 0.00656061, 0.00791507, ..., 0.00733536, 0.00852241,
        0.00672013],
       ...,
       [0.00905145, 0.00653295, 0.00688296, ..., 0.00973148, 0.00834647,
        0.00758633],
       [0.00716571, 0.0063632 , 0.00739182, ..., 0.00771693, 0.0091377 ,
        0.0077494 ],
       [0.00775375, 0.00775075, 0.0077545 , ..., 0.00775314, 0.00774724,
        0.00775078]], dtype=float32)