In [117]:
import tensorflow as tf 
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split

#### Load Dataset

In [102]:
columns=['mpg','cylinders', 'displacement', 'horsepower', 'weight', 'acceleration', 'model year', 'origin', 
         'car name']  #The Column names as given in the UCL Machine Learning Repo

df=pd.read_csv(r"C:\Users\Soumyajit Sarkar\Desktop\Tensorflow\Auto-mpg\Dataset\auto-mpg.data-original.csv",
               sep='\s+',names=columns)  # Reading the Dataset and storing it in a dataframe called 'df'

print(df.shape)  #Gives the shape of the dataset i.e rows x columns

df.head()

(406, 9)


Unnamed: 0,mpg,cylinders,displacement,horsepower,weight,acceleration,model year,origin,car name
0,18.0,8.0,307.0,130.0,3504.0,12.0,70.0,1.0,chevrolet chevelle malibu
1,15.0,8.0,350.0,165.0,3693.0,11.5,70.0,1.0,buick skylark 320
2,18.0,8.0,318.0,150.0,3436.0,11.0,70.0,1.0,plymouth satellite
3,16.0,8.0,304.0,150.0,3433.0,12.0,70.0,1.0,amc rebel sst
4,17.0,8.0,302.0,140.0,3449.0,10.5,70.0,1.0,ford torino


#### Pre-Process the Dataset

##### 1. Missing value replacement

In [103]:
df.isnull().sum()  # The total number of null or missing values collumn wise

mpg             8
cylinders       0
displacement    0
horsepower      6
weight          0
acceleration    0
model year      0
origin          0
car name        0
dtype: int64

In [104]:
df['mpg']=df['mpg'].replace(np.nan,df['mpg'].mean())  #replace the missing values by mean for mpg column
df['horsepower']=df['horsepower'].replace(np.nan,df['horsepower'].mean())  # By mean again for horsepower column

##### 2. Changing Country of Origin to Categorical Data

In [105]:
df=pd.concat([df,pd.get_dummies(df['origin'])],axis=1)  # This adds three more rows corresponding to each 
                                                        # unique value in origins collumns
df.head(2)

Unnamed: 0,mpg,cylinders,displacement,horsepower,weight,acceleration,model year,origin,car name,1.0,2.0,3.0
0,18.0,8.0,307.0,130.0,3504.0,12.0,70.0,1.0,chevrolet chevelle malibu,1,0,0
1,15.0,8.0,350.0,165.0,3693.0,11.5,70.0,1.0,buick skylark 320,1,0,0


In [106]:
"""
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Changing The names of the columns :
    
    1: USA
    2: Europe
    3: Japan
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
"""
df_changed=df.drop(['origin'],axis=1)
df_changed=df_changed.drop(['car name'],axis=1)
df_changed=df_changed.rename(columns={1:'USA',2:'Europe',3:'Japan'})
df_changed.head(3)

Unnamed: 0,mpg,cylinders,displacement,horsepower,weight,acceleration,model year,USA,Europe,Japan
0,18.0,8.0,307.0,130.0,3504.0,12.0,70.0,1,0,0
1,15.0,8.0,350.0,165.0,3693.0,11.5,70.0,1,0,0
2,18.0,8.0,318.0,150.0,3436.0,11.0,70.0,1,0,0


##### 3. Data Prep. for Model ( Y=Mpg)

In [118]:
"""
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Preparing the input for the model :

x : all the columns in df exclusing 
    'mpg', 'car name', 'origin'
    
y : 'mpg'

~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
"""
columns=df_changed.columns
columns=columns.tolist()
columns.remove('mpg')
print('Model Input Length : {}'.format(len(columns)))
x=df_changed[columns]
y=df_changed[['mpg']]

x_train,x_test,y_train,y_test=train_test_split(x,y, train_size=0.8)

Model Input Length : 9


#### Model Design 

In [119]:
"""
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Model Input : 9 features per data

1. A Dense Layer with 18 neurons
    Activation : ReLU

2. A Dense Layer with 40 neurons
    Activation : ReLU

3. A Dense Layer with 10 neurons
    Activation : ReLU

4. A Dense Layer with 1 neuron as output

loss : mean squared error 
optimizer : Adam optimizer with 0.1 Learning rate
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
"""
model=tf.keras.Sequential([tf.keras.layers.Dense(units=18, activation=tf.nn.relu , input_shape=[9]),
                           tf.keras.layers.Dense(units=40, activation=tf.nn.relu),
                           tf.keras.layers.Dense(units=10, activation=tf.nn.relu),
                           tf.keras.layers.Dense(units=1)])

model.compile(loss='mean_squared_error', optimizer=tf.keras.optimizers.Adam(learning_rate=0.1))

#### Model Training : 

In [129]:
"""
~~~~~~~~~~~~~~~
Batch Size = 32

Epochs = 100
~~~~~~~~~~~~~~~
"""
rows, columns=df_changed.shape

batch = 32

import math
model.fit(x,y,epochs=1, batch_size=batch)

Train on 406 samples


<tensorflow.python.keras.callbacks.History at 0x19973ee6828>