## Import Dataset

In [1]:
import pandas as pd

data = pd.read_csv('data/petrol_consumption.csv')

print(data.shape[0], 'records')
data.head()

48 records


Unnamed: 0,Petrol_tax,Average_income,Paved_Highways,Population_Driver_licence(%),Petrol_Consumption
0,9.0,3571,1976,0.525,541
1,9.0,4092,1250,0.572,524
2,9.0,3865,1586,0.58,561
3,7.5,4870,2351,0.529,414
4,8.0,4399,431,0.544,410


## Data Preprocessing

### Prepare features and labels
* Features = Petrol_tax, Average_income, Paved_Highways, Population_Driver_licence(%)
* Labels = Petrol_Consumption

In [2]:
X = data.iloc[:, 0:4].values
y = data.iloc[:, 4].values

### Split data into training and test sets

In [3]:
from sklearn.model_selection import train_test_split

X_trn, X_tst, y_trn, y_tst = train_test_split(X, y, test_size=0.2, random_state=4)

### Normalize the training and test features

In [4]:
from sklearn.preprocessing import StandardScaler

sc = StandardScaler()

X_trn_norm = sc.fit_transform(X_trn)
X_tst_norm = sc.transform(X_tst)

## Create model
* Using **Keras** functional API
* Input layer
* 3 Hidden Dense layers with 100, 50 and 25 neurons respoectively and ReLU activation function
* Output with 1 neurons for 1 output value
* Loss function = mean squared error
* Optimizer = Adam
* Evaluation metric = mean squared error

In [5]:
from tensorflow.keras.layers import Input, Dense
from tensorflow.keras.models import Model

input_layer = Input(shape=(X.shape[1],))
dense_layer_1 = Dense(100, activation='relu')(input_layer)
dense_layer_2 = Dense(50, activation='relu')(dense_layer_1)
dense_layer_3 = Dense(25, activation='relu')(dense_layer_2)
output = Dense(1)(dense_layer_3)

model = Model(inputs=input_layer, outputs=output)
model.compile(loss='mean_squared_error', optimizer='adam', metrics=['mean_squared_error'])

print(model.summary())

Model: "model"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         [(None, 4)]               0         
_________________________________________________________________
dense (Dense)                (None, 100)               500       
_________________________________________________________________
dense_1 (Dense)              (None, 50)                5050      
_________________________________________________________________
dense_2 (Dense)              (None, 25)                1275      
_________________________________________________________________
dense_3 (Dense)              (None, 1)                 26        
Total params: 6,851
Trainable params: 6,851
Non-trainable params: 0
_________________________________________________________________
None


## Train the model
* Epochs = 100
* Validation data = 20% of training data

In [6]:
training = model.fit(X_trn_norm, y_trn, batch_size=2, epochs=100,
                    verbose=1, validation_split=0.2)

Train on 30 samples, validate on 8 samples
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100


Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78/100
Epoch 79/100
Epoch 80/100
Epoch 81/100
Epoch 82/100
Epoch 83/100
Epoch 84/100
Epoch 85/100
Epoch 86/100
Epoch 87/100
Epoch 88/100
Epoch 89/100
Epoch 90/100
Epoch 91/100


Epoch 92/100
Epoch 93/100
Epoch 94/100
Epoch 95/100
Epoch 96/100
Epoch 97/100
Epoch 98/100
Epoch 99/100
Epoch 100/100


## Evaluate the model
Using ***Root mean squared error***.  
* Find mean squared error between the predicted and actual values
* Find the square root of the mean squared error  

In [7]:
from sklearn.metrics import mean_squared_error
import numpy as np

pred_trn = model.predict(X_trn_norm)
err_trn = np.sqrt(mean_squared_error(y_trn, pred_trn))
print('Training error:', err_trn)

pred = model.predict(X_tst_norm)
err = np.sqrt(mean_squared_error(y_tst, pred))
print('Test error:', err)

Training error: 64.27488983009766
Test error: 98.91566153871884
