# Fuel Efficiency End-to-End Project

### Import Libraries

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

import tensorflow as tf
from tensorflow import keras
import tensorflow.keras
from tensorflow.keras import layers
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.models import Sequential

import sklearn
import pickle
from sklearn.metrics import mean_sqaure_error, mean_absolute_error, r2_score


In [None]:
# check the GPU
physical_devices = tf.config.list_physical_devices('GPU')
tf.config.experimental.set_memory_growth(physical_devices[0], True)

In [None]:
# get the dataset
df = pd.read_csv('', na_values = '?')
df.head()

### EDA

In [None]:
# check information about the dataframe
df.info()

In [None]:
# check for null values
df.isnull().sum()

In [None]:
# if there are few values short, drop those rows
df.dropna(inplace = True)

In [None]:
# check if the null values are still available
df.isnull().sum()

In [None]:
# describe the dataframe
df.describe()

In [None]:
# get the names of all the types of cars in the ddataset
df['car name'].unique()

In [None]:
# drop the car name column
df.drop(['car name'], axis =1, inplace = True)

In [None]:
# seperate dataset into x and target feature
X = df.drop("mpg", axis = 1)
y = df['mpg']

In [None]:
# split the x and y into train and test datasets
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 42)

In [None]:
# standardise the datasets for train and test sets
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()

In [None]:
X_train_sc = sc.fit_transform(X_train)
X_test_sc = sc.transform(X_test)

In [None]:
# save the standard scaler 
pickle.dump(sc, open(r'\scaler.pkl','wb'))

In [None]:
# chckk the keys needed for X matrix of features
X_train.keys()

In [None]:
len(X_train.keys())

### Initialize ANN

In [None]:
model = Sequential()

# input layer and first hidden layer
model.add(Dense(units = 64, activation = 'relu', input_shape = [len(X_train.keys())]))
# second hidden layer
model.add(Dense(units = 64, activation = 'relu'))
model.add(Dense(units = 64, activation = 'relu'))
model.add(Dense(units = 64, activation = 'relu'))
# output layer 
model.add(Dense(units = 1))

In [None]:
# Compile the ANN
model.compile(loss = 'mean_square_error',
             optimizer = 'Adam',
             metrics = ['mean_sqaure_error', 'mean_absolute_error'])

In [None]:
model.summary()

In [None]:
# train the ann model
model_history = model.fit(X_train_sc, y_train, batch_size = 100, epochs = 1000)

In [None]:
# save the model
model.save(r"\fuel_model.h5")

### Model Evaluation


In [None]:
prediction=model.predict(X_test_std)

In [None]:
# lets plot the figure of predictions vs real values
plt.scatter(y_test, prediction)
plt.xlabel('True Values [MPG]')
plt.ylabel('Predictions [MPG]')
plt.axis('equal')
plt.axis('square')
plt.xlim([0,plt.xlim()[1]])
plt.ylim([0,plt.ylim()[1]])
_ = plt.plot([-100, 100], [-100, 100])

In [None]:
# check the metrics
rmse=np.sqrt(mean_squared_error(Y_test,prediction))

print('RMSE: ',rmse)

print('R_square:', r2_score(Y_test,prediction))

### Prediction

In [None]:
input_data_1=[[4,97,100,5000,14.5,70,3]]
input_data_1=sc.transform(input_data_1)
input_data_1

In [None]:
y_pred_1 = model.predict(input_data_1)

In [None]:
y_pred_1

In [None]:
input_data_2=[[8,307,130,3504,12,70,1]]
input_data_2=sc.transform(input_data_2)
input_data_2

In [None]:
y_pred_2 = model.predict(input_data_2)

In [None]:
y_pred_2