In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
from tensorflow import keras

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
from sklearn.model_selection import train_test_split
df = pd.read_csv('../input/car-price-prediction/CarPrice_Assignment.csv', index_col = 'car_ID')
y = df.price
X = df
X.drop(['price'], axis = 1, inplace = True)
X_train, X_valid, y_train, y_valid = train_test_split(df, y, train_size = 0.8, test_size = 0.2)
print(f"Shape of X_train = {X_train.shape}\nShape of y_train = {y_train.shape}\nShape if X_valid : {X_valid.shape}\nShape of y_valid = {y_valid.shape}")

In [None]:
# Preprocessing
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import OneHotEncoder

num_cols = [cname for cname in X_train.columns if 
                X_train[cname].dtype in ['int64', 'float64']]
cat_cols = [cname for cname in X_train.columns if 
                X_train[cname].dtype in ['object']]

si = SimpleImputer(strategy = 'constant')
ohe = OneHotEncoder(handle_unknown = 'ignore', sparse = False)

pp_pipeline = ColumnTransformer(
transformers = [
    ('num', si, num_cols),
    ('cat', ohe, cat_cols)
])

pp_X_train = pp_pipeline.fit_transform(X_train)
pp_X_valid = pp_pipeline.transform(X_valid)
print(pp_X_train.shape)
print(pp_X_valid.shape)

### Prepocessing done here. Creating a DNN using keras


In [None]:
from keras.models import Sequential
from keras.layers import Dense
model = Sequential()
model.add(Dense(pp_X_train.shape[-1], input_shape = (pp_X_train.shape[-1],)))
model.add(Dense(45, activation = 'relu'))
model.add(Dense(1, activation  = 'linear'))

model.compile(optimizer = 'adam', loss = 'mean_absolute_error', metrics = ['mean_absolute_error'])
print(model.summary())

## Fitting the model

In [None]:
epochs = 100
trained_model = model.fit(pp_X_train, y_train, epochs = epochs, validation_data = (pp_X_valid, y_valid))

## Checking for overfitting

In [None]:
from sklearn.metrics import mean_absolute_error as mae
from sklearn.metrics import accuracy_score

#Plot the decrease of MAE with each iteration
print(f"Training MAE : {trained_model.history['loss'][-1]}")
print(f"Validation MAE : {trained_model.history['val_loss'][-1]}")
import matplotlib.pyplot as plt
plt.clf()
fig = plt.figure()
fig.suptitle('Graph of training loss and validation loss')
plt.plot(range(epochs), trained_model.history['loss'], 'b', range(epochs), trained_model.history['val_loss'], 'r')

# Creating Autotuned model with Keras Tuner


In [None]:
import kerastuner as kt
import IPython
import tensorflow as tf
def model_builder(hp):
    model = Sequential()
    model.add(Dense(pp_X_train.shape[-1], input_shape = (pp_X_train.shape[-1],)))
    hp_units = hp.Int('units', min_value = 10, max_value = 180, step = 45)
    model.add(Dense(units = hp_units, activation = 'relu'))
    model.add(Dense(units = hp_units, activation = 'relu'))
    # Give the last layer
    model.add(Dense(1, activation = 'linear'))
    
    #Create HP for learning rate
    hp_learn_rate = hp.Choice('learning rate', values = [1e-4, 1e-3, 1e-2, 1e-1])
    model.compile(optimizer = keras.optimizers.Adam(learning_rate = hp_learn_rate), 
                  loss = keras.losses.MeanAbsoluteError(reduction="auto", name="mean_absolute_error"),
                  metrics = [keras.metrics.MeanAbsoluteError(name="mean_absolute_error", dtype=None)])
    return model

## Use custom function to build auto-tuned MLP

In [None]:
tuner = kt.RandomSearch(model_builder,
                        objective = 'val_loss',
                        max_trials=5,
                        executions_per_trial=4,
                        directory = 'output',
                        project_name = 'MLPRegressor')
# Code Taken from intro to tensorflow core
class ClearTrainingOutput(tf.keras.callbacks.Callback):
  def on_train_end(*args, **kwargs):
    IPython.display.clear_output(wait = True)
    
tuner.search(pp_X_train, y_train, epochs = 10, validation_data = (pp_X_valid, y_valid), callbacks = [ClearTrainingOutput()])
# Get the optimal hyperparameters
best_hps = tuner.get_best_hyperparameters(num_trials = 3)[0]

In [None]:
at_model = tuner.hypermodel.build(best_hps)
print(at_model.summary())
epochs = 100
at_trained_model = at_model.fit(pp_X_train, y_train, epochs = epochs, validation_data = (pp_X_valid, y_valid))

In [None]:
#Plot the decrease of MAE with each iteration
print(f"Training MAE : {at_trained_model.history['loss'][-1]}")
print(f"Validation MAE : {at_trained_model.history['val_loss'][-1]}")
import matplotlib.pyplot as plt
plt.clf()
fig = plt.figure()
fig.suptitle('Graph of training loss and validation loss')
plt.plot(range(epochs), at_trained_model.history['loss'], 'b', range(epochs), at_trained_model.history['val_loss'], 'r')