# Importing the initial classes and libraries

In [None]:
import numpy as np
import pandas as pd
from random import randint
from sklearn.utils import shuffle
from sklearn.preprocessing import MinMaxScaler
import matplotlib.pyplot as plt
import tensorflow as tf

In [None]:
from google.colab import drive
drive.mount('/content/drive')

# Data pre-process

Importing the file from google drive

In [None]:
df = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/cars_data.csv')
df.shape
df = df.sample(frac=1) #randomizing the order of data to eliminate any patterns

Show first 20 values

In [None]:
df.head(20)

Processing string data into numbers 

In [None]:
for col in df:
    print(df[col].unique())

In [None]:
df['class_values'] = df['class_values'].replace({'unacc':0, 'acc':1, 'good':2, 'vgood':3})

df['buying '] = df['buying '].replace({'low':0, 'med':1, 'high':2, 'vhigh':3})

df['maint'] = df['maint'].replace({'low':0, 'med':1, 'high':2, 'vhigh':3})

df['doors'] = df['doors'].replace({'2':0, '3':1, '4':2, '5more':3})

df['persons'] = df['persons'].replace({'2':0, '4':1, 'more':2})

df['lug_boots'] = df['lug_boots'].replace({'small':0, 'med':1, 'big':2})

df['safety'] = df['safety'].replace({'low':0, 'med':1, 'high':2})

In [None]:
df.hist()

Plotting the class values to show dicrepancy of the classes

In [None]:
for col in df:
    print(df[col].unique())

In [None]:
plt.hist((df.class_values))

# Assinging train and test data

Splitting samples and labels

In [None]:
samples = df.iloc[:, 0:6]
labels = df.iloc[:, 6]

Encoding the labels

In [None]:
from sklearn.preprocessing import LabelEncoder
lb = LabelEncoder()
labels = lb.fit_transform(labels)

Scaling the samples

In [None]:
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
sc.fit(samples)
samples = sc.transform(samples)

Splitting train and test data

In [None]:
from sklearn.model_selection import train_test_split
train_samples, test_samples, train_labels, test_labels = train_test_split(samples,labels, test_size = 0.20, random_state = 1)

Shape of train samples

In [None]:
train_samples.shape

Shape of train labels

In [None]:
train_labels.shape

Shape of test samples

In [None]:
test_samples.shape

Shape of test lables

In [None]:
test_labels.shape

# Use GPU for computing (optional)

**WARNING !**
USE ONLY IF YOUR INSTANCE SUPPORTS GPU USAGE



In [None]:

physical_devices = tf.config.experimental.list_physical_devices('GPU')
print("Num GPU", len(physical_devices))
tf.config.experimental.set_memory_growth(physical_devices[0], True)

# Plotting functions

In [None]:
def plotacc(history,epochs): #function to plot training accuracy
  loss_train = history.history['accuracy']
  epochs = range(1,epochs+1)
  plt.plot(epochs, loss_train, 'g', label='Training accuracy')
  plt.title('Training accuracy')
  plt.xlabel('Epochs')
  plt.ylabel('Accuracy')
  plt.legend()
  plt.show()

In [None]:
def plotloss(history,epochs): #function to plot training loss
  loss_train = history.history['loss']
  epochs = range(1,epochs+1)
  plt.plot(epochs, loss_train, 'r', label='Training loss')
  plt.title('Training loss')
  plt.xlabel('Epochs')
  plt.ylabel('Loss')
  plt.legend()
  plt.show()

In [None]:
def plotlosswval(history, epochs): #function to plot training loss against validation loss
  loss_train = history.history['loss']
  loss_val = history.history['val_loss']
  epochs = range(1,epochs+1)
  plt.plot(epochs, loss_train, 'g', label='Training loss')
  plt.plot(epochs, loss_val, 'b', label='validation loss')
  plt.title('Training and Validation loss')
  plt.xlabel('Epochs')
  plt.ylabel('Loss')
  plt.legend()
  plt.show()

In [None]:
def plotaccwval(history,epochs): #function to plot training accuracy against validation accuracy 
  loss_train = history.history['accuracy']
  loss_val = history.history['val_accuracy']
  epochs = range(1,epochs+1)
  plt.plot(epochs, loss_train, 'g', label='Training accuracy')
  plt.plot(epochs, loss_val, 'b', label='validation accuracy')
  plt.title('Training and Validation accuracy')
  plt.xlabel('Epochs')
  plt.ylabel('Accuracy')
  plt.legend()
  plt.show()

# Keras Sequential Model

In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Activation, Dense, Dropout, Conv3D
from tensorflow.keras.optimizers import SGD, Adam, Nadam
from tensorflow.keras.constraints import max_norm
from tensorflow.keras.metrics import categorical_crossentropy
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras import regularizers

Testing the model to be used in 4 fold cross valitation

In [None]:
model = keras.Sequential(
      [
          layers.Dense(30, input_dim=6, kernel_initializer='uniform', activation='relu', kernel_regularizer= tf.keras.regularizers.L2(0.5),bias_regularizer= tf.keras.regularizers.L2(0.9)),
          layers.BatchNormalization(),
          layers.Dense(60, kernel_initializer='uniform', activation='relu', kernel_constraint=tf.keras.constraints.MinMaxNorm(min_value=0.9, max_value=2, axis=0)),
          layers.Dropout(0.2),
          layers.Dense(10, kernel_initializer='uniform', activation='relu', kernel_constraint=tf.keras.constraints.MinMaxNorm(min_value=1.3, max_value=1.8, axis=0)),
          layers.Dropout(0.1),
          layers.Dense(1, kernel_initializer='uniform', activation='relu'),

          
         
      ]
  )

Summary of the model to be used

In [None]:
model.summary()

Choosing the number of epochs

In [None]:
epochs = 600

Declaring the optimiser

Compiling the model

In [None]:
model.compile(loss='mse', optimizer='adam', metrics=['accuracy'])

In [None]:
model.fit(train_samples, train_labels, epochs=600, batch_size=128, shuffle = True)
scores = model.evaluate(test_samples, test_labels)
print("%s: %.2f%%" % (model.metrics_names[1], scores[1]*100))

Fitting the model

In [None]:
history = model.fit(x=train_samples, y=train_labels, batch_size=500, epochs=epochs, validation_data=(test_samples, test_labels), shuffle=True, verbose=1)

Evaluating the model with a different method

In [None]:
test_loss, test_acc = model.evaluate(test_samples, test_labels)
#
# Print the test accuracy
#
print('Test Accuracy: ', test_acc, '\nTest Loss: ', test_loss)

# Results of the model with basic validation

Comparing training accuracy with validation accuracy

In [None]:
plotaccwval(history, epochs)

Comparing training loss with validation loss

In [None]:
plotlosswval(history, epochs)

# Defining K-Fold cross validator and metrics

In [None]:
from sklearn.model_selection import KFold
kfold = KFold(n_splits=4, shuffle=True) # creating the 4 fold validator
acc_per_fold = [] # array to record the accuracy per fold
loss_per_fold = [] # array to record the loss per fold
over_acc = np.array # variable to keep the overall accuracy
over_loss = np.array # variable to keep the overall loss

# K-Fold cross validation

In [None]:
from sklearn.model_selection import train_test_split
fold_no = 1
acc_per_fold = [] # array to record the accuracy per fold
loss_per_fold = []
for train, test in kfold.split(samples, labels):


  #Define the model
  model = keras.Sequential(
      [
          layers.Dense(30, input_dim=6, kernel_initializer='uniform', activation='relu', kernel_regularizer= tf.keras.regularizers.L2(0.5),bias_regularizer= tf.keras.regularizers.L2(0.9)),
          layers.BatchNormalization(),
          layers.Dense(60, kernel_initializer='uniform', activation='relu', kernel_constraint=tf.keras.constraints.MinMaxNorm(min_value=0.9, max_value=2, axis=0)),
          layers.Dropout(0.2),
          layers.Dense(10, kernel_initializer='uniform', activation='relu', kernel_constraint=tf.keras.constraints.MinMaxNorm(min_value=1.3, max_value=1.8, axis=0)),
          layers.Dropout(0.1),
          layers.Dense(1, kernel_initializer='uniform', activation='relu'),

          
         
      ]
  )
 
  

  #Compile the model
  model.compile(optimizer='Adam', loss='mean_squared_error', metrics=['accuracy'])

  #print
  print('\n-------------------------------------------------------------------------------------------------')
  print(f'Training for fold {fold_no}')

  # Fit data to model
  epochs = 600
  history = model.fit(x=samples[train], y=labels[train], batch_size=256, epochs=epochs, shuffle=True, verbose=0)
  #metrics
  scores = model.evaluate(x=samples[test], y=labels[test], verbose=2)
  print('\n')
  print(f'Prediction score for fold {fold_no}: {model.metrics_names[0]} of {scores[0]}; {model.metrics_names[1]} of {scores[1]*100}%')
  acc_per_fold.append(scores[1]*100)
  loss_per_fold.append(scores[0])

  plotacc(history, epochs)
  plotloss(history,epochs)

  # next fold
  fold_no = fold_no + 1


# Results

Accuracy per fold

In [None]:
print("Accuracy for each fold : ")
k=1
for i in acc_per_fold:
    l = "%.4f" % i
    print('Fold ', k,'is ', l)
    k=k+1

In [None]:
folds = range(1,5)
plt.plot(folds, acc_per_fold, 'g', label='Accuracy')
plt.title('Accuracy per fold')
plt.xlabel('Fold')
plt.ylabel('Accuracy')
plt.legend()
plt.show()

Loss per fold

In [None]:
print("Loss for each fold : ")
k=1
for i in loss_per_fold:
    l = "%.4f" % i
    print('Fold ', k,'is ', l)
    k=k+1

In [None]:
folds = range(1,5)
plt.plot(folds, loss_per_fold, 'r', label='Loss')
plt.title('Loss per fold')
plt.xlabel('Fold')
plt.ylabel('Loss')
plt.legend()
plt.show()