<a href="https://colab.research.google.com/github/shraddha-an/nn-practice/blob/main/car_owner_classification.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# **Classifying number of car owners with Classification Neural Network**

## **1) Importing libraries**
---

In [None]:
# Importing required libraries
# Data Handling/ Manipulation
import pandas as pd
import numpy as np

# Torch
import torch
import torch.nn as nn, torch.nn.functional as F

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Time 
import time
from datetime import datetime as dt

# Plotting
import matplotlib.pyplot as plt
import seaborn as sb

# Ignore warnings
import warnings
warnings.filterwarnings('ignore')

## **2) Data Preprocessing**
---
**Steps:**

1) Columns to encode: Fuel_Type, Seller_Type, Transmission

2) Feature Extraction: Extract Age of the vehicle from the Year column

3) Columns to delete: Car_Name, Year.






In [None]:
# Importing dataset
dataset = pd.read_csv('car_data.csv')

# Calculating age in years of each vehicle
dataset['Age'] = dataset['Year'].apply(lambda x: dt.today().year - x)

# Deleting car name & year columns
dataset.drop(columns = ['Car_Name', 'Year'], inplace = True)

# Deleting the row where owner = 3 as only 1 example exists in the dataset
dataset = dataset[dataset['Owner'] != 3]
print(len(dataset))

# Splitting into feature & target matrices
X = dataset.iloc[:, [0, 1, 2, 3, 4, 5, 7]].values
y = dataset.iloc[:, -2].values.reshape(-1, 1)

# Splitting into train & test subsets
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, random_state = 0, test_size = 0.2)

X_train.shape, y_train.shape, X_test.shape, y_test.shape

300


((240, 7), (240, 1), (60, 7), (60, 1))

In [None]:
X_train[98]

array([3.1, 4.43, 11849, 'Petrol', 'Dealer', 'Manual', 5], dtype=object)

In [None]:
# Label Encoding
from sklearn.preprocessing import LabelEncoder, OneHotEncoder
from sklearn.compose import ColumnTransformer 

enc = LabelEncoder()

X_train[:, 3] = enc.fit_transform(X_train[:, 3])
X_test[:, 3] = enc.transform(X_test[:, 3])

X_train[:, 4] = enc.fit_transform(X_train[:, 4])
X_test[:, 4] = enc.transform(X_test[:, 4])

X_train[:, 5] = enc.fit_transform(X_train[:, 5])
X_test[:, 5] = enc.transform(X_test[:, 5])

# OHE the Fuel_Type column
ohe = ColumnTransformer([('encoder', OneHotEncoder(), [3])], remainder = 'passthrough') 

X_train = ohe.fit_transform(X_train)
X_test = ohe.transform(X_test)


# Also label encoding the output classes from [0, 1, 3] to [0, 1, 2]
y_train[:, 0] = enc.fit_transform(y_train[:, 0])
y_test[:, 0] = enc.transform(y_test[:, 0])

# Standardization
from sklearn.preprocessing import StandardScaler

sc = StandardScaler()

X_train = sc.fit_transform(X_train)
X_test = sc.fit_transform(X_test)

len(X_test[0])

9

## **3) PyTorch Neural Network**

In [None]:
# Converting numpy arrays to torch tensors of dtype float32
%time
from torch.utils.data import TensorDataset, DataLoader, RandomSampler

train_data = TensorDataset(torch.tensor(X_train, dtype = torch.float32), torch.tensor(y_train, dtype = torch.float32))
test_data = TensorDataset(torch.tensor(X_test, dtype = torch.float32), torch.tensor(y_test, dtype = torch.float32))

# Creating data loader objects that'll supply batches of data to our model
train = DataLoader(dataset = train_data, sampler = RandomSampler(train_data), batch_size = 16)
test = DataLoader(dataset = test_data, batch_size = 1)

CPU times: user 3 µs, sys: 0 ns, total: 3 µs
Wall time: 5.72 µs


In [None]:
# Defining the Classification Neural Network: 1 input layer, 1 hidden layer, 1 output layer
class ClassificationNN(nn.Module):
  # Defining our neural network architecture in the constructor 
  def __init__(self, input_dim, output_dim):
    # Calling the correct parent constructors in the MRO
    super().__init__()

    self.input1 = nn.Linear(in_features = input_dim, out_features = 100)
    self.hidden2 = nn.Linear(in_features = 100, out_features = 200)
    self.output3 = nn.Linear(in_features = 200, out_features = output_dim)

  # Defining forward pass computations
  def forward(self, x):
    x = F.relu(self.input1(x))
    x = F.relu(self.hidden2(x))
    x = self.output3(x)

    return x

# Creating an object of our neural network class
input_dim = len(X_train[0])
output_dim = 1

model = ClassificationNN(input_dim = input_dim, output_dim = output_dim)
print(model)

ClassificationNN(
  (input1): Linear(in_features=9, out_features=100, bias=True)
  (hidden2): Linear(in_features=100, out_features=200, bias=True)
  (output3): Linear(in_features=200, out_features=1, bias=True)
)


In [None]:
# Optimizer
%time
from torch.optim import Adam

optimizer = Adam(params = model.parameters(), lr = 0.02, eps = 2e-3, amsgrad = True)

# Loss Function: Cross Entropy Loss
criterion = nn.BCEWithLogitsLoss()

CPU times: user 4 µs, sys: 0 ns, total: 4 µs
Wall time: 7.63 µs


In [None]:
# Training loop
%time

# Putting model in train mode
model.train()

# Epochs 
epochs = 20

for epoch in range(epochs):
  print('\nEpoch {}/{}'.format(epoch + 1, epochs))
  print('------------')

  # Calculating training loss for every epoch
  train_loss = 0.0

  # Training batches
  for features, target in train:
    
    # Push variables to device
    features, target = features.to(device), target.to(device)

    # Clear out gradients from previous training batch
    optimizer.zero_grad()

    # Forward pass; feed inputs to model & get outputs
    outputs = model(features)
    
    # Calculate loss between model's predictions & actual target
    loss = criterion(outputs, target)
    
    train_loss += loss.item()

    # Back propagate loss throughout the neural network
    loss.backward()

    # Update parameters based on the current gradient
    optimizer.step()
  
  print('Training Loss: ', train_loss/len(train))

CPU times: user 2 µs, sys: 0 ns, total: 2 µs
Wall time: 5.48 µs

Epoch 1/20
------------
Training Loss:  0.3237581169931218

Epoch 2/20
------------
Training Loss:  0.19171019072333972

Epoch 3/20
------------
Training Loss:  0.1645514545030892

Epoch 4/20
------------
Training Loss:  0.14227132067705195

Epoch 5/20
------------
Training Loss:  0.14220010687907536

Epoch 6/20
------------
Training Loss:  0.1531794733988742

Epoch 7/20
------------
Training Loss:  0.14538309685885906

Epoch 8/20
------------
Training Loss:  0.12394413854926825

Epoch 9/20
------------
Training Loss:  0.1244189123933514

Epoch 10/20
------------
Training Loss:  0.11477905033777157

Epoch 11/20
------------
Training Loss:  0.12007809989154339

Epoch 12/20
------------
Training Loss:  0.11955614139636357

Epoch 13/20
------------
Training Loss:  0.11661903696755568

Epoch 14/20
------------
Training Loss:  0.11131213294963042

Epoch 15/20
------------
Training Loss:  0.11643943358212709

Epoch 16/20
------

In [None]:
# Testing loop
%time

# Variables to track predictions & target
preds, targets = [], []

# Putting model in test mode
model.eval()

# Disabling gradient calculation with no_grad(). Deactivates auto_grad engine 
# and reduces memory usage and speeds up computations but back propagation not possible.
with torch.no_grad():
  for features, target in test:

    # Pushing data to the device
    features = features.to(device)

    # Perform forward pass, collect outputs
    output = model(features)

    # Deatch predictions from the graph and append to list
    preds.append(output.detach().cpu().numpy())
    targets.append(target.numpy())

# Converting preds to a simple list from a list of arrays
preds = [0 if x[0][0] < 0 else 1 for x in preds]
y_test = [x for x in y_test]

CPU times: user 4 µs, sys: 1e+03 ns, total: 5 µs
Wall time: 8.58 µs


In [None]:
# Calculating accuracy between predictions & target
from sklearn.metrics import accuracy_score, classification_report
from pprint import pprint

acc = accuracy_score(y_test, preds)
report = classification_report(y_test, preds)

print('Accuracy: {}\n\n{}'.format(acc, report))


Accuracy: 0.9833333333333333

              precision    recall  f1-score   support

           0       0.98      1.00      0.99        59
           1       0.00      0.00      0.00         1

    accuracy                           0.98        60
   macro avg       0.49      0.50      0.50        60
weighted avg       0.97      0.98      0.98        60



## **4) Keras Neural Network**

In [None]:
# Importing keras modules
from keras.models import Sequential
from keras.layers import Dense

regressor = Sequential()
regressor.add(Dense(units = 100, activation = 'relu', input_dim = input_dim))
regressor.add(Dense(units = 200, activation = 'relu'))
regressor.add(Dense(units = 1, activation = 'sigmoid'))

# Compiling the model with adam optimizer
regressor.compile(optimizer = 'adam', metrics = ['accuracy'], loss = 'binary_crossentropy')

# Training the model
history = regressor.fit(X_train, y_train, batch_size = 16, epochs = 20, verbose = 0)

# Evaluating on test set
y_pred = regressor.predict_classes(X_test)

# Printing metrics
acc_k = accuracy_score(y_test, y_pred)
report_k = classification_report(y_test, y_pred)

print('Accuracy: {}\n\n{}'.format(acc_k, report_k))

Accuracy: 0.9833333333333333

              precision    recall  f1-score   support

           0       0.98      1.00      0.99        59
           1       0.00      0.00      0.00         1

    accuracy                           0.98        60
   macro avg       0.49      0.50      0.50        60
weighted avg       0.97      0.98      0.98        60



## **5) TensorFlow Neural Network**

In [None]:
# Converting numpy arrays to tf dataset
import tensorflow as tf

train_data = tf.data.Dataset.from_tensor_slices((X_train, y_train))
train_data = train_data.batch(batch_size = 16)

test_data = tf.data.Dataset.from_tensor_slices((X_test, y_test))
test_data = test_data.batch(batch_size = len(X_test))

In [None]:
# Creating a classification neural network
from tensorflow.keras import Model, losses, metrics, optimizers, activations
from tensorflow.keras.layers import Dense

# Classification class
class Classification(Model):
  # Defining layer architectures in the constructor
  def __init__(self):
    # Calling the correct parent constructors in the Method Resolution Order
    super().__init__()

    self.input1 = Dense(units = 100, activation = activations.relu)
    self.hidden2 = Dense(units = 200, activation = activations.relu)
    self.output3 = Dense(units = 1, activation = activations.hard_sigmoid)
  
  # Defining forward pass computations
  def call(self, x):
    x = self.input1(x)
    x = self.hidden2(x)
    x = self.output3(x)

    return x

# Creating an instance of the classification neural network
model = Classification()

# Setting up optimizers
optimizer = optimizers.Adam(learning_rate = 0.01, epsilon = 2e-3)

# Loss object
loss_object = losses.BinaryCrossentropy()

In [None]:
# Training loop
epochs = 20

for epoch in range(epochs):
  print('Epoch {}/{}'.format(epoch + 1, epochs))
  print('-----------')

  # Training loss for every epoch
  train_loss = 0.0

  # Iterating over batches in training dataset
  for x, y in train_data:

    # Using tf.GradientTape() to record forward pass computations on the trainable weights & enable auto-differentiation
    with tf.GradientTape() as tape:

      # Feed inputs to model; perform forward pass
      outputs = model(x)

      # Compute loss
      loss = loss_object(y, outputs)
      train_loss += loss

    # Calculate gradients of the trainable model parameters w.r.t loss 
    grads = tape.gradient(loss, model.trainable_weights)

    # Applying one step of gradient descent to minimize loss by updating the weights
    optimizer.apply_gradients(zip(grads, model.trainable_weights))

  # Printing avg train loss after every epoch
  print('Average train loss: {}\n'.format(train_loss/len(train_data)))


Epoch 1/20
-----------
Average train loss: 0.5607510805130005

Epoch 2/20
-----------
Average train loss: 0.5784355998039246

Epoch 3/20
-----------
Average train loss: 0.5784355998039246

Epoch 4/20
-----------
Average train loss: 0.5784355998039246

Epoch 5/20
-----------
Average train loss: 0.5784355998039246

Epoch 6/20
-----------
Average train loss: 0.5784355998039246

Epoch 7/20
-----------
Average train loss: 0.5784355998039246

Epoch 8/20
-----------
Average train loss: 0.5784355998039246

Epoch 9/20
-----------
Average train loss: 0.5784355998039246

Epoch 10/20
-----------
Average train loss: 0.5784355998039246

Epoch 11/20
-----------
Average train loss: 0.5784355998039246

Epoch 12/20
-----------
Average train loss: 0.5784355998039246

Epoch 13/20
-----------
Average train loss: 0.5784355998039246

Epoch 14/20
-----------
Average train loss: 0.5784355998039246

Epoch 15/20
-----------
Average train loss: 0.5784355998039246

Epoch 16/20
-----------
Average train loss: 0.578

In [None]:
# Testing loop
# Setting accuracy as the metric for evaluating performance of the model
accuracy = metrics.Accuracy()
precision = metrics.Precision()
recall = metrics.Recall()

for x, y in test_data:
  # Feed inputs to the model, collect outputs
  output = model(x)
  print((output, y))
  # Calculate accuracy, precision & recall
  accuracy.update_state(y, output)
  precision.update_state(y, output)
  recall.update_state(y, output)

  # Extract only the numpy value
  acc = accuracy.result().numpy()
  prec = precision.result().numpy()
  rec = recall.result().numpy()

# Printing metrics
print('Performance of TensorFlow Model\n\nAccuracy: {}\nPrecision: {}\nRecall: {}\n'.format(acc, prec, rec))

(<tf.Tensor: shape=(60, 1), dtype=float32, numpy=
array([[0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.]], dtype=float32)>, <tf.Tensor: shape=(60, 1), dtype=int64, numpy=
array([[0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0

array([0, 1])