In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader

import warnings
warnings.filterwarnings('ignore')

In [2]:
df = pd.read_csv('CO2.csv')

del df['Model Year']
del df['Make']
del df['Model']
del df['Vehicle Class']
del df['Smog Rating']
del df['Transmission']

df = df.rename({'Engine Size(L)': 'Engine_Size','Fuel Type':'Fuel_Type','Fuel Consumption (City (L/100 km)':'Fuel_Consumption_City',
                'Fuel Consumption(Hwy (L/100 km))':'Fuel_Consumption_Higway','Fuel Consumption(Comb (mpg))':'Fuel_Consumption_miles',
                'CO2 Emissions(g/km)':'CO2_Emission_KM','CO2 Rating':'CO2_Rating','Fuel Consumption(Comb (L/100 km))':'Fuel_Consumption_km'}, axis=1) 

df.head()

Unnamed: 0,Engine_Size,Cylinders,Fuel_Type,Fuel_Consumption_City,Fuel_Consumption_Higway,Fuel_Consumption_km,Fuel_Consumption_miles,CO2_Emission_KM,CO2_Rating
0,2.4,4,Z,9.9,7.0,8.6,33,200,6
1,3.5,6,Z,12.6,9.4,11.2,25,263,4
2,2.0,4,Z,11.0,8.6,9.9,29,232,5
3,2.0,4,Z,11.3,9.1,10.3,27,242,5
4,2.0,4,Z,11.2,8.0,9.8,29,230,5


In [3]:
df.drop(df.index[df['CO2_Rating'] == 10], inplace=True)
df.drop(df.index[df['CO2_Rating'] == 1], inplace=True)
df.drop(df.index[df['CO2_Rating'] == 8], inplace=True)
df.drop(df.index[df['CO2_Rating'] == 9], inplace=True)
df.drop(df.index[df['CO2_Rating'] == 2], inplace=True)
df.drop(df.index[df['CO2_Rating'] == 7], inplace=True)

In [4]:
df['CO2_Rating'] = df['CO2_Rating'].map({3:1,4:2,5:3,6:4})

In [5]:
df=df.dropna()

In [6]:
df['CO2_Rating'].value_counts()

CO2_Rating
3    266
2    228
1    189
4    113
Name: count, dtype: int64

In [7]:
df.columns

Index(['Engine_Size', 'Cylinders', 'Fuel_Type', 'Fuel_Consumption_City',
       'Fuel_Consumption_Higway', 'Fuel_Consumption_km',
       'Fuel_Consumption_miles', 'CO2_Emission_KM', 'CO2_Rating'],
      dtype='object')

In [8]:
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()

var = ['Fuel_Type']
  
for i in var:
    df[i] = le.fit_transform(df[i]).astype(int)

In [9]:
df.head()

Unnamed: 0,Engine_Size,Cylinders,Fuel_Type,Fuel_Consumption_City,Fuel_Consumption_Higway,Fuel_Consumption_km,Fuel_Consumption_miles,CO2_Emission_KM,CO2_Rating
0,2.4,4,3,9.9,7.0,8.6,33,200,4
1,3.5,6,3,12.6,9.4,11.2,25,263,2
2,2.0,4,3,11.0,8.6,9.9,29,232,3
3,2.0,4,3,11.3,9.1,10.3,27,242,3
4,2.0,4,3,11.2,8.0,9.8,29,230,3


In [10]:
x1 = df.drop(labels='CO2_Rating', axis=1).values
y1 = df.loc[:,'CO2_Rating'].values

In [11]:
# Data Preprocessing
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
features = scaler.fit_transform(x1)

In [12]:
import imblearn
from imblearn.over_sampling import RandomOverSampler
from collections import Counter

ros =RandomOverSampler(random_state=42)
x,y=ros.fit_resample(x1,y1)
print("OUR DATASET COUNT         : ", Counter(y1))
print("OVER SAMPLING DATA COUNT  : ", Counter(y))

OUR DATASET COUNT         :  Counter({3: 266, 2: 228, 1: 189, 4: 113})
OVER SAMPLING DATA COUNT  :  Counter({4: 266, 2: 266, 3: 266, 1: 266})


In [13]:
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.20, random_state=42, stratify=y)
print("NUMBER OF TRAIN DATASET    : ", len(x_train))
print("NUMBER OF TEST DATASET      : ", len(x_test))
print("TOTAL NUMBER OF DATASET    : ", len(x_train)+len(x_test))

NUMBER OF TRAIN DATASET    :  851
NUMBER OF TEST DATASET      :  213
TOTAL NUMBER OF DATASET    :  1064


In [14]:
print("NUMBER OF TRAIN DATASET    : ", len(y_train))
print("NUMBER OF TEST DATASET      : ", len(y_test))
print("TOTAL NUMBER OF DATASET    : ", len(y_train)+len(y_test))

NUMBER OF TRAIN DATASET    :  851
NUMBER OF TEST DATASET      :  213
TOTAL NUMBER OF DATASET    :  1064


In [15]:
# Convert features and labels to torch tensors
features = torch.tensor(x, dtype=torch.float32)
labels = torch.tensor(y, dtype=torch.long)


In [16]:
# Step 2: Split the dataset into training and testing sets
train_features, test_features, train_labels, test_labels = train_test_split(
    features, labels, test_size=0.2, random_state=42
)


In [17]:
# Step 3: Create DataLoader for batch processing
batch_size = 32
train_dataset = torch.utils.data.TensorDataset(train_features, train_labels)
test_dataset = torch.utils.data.TensorDataset(test_features, test_labels)
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)


In [18]:

# Step 4: Define the neural network model
class MyModel(nn.Module):
    def __init__(self, input_size, hidden_size, num_classes):
        super(MyModel, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(hidden_size, num_classes)

    def forward(self, x):
        out = self.fc1(x)
        out = self.relu(out)
        out = self.fc2(out)
        return out

In [21]:
# Step 5: Create the model and set hyperparameters
input_size = 8
hidden_size = 128
num_classes = 5
learning_rate = 0.001
num_epochs = 20

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

model = MyModel(input_size, hidden_size, num_classes).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

# Initialize lists to store loss and accuracy for each epoch
epoch_losses = []
epoch_accuracies = []

In [22]:
from sklearn.metrics import accuracy_score

# Step 6: Training the model
for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    num_batches = 0
    
    for i, (inputs, labels) in enumerate(train_loader):
        inputs = inputs.to(device)
        labels = labels.to(device)

        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        num_batches += 1

    average_loss = running_loss / num_batches
    
    # Calculate accuracy at the end of each epoch
    model.eval()  # Switch to evaluation mode
    with torch.no_grad():
        test_predictions = []
        for inputs, _ in test_loader:
            inputs = inputs.to(device)
            outputs = model(inputs)
            _, predicted = torch.max(outputs.data, 1)
            test_predictions.extend(predicted.cpu().numpy())

        # Calculate accuracy
        accuracy = accuracy_score(test_labels, test_predictions)
        epoch_accuracies.append(accuracy)
        epoch_losses.append(average_loss)
        print(f'Epoch [{epoch + 1}/{num_epochs}], Accuracy: {accuracy * 100:.2f}%, Loss: {average_loss:.4f}')

    model.train()  # Switch back to training mode

print('Training finished.')

Epoch [1/20], Accuracy: 53.05%, Loss: 7.8214
Epoch [2/20], Accuracy: 48.83%, Loss: 1.5057
Epoch [3/20], Accuracy: 46.48%, Loss: 0.9768
Epoch [4/20], Accuracy: 52.11%, Loss: 0.9534
Epoch [5/20], Accuracy: 69.95%, Loss: 0.8017
Epoch [6/20], Accuracy: 68.54%, Loss: 0.7355
Epoch [7/20], Accuracy: 86.85%, Loss: 0.7274
Epoch [8/20], Accuracy: 65.26%, Loss: 0.6661
Epoch [9/20], Accuracy: 84.04%, Loss: 0.6476
Epoch [10/20], Accuracy: 86.38%, Loss: 0.6155
Epoch [11/20], Accuracy: 92.49%, Loss: 0.5717
Epoch [12/20], Accuracy: 91.55%, Loss: 0.5435
Epoch [13/20], Accuracy: 65.73%, Loss: 0.5638
Epoch [14/20], Accuracy: 92.02%, Loss: 0.5317
Epoch [15/20], Accuracy: 77.46%, Loss: 0.5483
Epoch [16/20], Accuracy: 77.93%, Loss: 0.5840
Epoch [17/20], Accuracy: 80.28%, Loss: 0.4676
Epoch [18/20], Accuracy: 91.08%, Loss: 0.4468
Epoch [19/20], Accuracy: 83.10%, Loss: 0.4605
Epoch [20/20], Accuracy: 84.98%, Loss: 0.4803
Training finished.
