In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

import pandas as pd
import numpy as np
import datetime
import pickle

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error

In [2]:
url = 'https://raw.githubusercontent.com/bala-1409/Price-Prediction-for-Used-Cars-Datascience-Project/refs/heads/main/Dataset/dataset.csv'
df = pd.read_csv(url)

In [3]:
df.head()

Unnamed: 0.1,Unnamed: 0,Name,Location,Year,Kilometers_Driven,Fuel_Type,Transmission,Owner_Type,Mileage,Engine,Power,Seats,New_Price,Price
0,0,Maruti Wagon R LXI CNG,Mumbai,2010,72000,CNG,Manual,First,26.6 km/kg,998 CC,58.16 bhp,5.0,,1.75
1,1,Hyundai Creta 1.6 CRDi SX Option,Pune,2015,41000,Diesel,Manual,First,19.67 kmpl,1582 CC,126.2 bhp,5.0,,12.5
2,2,Honda Jazz V,Chennai,2011,46000,Petrol,Manual,First,18.2 kmpl,1199 CC,88.7 bhp,5.0,8.61 Lakh,4.5
3,3,Maruti Ertiga VDI,Chennai,2012,87000,Diesel,Manual,First,20.77 kmpl,1248 CC,88.76 bhp,7.0,,6.0
4,4,Audi A4 New 2.0 TDI Multitronic,Coimbatore,2013,40670,Diesel,Automatic,Second,15.2 kmpl,1968 CC,140.8 bhp,5.0,,17.74


In [4]:
df.drop(['Unnamed: 0','Location','New_Price'],axis=1,inplace=True)

In [5]:
df.isnull().sum()

Name                  0
Year                  0
Kilometers_Driven     0
Fuel_Type             0
Transmission          0
Owner_Type            0
Mileage               2
Engine               36
Power                36
Seats                42
Price                 0
dtype: int64

In [6]:
manufacturer = df['Name'].str.split(' ',expand=True)
df['Manufacturer'] = manufacturer[0]

In [7]:
curr_time = datetime.datetime.now()
df['Year Used']=df['Year'].apply(lambda x:curr_time.year-x)

In [8]:
df.drop(['Name','Year'],axis=1,inplace=True)

In [9]:
mileage = df['Mileage'].str.split(' ',expand=True)
df['Mileage'] = pd.to_numeric(mileage[0],errors='coerce')
df['Mileage'].fillna(df['Mileage'].astype('float').mean(),inplace=True)

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['Mileage'].fillna(df['Mileage'].astype('float').mean(),inplace=True)


In [10]:
engine = df['Engine'].str.split(' ',expand=True)
df['Engine']=pd.to_numeric(engine[0],errors='coerce')
df['Engine'].fillna(df['Engine'].astype('float').mean(),inplace=True)

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['Engine'].fillna(df['Engine'].astype('float').mean(),inplace=True)


In [11]:
power = df['Power'].str.split(' ',expand=True)
df['Power']=pd.to_numeric(power[0],errors='coerce')
df['Power'].fillna(df['Power'].astype('float').mean(),inplace=True)

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['Power'].fillna(df['Power'].astype('float').mean(),inplace=True)


In [12]:
df['Seats'].fillna(df['Seats'].astype('float').mean(),inplace=True)

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['Seats'].fillna(df['Seats'].astype('float').mean(),inplace=True)


In [13]:
df.isnull().sum()

Kilometers_Driven    0
Fuel_Type            0
Transmission         0
Owner_Type           0
Mileage              0
Engine               0
Power                0
Seats                0
Price                0
Manufacturer         0
Year Used            0
dtype: int64

In [14]:
df = pd.get_dummies(df,columns=['Fuel_Type','Transmission','Owner_Type','Manufacturer'],drop_first=True)

In [15]:
X = df.drop(['Price'],axis=1)
y = df['Price']

In [16]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [17]:
X_train.shape, X_test.shape, y_train.shape, y_test.shape

((4815, 44), (1204, 44), (4815,), (1204,))

In [18]:
scaler = StandardScaler()

In [19]:
X_train = scaler.fit_transform(X_train)
x_test = scaler.transform(X_test)

In [20]:
with open('scaler.pkl','wb') as file:
  pickle.dump(scaler,file)

In [21]:
# X_train = X_train.astype('float32')
# X_test = X_test.astype('float32')

In [22]:
class CarPriceModel(nn.Module):
    def __init__ (self, NUM_FEATURES, NUM_HIDDEN):
        super(CarPriceModel, self).__init__()
        self.layer1 = nn.Linear(NUM_FEATURES, NUM_HIDDEN)
        self.layer2 = nn.Linear(NUM_HIDDEN, 10)
        self.layer3 = nn.Linear(10, 6)
        self.layer4 = nn.Linear(6, 1)
        self.relu = nn.ReLU()

        
    def forward(self, x):
        x = self.layer1(x)
        x = self.relu(x)
        x = self.layer2(x)
        x = self.relu(x)
        x = self.layer3(x)
        x = self.relu(x)
        x = self.layer4(x)
        x = self.relu(x)
        return x

In [23]:
NUM_FEATURES = X_train.shape[1]
NUM_HIDDEN = 20

NUM_FEATURES, NUM_HIDDEN

(44, 20)

In [24]:
model = CarPriceModel(NUM_FEATURES, NUM_HIDDEN)

In [25]:
from torchsummary import summary

summary(model, (1, NUM_FEATURES))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Linear-1                [-1, 1, 20]             900
              ReLU-2                [-1, 1, 20]               0
            Linear-3                [-1, 1, 10]             210
              ReLU-4                [-1, 1, 10]               0
            Linear-5                 [-1, 1, 6]              66
              ReLU-6                 [-1, 1, 6]               0
            Linear-7                 [-1, 1, 1]               7
              ReLU-8                 [-1, 1, 1]               0
Total params: 1,183
Trainable params: 1,183
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.00
Forward/backward pass size (MB): 0.00
Params size (MB): 0.00
Estimated Total Size (MB): 0.01
----------------------------------------------------------------


In [26]:
loss = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [27]:
EPOCHS = 500

X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train.values, dtype=torch.float32).view(-1, 1)

for epoch in range(EPOCHS):
    optimizer.zero_grad()
    y_pred = model(X_train_tensor)
    loss_value = loss(y_pred, y_train_tensor)
    loss_value.backward()
    optimizer.step()

    if epoch % 10 == 0:
        print(f'epoch: {epoch} loss: {loss_value.item()}')

epoch: 0 loss: 216.06275939941406
epoch: 10 loss: 216.04632568359375
epoch: 20 loss: 215.99899291992188
epoch: 30 loss: 215.72193908691406
epoch: 40 loss: 214.36204528808594
epoch: 50 loss: 211.65196228027344
epoch: 60 loss: 207.7049102783203
epoch: 70 loss: 202.0399169921875
epoch: 80 loss: 193.8059844970703
epoch: 90 loss: 181.80401611328125
epoch: 100 loss: 164.8983154296875
epoch: 110 loss: 142.2626495361328
epoch: 120 loss: 113.83426666259766
epoch: 130 loss: 81.98397064208984
epoch: 140 loss: 53.5606575012207
epoch: 150 loss: 38.06221389770508
epoch: 160 loss: 34.547935485839844
epoch: 170 loss: 32.3229866027832
epoch: 180 loss: 30.181638717651367
epoch: 190 loss: 28.86027717590332
epoch: 200 loss: 27.72224235534668
epoch: 210 loss: 26.721446990966797
epoch: 220 loss: 25.859773635864258
epoch: 230 loss: 25.098072052001953
epoch: 240 loss: 24.420686721801758
epoch: 250 loss: 23.81588363647461
epoch: 260 loss: 23.263214111328125
epoch: 270 loss: 22.7510929107666
epoch: 280 loss: 22

In [28]:
with torch.no_grad():
    X_test_tensor = torch.tensor(X_test.astype(np.float32).values)
    y_pred = model(X_test_tensor)
    y_test_tensor = torch.tensor(y_test.values, dtype=torch.float32).view(-1, 1)
    mse = F.mse_loss(y_pred, y_test_tensor)
    # print(f'Mean Squared Error: {mse.item()}')

Mean Squared Error: 22900508672.0


In [29]:
torch.save(model.state_dict(), 'CarPriceModel.pth')

In [30]:
torch.save(model, 'CarPriceModel.h5')

In [31]:
load_model = torch.load('CarPriceModel.h5')
load_model.eval()

  load_model = torch.load('CarPriceModel.h5')


CarPriceModel(
  (layer1): Linear(in_features=44, out_features=20, bias=True)
  (layer2): Linear(in_features=20, out_features=10, bias=True)
  (layer3): Linear(in_features=10, out_features=6, bias=True)
  (layer4): Linear(in_features=6, out_features=1, bias=True)
  (relu): ReLU()
)

In [32]:
sample = X_test.iloc[1]
sample = torch.tensor(sample.values.astype(np.float32)).unsqueeze(0)

with torch.no_grad():
    y_pred = load_model(sample)
    print(f'Predicted Price: {y_pred.item()}')

Predicted Price: 21393.142578125


In [38]:
# calculate mean_absolute_error
y_pred = model(X_test_tensor).detach()
mean_absolute_error(y_test_tensor, y_pred)



49263.05633783538