<a href="https://colab.research.google.com/github/saicharan7479/pytorch-notes/blob/main/automobile.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# Car price predictor by using machine learning
import torch
import torch.nn as nn
import pandas as pd
import numpy as np

In [2]:
# Define column names for the automobile dataset
#In the csv file there are no column names i restored from chat gpt
column_names = [
    'symboling', 'normalized-losses', 'make', 'fuel-type', 'aspiration',
    'num-of-doors', 'body-style', 'drive-wheels', 'engine-location',
    'wheel-base', 'length', 'width', 'height', 'curb-weight',
    'engine-type', 'num-of-cylinders', 'engine-size', 'fuel-system',
    'bore', 'stroke', 'compression-ratio', 'horsepower', 'peak-rpm',
    'city-mpg', 'highway-mpg', 'price'
]

In [3]:
df = pd.read_csv("https://archive.ics.uci.edu/ml/machine-learning-databases/autos/imports-85.data",
                 names=column_names, na_values='?')

In [4]:

#In the csv file there are no column names i restored from chat gpt
# Convert numeric columns to proper data types
numeric_columns = ['normalized-losses', 'wheel-base', 'length', 'width', 'height',
                   'curb-weight', 'engine-size', 'bore', 'stroke', 'compression-ratio',
                   'horsepower', 'peak-rpm', 'city-mpg', 'highway-mpg', 'price']


In [5]:
for col in numeric_columns:
    df[col] = pd.to_numeric(df[col], errors='coerce')

In [6]:
# removing null values
df = df.dropna()

In [7]:
# Select features for prediction - using important car specifications
X = df[["engine-size", "horsepower", "curb-weight", "city-mpg"]].values
y = df[["price"]].values

In [8]:
# Manual normalization (Min-Max)
X_min = X.min(axis=0)
X_max = X.max(axis=0)

In [9]:
# got error here so used normalization, here the colab itself recommended this thing, it basically sort things from top to bottom
X_scaled = (X - X_min) / (X_max - X_min)

In [10]:
# converting into tensors
X_tensor = torch.tensor(X_scaled,
                        dtype=torch.float32)  # we take this here as float32 because X is like tensor it gives us long or float 64 we need to convert into float 32
y_tensor = torch.tensor(y, dtype=torch.float32)

In [11]:
# splitting into 80 20 80-training 20-testing
split_index = int(0.8 * len(X_tensor))
X_train = X_tensor[:split_index]
X_test = X_tensor[split_index:]
y_min, y_max = y.min(), y.max()
y_train = y_tensor[:split_index]
y_test = y_tensor[split_index:]

In [12]:
# Define model
class CarPricePredictor(nn.Module):
    def __init__(self):
        super(CarPricePredictor, self).__init__()
        self.model = nn.Sequential(
            nn.Linear(4, 64),
            nn.ReLU(),
            nn.Linear(64, 32),
            nn.ReLU(),
            nn.Linear(32, 1),
          )

    def forward(self, x):
        return self.model(x)

In [13]:
model = CarPricePredictor()

In [14]:
# Loss function or criterion and optimizer
criterion =(nn.SmoothL1Loss())
optimizer = torch.optim.Adam(model.parameters(), lr=.01)#lr means learning rate

In [15]:
for epoch in range(300):
    model.train()
    optimizer.zero_grad()
    output = model(X_train) # code generated by chatgpt
    loss = criterion(output, y_train)
    loss.backward()
    optimizer.step()

    if (epoch + 1) % 50 == 0:
        model.eval()
        with torch.inference_mode():
            val_output = model(X_test)
            val_loss = criterion(val_output, y_test)
        print(f"Epoch {epoch+1} - Train Loss: {loss.item():.4f}, Val Loss: {val_loss.item():.4f}")

Epoch 50 - Train Loss: 10559.1680, Val Loss: 12928.1172
Epoch 100 - Train Loss: 5516.3945, Val Loss: 7147.0244
Epoch 150 - Train Loss: 2777.1370, Val Loss: 3096.5161
Epoch 200 - Train Loss: 2379.4817, Val Loss: 2605.7593
Epoch 250 - Train Loss: 2086.6260, Val Loss: 2168.2329
Epoch 300 - Train Loss: 1906.0265, Val Loss: 2087.0225


In [16]:
# Evaluation
model.eval()  # this will stop training then it starts predicting
with torch.inference_mode():  # start predicting
    predictions = model(X_test)
    for i in range(5):
        actual = y_test[i].item()
        predicted = predictions[i].item()
        print(f"Predicted: {predicted:.2f} | Actual: {actual:.2f}")

Predicted: 12897.12 | Actual: 9639.00
Predicted: 12942.09 | Actual: 9989.00
Predicted: 13325.87 | Actual: 11199.00
Predicted: 13430.81 | Actual: 11549.00
Predicted: 14213.36 | Actual: 17669.00


In [19]:
mape = torch.mean(torch.abs((y_test - predictions) / y_test)) * 100
print(f"\nMean Absolute Percentage Error (MAPE): {mape.item():.2f}%")


Mean Absolute Percentage Error (MAPE): 14.27%
