In [5]:
# We usually don't use PyTorch for simple linear regression (standalone linear classifier; not in neural networks). Scikit-learn is typically preferred due to its simplicity. PyTorch is mainly used for neural network for complex datasets

In [6]:
# Linear Regression with multiple targets (outputs)

In [7]:
import pandas as pd
import numpy as np
import torch
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

df = pd.read_csv('FuelConsumption.csv')
df.head()

x_num = df[['ENGINESIZE', 'CYLINDERS']]
x_cat = df[['MODELYEAR', 'VEHICLECLASS', 'TRANSMISSION', 'FUELTYPE']]
x_cat = pd.get_dummies(x_cat, drop_first=True)
X = pd.concat([x_num, x_cat], axis=1)
X = X.values
X = StandardScaler().fit_transform(X)

y = df[['FUELCONSUMPTION_COMB_MPG', 'CO2EMISSIONS']].values

X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.3, random_state=42)
X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42)

X_train = torch.FloatTensor(X_train)
X_val = torch.FloatTensor(X_val)
X_test = torch.FloatTensor(X_test)
y_train = torch.FloatTensor(y_train)
y_val = torch.FloatTensor(y_val)
y_test = torch.FloatTensor(y_test)

In [8]:
from torch.utils.data import TensorDataset, DataLoader
import torch.nn as nn

    
train_dataset = TensorDataset(X_train, y_train)
val_dataset = TensorDataset(X_val, y_val)
test_dataset = TensorDataset(X_test, y_test)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32)
test_loader = DataLoader(test_dataset, batch_size=32)


class MultLinRegModel(nn.Module):
    def __init__(self, input_size):
        super().__init__()
        self.linear = nn.Linear(input_size, 2)  # 2: output size

    def forward(self, x):
        return self.linear(x)  # takes your input data x and passes it through the linear model (which applies the equation y = wx + b, where w and b are the learned weights and bias) to get the predicted output.
    

input_size = X_train.shape[1]
model = MultLinRegModel(input_size)
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)


num_epochs = 100
for epoch in range(num_epochs):
    model.train()
    total_train_loss = 0
    for batch_X, batch_y in train_loader:
        optimizer.zero_grad()
        train_pred = model(batch_X)
        loss = criterion(train_pred, batch_y)
        loss.backward()
        optimizer.step()

        # Accumulate batch losses
        total_train_loss += loss.item()
    
    # Calculate average training loss for the epoch
    avg_train_loss = total_train_loss / len(train_loader)       

    model.eval()
    with torch.no_grad():
        val_pred = model(X_val)
        val_loss = criterion(val_pred, y_val)

    if (epoch + 1) % 10 == 0:  # print losses for each 10 epochs
         print(f'Epoch [{epoch+1}/{num_epochs}], Average Train Loss: {avg_train_loss:.4f}, Val Loss: {val_loss.item():.4f}')


model.eval()
with torch.no_grad():
    test_pred = model(X_test)
    test_loss = criterion(test_pred, y_test)
    print(f'\nTest Loss: {test_loss.item():.4f}')

Epoch [10/100], Average Train Loss: 34290.4996, Val Loss: 33828.0547
Epoch [20/100], Average Train Loss: 33189.0681, Val Loss: 33043.9766
Epoch [30/100], Average Train Loss: 32806.5077, Val Loss: 32319.5566
Epoch [40/100], Average Train Loss: 31869.1886, Val Loss: 31611.3535
Epoch [50/100], Average Train Loss: 31079.8079, Val Loss: 30954.1816
Epoch [60/100], Average Train Loss: 30281.9147, Val Loss: 30328.3555
Epoch [70/100], Average Train Loss: 29542.9737, Val Loss: 29725.9414
Epoch [80/100], Average Train Loss: 28908.5719, Val Loss: 29148.4258
Epoch [90/100], Average Train Loss: 28428.7201, Val Loss: 28591.6934
Epoch [100/100], Average Train Loss: 27814.9784, Val Loss: 28049.7910

Test Loss: 28103.8301
