In [24]:
# Regression using ANN in PyTorch

import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, r2_score

In [25]:
# Set device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

Using device: cpu


In [26]:
# Load the dataset
df = pd.read_csv('50_Startups.csv')
df.dropna(inplace=True)

print(df.head())

   R&D Spend  Administration  Marketing Spend       State     Profit
0  165349.20       136897.80        471784.10    New York  192261.83
1  162597.70       151377.59        443898.53  California  191792.06
2  153441.51       101145.55        407934.54     Florida  191050.39
3  144372.41       118671.85        383199.62    New York  182901.99
4  142107.34        91391.77        366168.42     Florida  166187.94


In [27]:

df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 50 entries, 0 to 49
Data columns (total 5 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   R&D Spend        50 non-null     float64
 1   Administration   50 non-null     float64
 2   Marketing Spend  50 non-null     float64
 3   State            50 non-null     object 
 4   Profit           50 non-null     float64
dtypes: float64(4), object(1)
memory usage: 2.1+ KB


In [28]:
df.columns

Index(['R&D Spend', 'Administration', 'Marketing Spend', 'State', 'Profit'], dtype='object')

In [29]:
# df[["State"]].get_values()

In [30]:
# Perform one-hot encoding
data = pd.get_dummies(df, columns=["State"])

# Display the encoded DataFrame
print(data)

    R&D Spend  Administration  Marketing Spend     Profit  State_California  \
0   165349.20       136897.80        471784.10  192261.83             False   
1   162597.70       151377.59        443898.53  191792.06              True   
2   153441.51       101145.55        407934.54  191050.39             False   
3   144372.41       118671.85        383199.62  182901.99             False   
4   142107.34        91391.77        366168.42  166187.94             False   
5   131876.90        99814.71        362861.36  156991.12             False   
6   134615.46       147198.87        127716.82  156122.51              True   
7   130298.13       145530.06        323876.68  155752.60             False   
8   120542.52       148718.95        311613.29  152211.77             False   
9   123334.88       108679.17        304981.62  149759.96              True   
10  101913.08       110594.11        229160.95  146121.95             False   
11  100671.96        91790.61        249744.55  1442

In [31]:
data.columns

Index(['R&D Spend', 'Administration', 'Marketing Spend', 'Profit',
       'State_California', 'State_Florida', 'State_New York'],
      dtype='object')

In [32]:
# Features and label
X = data[['R&D Spend', 'Administration', 'Marketing Spend', 'State_California', 'State_Florida', 'State_New York']].values  # input feature
y = data['Profit'].values  # target variable

In [33]:
X

array([[165349.2, 136897.8, 471784.1, False, False, True],
       [162597.7, 151377.59, 443898.53, True, False, False],
       [153441.51, 101145.55, 407934.54, False, True, False],
       [144372.41, 118671.85, 383199.62, False, False, True],
       [142107.34, 91391.77, 366168.42, False, True, False],
       [131876.9, 99814.71, 362861.36, False, False, True],
       [134615.46, 147198.87, 127716.82, True, False, False],
       [130298.13, 145530.06, 323876.68, False, True, False],
       [120542.52, 148718.95, 311613.29, False, False, True],
       [123334.88, 108679.17, 304981.62, True, False, False],
       [101913.08, 110594.11, 229160.95, False, True, False],
       [100671.96, 91790.61, 249744.55, True, False, False],
       [93863.75, 127320.38, 249839.44, False, True, False],
       [91992.39, 135495.07, 252664.93, True, False, False],
       [119943.24, 156547.42, 256512.92, False, True, False],
       [114523.61, 122616.84, 261776.23, False, False, True],
       [78013.11, 

In [34]:
# Feature Scaling
scaler_X = StandardScaler()
scaler_y = StandardScaler()

In [35]:
X = scaler_X.fit_transform(X)
y = scaler_y.fit_transform(y.reshape(-1, 1))  # Reshape y for scaling

In [36]:
# Convert to tensors
X = torch.tensor(X, dtype=torch.float32)
y = torch.tensor(y, dtype=torch.float32)

In [37]:
# Split into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [38]:

# Send data to device
X_train, X_test = X_train.to(device), X_test.to(device)
y_train, y_test = y_train.to(device), y_test.to(device)

In [42]:
# Define a simple ANN model for regression
class RegressionANN(nn.Module):
    def __init__(self):
        super(RegressionANN, self).__init__()
        self.model = nn.Sequential(
            nn.Linear(6, 20),
            nn.ReLU(),
            nn.Linear(20, 10),
            nn.ReLU(),
            nn.Linear(10, 5),
            nn.ReLU(),
            nn.Linear(5, 1)
            
            # No output function because we want to take the output as it is without modification of the linear function
        )

    def forward(self, x):
        return self.model(x)

In [43]:
# Instantiate model, loss function and optimizer
model = RegressionANN().to(device)
criterion = nn.MSELoss()  # Mean Squared Error for regression
optimizer = optim.Adam(model.parameters(), lr=0.01)

In [44]:
# Training loop
n_epochs = 100
for epoch in range(n_epochs):
    model.train()
    optimizer.zero_grad()
    outputs = model(X_train)
    loss = criterion(outputs, y_train)
    loss.backward()
    optimizer.step()

    # Calculate training R2 Score
    with torch.no_grad():
        train_preds = scaler_y.inverse_transform(outputs.cpu().numpy())
        y_train_actual = scaler_y.inverse_transform(y_train.cpu().numpy())
        train_r2 = r2_score(y_train_actual, train_preds)

    # Evaluation on validation set
    model.eval()
    with torch.no_grad():
        val_outputs = model(X_test)
        val_loss = criterion(val_outputs, y_test)

        val_preds = scaler_y.inverse_transform(val_outputs.cpu().numpy())
        y_val_actual = scaler_y.inverse_transform(y_test.cpu().numpy())
        val_r2 = r2_score(y_val_actual, val_preds)

    if (epoch + 1) % 10 == 0:
        print(f"Epoch [{epoch+1}/{n_epochs}], Train Loss: {loss.item():.4f}, Val Loss: {val_loss.item():.4f}, Train R2: {train_r2:.4f}, Val R2: {val_r2:.4f}")


Epoch [10/100], Train Loss: 0.6396, Val Loss: 0.1575, Train R2: 0.4084, Val R2: 0.6903
Epoch [20/100], Train Loss: 0.2682, Val Loss: 0.5719, Train R2: 0.7520, Val R2: -0.1245
Epoch [30/100], Train Loss: 0.1302, Val Loss: 0.1832, Train R2: 0.8796, Val R2: 0.6398
Epoch [40/100], Train Loss: 0.0774, Val Loss: 0.1978, Train R2: 0.9285, Val R2: 0.6111
Epoch [50/100], Train Loss: 0.0475, Val Loss: 0.1002, Train R2: 0.9560, Val R2: 0.8030
Epoch [60/100], Train Loss: 0.0343, Val Loss: 0.0522, Train R2: 0.9683, Val R2: 0.8974
Epoch [70/100], Train Loss: 0.0282, Val Loss: 0.0582, Train R2: 0.9739, Val R2: 0.8857
Epoch [80/100], Train Loss: 0.0236, Val Loss: 0.0689, Train R2: 0.9782, Val R2: 0.8645
Epoch [90/100], Train Loss: 0.0199, Val Loss: 0.0721, Train R2: 0.9816, Val R2: 0.8582
Epoch [100/100], Train Loss: 0.0169, Val Loss: 0.0810, Train R2: 0.9844, Val R2: 0.8407


In [None]:
# Final evaluation
model.eval()
with torch.no_grad():
    predictions = model(X_test)
    predictions = predictions.cpu().numpy()
    y_test_np = y_test.cpu().numpy()

    # Inverse scaling
    predictions = scaler_y.inverse_transform(predictions)
    y_actual = scaler_y.inverse_transform(y_test_np)

    mse = mean_squared_error(y_actual, predictions)
    r2 = r2_score(y_actual, predictions)
    

[[134429.23 ]
 [ 85303.05 ]
 [106222.55 ]
 [ 55579.703]
 [121513.555]
 [ 65073.992]
 [108769.16 ]
 [111240.88 ]
 [105971.21 ]
 [109801.8  ]]  :  [[134307.34 ]
 [ 81005.76 ]
 [ 99937.586]
 [ 64926.082]
 [125370.37 ]
 [ 35673.414]
 [105733.54 ]
 [107404.34 ]
 [ 97427.836]
 [122776.86 ]]


In [46]:
print("\nTest MSE:", mse)
print("R2 Score:", r2)


Test MSE: 128988160.0
R2 Score: 0.8407144510036823
