In [2]:
# build ANN for 50_Startups (regression model)
# dataset: where do startup companies spend money on, determines the profit they earn.
# build a model to predict the profit of a new startup company, depending on its investments (and other features)

In [3]:
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.optim import Adam
from sklearn.preprocessing import StandardScaler, MinMaxScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.impute import SimpleImputer
from sklearn.model_selection import train_test_split

# 1. preprocessing

In [4]:
df = pd.read_csv("./50_Startups.csv")
df.head()

Unnamed: 0,R&D Spend,Administration,Marketing Spend,State,Profit
0,165349.2,136897.8,471784.1,New York,192261.83
1,162597.7,151377.59,443898.53,California,191792.06
2,153441.51,101145.55,407934.54,Florida,191050.39
3,144372.41,118671.85,383199.62,New York,182901.99
4,142107.34,91391.77,366168.42,Florida,166187.94


In [5]:
# no missing data
X_independent = df.iloc[:, :-1].values
y_dependent = df.iloc[:, -1].values
y_dependent = y_dependent.reshape(-1, 1)
print(X_independent.shape)
print(X_independent[:10, :])
print("")
print("y_dependent: ", y_dependent[:10, :])
print(type(X_independent))
print(type(X_independent[0][0]))

(50, 4)
[[165349.2 136897.8 471784.1 'New York']
 [162597.7 151377.59 443898.53 'California']
 [153441.51 101145.55 407934.54 'Florida']
 [144372.41 118671.85 383199.62 'New York']
 [142107.34 91391.77 366168.42 'Florida']
 [131876.9 99814.71 362861.36 'New York']
 [134615.46 147198.87 127716.82 'California']
 [130298.13 145530.06 323876.68 'Florida']
 [120542.52 148718.95 311613.29 'New York']
 [123334.88 108679.17 304981.62 'California']]

y_dependent:  [[192261.83]
 [191792.06]
 [191050.39]
 [182901.99]
 [166187.94]
 [156991.12]
 [156122.51]
 [155752.6 ]
 [152211.77]
 [149759.96]]
<class 'numpy.ndarray'>
<class 'float'>


In [6]:
ct = ColumnTransformer(transformers=[("encoder", OneHotEncoder(), [3])], remainder="passthrough")
X_independent = np.array(ct.fit_transform(X_independent))
print(X_independent[:10, :])



[[0.0 0.0 1.0 165349.2 136897.8 471784.1]
 [1.0 0.0 0.0 162597.7 151377.59 443898.53]
 [0.0 1.0 0.0 153441.51 101145.55 407934.54]
 [0.0 0.0 1.0 144372.41 118671.85 383199.62]
 [0.0 1.0 0.0 142107.34 91391.77 366168.42]
 [0.0 0.0 1.0 131876.9 99814.71 362861.36]
 [1.0 0.0 0.0 134615.46 147198.87 127716.82]
 [0.0 1.0 0.0 130298.13 145530.06 323876.68]
 [0.0 0.0 1.0 120542.52 148718.95 311613.29]
 [1.0 0.0 0.0 123334.88 108679.17 304981.62]]


In [7]:
X_train, X_test, y_train, y_test = train_test_split(X_independent, y_dependent, test_size=0.3, random_state=101)
print("X_train shape: ", X_train.shape)
print("X_train: ", X_train)
print("")
print("y_test shape: ", y_test.shape)
print("y_test: ", y_test)
print(type(X_train))
print(type(X_train[1]))

# because of df.iloc[].values -> .values returns with bad dtypes. must convert them.
# this is only necessary if you skip the scaling entirely. (scaling functions solve this issue)
# otherwise -> torch tensor error: TypeError: can't convert np.ndarray of type numpy.object_.
X_train = X_train.astype(np.float32)
X_test = X_test.astype(np.float32)
y_train = y_train.astype(np.float32)
y_test = y_test.astype(np.float32)

print(type(X_train[0][0]))

X_train shape:  (35, 6)
X_train:  [[0.0 0.0 1.0 144372.41 118671.85 383199.62]
 [0.0 1.0 0.0 27892.92 84710.77 164470.71]
 [0.0 1.0 0.0 61994.48 115641.28 91131.24]
 [0.0 0.0 1.0 114523.61 122616.84 261776.23]
 [1.0 0.0 0.0 76253.86 113867.3 298664.47]
 [0.0 0.0 1.0 15505.73 127382.3 35534.17]
 [0.0 0.0 1.0 20229.59 65947.93 185265.1]
 [1.0 0.0 0.0 22177.74 154806.14 28334.72]
 [1.0 0.0 0.0 38558.51 82982.09 174999.3]
 [0.0 1.0 0.0 101913.08 110594.11 229160.95]
 [1.0 0.0 0.0 0.0 116983.8 45173.06]
 [1.0 0.0 0.0 64664.71 139553.16 137962.62]
 [0.0 1.0 0.0 55493.95 103057.49 214634.81]
 [0.0 1.0 0.0 28663.76 127056.21 201126.82]
 [0.0 1.0 0.0 153441.51 101145.55 407934.54]
 [0.0 0.0 1.0 72107.6 127864.55 353183.81]
 [1.0 0.0 0.0 46426.07 157693.92 210797.67]
 [0.0 0.0 1.0 46014.02 85047.44 205517.64]
 [0.0 0.0 1.0 120542.52 148718.95 311613.29]
 [0.0 0.0 1.0 86419.7 153514.11 0.0]
 [0.0 0.0 1.0 65605.48 153032.06 107138.38]
 [0.0 1.0 0.0 93863.75 127320.38 249839.44]
 [0.0 0.0 1.0 13187

In [8]:
# experiment:
#     - scale X and y with the same scaler.
#     - do not scaler cathegorical data, but scale the numerical.


std_scaler = StandardScaler()
X_train = std_scaler.fit_transform(X_train)
X_test = std_scaler.transform(X_test)

minmax_scaler = MinMaxScaler()
y_train = minmax_scaler.fit_transform(y_train)
y_test = minmax_scaler.transform(y_test)

print("X_train scaled: ", X_train)
print("")
print("y_test scaled: ", y_test)
print(type(X_train))
print(type(X_train[0]))
print(type(X_train[0][0]))

X_train scaled:  [[-6.7700320e-01 -6.3245559e-01  1.2247449e+00  1.5258845e+00
  -5.3207688e-02  1.4378211e+00]
 [-6.7700320e-01  1.5811388e+00 -8.1649661e-01 -1.0793741e+00
  -1.2595519e+00 -3.9302912e-01]
 [-6.7700320e-01  1.5811388e+00 -8.1649661e-01 -3.1663558e-01
  -1.6085771e-01 -1.0069102e+00]
 [-6.7700320e-01 -6.3245559e-01  1.2247449e+00  8.5826629e-01
   8.6923853e-02  4.2145783e-01]
 [ 1.4770980e+00 -6.3245559e-01 -8.1649661e-01  2.2992694e-03
  -2.2387207e-01  7.3022741e-01]
 [-6.7700320e-01 -6.3245559e-01  1.2247449e+00 -1.3564342e+00
   2.5619930e-01 -1.4722804e+00]
 [-6.7700320e-01 -6.3245559e-01  1.2247449e+00 -1.2507771e+00
  -1.9260339e+00 -2.1897160e-01]
 [ 1.4770980e+00 -6.3245559e-01 -8.1649661e-01 -1.2072036e+00
   1.2303319e+00 -1.5325427e+00]
 [ 1.4770980e+00 -6.3245559e-01 -8.1649661e-01 -8.4082019e-01
  -1.3209569e+00 -3.0490050e-01]
 [-6.7700320e-01  1.5811388e+00 -8.1649661e-01  5.7621062e-01
  -3.4014019e-01  1.4845464e-01]
 [ 1.4770980e+00 -6.3245559e-01 -

In [9]:
X_train = torch.from_numpy(X_train).float()
X_test = torch.from_numpy(X_test).float()
y_train = torch.from_numpy(y_train).float()
y_test = torch.from_numpy(y_test).float()

print(X_train)

tensor([[-6.7700e-01, -6.3246e-01,  1.2247e+00,  1.5259e+00, -5.3208e-02,
          1.4378e+00],
        [-6.7700e-01,  1.5811e+00, -8.1650e-01, -1.0794e+00, -1.2596e+00,
         -3.9303e-01],
        [-6.7700e-01,  1.5811e+00, -8.1650e-01, -3.1664e-01, -1.6086e-01,
         -1.0069e+00],
        [-6.7700e-01, -6.3246e-01,  1.2247e+00,  8.5827e-01,  8.6924e-02,
          4.2146e-01],
        [ 1.4771e+00, -6.3246e-01, -8.1650e-01,  2.2993e-03, -2.2387e-01,
          7.3023e-01],
        [-6.7700e-01, -6.3246e-01,  1.2247e+00, -1.3564e+00,  2.5620e-01,
         -1.4723e+00],
        [-6.7700e-01, -6.3246e-01,  1.2247e+00, -1.2508e+00, -1.9260e+00,
         -2.1897e-01],
        [ 1.4771e+00, -6.3246e-01, -8.1650e-01, -1.2072e+00,  1.2303e+00,
         -1.5325e+00],
        [ 1.4771e+00, -6.3246e-01, -8.1650e-01, -8.4082e-01, -1.3210e+00,
         -3.0490e-01],
        [-6.7700e-01,  1.5811e+00, -8.1650e-01,  5.7621e-01, -3.4014e-01,
          1.4845e-01],
        [ 1.4771e+00, -6.3246e

# 2. building the model

In [10]:
models = []

In [11]:
# init hyperparameters

n_neurons = 16
n_epochs = 600
input_size = len(X_train[0])
learning_batch_size = len(X_train) # BGD -> batch gradient descent, because of small dataset
alpha_learning_rate = 0.001

In [12]:
class Network(nn.Module):


    def __init__(self, input_size, seed=101):
        super().__init__()
        self.seed = torch.manual_seed(seed)
        self.fcl1 = nn.Linear(input_size, n_neurons)
        self.fcl2 = nn.Linear(n_neurons, n_neurons)
        self.fcl3 = nn.Linear(n_neurons, 1)


    def forward(self, data):
        signal = self.fcl1(data)
        signal = F.relu(signal)
        signal = self.fcl2(signal)
        signal = F.relu(signal)
        return self.fcl3(signal)


In [13]:
# from torch.utils.data import DataLoader
brain = Network(input_size)

In [14]:
optimizer = Adam(brain.parameters(), lr=alpha_learning_rate)

# 3. train

In [15]:
for epoch in range(1, n_epochs+1):
    predictions = brain(X_train)
    loss = F.mse_loss(predictions, y_train)
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    # visualize
    if epoch % 10 == 0:
        print("\r Epoch: {} \t Loss: {:.4f}".format(epoch, loss.data))


 Epoch: 10 	 Loss: 0.2049
 Epoch: 20 	 Loss: 0.1642
 Epoch: 30 	 Loss: 0.1324
 Epoch: 40 	 Loss: 0.1032
 Epoch: 50 	 Loss: 0.0749
 Epoch: 60 	 Loss: 0.0509
 Epoch: 70 	 Loss: 0.0332
 Epoch: 80 	 Loss: 0.0214
 Epoch: 90 	 Loss: 0.0139
 Epoch: 100 	 Loss: 0.0093
 Epoch: 110 	 Loss: 0.0064
 Epoch: 120 	 Loss: 0.0045
 Epoch: 130 	 Loss: 0.0033
 Epoch: 140 	 Loss: 0.0026
 Epoch: 150 	 Loss: 0.0022
 Epoch: 160 	 Loss: 0.0020
 Epoch: 170 	 Loss: 0.0018
 Epoch: 180 	 Loss: 0.0017
 Epoch: 190 	 Loss: 0.0016
 Epoch: 200 	 Loss: 0.0015
 Epoch: 210 	 Loss: 0.0014
 Epoch: 220 	 Loss: 0.0013
 Epoch: 230 	 Loss: 0.0012
 Epoch: 240 	 Loss: 0.0012
 Epoch: 250 	 Loss: 0.0011
 Epoch: 260 	 Loss: 0.0010
 Epoch: 270 	 Loss: 0.0010
 Epoch: 280 	 Loss: 0.0009
 Epoch: 290 	 Loss: 0.0009
 Epoch: 300 	 Loss: 0.0008
 Epoch: 310 	 Loss: 0.0008
 Epoch: 320 	 Loss: 0.0008
 Epoch: 330 	 Loss: 0.0007
 Epoch: 340 	 Loss: 0.0007
 Epoch: 350 	 Loss: 0.0007
 Epoch: 360 	 Loss: 0.0007
 Epoch: 370 	 Loss: 0.0007
 Epoch: 38

# check accuracy

In [18]:
from sklearn.metrics import r2_score

In [20]:
brain.eval()
with torch.no_grad():
    y_pred = brain(X_test)

concatenated = np.concatenate((y_test, y_pred), axis=1)
score_r2 = r2_score(y_test, y_pred)
score_mse = F.mse_loss(y_pred, y_test)


models.append({
    "score_r2" : score_r2,
    "score_mse" : score_mse,
    "n_neurons" : n_neurons,
    "n_epochs" : n_epochs,
    "alpha" : alpha_learning_rate,
    "scaler" : "no scaling"
})

print("concatenated: \n", pd.DataFrame(concatenated, columns=["y_true", "y_pred"]))
print("score_r2: ", score_r2)
print("score_mse: ", score_mse.data)

concatenated: 
       y_true    y_pred
0   0.423852  0.440186
1   0.664044  0.796265
2   0.544157  0.591399
3   0.465966  0.473294
4   0.538747  0.590854
5   0.997355  1.013072
6   0.512737  0.535591
7   0.196020  0.420845
8   0.319951  0.251292
9   0.156990  0.047351
10  0.632454  0.590646
11  0.528609  0.513314
12  0.794407  0.823783
13  0.282940  0.212694
14  0.617103  0.686122
score_r2:  0.8504735408826221
score_mse:  tensor(0.0069)


In [100]:
# custom accuracy by chatgpt
# threshold is the accepted difference (%) between y_true and y_pred

threshold = 0.1  # You can adjust this threshold based on your problem

correct_predictions = torch.abs(y_pred - y_test) <= threshold
accuracy = torch.sum(correct_predictions).item() / len(y_test)

print("Custom Accuracy:", accuracy * 100, "%")

Custom Accuracy: 80.0 %


In [113]:
for elem in models:
    print(elem)

{'score_r2': 0.7812195667807333, 'score_mse': tensor(0.0101), 'n_neurons': 8, 'n_epochs': 400, 'alpha': 0.001, 'scaler': 'features=std, targets=minmax'}
{'score_r2': 0.7812195667807333, 'score_mse': tensor(0.0101), 'n_neurons': 8, 'n_epochs': 400, 'alpha': 0.001, 'scaler': 'features=std, targets=minmax'}
{'score_r2': 0.8478386686655506, 'score_mse': tensor(0.0070), 'n_neurons': 8, 'n_epochs': 600, 'alpha': 0.001, 'scaler': 'features=std, targets=minmax'}
{'score_r2': 0.8475171497038081, 'score_mse': tensor(0.0070), 'n_neurons': 8, 'n_epochs': 800, 'alpha': 0.001, 'scaler': 'features=std, targets=minmax'}
{'score_r2': 0.7950949183930469, 'score_mse': tensor(0.0094), 'n_neurons': 8, 'n_epochs': 600, 'alpha': 0.003, 'scaler': 'features=std, targets=minmax'}
{'score_r2': 0.7976605602500896, 'score_mse': tensor(0.0093), 'n_neurons': 8, 'n_epochs': 800, 'alpha': 0.003, 'scaler': 'features=std, targets=minmax'}
{'score_r2': 0.8504687281818312, 'score_mse': tensor(0.0069), 'n_neurons': 16, 'n_

In [None]:
# saving the model parameters
save_name = "50_Startups_checkpoint.pth"
torch.save(brain.state_dict(), save_name)
print("saved as: ", save_name)