In [1]:
import torch
from torch.utils.data import Dataset, DataLoader
from torch import nn
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score
import matplotlib.pyplot as plt

In [2]:
# read in data and turn to tensor

class JointDataset(Dataset):
    def __init__(self, x, y):
        self.x = x
        self.y = y
    def __len__(self):
        return len(self.x)
    def __getitem__(self, idx):
        return self.x[idx], self.y[idx]

df = pd.read_csv("mldata_log_est.csv")
colnames = df.columns
X = df[colnames[:-1]].to_numpy(dtype=float)
y = df[colnames[-1]].to_numpy(dtype=float)

X_train, X_test, y_train, y_test = train_test_split(X,y, test_size=0.3)

sc_x = StandardScaler()
sc_y = StandardScaler()

X_train_std = sc_x.fit_transform(X_train)
y_train_std = sc_y.fit_transform(y_train.reshape(-1,1))

X_test_std_tensor = torch.from_numpy(sc_x.transform(X_test))

X_train_std_tensor = torch.from_numpy(X_train_std)
y_train_std_tensor = torch.from_numpy(y_train_std)

train_ds = JointDataset(X_train_std_tensor, y_train_std_tensor)

batch_size=3 # can do 3, 9697, 
train_dl = DataLoader(dataset=train_ds, batch_size=3, shuffle=True)

In [3]:
# Just checking if my GPU has CUDA capability. It does not :/

torch.cuda.is_available()

False

In [4]:
# Making the model

class zGuess_dNN(nn.Module):
    def __init__(self):
        super().__init__()
        # got idea from https://github.com/christianversloot/machine-learning-articles/blob/main/how-to-create-a-neural-network-for-regression-with-pytorch.md
        self.layers = nn.Sequential(
            nn.Linear(7,64),
            nn.Dropout(p=0.5),
            nn.Linear(64,16),
            nn.Dropout(p=0.5),
            nn.Linear(16,8),
            nn.Dropout(p=0.5),
            nn.Linear(8,1)
        )
    def forward(self, x):
        return self.layers(x)
    
model = zGuess_dNN()
loss_fn = nn.CrossEntropyLoss(reduction='mean')
learning_rate = 0.001
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)

In [5]:
# Training

num_epochs = 100
mse_train_arr = []
epoch_train_arr = []
for epoch in range(num_epochs):
    for x_batch, y_batch in train_dl:
        pred = model(x_batch.float())
        loss = loss_fn(pred, y_batch.float())
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()
        mse_train_arr.append(loss)
        epoch_arr.append(epoch)

In [6]:
y_pred_tensor = model(X_test_std_tensor.float())
y_pred = y_pred_tensor.cpu().detach().numpy()
print(mean_squared_error( sc_y.inverse_transform(y_pred), y_test))
print(r2_score( sc_y.inverse_transform(y_pred), y_test))



0.9636027476140815
-6.078607093752186
