# LAB 02 - Mini projekt
## Filip Ryniewicz, Miłosz Cieśla

In [76]:
import pandas as pd
import torch
import torch.nn as nn
import numpy as np
import torch.utils.data as data

In [None]:
def normalize(data):
    for i in range(data.shape[1]):
        data[:, i] = (data[:, i] - np.mean(data[:, i])) / np.std(data[:, i])
    return data

def prepare_data(batch_size, train_path, test_X_path, test_y_path, delimiter=","):
    train = pd.read_csv(train_path, delimiter=delimiter)
    test_X = pd.read_csv(test_X_path, delimiter=delimiter)
    test_y = pd.read_csv(test_y_path, header=None)
    train_dataset = data.TensorDataset(
        torch.from_numpy(normalize(np.delete(train.values[:, 2:-3], 1, axis=1).astype(float))),
        torch.from_numpy(train.values[:, -1].astype(float)),
    )
    test_dataset = data.TensorDataset(
        torch.from_numpy(test_X.values[:, 1:].astype(float)),
        torch.from_numpy(test_y.values.astype(float)),
    )
    return (
        data.DataLoader(
            train_dataset, batch_size=batch_size, drop_last=True, shuffle=False
        ),
        test_dataset,
    )

In [78]:
class Perceptron(nn.Module):
    def __init__(
        self,
        num_inputs=12,
        num_hidden1=64,
        num_hidden2=32,
        num_hidden3=16,
        num_outputs=1,
    ):
        super().__init__()
        self.linear1 = nn.Linear(num_inputs, num_hidden1)
        self.act_fn1 = nn.ReLU()
        self.linear2 = nn.Linear(num_hidden1, num_hidden2)
        self.act_fn2 = nn.ReLU()
        self.linear3 = nn.Linear(num_hidden2, num_hidden3)
        self.act_fn3 = nn.ReLU()
        self.linear4 = nn.Linear(num_hidden3, num_outputs)

    def forward(self, x):
        x = self.linear1(x)
        x = self.act_fn1(x)
        x = self.linear2(x)
        x = self.act_fn2(x)
        x = self.linear3(x)
        x = self.act_fn3(x)
        x = self.linear4(x)
        return x

In [79]:
train_loader, test_loader = prepare_data(
    24, "data.csv", "evaluation_data.csv", "example.csv"
)

ValueError: all the input arrays must have same number of dimensions, but the array at index 0 has 2 dimension(s) and the array at index 1 has 1 dimension(s)

In [None]:
model = Perceptron()
optimizer = torch.optim.SGD(model.parameters(), lr=5e-6)
loss_module = nn.MSELoss()

In [None]:
model.train()

for epoch in range(1500):
    for data_inputs, data_labels in train_loader:
        preds = model(data_inputs.float())
        preds = preds.squeeze(dim=1)
        loss = loss_module(preds.float(), data_labels.float())
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    print(f"Epoch: {epoch}, loss: {loss.item():.3}")


Epoch: 0, loss: 8.31e+04
Epoch: 1, loss: 3.12e+04
Epoch: 2, loss: 3.03e+04
Epoch: 3, loss: 2.89e+04
Epoch: 4, loss: 2.54e+04
Epoch: 5, loss: 1.98e+04
Epoch: 6, loss: 1.55e+04
Epoch: 7, loss: 1.42e+04
Epoch: 8, loss: 1.37e+04
Epoch: 9, loss: 1.39e+04
Epoch: 10, loss: 1.25e+04
Epoch: 11, loss: 1.27e+04
Epoch: 12, loss: 1.3e+04
Epoch: 13, loss: 1.46e+04
Epoch: 14, loss: 1.04e+04
Epoch: 15, loss: 7.53e+03
Epoch: 16, loss: 9.91e+03
Epoch: 17, loss: 1.16e+04
Epoch: 18, loss: 9.99e+03
Epoch: 19, loss: 5.9e+03
Epoch: 20, loss: 3.98e+03
Epoch: 21, loss: 5.4e+03
Epoch: 22, loss: 4.06e+03
Epoch: 23, loss: 3.7e+03
Epoch: 24, loss: 2.64e+03
Epoch: 25, loss: 2.34e+03
Epoch: 26, loss: 2.25e+03
Epoch: 27, loss: 2.73e+03
Epoch: 28, loss: 1.34e+03
Epoch: 29, loss: 1.76e+03
Epoch: 30, loss: 1.52e+03
Epoch: 31, loss: 1.4e+03
Epoch: 32, loss: 1.46e+03
Epoch: 33, loss: 1.34e+03
Epoch: 34, loss: 1.26e+03
Epoch: 35, loss: 1.13e+03
Epoch: 36, loss: 1.26e+03
Epoch: 37, loss: 1.12e+03
Epoch: 38, loss: 1.13e+03
E

In [None]:
model.eval()

total_predictions = np.array([])
test_data = pd.read_csv("evaluation_data.csv").values[:, 1:].astype(float)
test_data = torch.tensor(test_data).float()
test_data = np.delete(test_data, 1, axis=1)
with torch.no_grad():
    preds = model(test_data)

test_labels = pd.read_csv("example.csv").values[:, 1:].astype(float)

# print(f"RMSLE: {rmsle(test_labels, preds.values)}")
pred_df = pd.DataFrame(preds.detach().numpy())
pred_df.to_csv("predictions.csv", index=False, header=None)