In [1]:
import pandas as pd
import numpy as np
import torch
from torch.utils.data import Dataset, DataLoader
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error



In [2]:
data = pd.read_pickle("data/ready_dataset.pickle")
print(type(data))
test_data = data.iloc[:40]
train_data = data.iloc[40:]

<class 'pandas.core.frame.DataFrame'>


In [3]:
class MyDataset(Dataset):
    def __init__(self, data):
        self.data = data

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        x = torch.tensor(np.stack(data.loc[idx, data.columns != "target_value"].values, axis=0), dtype=torch.float32)       
        y = torch.tensor(np.array(data.iloc[idx]["target_value"], dtype=np.float32), dtype=torch.float32)
        return x, y


In [4]:
my_dataset = MyDataset(train_data)

In [5]:
my_dataset.__getitem__(0)

(tensor([[-2.7116e-04, -2.7116e-04, -2.3580e-04,  ..., -2.1271e-04,
          -2.3671e-04, -2.3671e-04],
         [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
           1.0000e+00,  1.0000e+00],
         [ 2.5207e-01,  2.5207e-01,  2.5038e-01,  ...,  3.4124e-01,
           2.6794e-01,  2.6794e-01],
         ...,
         [ 0.0000e+00,  1.0000e+00,  1.0000e+00,  ...,  1.0000e+00,
           1.0000e+00,  1.0000e+00],
         [ 0.0000e+00,  1.0000e+00,  1.0000e+00,  ...,  1.0000e+00,
           1.0000e+00,  1.0000e+00],
         [-1.0000e+07,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
           0.0000e+00,  0.0000e+00]]),
 tensor([31.9400,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
          0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
          0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
          0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
          0.0000,  0.0000,  0.

In [42]:

class MyLSTM(torch.nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(MyLSTM, self).__init__()
        self.lstm = torch.nn.LSTM(input_size, hidden_size)
        self.fc = torch.nn.Linear(hidden_size, output_size)

    def forward(self, x):
        x, _ = self.lstm(x)
        x = self.fc(x[:, -1, :])
        return x


# konwersja danych na tensor i załadowanie do DataLoader


def collate_fn(batch):
    x = [item[0] for item in batch]
    y = [item[1] for item in batch]
    x = torch.nn.utils.rnn.pad_sequence(x, batch_first=True)
    y = torch.stack(y)
    return x, y

def collate_fn_lstm(batch):
    """
    Funkcja collate dla sekwencji wejściowych dla sieci LSTM.
    """
    # Sortujemy batch względem długości sekwencji wejściowych
    batch = sorted(batch, key=lambda x: x[0].shape[0], reverse=True)
    
    # Tworzymy mini-batche z wyrównaniem długości sekwencji wejściowych
    inputs = [torch.FloatTensor(item[0]) for item in batch]
    inputs = torch.nn.utils.rnn.pad_sequence(inputs, batch_first=True)
    # Tworzymy mini-batche z etykietami
    targets = [torch.FloatTensor(item[1]) for item in batch]
    targets = torch.nn.utils.rnn.pad_sequence(targets, batch_first=True)
    
    return inputs, targets


batch_size = 4
my_dataloader = DataLoader(my_dataset, batch_size=batch_size, shuffle=True, collate_fn=collate_fn_lstm)

# inicjalizacja modelu i uruchomienie treningu
model = MyLSTM(input_size=104, hidden_size=16, output_size=104)
criterion = torch.nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

for epoch in range(1000):
    for batch_x, batch_y in my_dataloader:
        optimizer.zero_grad()
        y_pred = model(batch_x)
        loss = criterion(y_pred, batch_y)
        loss.backward()
        optimizer.step()

In [43]:
test_dataset = MyDataset(test_data)

In [52]:
test_dataloader = DataLoader(test_dataset, batch_size=4, shuffle=False)

y_true = []
y_pred = []
model.eval()
with torch.no_grad():
    for batch_x, batch_y in test_dataloader:
        #print(model(batch_x))
        y_true += batch_y.tolist()
        y_pred += model(batch_x).tolist()

mse = mean_squared_error(y_true, y_pred)
r2 = r2_score(y_true, y_pred)
mae = mean_absolute_error(y_true, y_pred)

print(f"MSE: {mse:.4f}")
print(f"R^2: {r2:.4f}")
print(f"MAE: {mae:.4f}")
print(y_pred[3], y_true[3])

MSE: 0.1293
R^2: -7.7303
MAE: 0.0069
[31.785707473754883, -0.0008731884299777448, -9.0229426859878e-05, -0.0006052218959666789, -0.0010004419600591063, -0.0004416684969328344, 8.274368155980483e-05, -0.00019567205163184553, 0.0008994225645437837, -0.0005972381331957877, 0.0005493167554959655, -0.0010852625127881765, -0.0003815979871433228, -0.0003458455321379006, 0.0006906923372298479, -0.0012409038608893752, 0.0001335963752353564, -0.0001361480390187353, 0.0008032976766116917, -0.00013300523278303444, -0.000337424804456532, 0.00041309872176498175, 0.0007904527010396123, 0.0006842159782536328, -0.0007170976605266333, -0.0009840712882578373, -0.0003008514759130776, 0.001147774513810873, -0.0015518374275416136, 4.0720246033743024e-05, 0.0009592900169081986, 0.000558712228666991, -0.0008080498082563281, -0.0007261523278430104, 0.00012980740575585514, -0.000947465596254915, -0.0008084133733063936, -0.0007796541904099286, -0.0007858152384869754, 0.0006653349264524877, 0.001420430839061737, 