In [1]:
import pandas as pd
import numpy as np
import torch
from torch.utils.data import Dataset, DataLoader
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error
import copy
import ray
from ray import tune
from ray.air import session
from ray.air.checkpoint import Checkpoint
from ray.tune.schedulers import ASHAScheduler


ImportError: Failed to import grpc on Apple Silicon. On Apple Silicon machines, try `pip uninstall grpcio; conda install grpcio`. Check out https://docs.ray.io/en/master/ray-overview/installation.html#m1-mac-apple-silicon-support for more details.

In [2]:
class EarlyStopping:
    def __init__(self, tolerance=5, min_delta=0):

        self.tolerance = tolerance
        self.min_delta = min_delta
        self.counter = 0
        self.early_stop = False

    def __call__(self, train_loss, validation_loss):
        if (validation_loss - train_loss) > self.min_delta:
            self.counter +=1
            if self.counter >= self.tolerance:  
                self.early_stop = True
        else:
            self.counter = 0


In [3]:
data = pd.read_pickle("data/ready_dataset.pickle")
#print(len(data.merged[0]))
test_data = data.iloc[:40]
train_data = data.iloc[40:]

In [4]:
class LSTMModel(torch.nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, output_size):
        super(LSTMModel, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.lstm = torch.nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
        self.fc = torch.nn.Linear(hidden_size, output_size)
        
    def forward(self, x):
        # Inicjalizacja stanu ukrytego LSTM
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)
        c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)
        
        # Przekazanie danych wejściowych przez warstwę LSTM
        out, _ = self.lstm(x, (h0, c0))
        
        # Przekazanie ostatniego kroku czasowego przez warstwę Fully Connected
        out = self.fc(out[:, -1, :])
        return out


In [5]:
data_arr = data.to_numpy()

feat_data = data.loc[:, data.columns != "target_value"]
target_data = data.loc[:, data.columns == "target_value"]

feat_arr = feat_data.to_numpy()
target_arr = target_data.to_numpy()

target_arr = np.array(target_arr, dtype=np.float32).reshape(-1, 1)

In [6]:
#data_arr[0][8]
data_arr_2 = []
for row in feat_arr:
    row = np.hstack(row)
    row_2 = []
    for item in row:
        item = item.reshape(8,13)
        row_2.append(item)
    data_arr_2.append(row_2)



In [7]:
target_arr_2 = []
for row in target_arr:
    row = np.hstack(row)
    row_2 = []
    for item in row:
        target_arr_2.append(item)

In [8]:
data_arr_2 = np.array(data_arr_2, dtype=np.float32)
data_flatten = np.array([row.flatten() for row in data_arr_2])

In [9]:
batch_size = 32
seq_len = 4
sequences = []
targets = []
for i in range(len(data_flatten) - seq_len + 1):
        sequences.append(data_flatten[i:i + seq_len])  # Wyłączenie ostatniej kolumny (target) z sekwencji
        targets.append(np.array([target_arr_2[i + seq_len-1]]))

sequences = np.array(sequences, dtype=np.float32)
targets = np.array(targets, dtype=np.float32)
print(targets.shape)
train_sequences = sequences[:int(sequences.shape[0] * 0.8)]
train_targets = targets[:int(targets.shape[0] * 0.8)]
test_sequences = sequences[int(sequences.shape[0] * 0.8):]
test_targets = targets[int(targets.shape[0] * 0.8):]

data_tensor = torch.tensor(train_sequences, dtype=torch.float32)
target_tensor = torch.tensor(train_targets, dtype=torch.float32)

test_data_tensor = torch.tensor(test_sequences, dtype=torch.float32)
test_target_tensor = torch.tensor(test_targets, dtype=torch.float32)


(236, 1)


In [10]:
class CustomDataset(Dataset):
    def __init__(self, data, target):
        self.data = data
        self.target = target

    def __getitem__(self, index):
        return self.data[index], self.target[index]

    def __len__(self):
        return len(self.data)


In [11]:
device = torch.device("cpu")

In [12]:
ds = CustomDataset(data_tensor, target_tensor)
test_ds = CustomDataset(test_data_tensor, test_target_tensor)
weights_path = "model_best_v1.pt"

batch_sizes = [32, 64]
hidden_sizes = [1, 2, 4, 8, 16, 32, 64, 128, 256, 512]
num_layers = [1, 2, 3, 4, 5, 6, 7, 8]
epochs = [20, 40, 60, 80, 100, 150, 200, 500, 700, 1000]
tolerances = [1, 2, 3, 4, 5]
min_deltas = [5, 10, 15, 20, 25, 30]
learning_rates = [0.1, 0.075, 0.05, 0.01, 0.005]

old_loss = 99999999999

for batch_size in batch_sizes:
    print(f"Current batch_size: {batch_size}")
    print("")
    for hidden_size in hidden_sizes:
        print(f"Current hidden_size: {hidden_size}")
        print("")
        for num_layer in num_layers:
            print(f"Current num_layer: {num_layer}")
            print("")
            for num_epoch in epochs:
                for tolerance in tolerances:
                    for min_delta in min_deltas:
                        for learning_rate in learning_rates:
                            train_loader = DataLoader(ds, batch_size=batch_size, shuffle=True)

                            test_dataloader = DataLoader(test_ds, batch_size=batch_size, shuffle=False)

                            early_stopping = EarlyStopping(tolerance=tolerance, min_delta=min_delta)

                            #model = ConvLSTM(8, 4, (7,7), 1, True, True, False)
                            model = LSTMModel(832, hidden_size, num_layer, 1)
                            model.to(device)
                            criterion = torch.nn.MSELoss()
                            optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
                            epoch_test_loss = 0

                            for epoch in range(num_epoch):
                                epoch_test_loss = []
                                epoch_loss = []
                                model.train()
                                for batch_x, batch_y in train_loader:
                                    batch_x = batch_x.to(device)
                                    batch_y = batch_y.to(device)
                                    optimizer.zero_grad()
                                    y_pred = model(batch_x)
                                    loss = criterion(y_pred, batch_y).to(device)
                                    loss.backward()
                                    epoch_loss.append(loss.item())
                                    optimizer.step()
                                epoch_loss = np.mean(epoch_loss)

                                model.eval()

                                with torch.no_grad():
                                    for batch_test_x, batch_test_y in test_dataloader:
                                        batch_test_x = batch_test_x.to(device)
                                        batch_test_y = batch_test_y.to(device)
                                        y_test_pred = model(batch_test_x)
                                        test_loss = criterion(y_test_pred, batch_test_y).to(device)
                                        epoch_test_loss.append(test_loss.item())
                                    epoch_test_loss = np.mean(epoch_test_loss)

                                early_stopping(epoch_loss, epoch_test_loss)
                                if early_stopping.early_stop:
                                    print("Early stopping at epoch:", epoch)
                                    break
                        
                            if epoch_test_loss < old_loss:
                                old_loss = epoch_test_loss
                                print("Batch size: ", batch_size)
                                print("Hidden size: ", hidden_size)
                                print("Num of layers: ", num_layer)
                                print("Epochs: ", num_epoch)
                                print("Tolerance: ", tolerance)
                                print("Min delta: ", min_delta)
                                print("Learning rate: ", learning_rate)
                                print("Epoch: ", epoch)
                                print("Train loss: ", epoch_loss)
                                print("Test loss: ", epoch_test_loss)
                                print(" ")
                                print("------------------------------------")
                                print(" ")
                                best_model_wts = copy.deepcopy(model.state_dict())
                                torch.save(best_model_wts, weights_path)


#len(y_pred[0])
#print(len(y_pred))
#print(y_pred[0][1][0])

#y_pred[0][1][0][3]

Current batch_size: 32

Current hidden_size: 1

Current num_layer: 1

Batch size:  32
Hidden size:  1
Num of layers:  1
Epochs:  20
Tolerance:  1
Min delta:  5
Learning rate:  0.1
Epoch:  19
Train loss:  457.15936279296875
Test loss:  458.59544372558594
 
------------------------------------
 
Early stopping at epoch: 2
Early stopping at epoch: 4
Early stopping at epoch: 0
Early stopping at epoch: 0
Batch size:  32
Hidden size:  1
Num of layers:  1
Epochs:  20
Tolerance:  1
Min delta:  10
Learning rate:  0.1
Epoch:  19
Train loss:  430.3636474609375
Test loss:  431.08604431152344
 
------------------------------------
 
Batch size:  32
Hidden size:  1
Num of layers:  1
Epochs:  20
Tolerance:  1
Min delta:  10
Learning rate:  0.075
Epoch:  19
Train loss:  296.51984151204425
Test loss:  298.2155456542969
 
------------------------------------
 
Early stopping at epoch: 1
Early stopping at epoch: 0
Early stopping at epoch: 0
Batch size:  32
Hidden size:  1
Num of layers:  1
Epochs:  20
To

KeyboardInterrupt: 

In [None]:
test_ds = CustomDataset(test_data_tensor, test_target_tensor)
test_dataloader = DataLoader(test_ds, batch_size=32, shuffle=False)

y_true = []
y_pred = []
model.eval()
with torch.no_grad():
    for batch_x, batch_y in test_dataloader:
        y_true += batch_y.tolist()
        y_pred += model(batch_x).tolist()
        print(y_pred)
        break
mse = mean_squared_error(y_true, y_pred)
r2 = r2_score(y_true, y_pred)
mae = mean_absolute_error(y_true, y_pred)

print(f"MSE: {mse:.4f}")
print(f"R^2: {r2:.4f}")
print(f"MAE: {mae:.4f}")
print(y_pred[2], y_true[2])

[[30.99135398864746], [30.99135398864746], [30.99135398864746], [30.99135398864746], [30.99135398864746], [30.99135398864746], [31.07925796508789], [31.99267578125], [31.25542640686035], [32.347808837890625], [31.915220260620117], [30.973243713378906], [31.708707809448242], [31.64101219177246], [31.635414123535156], [31.6422119140625], [31.642210006713867], [31.6422119140625], [31.6422119140625], [31.6422119140625], [31.6422119140625], [31.6422119140625], [31.210721969604492], [31.232046127319336], [31.593223571777344], [31.307003021240234], [31.59640884399414], [31.636672973632812], [31.637203216552734], [31.6422119140625], [31.6422119140625], [31.6422119140625]]
MSE: 0.4430
R^2: -16.6292
MAE: 0.6115
[30.99135398864746] [31.80500030517578]
