In [144]:
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
import numpy as np

In [145]:
mini_df = pd.read_csv('data/nn_data/mini_df.csv')
mini_df_fill = pd.read_csv('data/nn_data/mini_df_fill.csv')
nn_df = pd.read_csv('data/nn_data/nn_df.csv')

In [146]:
dataframes = [mini_df, nn_df, mini_df_fill]

In [147]:
# OneHotEncoding

encoded_dataframes = []

for df in dataframes:
    encoded_df = pd.get_dummies(df)
    encoded_dataframes.append(encoded_df)

In [148]:
# Scaling

scaler = StandardScaler()

scaled_dataframes = []
for encoded_df in encoded_dataframes:
    scaled_array = scaler.fit_transform(encoded_df)
    scaled_df = pd.DataFrame(scaled_array, columns=encoded_df.columns)
    scaled_dataframes.append(scaled_df)

In [149]:
mini_df_scaled_path = "data/ready_dataframes/mini_df_scaled.csv"
nn_df_scaled_path = "data/ready_dataframes/nn_df_scaled.csv"
mini_df_fill_scaled_path = "data/ready_dataframes/mini_df_fill_scaled.csv"

scaled_dataframes[0].to_csv(mini_df_scaled_path, index=False)
scaled_dataframes[1].to_csv(nn_df_scaled_path, index=False)
scaled_dataframes[2].to_csv(mini_df_fill_scaled_path, index=False)

In [150]:
# Model-ready files
nn_df = pd.read_csv(nn_df_scaled_path)
mini_df = pd.read_csv(mini_df_scaled_path)
mini_df_fill = pd.read_csv(mini_df_fill_scaled_path)

In [151]:
# Data Split

# nn_df
target_column = 'Market value'
X = nn_df.drop(columns=[target_column])
y = nn_df[target_column]

# mini_df
# target_column = 'Market value'
# X = mini_df.drop(columns=[target_column])
# y = mini_df[target_column]

# mini_df_fill
# target_column = 'Market value'
# X = mini_df_fill.drop(columns=[target_column])
# y = mini_df_fill[target_column]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [152]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f'Using device: {device}')

Using device: cpu


In [153]:
# data conversion - to tensor
X_train = torch.tensor(X_train.values, dtype=torch.float32).unsqueeze(1).to(device)
X_test = torch.tensor(X_test.values, dtype=torch.float32).unsqueeze(1).to(device)
y_train = torch.tensor(y_train.values, dtype=torch.float32).unsqueeze(1).to(device)
y_test = torch.tensor(y_test.values, dtype=torch.float32).unsqueeze(1).to(device)


# dataloader creation
train_dataset = TensorDataset(X_train, y_train)
test_dataset = TensorDataset(X_test, y_test)
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

# LSTM

In [154]:
class LSTMModel(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers):
        super(LSTMModel, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, 1)
    
    def forward(self, x):
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)
        c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)
        out, _ = self.lstm(x, (h0, c0))
        out = self.fc(out[:, -1, :])
        return out

input_size = X_train.shape[2]
hidden_size = 50
num_layers = 2

model = LSTMModel(input_size, hidden_size, num_layers).to(device)
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [155]:
# model training
def train_model(model, train_loader, test_loader, criterion, optimizer, epochs=50):
    train_losses = []
    test_losses = []
    
    for epoch in range(epochs):
        model.train()
        running_loss = 0.0
        for inputs, targets in train_loader:
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, targets)
            loss.backward()
            optimizer.step()
            running_loss += loss.item() * inputs.size(0)
        epoch_loss = running_loss / len(train_loader.dataset)
        
        train_losses.append(epoch_loss)
        
        model.eval()
        test_loss = 0.0
        with torch.no_grad():
            for inputs, targets in test_loader:
                outputs = model(inputs)
                loss = criterion(outputs, targets)
                test_loss += loss.item() * inputs.size(0)
        test_loss = test_loss / len(test_loader.dataset)
        test_losses.append(test_loss)
        
        print(f'Epoch {epoch+1}/{epochs}, Train Loss: {epoch_loss:.4f}, Test Loss: {test_loss:.4f}')
    
    loss_df = pd.DataFrame({'Epoch': list(range(1, epochs + 1)), 'Train Loss': train_losses, 'Test Loss': test_losses})
    loss_df.to_csv("lstm_training_results.csv", index=False)

In [156]:
train_model(model, train_loader, test_loader, criterion, optimizer, epochs=50)

Epoch 1/50, Train Loss: 0.8456, Test Loss: 1.3417
Epoch 2/50, Train Loss: 0.6563, Test Loss: 0.7708
Epoch 3/50, Train Loss: 0.3463, Test Loss: 0.3089
Epoch 4/50, Train Loss: 0.2279, Test Loss: 0.2044
Epoch 5/50, Train Loss: 0.1831, Test Loss: 0.1734
Epoch 6/50, Train Loss: 0.1580, Test Loss: 0.1604
Epoch 7/50, Train Loss: 0.1380, Test Loss: 0.1578
Epoch 8/50, Train Loss: 0.1219, Test Loss: 0.1424
Epoch 9/50, Train Loss: 0.1101, Test Loss: 0.1384
Epoch 10/50, Train Loss: 0.0971, Test Loss: 0.1347
Epoch 11/50, Train Loss: 0.0928, Test Loss: 0.1471
Epoch 12/50, Train Loss: 0.0842, Test Loss: 0.1356
Epoch 13/50, Train Loss: 0.0749, Test Loss: 0.1344
Epoch 14/50, Train Loss: 0.0694, Test Loss: 0.1436
Epoch 15/50, Train Loss: 0.0654, Test Loss: 0.1492
Epoch 16/50, Train Loss: 0.0620, Test Loss: 0.1495
Epoch 17/50, Train Loss: 0.0573, Test Loss: 0.1432
Epoch 18/50, Train Loss: 0.0526, Test Loss: 0.1471
Epoch 19/50, Train Loss: 0.0507, Test Loss: 0.1441
Epoch 20/50, Train Loss: 0.0497, Test Lo

In [157]:
# model evaluation
def evaluate_model(model, test_loader):
    model.eval()
    predictions, actuals = [], []
    with torch.no_grad():
        for inputs, targets in test_loader:
            outputs = model(inputs)
            predictions.extend(outputs.numpy())
            actuals.extend(targets.numpy())
    return np.array(predictions), np.array(actuals)

y_pred, y_true = evaluate_model(model, test_loader)

rmse = np.sqrt(mean_squared_error(y_true, y_pred))
mae = mean_absolute_error(y_true, y_pred)
r2 = r2_score(y_true, y_pred)

print(f'RMSE: {rmse}')
print(f'MAE: {mae}')
print(f'R²: {r2}')

RMSE: 0.38958266377449036
MAE: 0.12814141809940338
R²: 0.8988246041030987


# CNN

In [158]:
print("Shapes before tensor conversion:")
print("X_train:", X_train.shape)
print("X_test:", X_test.shape)
print("y_train:", y_train.shape)
print("y_test:", y_test.shape)

X_train_tensor = torch.from_numpy(X_train).float().unsqueeze(1).to(device)
X_test_tensor = torch.from_numpy(X_test).float().unsqueeze(1).to(device)
y_train_tensor = torch.from_numpy(y_train).float().unsqueeze(1).to(device)
y_test_tensor = torch.from_numpy(y_test).float().unsqueeze(1).to(device)

Shapes before tensor conversion:
X_train: torch.Size([1358, 1, 107])
X_test: torch.Size([340, 1, 107])
y_train: torch.Size([1358, 1])
y_test: torch.Size([340, 1])


TypeError: expected np.ndarray (got Tensor)

In [ ]:
print("Shapes after tensor conversion:")
print("X_train_tensor:", X_train_tensor.shape)
print("X_test_tensor:", X_test_tensor.shape)
print("y_train_tensor:", y_train_tensor.shape)
print("y_test_tensor:", y_test_tensor.shape)

train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
test_dataset = TensorDataset(X_test_tensor, y_test_tensor)
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

In [None]:
class CNNModel(nn.Module):
    def __init__(self, input_length):
        super(CNNModel, self).__init__()
        self.conv1 = nn.Conv1d(in_channels=1, out_channels=64, kernel_size=2)
        self.pool = nn.MaxPool1d(kernel_size=2, stride=2)
        self.dropout = nn.Dropout(0.2)
        self.flatten = nn.Flatten()

        conv_output_size = (input_length - 1) // 2
        self.fc1 = nn.Linear(64 * conv_output_size, 50)
        self.fc2 = nn.Linear(50, 1)
    
    def forward(self, x):
        x = torch.relu(self.conv1(x))
        x = self.pool(x)
        x = self.dropout(x)
        x = self.flatten(x)
        x = torch.relu(self.fc1(x))
        x = self.fc2(x)
        return x

input_length = X_train_tensor.shape[2]
model = CNNModel(input_length).to(device)
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [None]:
# model training
def train_model(model, train_loader, test_loader, criterion, optimizer, epochs=50):
    train_losses = []
    test_losses = []
    
    for epoch in range(epochs):
        model.train()
        running_loss = 0.0
        for inputs, targets in train_loader:
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, targets)
            loss.backward()
            optimizer.step()
            running_loss += loss.item() * inputs.size(0)
        epoch_loss = running_loss / len(train_loader.dataset)
        
        train_losses.append(epoch_loss)
        
        # Ewaluacja na zbiorze testowym
        model.eval()
        test_loss = 0.0
        with torch.no_grad():
            for inputs, targets in test_loader:
                outputs = model(inputs)
                loss = criterion(outputs, targets)
                test_loss += loss.item() * inputs.size(0)
        test_loss = test_loss / len(test_loader.dataset)
        test_losses.append(test_loss)
        
        print(f'Epoch {epoch+1}/{epochs}, Train Loss: {epoch_loss:.4f}, Test Loss: {test_loss:.4f}')
    
    # Zapisz wyniki do pliku CSV
    loss_df = pd.DataFrame({'Epoch': list(range(1, epochs + 1)), 'Train Loss': train_losses, 'Test Loss': test_losses})
    loss_df.to_csv("cnn_training_results.csv", index=False)

In [None]:
train_model(model, train_loader, test_loader, criterion, optimizer, epochs=50)

In [ ]:
def evaluate_model(model, test_loader):
    model.eval()
    predictions, actuals = [], []
    with torch.no_grad():
        for inputs, targets in test_loader:
            outputs = model(inputs)
            predictions.extend(outputs.cpu().numpy())  # Przenieś dane z GPU na CPU przed konwersją na numpy
            actuals.extend(targets.cpu().numpy())  # Przenieś dane z GPU na CPU przed konwersją na numpy
    return np.array(predictions), np.array(actuals)

# model evaluation
y_pred, y_true = evaluate_model(model, test_loader)

rmse = np.sqrt(mean_squared_error(y_true, y_pred))
mae = mean_absolute_error(y_true, y_pred)
r2 = r2_score(y_true, y_pred)

print(f'RMSE: {rmse}')
print(f'MAE: {mae}')
print(f'R²: {r2}')

# Results Summary and Comparison