# Library

In [1]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from sklearn.preprocessing import StandardScaler
from tqdm.notebook import tqdm
import os
import random

# Parameters

In [2]:
class CONFIG:
    INPUT_SEQ_LEN = 90
    OUTPUT_SEQ_LEN = 30
    N_STATIONS = 422
    N_VARS = 76
    HIDDEN_SIZE = 128
    N_LAYERS = 1
    DROPOUT = 0.4 
    N_EPOCHS = 50
    BATCH_SIZE = 128
    LEARNING_RATE = 8e-5  
    WEIGHT_DECAY = 1e-2
    TEACHER_FORCING_RATIO = 0.5
    NOISE_LEVEL = 0.015   
    
    DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    SEED = 42
    TRAIN_PATH = '/kaggle/input/unipd-deep-learning-2025-challenge-2/train_dataset.csv'

def set_seed(seed):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed(seed)
        torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

set_seed(CONFIG.SEED)
print(f"Using device: {CONFIG.DEVICE}")
if torch.cuda.is_available():
    print(f"Found {torch.cuda.device_count()} GPUs.")


Using device: cuda
Found 2 GPUs.


# train and validation set

In [3]:
train_df = pd.read_csv(CONFIG.TRAIN_PATH)
var_columns = [f'var{i}' for i in range(1, CONFIG.N_VARS + 1)]
scaler = StandardScaler()
train_df[var_columns] = scaler.fit_transform(train_df[var_columns])
total_timesteps = train_df['time'].max() + 1
validation_split_time = total_timesteps - (CONFIG.INPUT_SEQ_LEN + CONFIG.OUTPUT_SEQ_LEN)
train_data = train_df[train_df['time'] < validation_split_time]
val_data = train_df[train_df['time'] >= validation_split_time]

In [4]:
class WeatherDataset(Dataset):
    def __init__(self, data, input_len, output_len, var_cols, is_train=False):
        self.data = data
        self.input_len = input_len
        self.output_len = output_len
        self.var_cols = var_cols
        self.is_train = is_train
        self.samples = []
        grouped = self.data.groupby('station')
        for _, station_df in tqdm(grouped, desc="Creazione campioni"):
            station_values = station_df[self.var_cols].values
            num_timesteps = len(station_values)
            for i in range(num_timesteps - self.input_len - self.output_len + 1):
                X = station_values[i : i + self.input_len]
                y = station_values[i + self.input_len : i + self.input_len + self.output_len]
                self.samples.append((X, y))

    def __len__(self):
        return len(self.samples)

    def __getitem__(self, idx):
        X, y = self.samples[idx]
        if self.is_train:
            noise = np.random.normal(0, CONFIG.NOISE_LEVEL, X.shape)
            X = X + noise
        return torch.FloatTensor(X), torch.FloatTensor(y)


In [5]:
train_dataset = WeatherDataset(train_data, CONFIG.INPUT_SEQ_LEN, CONFIG.OUTPUT_SEQ_LEN, var_columns, is_train=True)
val_dataset = WeatherDataset(val_data, CONFIG.INPUT_SEQ_LEN, CONFIG.OUTPUT_SEQ_LEN, var_columns, is_train=False)
train_loader = DataLoader(train_dataset, batch_size=CONFIG.BATCH_SIZE, shuffle=True, num_workers=2, pin_memory=True)
val_loader = DataLoader(val_dataset, batch_size=CONFIG.BATCH_SIZE, shuffle=False, num_workers=2, pin_memory=True)
print(f"training: {len(train_dataset)}, validazione: {len(val_dataset)}")

Creazione campioni:   0%|          | 0/422 [00:00<?, ?it/s]

Creazione campioni:   0%|          | 0/422 [00:00<?, ?it/s]

training: 192432, validazione: 422


# Model

In [6]:
class Encoder(nn.Module):
    def __init__(self, input_size, hidden_size, n_layers, dropout):
        super(Encoder, self).__init__()
        self.gru = nn.GRU(input_size, hidden_size, n_layers, dropout=dropout if n_layers > 1 else 0, batch_first=True)

    def forward(self, x):
        outputs, hidden = self.gru(x)
        return hidden

class Decoder(nn.Module):
    def __init__(self, output_size, hidden_size, n_layers, dropout):
        super(Decoder, self).__init__()
        self.gru = nn.GRU(output_size, hidden_size, n_layers, dropout=dropout if n_layers > 1 else 0, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x, hidden):
        output, hidden = self.gru(x, hidden)
        prediction = self.fc(output)
        return prediction, hidden

class Seq2Seq(nn.Module):
    def __init__(self, encoder, decoder, device):
        super(Seq2Seq, self).__init__()
        self.encoder = encoder
        self.decoder = decoder
        self.device = device

    def forward(self, src, trg, teacher_forcing_ratio=0.5):
        batch_size = src.shape[0]
        trg_len = trg.shape[1]
        trg_vocab_size = CONFIG.N_VARS
        outputs = torch.zeros(batch_size, trg_len, trg_vocab_size).to(self.device)
        hidden = self.encoder(src)
        decoder_input = src[:, -1, :].unsqueeze(1)
        for t in range(trg_len):
            output, hidden = self.decoder(decoder_input, hidden)
            outputs[:, t, :] = output.squeeze(1)
            teacher_force = random.random() < teacher_forcing_ratio
            decoder_input = trg[:, t, :].unsqueeze(1) if teacher_force else output
        return outputs


# Traning phase

In [7]:
def train_one_epoch(model, dataloader, optimizer, criterion, teacher_forcing_ratio):
    model.train()
    total_loss = 0
    for x_batch, y_batch in tqdm(dataloader, desc="Training"):
        x_batch, y_batch = x_batch.to(CONFIG.DEVICE), y_batch.to(CONFIG.DEVICE)
        optimizer.zero_grad()
        outputs = model(x_batch, y_batch, teacher_forcing_ratio)
        loss = criterion(outputs, y_batch)
        loss.backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(), 1)
        optimizer.step()
        total_loss += loss.item()
    return total_loss / len(dataloader)

def evaluate(model, dataloader, criterion):
    model.eval()
    total_loss = 0
    with torch.no_grad():
        for x_batch, y_batch in tqdm(dataloader, desc="Validating"):
            x_batch, y_batch = x_batch.to(CONFIG.DEVICE), y_batch.to(CONFIG.DEVICE)
            outputs = model(x_batch, y_batch, 0)
            loss = criterion(outputs, y_batch)
            total_loss += loss.item()
    return total_loss / len(dataloader)

# --- 6. Blocco di Esecuzione Principale ---
encoder = Encoder(CONFIG.N_VARS, CONFIG.HIDDEN_SIZE, CONFIG.N_LAYERS, CONFIG.DROPOUT)
decoder = Decoder(CONFIG.N_VARS, CONFIG.HIDDEN_SIZE, CONFIG.N_LAYERS, CONFIG.DROPOUT)
model = Seq2Seq(encoder, decoder, CONFIG.DEVICE).to(CONFIG.DEVICE)

if torch.cuda.device_count() > 1:
    print(f"Using {torch.cuda.device_count()} GPUs for training.")
    model = nn.DataParallel(model)

optimizer = torch.optim.AdamW(model.parameters(), lr=CONFIG.LEARNING_RATE, weight_decay=CONFIG.WEIGHT_DECAY)
criterion = nn.L1Loss()
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min', factor=0.2, patience=5, verbose=True) 

best_val_loss = float('inf')
patience_counter = 0
patience = 10 
teacher_forcing_ratio = CONFIG.TEACHER_FORCING_RATIO

for epoch in range(1, CONFIG.N_EPOCHS + 1):
    print(f"--- Epoch {epoch}/{CONFIG.N_EPOCHS} ---")
    train_loss = train_one_epoch(model, train_loader, optimizer, criterion, teacher_forcing_ratio)
    val_loss = evaluate(model, val_loader, criterion)
    print(f"Epoch {epoch}: Train Loss = {train_loss:.4f}, Val Loss = {val_loss:.4f}")
    scheduler.step(val_loss)
    if teacher_forcing_ratio > 0.05:
        teacher_forcing_ratio -= 0.01 
    if val_loss < best_val_loss:
        best_val_loss = val_loss
        torch.save(model.state_dict(), 'best_model.pth')
        print("Validation loss improved. Saving model.")
        patience_counter = 0
    else:
        patience_counter += 1
        print(f"No improvement in validation loss. Patience: {patience_counter}/{patience}")
    if patience_counter >= patience:
        print("Early stopping triggered.")
        break
print("Training finished.")

Using 2 GPUs for training.
--- Epoch 1/50 ---




Training:   0%|          | 0/1504 [00:00<?, ?it/s]

Validating:   0%|          | 0/4 [00:00<?, ?it/s]

Epoch 1: Train Loss = 0.3449, Val Loss = 0.5842
Validation loss improved. Saving model.
--- Epoch 2/50 ---


Training:   0%|          | 0/1504 [00:00<?, ?it/s]

Validating:   0%|          | 0/4 [00:00<?, ?it/s]

Epoch 2: Train Loss = 0.2970, Val Loss = 0.5566
Validation loss improved. Saving model.
--- Epoch 3/50 ---


Training:   0%|          | 0/1504 [00:00<?, ?it/s]

Validating:   0%|          | 0/4 [00:00<?, ?it/s]

Epoch 3: Train Loss = 0.2910, Val Loss = 0.5652
No improvement in validation loss. Patience: 1/10
--- Epoch 4/50 ---


Training:   0%|          | 0/1504 [00:00<?, ?it/s]

Validating:   0%|          | 0/4 [00:00<?, ?it/s]

Epoch 4: Train Loss = 0.2878, Val Loss = 0.5595
No improvement in validation loss. Patience: 2/10
--- Epoch 5/50 ---


Training:   0%|          | 0/1504 [00:00<?, ?it/s]

Validating:   0%|          | 0/4 [00:00<?, ?it/s]

Epoch 5: Train Loss = 0.2866, Val Loss = 0.5761
No improvement in validation loss. Patience: 3/10
--- Epoch 6/50 ---


Training:   0%|          | 0/1504 [00:00<?, ?it/s]

Validating:   0%|          | 0/4 [00:00<?, ?it/s]

Epoch 6: Train Loss = 0.2859, Val Loss = 0.5787
No improvement in validation loss. Patience: 4/10
--- Epoch 7/50 ---


Training:   0%|          | 0/1504 [00:00<?, ?it/s]

Validating:   0%|          | 0/4 [00:00<?, ?it/s]

Epoch 7: Train Loss = 0.2860, Val Loss = 0.5805
No improvement in validation loss. Patience: 5/10
--- Epoch 8/50 ---


Training:   0%|          | 0/1504 [00:00<?, ?it/s]

Validating:   0%|          | 0/4 [00:00<?, ?it/s]

Epoch 8: Train Loss = 0.2857, Val Loss = 0.5857
No improvement in validation loss. Patience: 6/10
--- Epoch 9/50 ---


Training:   0%|          | 0/1504 [00:00<?, ?it/s]

Validating:   0%|          | 0/4 [00:00<?, ?it/s]

Epoch 9: Train Loss = 0.2864, Val Loss = 0.5805
No improvement in validation loss. Patience: 7/10
--- Epoch 10/50 ---


Training:   0%|          | 0/1504 [00:00<?, ?it/s]

Validating:   0%|          | 0/4 [00:00<?, ?it/s]

Epoch 10: Train Loss = 0.2869, Val Loss = 0.5829
No improvement in validation loss. Patience: 8/10
--- Epoch 11/50 ---


Training:   0%|          | 0/1504 [00:00<?, ?it/s]

Validating:   0%|          | 0/4 [00:00<?, ?it/s]

Epoch 11: Train Loss = 0.2883, Val Loss = 0.5865
No improvement in validation loss. Patience: 9/10
--- Epoch 12/50 ---


Training:   0%|          | 0/1504 [00:00<?, ?it/s]

Validating:   0%|          | 0/4 [00:00<?, ?it/s]

Epoch 12: Train Loss = 0.2890, Val Loss = 0.5849
No improvement in validation loss. Patience: 10/10
Early stopping triggered.
Training finished.


# Prediction

In [8]:
encoder_pred = Encoder(CONFIG.N_VARS, CONFIG.HIDDEN_SIZE, CONFIG.N_LAYERS, CONFIG.DROPOUT)
decoder_pred = Decoder(CONFIG.N_VARS, CONFIG.HIDDEN_SIZE, CONFIG.N_LAYERS, CONFIG.DROPOUT)
prediction_model = Seq2Seq(encoder_pred, decoder_pred, CONFIG.DEVICE)

state_dict = torch.load('best_model.pth')
if isinstance(model, nn.DataParallel):
    from collections import OrderedDict
    new_state_dict = OrderedDict()
    for k, v in state_dict.items():
        name = k[7:]
        new_state_dict[name] = v
    prediction_model.load_state_dict(new_state_dict)
else:
    prediction_model.load_state_dict(state_dict)

prediction_model.to(CONFIG.DEVICE)
prediction_model.eval()

predictions = []
full_train_grouped = train_df.groupby('station')
with torch.no_grad():
    for station_id in tqdm(range(CONFIG.N_STATIONS), desc="Predizione per stazione"):
        station_data = full_train_grouped.get_group(station_id)
        input_sequence = station_data[var_columns].values[-CONFIG.INPUT_SEQ_LEN:]
        input_tensor = torch.FloatTensor(input_sequence).unsqueeze(0).to(CONFIG.DEVICE)
        placeholder_trg = torch.zeros(1, CONFIG.OUTPUT_SEQ_LEN, CONFIG.N_VARS).to(CONFIG.DEVICE)
        output = prediction_model(input_tensor, placeholder_trg, 0)
        pred_scaled = output.squeeze(0).cpu().numpy()
        pred_original_scale = scaler.inverse_transform(pred_scaled)
        for t in range(CONFIG.OUTPUT_SEQ_LEN):
            row = {'id': f'{station_id}_{t}'}
            for i, var_name in enumerate(var_columns):
                row[var_name] = pred_original_scale[t, i]
            predictions.append(row)

submission_df = pd.DataFrame(predictions)
submission_df.to_csv('submission.csv', index=False)

Predizione per stazione:   0%|          | 0/422 [00:00<?, ?it/s]