In [1]:
import pandas as pd
import os
import pickle

from model import Transformer # this is the transformer.py file
import torch
from torch import nn
import numpy as np

In [2]:
from data_processing import create_segments
from sklearn.preprocessing import StandardScaler

all_ghg_data = pd.read_csv("data/total-ghg-emissions.csv")
all_ghg_data.rename(columns={'Year': 'year'}, inplace=True)
scaler = StandardScaler()
all_ghg_data.iloc[:, -1] = scaler.fit_transform(all_ghg_data.iloc[:, -1].values.reshape(-1, 1)).flatten()
all_temp_data = pd.read_csv("data/processed_data.csv")
all_temp_data = all_temp_data[all_temp_data['year'] != all_temp_data['year'].max()]

annual_avg_temp = all_temp_data.groupby('year').mean().reset_index()
annual_avg_temp.drop(columns=['Unnamed: 0', 'month'], inplace=True)


def pad_features(df, max_features):
    additional_cols = max_features - df.shape[1]
    for i in range(additional_cols):
        df[f'pad_feature_{i}'] = 0
    return df

max_features = max(all_ghg_data.shape[1], annual_avg_temp.shape[1])
all_ghg_data.drop(columns=['Entity', 'Code'], inplace=True)
all_ghg_data = pad_features(all_ghg_data, max_features)

print("Padded Greenhouse Gas Data: ", all_ghg_data.shape)
print("Average Annual Temperature Data: ", annual_avg_temp.shape)

Padded Greenhouse Gas Data:  (142, 14)
Average Annual Temperature Data:  (142, 14)


In [10]:
ghg_data_padded, temp_data_padded = all_ghg_data, annual_avg_temp

input_years = 5
target_years = 5
overlapping = False

# Segment the data
ghg_input_segments, ghg_target_segments = create_segments(ghg_data_padded, input_years, target_years, overlapping)
temp_input_segments, temp_target_segments = create_segments(temp_data_padded, input_years, target_years, overlapping)

print("GHG Input Segment Shape:", ghg_input_segments[0].shape)
print("Temperature Input Segment Shape:", temp_input_segments[0].shape)
print("GHG Target Segment Shape:", ghg_target_segments[0].shape)
print("Temperature Target Segment Shape:", temp_target_segments[0].shape)

# Iterate over the segments, drop 'year', combine and convert to tensors
# Temperature data is now at index 0 and GHG data at index 1
input_segments_tensors = []
target_segments_tensors = []

for temp_segment, ghg_segment in zip(temp_input_segments, ghg_input_segments):
    temp_tensor = torch.tensor(temp_segment.drop(columns=['year']).values, dtype=torch.float32)
    ghg_tensor = torch.tensor(ghg_segment.drop(columns=['year']).values, dtype=torch.float32)
    combined_input_tensor = torch.stack((temp_tensor, ghg_tensor), dim=0)
    input_segments_tensors.append(combined_input_tensor)

for temp_segment, ghg_segment in zip(temp_target_segments, ghg_target_segments):
    temp_tensor = torch.tensor(temp_segment.drop(columns=['year']).values, dtype=torch.float32)
    ghg_tensor = torch.tensor(ghg_segment.drop(columns=['year']).values, dtype=torch.float32)
    combined_target_tensor = torch.stack((temp_tensor, ghg_tensor), dim=0)
    target_segments_tensors.append(combined_target_tensor)

# Convert lists of combined segments into tensors
input_tensors = torch.stack(input_segments_tensors, dim=0)
target_tensors = torch.stack(target_segments_tensors, dim=0)

print("\nCombined Input Tensor Shape:", input_tensors.shape)
print("Combined Target Tensor Shape:", target_tensors.shape)

GHG Input Segment Shape: (5, 14)
Temperature Input Segment Shape: (5, 14)
GHG Target Segment Shape: (5, 14)
Temperature Target Segment Shape: (5, 14)

Combined Input Tensor Shape: torch.Size([14, 2, 5, 13])
Combined Target Tensor Shape: torch.Size([14, 2, 5, 13])


In [11]:
from dataset import TimeSeriesDataset
from torch.utils.data import DataLoader, random_split
batch_size = 1

ts_dataset = TimeSeriesDataset(input_tensors, target_tensors)
train_size = int(0.6 * len(ts_dataset))  # e.g., 70% of data for training
val_size = int(0.2 * len(ts_dataset))  # 20% of data for validation 
test_size = len(ts_dataset) - train_size - val_size  # Remaining for testing

# Randomly split the dataset into training and validation datasets
train_dataset, val_dataset, test_dataset = random_split(ts_dataset, [train_size, val_size, test_size])

# Create DataLoaders for both training and validation sets
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, drop_last=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, drop_last=True)  # Usually, no need to shuffle the validation set
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=True, drop_last=True)

print(f"Train Loader Length: {len(train_loader)}")
print(f"Validation Loader Length: {len(val_loader)}")
print(f"Test Loader Length: {len(test_loader)}")

input_batch, target_batch = next(iter(train_loader))

# Print the shapes
print(f'Input batch shape: {input_batch.shape}')  # e.g., torch.Size([8, 120, 15])
print(f'Target batch shape: {target_batch.shape}')  # e.g., torch.Size([8, 12, 15])

Train Loader Length: 8
Validation Loader Length: 2
Test Loader Length: 4
Input batch shape: torch.Size([1, 2, 5, 13])
Target batch shape: torch.Size([1, 2, 5, 13])


In [12]:

from sklearn.metrics import r2_score

def testing_metrics(logits_batch, target_batch):
    mse = torch.mean((logits_batch - target_batch) ** 2)
    rmse = torch.sqrt(mse)
    mae = torch.mean(torch.abs(logits_batch - target_batch))

    r_squared_values = []
    for t in range(logits_batch.shape[1]):  # Iterate over time steps
        logits_t = logits_batch[:, t, :].reshape(-1).detach().numpy()
        target_t = target_batch[:, t, :].reshape(-1).detach().numpy()
        r_squared_t = r2_score(target_t, logits_t)
        r_squared_values.append(r_squared_t)

    r_squared_avg = sum(r_squared_values) / len(r_squared_values)
    return rmse.item(), mae.item(), r_squared_avg

In [13]:
# Hyperparameters

d_temp = 13
d_ghg = 13
d_enc = 15
d_dec = 30
d_data = 26
ffn_hidden = 2048
num_heads = 5
drop_prob = 0.1
num_layers = 1

transformer = Transformer(d_enc, d_dec, d_data, d_temp, d_ghg, ffn_hidden, num_heads, drop_prob, num_layers)

In [14]:
# Custom Training Loop

def train_model(model, epochs, optimizer, criterion, save_freq = 20, print_results = True):
    print("Single-Shot Model Training")
    for epoch in range(1, epochs+1):
        
        model.train()
        train_loss = 0
        train_rmse = 0
        train_mae = 0
        train_r_squared = 0

        for input_batch, target_batch in train_loader:
            optimizer.zero_grad()

            logits_batch = model(input_batch, target_batch)

            # batch_size * number features * time frame * num of subfeatures
            target_batch = target_batch.permute(0, 2, 1, 3)
            # batch_size * time frame * number features * num of subfeatures
            target_batch = target_batch.reshape(batch_size, -1, 26)
            # batch_size * time frame * (number feabtures * num of subfeatures)

            loss = criterion(logits_batch, target_batch)
            loss.backward()

            optimizer.step()

            train_loss += loss.item()

            rmse, mae, r_squared = testing_metrics(logits_batch, target_batch)
            train_rmse += rmse
            train_mae += mae
            train_r_squared += r_squared

        # Calculate average loss and mean average percent error for the epoch
        train_loss /= len(train_loader)
        train_rmse /= len(train_loader)
        train_mae /= len(train_loader)
        train_r_squared /= len(train_loader)

        model.eval()

        val_loss = 0
        val_rmse = 0
        val_mae = 0
        val_r_squared = 0

        with torch.inference_mode():
            for input_batch, target_batch in val_loader:
                logits_batch = model(input_batch, target_batch)

                target_batch = target_batch.permute(0, 2, 1, 3)
                target_batch = target_batch.reshape(batch_size, -1, 26)

                # Calculate loss
                loss = criterion(logits_batch, target_batch)
                val_loss += loss.item()

                rmse, mae, r_squared = testing_metrics(logits_batch, target_batch)
                val_rmse += rmse
                val_mae += mae
                val_r_squared += r_squared
            
        val_loss /= len(val_loader)
        val_rmse /= len(val_loader)
        val_mae /= len(val_loader)
        val_r_squared /= len(val_loader)
        
        # Print epoch stats
        if(epoch % save_freq == 0 and print_results):
            print(f'Epoch {epoch}/{epochs} | Train Loss: {train_loss:.4f} | Train RMSE: {train_rmse:.4f} | Train MAE: {train_mae:.4f} | Train R^2: {train_r_squared:.4f}')
            print(f'Validation Loss: {val_loss:.4f} | Validation RMSE: {val_rmse:.4f} | Validation MAE: {val_mae:.4f} | Validation R^2: {val_r_squared:.4f}')
            print("")
    
        torch.save(model.state_dict(), f'saved_models/transformer.pt')

In [15]:
criterion = nn.MSELoss()
optimizer = torch.optim.SGD(transformer.parameters(), lr = 0.01)

train_model(transformer, 100, optimizer, criterion, save_freq=20)

Single-Shot Model Training
Epoch 20/100 | Train Loss: 0.0557 | Train RMSE: 0.2336 | Train MAE: 0.1799 | Train R^2: 0.2409
Validation Loss: 0.0914 | Validation RMSE: 0.2889 | Validation MAE: 0.2179 | Validation R^2: 0.5704

Epoch 40/100 | Train Loss: 0.0356 | Train RMSE: 0.1880 | Train MAE: 0.1455 | Train R^2: 0.4359
Validation Loss: 0.0479 | Validation RMSE: 0.2130 | Validation MAE: 0.1592 | Validation R^2: 0.7583

Epoch 60/100 | Train Loss: 0.0293 | Train RMSE: 0.1701 | Train MAE: 0.1300 | Train R^2: 0.5513
Validation Loss: 0.0377 | Validation RMSE: 0.1898 | Validation MAE: 0.1363 | Validation R^2: 0.8063

Epoch 80/100 | Train Loss: 0.0272 | Train RMSE: 0.1641 | Train MAE: 0.1227 | Train R^2: 0.5920
Validation Loss: 0.0352 | Validation RMSE: 0.1830 | Validation MAE: 0.1288 | Validation R^2: 0.8209

Epoch 100/100 | Train Loss: 0.0254 | Train RMSE: 0.1593 | Train MAE: 0.1206 | Train R^2: 0.6220
Validation Loss: 0.0339 | Validation RMSE: 0.1791 | Validation MAE: 0.1246 | Validation R^2: 