In [1]:
import pandas as pd
import numpy as np
import pickle as pkl
import datetime as datetime
from tqdm import tqdm

import torch
import torch.nn as nn
import torch.optim as optim
from tqdm import tqdm
from torch.optim.lr_scheduler import StepLR
from src.utils.CreateFeatures import CreateFeatures
from src.pygcn.SiameseGNN import SiameseGNN
from src.pygcn.GraphSAGE import SiameseGNN_SAGE

import torch
import torch.nn as nn
import torch
import torch_geometric.data as data

## All Events

In [2]:
years = range(1962,2019)

train_years = [2005, 1969, 2002, 1997, 1993, 1982, 2001, 2000, 1962, 1985, 1978, 2016, 1986, 1987, 1989, 1971, 2013, 1996, 1995, 1967, 2017, 1974, 1990, 1977, 1980, 2014, 1965, 1984, 2006, 1973, 1968, 1981, 1970, 1991]
val_years = [1975, 1983, 2009, 1966, 1999, 1988, 2007, 1979, 1972, 2015, 2003]
test_years = [1963, 1964, 1976, 1992, 1994, 1998, 2004, 2008, 2010, 2011, 2012, 2018]

In [3]:
all_nodes = ['ABW', 'AFG', 'AGO', 'ALB', 'AND', 'ARE', 'ARG', 'ARM', 'ASM',
       'ATG', 'AUS', 'AUT', 'AZE', 'BDI', 'BEL', 'BEN', 'BFA', 'BGD',
       'BGR', 'BHR', 'BHS', 'BIH', 'BLR', 'BLZ', 'BMU', 'BOL', 'BRA',
       'BRB', 'BRN', 'BTN', 'BWA', 'CAF', 'CAN', 'CHE', 'CHL', 'CHN',
       'CIV', 'CMR', 'COD', 'COG', 'COL', 'COM', 'CPV', 'CRI', 'CUB',
       'CUW', 'CYM', 'CYP', 'CZE', 'DEU', 'DMA', 'DNK', 'DOM', 'DZA',
       'ECU', 'EGY', 'ESP', 'EST', 'ETH', 'FIN', 'FJI', 'FRA', 'FSM',
       'GAB', 'GBR', 'GEO', 'GHA', 'GIN', 'GMB', 'GNB', 'GNQ', 'GRC',
       'GRD', 'GRL', 'GTM', 'GUM', 'GUY', 'HKG', 'HND', 'HRV', 'HTI',
       'HUN', 'IDN', 'IND', 'IRL', 'IRN', 'IRQ', 'ISL', 'ISR', 'ITA',
       'JAM', 'JOR', 'JPN', 'KAZ', 'KEN', 'KGZ', 'KHM', 'KNA', 'KOR',
       'KWT', 'LAO', 'LBN', 'LBR', 'LBY', 'LCA', 'LKA', 'LSO', 'LTU',
       'LUX', 'LVA', 'MAC', 'MAR', 'MDA', 'MDG', 'MDV', 'MEX', 'MHL',
       'MKD', 'MLI', 'MLT', 'MMR', 'MNE', 'MNG', 'MNP', 'MOZ', 'MRT',
       'MUS', 'MWI', 'MYS', 'NAM', 'NER', 'NGA', 'NIC', 'NLD', 'NOR',
       'NPL', 'NRU', 'NZL', 'OMN', 'PAK', 'PAN', 'PER', 'PHL', 'PLW',
       'PNG', 'POL', 'PRT', 'PRY', 'PSE', 'PYF', 'QAT', 'ROU', 'RUS',
       'RWA', 'SAU', 'SDN', 'SEN', 'SGP', 'SLB', 'SLE', 'SLV', 'SMR',
       'SRB', 'SSD', 'STP', 'SUR', 'SVK', 'SVN', 'SWE', 'SWZ', 'SXM',
       'SYC', 'SYR', 'TCD', 'TGO', 'THA', 'TJK', 'TKM', 'TLS', 'TON',
       'TTO', 'TUN', 'TUR', 'TUV', 'TZA', 'UGA', 'UKR', 'URY', 'USA',
       'UZB', 'VCT', 'VEN', 'VNM', 'VUT', 'WSM', 'YEM', 'ZAF', 'ZMB',
       'ZWE']

In [4]:
#12 features
import pickle as pkl
with open("src/pygcn/train_graphs.pickle", "rb") as f:
    train_graphs = pkl.load(f)

with open("src/pygcn/val_graphs.pickle", "rb") as f:  
    val_graphs = pkl.load(f)

with open("src/pygcn/test_graphs.pickle", "rb") as f:         
    test_graphs = pkl.load(f)

In [5]:
from torch_geometric.data import DataLoader
test_loader = DataLoader(test_graphs, batch_size=4)
train_loader = DataLoader(train_graphs, batch_size=4)
val_loader = DataLoader(val_graphs, batch_size=4)



## sGNN with GCN Encoder and 3 Features

In [6]:
def check_crisis_years(year_pairs, crisis_years):
    result = []
    for pair in year_pairs:
        start, end = pair
        # Check if any crisis year is between the pair or equals the later year
        if any(start < year <= end for year in crisis_years):
            result.append(0)
        else:
            result.append(1)
    return result

In [7]:
crisis_years = [1983, 1982, 2008, 2002, 2016, 1967, 1962, 1989, 2012, 1963, 1993, 1986, 1996,1978]

def get_year_pairs(year_range):
    return [(year1, year2) for year1 in year_range for year2 in year_range if year2 >= year1]

def get_loader_pairs(dataset):
    return [(dataset[i], dataset[j]) for i in range(len(dataset)) for j in range(len(dataset)) if j >= i]

def get_graph_pairs(graphs):
    return [(graphs[i], graphs[j]) for i in range(len(graphs)) for j in range(len(graphs)) if j >= i]

train_pairs = get_year_pairs(train_years)
val_pairs = get_year_pairs(val_years)

train_y = check_crisis_years(train_pairs, crisis_years)
val_y = check_crisis_years(val_pairs, crisis_years)

train_loader_pairs = get_loader_pairs(train_loader.dataset)
val_loader_pairs = get_loader_pairs(val_loader.dataset)

In [8]:
train_graph_pairs = get_graph_pairs(train_graphs)
val_graph_pairs = get_graph_pairs(val_graphs)

train_torch_y = torch.tensor(np.array(train_y))
val_torch_y = torch.tensor(np.array(val_y))

labeled_pairs_train = list(zip(train_loader_pairs, train_y))
labeled_pairs_val = list(zip(val_loader_pairs, val_y))

flattened_train = [(a, b, c) for ((a, b), c) in labeled_pairs_train]
flattened_val  = [(a, b, c) for ((a, b), c) in labeled_pairs_val]

In [9]:
import random
positive_samples = [item for item in flattened_train if item[2] == 1]
negative_samples = [item for item in flattened_train if item[2] == 0]

# Calculate the difference in count
diff = len(negative_samples) - len(positive_samples)

# Upsample positive samples
if diff > 0:
    positive_samples_upsampled = positive_samples * (diff // len(positive_samples)) + random.sample(positive_samples, diff % len(positive_samples))
    balanced_data = negative_samples + positive_samples + positive_samples_upsampled
else:
    balanced_data = data

# Shuffle the balanced dataset
random.shuffle(balanced_data)

In [12]:
def run_model(model):
    torch.manual_seed(42)
    optimizer = optim.Adam(model.parameters(), lr=0.001)
    scheduler = StepLR(optimizer, step_size=10, gamma=0.1)  # Adjust step_size and gamma as needed
    criterion = nn.BCELoss()

    for epoch in tqdm(range(10)):
        model.train()
        train_losses = []
        for data1, data2, label in balanced_data:

            optimizer.zero_grad()
            out = model(data1, data2)
            label = torch.tensor(label).view(1).float()
            loss = criterion(out.squeeze(0), label)
            loss.backward()
            optimizer.step()
            train_losses.append(loss.item())

        scheduler.step()  # Add this line to update the learning rate

        model.eval()
        with torch.no_grad():
            val_losses = []
            correct = 0
            total = 0
            for data1, data2, label in flattened_val:
                out = model(data1, data2)
                label = torch.tensor(label).view(1).float()
                val_loss = criterion(out.squeeze(0), label)
                val_losses.append(val_loss.item())

                predictions = torch.round(out.squeeze())
                correct += (predictions == label).sum().item()
                total += 1

            val_loss = sum(val_losses) / len(val_losses)
            val_accuracy = correct / total

        print(f'Epoch: {epoch+1}, Training Loss: {sum(train_losses)/len(train_losses)}, Validation Loss: {val_loss}, Validation Accuracy: {val_accuracy}')

In [13]:
model = SiameseGNN(num_features=balanced_data[0][0].num_node_features)
run_model(model)

  0%|          | 0/10 [00:00<?, ?it/s]

 10%|█         | 1/10 [00:22<03:22, 22.48s/it]

Epoch: 1, Training Loss: 0.7046204824254759, Validation Loss: 0.6963482688773762, Validation Accuracy: 0.21212121212121213


 20%|██        | 2/10 [00:44<02:58, 22.28s/it]

Epoch: 2, Training Loss: 0.6973265162278802, Validation Loss: 0.6801454578385209, Validation Accuracy: 0.6666666666666666


 30%|███       | 3/10 [01:06<02:34, 22.11s/it]

Epoch: 3, Training Loss: 0.6834073272074408, Validation Loss: 0.6554563384164463, Validation Accuracy: 0.696969696969697


 40%|████      | 4/10 [01:28<02:12, 22.12s/it]

Epoch: 4, Training Loss: 0.6665389199485208, Validation Loss: 0.6449837702693362, Validation Accuracy: 0.5757575757575758


 50%|█████     | 5/10 [01:49<01:48, 21.60s/it]

Epoch: 5, Training Loss: 0.6426320381273768, Validation Loss: 0.6846070709553632, Validation Accuracy: 0.5757575757575758


 60%|██████    | 6/10 [02:12<01:28, 22.04s/it]

Epoch: 6, Training Loss: 0.6157376412338564, Validation Loss: 0.6992193205338536, Validation Accuracy: 0.6060606060606061


 70%|███████   | 7/10 [02:34<01:06, 22.04s/it]

Epoch: 7, Training Loss: 0.5995126511596622, Validation Loss: 0.6645722147641759, Validation Accuracy: 0.6060606060606061


 80%|████████  | 8/10 [02:55<00:43, 21.83s/it]

Epoch: 8, Training Loss: 0.5667456304745251, Validation Loss: 0.6797616377924428, Validation Accuracy: 0.6212121212121212


 90%|█████████ | 9/10 [03:19<00:22, 22.49s/it]

Epoch: 9, Training Loss: 0.5486714685259507, Validation Loss: 0.7152796974687865, Validation Accuracy: 0.5454545454545454


100%|██████████| 10/10 [03:41<00:00, 22.15s/it]

Epoch: 10, Training Loss: 0.5102541625779303, Validation Loss: 1.0541933710602196, Validation Accuracy: 0.5454545454545454





In [10]:
model = SiameseGNN_SAGE(num_features=balanced_data[0][0].num_node_features)
run_model(model)

 10%|█         | 1/10 [00:16<02:32, 16.92s/it]

Epoch: 1, Training Loss: 0.7012256620121398, Validation Loss: 0.6804764008883274, Validation Accuracy: 0.6515151515151515


 20%|██        | 2/10 [00:35<02:24, 18.04s/it]

Epoch: 2, Training Loss: 0.6770087722507369, Validation Loss: 0.6980912464134621, Validation Accuracy: 0.48484848484848486


 30%|███       | 3/10 [00:53<02:03, 17.71s/it]

Epoch: 3, Training Loss: 0.6293002815216373, Validation Loss: 0.6624012323039951, Validation Accuracy: 0.5606060606060606


 40%|████      | 4/10 [01:07<01:38, 16.36s/it]

Epoch: 4, Training Loss: 0.6013625392991665, Validation Loss: 0.7412629554217512, Validation Accuracy: 0.36363636363636365


 50%|█████     | 5/10 [01:21<01:17, 15.48s/it]

Epoch: 5, Training Loss: 0.5864994850365507, Validation Loss: 0.6933714330873706, Validation Accuracy: 0.48484848484848486


 60%|██████    | 6/10 [01:36<01:01, 15.34s/it]

Epoch: 6, Training Loss: 0.5671365443856744, Validation Loss: 0.8020840715052504, Validation Accuracy: 0.4696969696969697


 70%|███████   | 7/10 [01:50<00:44, 14.86s/it]

Epoch: 7, Training Loss: 0.5391776538753544, Validation Loss: 0.7895573675068039, Validation Accuracy: 0.5


 80%|████████  | 8/10 [02:03<00:28, 14.48s/it]

Epoch: 8, Training Loss: 0.5531026917431671, Validation Loss: 0.7773121060295538, Validation Accuracy: 0.48484848484848486


 90%|█████████ | 9/10 [02:17<00:14, 14.30s/it]

Epoch: 9, Training Loss: 0.5213070801635351, Validation Loss: 0.7524650239131667, Validation Accuracy: 0.48484848484848486


100%|██████████| 10/10 [02:32<00:00, 15.23s/it]

Epoch: 10, Training Loss: 0.47979829240350697, Validation Loss: 0.7525106672141136, Validation Accuracy: 0.5909090909090909





## sGNN with Feature Subset

In [14]:
with open("feature_dicts/filtered_features_dict.pkl", "rb") as f:
    feat_dict = pkl.load(f)

In [15]:
from torch_geometric.data import DataLoader
test_loader = DataLoader(test_graphs, batch_size=4)
train_loader = DataLoader(train_graphs, batch_size=4)
val_loader = DataLoader(val_graphs, batch_size=4)



In [16]:
def add_features(years, graphs, feat_dict, dim):

    zeros = torch.zeros(dim)

    for i in range(len(years)):
        new_x = torch.empty(0, dim)
        year = years[i]

        feat_dict_year = feat_dict[year].combined_features

        for j, country in enumerate(all_nodes):
            if j == 0:
                new_x = torch.stack([zeros])

            elif country in feat_dict_year["country_code"].values:
                tensor_before = graphs[i].x[j]
                country_row = feat_dict_year[feat_dict_year["country_code"] == country]
                country_row = country_row.drop(columns = ["prev_gdp_growth", "country_code", "current_gdp_growth"])
                row_values = country_row.values.tolist()
                row_tensor = torch.tensor(row_values)[0]
                combined_values = torch.cat((tensor_before, row_tensor))

                new_x = torch.cat((new_x, combined_values.unsqueeze(0)), dim=0)

            else:
                new_x = torch.cat((new_x, zeros.unsqueeze(0)), dim=0)

        graphs[i].x = new_x

    return graphs

In [17]:
train_graphs = add_features(train_years, train_graphs, feat_dict, 59)
val_graphs = add_features(val_years, val_graphs, feat_dict, 59)
test_graphs = add_features(test_years, test_graphs, feat_dict, 59)

In [18]:
train_pairs = get_year_pairs(train_years)
val_pairs = get_year_pairs(val_years)

train_y = check_crisis_years(train_pairs, crisis_years)
val_y = check_crisis_years(val_pairs, crisis_years)

train_loader_pairs = get_loader_pairs(train_loader.dataset)
val_loader_pairs = get_loader_pairs(val_loader.dataset)

In [19]:
train_graph_pairs = get_graph_pairs(train_graphs)
val_graph_pairs = get_graph_pairs(val_graphs)

train_torch_y = torch.tensor(np.array(train_y))
val_torch_y = torch.tensor(np.array(val_y))

labeled_pairs_train = list(zip(train_loader_pairs, train_y))
labeled_pairs_val = list(zip(val_loader_pairs, val_y))

flattened_train = [(a, b, c) for ((a, b), c) in labeled_pairs_train]
flattened_val  = [(a, b, c) for ((a, b), c) in labeled_pairs_val]

In [20]:
positive_samples = [item for item in flattened_train if item[2] == 1]
negative_samples = [item for item in flattened_train if item[2] == 0]

# Calculate the difference in count
diff = len(negative_samples) - len(positive_samples)

# Upsample positive samples
if diff > 0:
    positive_samples_upsampled = positive_samples * (diff // len(positive_samples)) + random.sample(positive_samples, diff % len(positive_samples))
    balanced_data = negative_samples + positive_samples + positive_samples_upsampled
else:
    balanced_data = data

# Shuffle the balanced dataset
random.shuffle(balanced_data)

In [21]:
model = SiameseGNN(num_features=balanced_data[0][0].num_node_features)
run_model(model)

 10%|█         | 1/10 [00:19<02:53, 19.26s/it]

Epoch: 1, Training Loss: 0.7038446231717952, Validation Loss: 0.6924803627259803, Validation Accuracy: 0.6060606060606061


 20%|██        | 2/10 [00:39<02:36, 19.62s/it]

Epoch: 2, Training Loss: 0.6949782815941593, Validation Loss: 0.6901179813977444, Validation Accuracy: 0.5151515151515151


 30%|███       | 3/10 [00:59<02:19, 19.87s/it]

Epoch: 3, Training Loss: 0.6724205428106278, Validation Loss: 0.7878390424179308, Validation Accuracy: 0.3181818181818182


 40%|████      | 4/10 [01:18<01:57, 19.60s/it]

Epoch: 4, Training Loss: 0.6365163160422653, Validation Loss: 0.7015447364837835, Validation Accuracy: 0.5303030303030303


 50%|█████     | 5/10 [01:38<01:38, 19.61s/it]

Epoch: 5, Training Loss: 0.6034843487080485, Validation Loss: 0.7215547635445766, Validation Accuracy: 0.5151515151515151


 60%|██████    | 6/10 [01:57<01:18, 19.53s/it]

Epoch: 6, Training Loss: 0.5970811171649608, Validation Loss: 0.7800996714469158, Validation Accuracy: 0.3787878787878788


 70%|███████   | 7/10 [02:16<00:58, 19.49s/it]

Epoch: 7, Training Loss: 0.582973517335, Validation Loss: 0.6598481126129627, Validation Accuracy: 0.6515151515151515


 80%|████████  | 8/10 [02:35<00:38, 19.16s/it]

Epoch: 8, Training Loss: 0.5522051124613856, Validation Loss: 0.6293981331259464, Validation Accuracy: 0.6818181818181818


 90%|█████████ | 9/10 [02:54<00:19, 19.26s/it]

Epoch: 9, Training Loss: 0.5485696775428442, Validation Loss: 0.662395829617074, Validation Accuracy: 0.6818181818181818


100%|██████████| 10/10 [03:16<00:00, 19.61s/it]

Epoch: 10, Training Loss: 0.5306971845512967, Validation Loss: 0.7095640517425525, Validation Accuracy: 0.6666666666666666





In [22]:
model = SiameseGNN_SAGE(num_features=balanced_data[0][0].num_node_features)
run_model(model)

 10%|█         | 1/10 [00:16<02:25, 16.17s/it]

Epoch: 1, Training Loss: 0.705428816654064, Validation Loss: 0.6745922122940873, Validation Accuracy: 0.803030303030303


 20%|██        | 2/10 [00:31<02:04, 15.61s/it]

Epoch: 2, Training Loss: 0.6771452085897471, Validation Loss: 0.746527910232544, Validation Accuracy: 0.25757575757575757


 30%|███       | 3/10 [00:45<01:45, 15.11s/it]

Epoch: 3, Training Loss: 0.6439833628561994, Validation Loss: 0.7567781769868099, Validation Accuracy: 0.3333333333333333


 40%|████      | 4/10 [01:00<01:29, 14.88s/it]

Epoch: 4, Training Loss: 0.6231136586931016, Validation Loss: 0.7366615177103968, Validation Accuracy: 0.3787878787878788


 50%|█████     | 5/10 [01:15<01:13, 14.78s/it]

Epoch: 5, Training Loss: 0.5932009554164916, Validation Loss: 0.7465092153711752, Validation Accuracy: 0.4393939393939394


 60%|██████    | 6/10 [01:29<00:58, 14.68s/it]

Epoch: 6, Training Loss: 0.579860189268783, Validation Loss: 0.7722927503513567, Validation Accuracy: 0.3484848484848485


 70%|███████   | 7/10 [01:47<00:47, 15.84s/it]

Epoch: 7, Training Loss: 0.5589611586619015, Validation Loss: 0.7351954425826217, Validation Accuracy: 0.4696969696969697


 80%|████████  | 8/10 [02:04<00:32, 16.10s/it]

Epoch: 8, Training Loss: 0.5447881424084277, Validation Loss: 0.7598140523063414, Validation Accuracy: 0.3939393939393939


 90%|█████████ | 9/10 [02:18<00:15, 15.60s/it]

Epoch: 9, Training Loss: 0.5260401090922934, Validation Loss: 0.7892862546511672, Validation Accuracy: 0.4696969696969697


100%|██████████| 10/10 [02:37<00:00, 15.77s/it]

Epoch: 10, Training Loss: 0.5277270550420236, Validation Loss: 0.7541533000202794, Validation Accuracy: 0.3939393939393939





## Random Feature Subset

In [23]:
with open("feature_dicts/random_features_dict.pkl", "rb") as f:
    feat_dict_random = pkl.load(f)

In [25]:
with open("src/pygcn/train_graphs.pickle", "rb") as f:
    train_graphs = pkl.load(f)

with open("src/pygcn/val_graphs.pickle", "rb") as f:  
    val_graphs = pkl.load(f)

with open("src/pygcn/test_graphs.pickle", "rb") as f:         
    test_graphs = pkl.load(f)

In [26]:
train_graphs = add_features(train_years, train_graphs, feat_dict_random, 434)
val_graphs = add_features(val_years, val_graphs, feat_dict_random, 434)
test_graphs = add_features(test_years, test_graphs, feat_dict_random, 434)

In [27]:
from torch_geometric.data import DataLoader
test_loader = DataLoader(test_graphs, batch_size=4)
train_loader = DataLoader(train_graphs, batch_size=4)
val_loader = DataLoader(val_graphs, batch_size=4)



In [28]:
def check_crisis_years(year_pairs, crisis_years):
    result = []
    for pair in year_pairs:
        start, end = pair
        # Check if any crisis year is between the pair or equals the later year
        if any(start < year <= end for year in crisis_years):
            result.append(0)
        else:
            result.append(1)
    return result

In [29]:
train_pairs = get_year_pairs(train_years)
val_pairs = get_year_pairs(val_years)

train_y = check_crisis_years(train_pairs, crisis_years)
val_y = check_crisis_years(val_pairs, crisis_years)

train_loader_pairs = get_loader_pairs(train_loader.dataset)
val_loader_pairs = get_loader_pairs(val_loader.dataset)

In [30]:
train_graph_pairs = get_graph_pairs(train_graphs)
val_graph_pairs = get_graph_pairs(val_graphs)

train_torch_y = torch.tensor(np.array(train_y))
val_torch_y = torch.tensor(np.array(val_y))

labeled_pairs_train = list(zip(train_loader_pairs, train_y))
labeled_pairs_val = list(zip(val_loader_pairs, val_y))

flattened_train = [(a, b, c) for ((a, b), c) in labeled_pairs_train]
flattened_val  = [(a, b, c) for ((a, b), c) in labeled_pairs_val]

In [31]:
positive_samples = [item for item in flattened_train if item[2] == 1]
negative_samples = [item for item in flattened_train if item[2] == 0]

# Calculate the difference in count
diff = len(negative_samples) - len(positive_samples)

# Upsample positive samples
if diff > 0:
    positive_samples_upsampled = positive_samples * (diff // len(positive_samples)) + random.sample(positive_samples, diff % len(positive_samples))
    balanced_data = negative_samples + positive_samples + positive_samples_upsampled
else:
    balanced_data = data

# Shuffle the balanced dataset
random.shuffle(balanced_data)

In [32]:
model = SiameseGNN(num_features=balanced_data[0][0].num_node_features)
run_model(model)

  0%|          | 0/10 [00:00<?, ?it/s]

 10%|█         | 1/10 [00:20<03:07, 20.81s/it]

Epoch: 1, Training Loss: 0.7018044337078377, Validation Loss: 0.8041994102073439, Validation Accuracy: 0.19696969696969696


 20%|██        | 2/10 [00:41<02:45, 20.71s/it]

Epoch: 2, Training Loss: 0.698131216664528, Validation Loss: 0.8112769741000552, Validation Accuracy: 0.19696969696969696


 30%|███       | 3/10 [01:03<02:28, 21.16s/it]

Epoch: 3, Training Loss: 0.6775683034901265, Validation Loss: 0.7601045948086362, Validation Accuracy: 0.18181818181818182


 40%|████      | 4/10 [01:23<02:05, 20.93s/it]

Epoch: 4, Training Loss: 0.6461101838661681, Validation Loss: 0.7332550946510199, Validation Accuracy: 0.42424242424242425


 50%|█████     | 5/10 [01:45<01:46, 21.30s/it]

Epoch: 5, Training Loss: 0.6068286326791924, Validation Loss: 0.734976280819286, Validation Accuracy: 0.45454545454545453


 60%|██████    | 6/10 [02:06<01:24, 21.09s/it]

Epoch: 6, Training Loss: 0.5861677039654283, Validation Loss: 0.6585505241245935, Validation Accuracy: 0.5454545454545454


 70%|███████   | 7/10 [02:26<01:02, 20.74s/it]

Epoch: 7, Training Loss: 0.5633899032829972, Validation Loss: 0.7210089893955173, Validation Accuracy: 0.5


 80%|████████  | 8/10 [02:46<00:41, 20.50s/it]

Epoch: 8, Training Loss: 0.5387923845850643, Validation Loss: 0.6957595527849414, Validation Accuracy: 0.5909090909090909


 90%|█████████ | 9/10 [03:09<00:21, 21.22s/it]

Epoch: 9, Training Loss: 0.5234629732318818, Validation Loss: 0.7294799059516552, Validation Accuracy: 0.4696969696969697


100%|██████████| 10/10 [03:32<00:00, 21.25s/it]

Epoch: 10, Training Loss: 0.5124196937683512, Validation Loss: 0.6465034236510595, Validation Accuracy: 0.5454545454545454





In [33]:
model = SiameseGNN_SAGE(num_features=balanced_data[0][0].num_node_features)
run_model(model)

 10%|█         | 1/10 [00:29<04:27, 29.76s/it]

Epoch: 1, Training Loss: 0.7038762663540087, Validation Loss: 0.8181786871317661, Validation Accuracy: 0.19696969696969696


 20%|██        | 2/10 [00:58<03:52, 29.01s/it]

Epoch: 2, Training Loss: 0.6988503450480585, Validation Loss: 0.8089826919815757, Validation Accuracy: 0.19696969696969696


 30%|███       | 3/10 [01:20<03:00, 25.85s/it]

Epoch: 3, Training Loss: 0.6960537121077495, Validation Loss: 0.7643141547838846, Validation Accuracy: 0.2727272727272727


 40%|████      | 4/10 [01:44<02:30, 25.01s/it]

Epoch: 4, Training Loss: 0.6905097763760281, Validation Loss: 0.7489413889971647, Validation Accuracy: 0.3787878787878788


 50%|█████     | 5/10 [02:06<02:00, 24.16s/it]

Epoch: 5, Training Loss: 0.6803670633969136, Validation Loss: 0.7099146786512751, Validation Accuracy: 0.3787878787878788


 60%|██████    | 6/10 [02:28<01:33, 23.43s/it]

Epoch: 6, Training Loss: 0.6643559580948385, Validation Loss: 0.7255922693646315, Validation Accuracy: 0.3787878787878788


 70%|███████   | 7/10 [02:50<01:08, 22.97s/it]

Epoch: 7, Training Loss: 0.6362668401956536, Validation Loss: 0.6766378241979206, Validation Accuracy: 0.45454545454545453


 80%|████████  | 8/10 [03:12<00:45, 22.52s/it]

Epoch: 8, Training Loss: 0.6265352101575358, Validation Loss: 0.7241720527720948, Validation Accuracy: 0.5303030303030303


 90%|█████████ | 9/10 [03:34<00:22, 22.29s/it]

Epoch: 9, Training Loss: 0.5908057982157408, Validation Loss: 0.7568831684872169, Validation Accuracy: 0.5909090909090909


100%|██████████| 10/10 [03:55<00:00, 23.56s/it]

Epoch: 10, Training Loss: 0.5549839064240597, Validation Loss: 0.8763748495081073, Validation Accuracy: 0.5303030303030303





## All Features

In [34]:
with open("feature_dicts/features_dict.pkl", "rb") as f:
    feat_dict_all = pkl.load(f)

In [35]:
with open("src/pygcn/train_graphs.pickle", "rb") as f:
    train_graphs = pkl.load(f)

with open("src/pygcn/val_graphs.pickle", "rb") as f:  
    val_graphs = pkl.load(f)

with open("src/pygcn/test_graphs.pickle", "rb") as f:         
    test_graphs = pkl.load(f)

In [41]:
def add_features(years, graphs, feat_dict, dim):

    zeros = torch.zeros(dim)

    for i in range(len(years)):
        new_x = torch.empty(0, dim)
        year = years[i]

        feat_dict_year = feat_dict[year].combined_features

        for j, country in enumerate(all_nodes):
            if j == 0:
                new_x = torch.stack([zeros])

            elif country in feat_dict_year["country_code"].values:
                tensor_before = graphs[i].x[j]
                country_row = feat_dict_year[feat_dict_year["country_code"] == country]
                country_row = country_row.drop(columns = ["prev_gdp_growth", "country_code", "current_gdp_growth"])
                row_values = country_row.values.tolist()
                row_tensor = torch.tensor(row_values)[0]
                combined_values = torch.cat((tensor_before, row_tensor))

                new_x = torch.cat((new_x, combined_values.unsqueeze(0)), dim=0)

            else:
                new_x = torch.cat((new_x, zeros.unsqueeze(0)), dim=0)

        graphs[i].x = new_x

    return graphs

In [42]:
train_graphs = add_features(train_years, train_graphs, feat_dict_all)
val_graphs = add_features(val_years, val_graphs, feat_dict_all)
test_graphs = add_features(test_years, test_graphs, feat_dict_all)

In [43]:
from torch_geometric.data import DataLoader
test_loader = DataLoader(test_graphs, batch_size=4)
train_loader = DataLoader(train_graphs, batch_size=4)
val_loader = DataLoader(val_graphs, batch_size=4)



In [44]:
train_pairs = get_year_pairs(train_years)
val_pairs = get_year_pairs(val_years)

train_y = check_crisis_years(train_pairs, crisis_years)
val_y = check_crisis_years(val_pairs, crisis_years)

train_loader_pairs = get_loader_pairs(train_loader.dataset)
val_loader_pairs = get_loader_pairs(val_loader.dataset)

In [45]:
train_graph_pairs = get_graph_pairs(train_graphs)
val_graph_pairs = get_graph_pairs(val_graphs)

train_torch_y = torch.tensor(np.array(train_y))
val_torch_y = torch.tensor(np.array(val_y))

labeled_pairs_train = list(zip(train_loader_pairs, train_y))
labeled_pairs_val = list(zip(val_loader_pairs, val_y))

flattened_train = [(a, b, c) for ((a, b), c) in labeled_pairs_train]
flattened_val  = [(a, b, c) for ((a, b), c) in labeled_pairs_val]

In [46]:
positive_samples = [item for item in flattened_train if item[2] == 1]
negative_samples = [item for item in flattened_train if item[2] == 0]

# Calculate the difference in count
diff = len(negative_samples) - len(positive_samples)

# Upsample positive samples
if diff > 0:
    positive_samples_upsampled = positive_samples * (diff // len(positive_samples)) + random.sample(positive_samples, diff % len(positive_samples))
    balanced_data = negative_samples + positive_samples + positive_samples_upsampled
else:
    balanced_data = data

# Shuffle the balanced dataset
random.shuffle(balanced_data)

In [47]:
model = SiameseGNN(num_features=balanced_data[0][0].num_node_features)
run_model(model)

 10%|█         | 1/10 [00:28<04:18, 28.70s/it]

Epoch: 1, Training Loss: 0.7038765474718217, Validation Loss: 0.6810447085987438, Validation Accuracy: 0.803030303030303


 20%|██        | 2/10 [00:53<03:31, 26.40s/it]

Epoch: 2, Training Loss: 0.6960015451466596, Validation Loss: 0.6861390462427428, Validation Accuracy: 0.6666666666666666


 30%|███       | 3/10 [01:18<02:59, 25.68s/it]

Epoch: 3, Training Loss: 0.692367503401364, Validation Loss: 0.6572099048079867, Validation Accuracy: 0.7121212121212122


 40%|████      | 4/10 [01:43<02:32, 25.36s/it]

Epoch: 4, Training Loss: 0.6717249420099324, Validation Loss: 0.6290249801946409, Validation Accuracy: 0.696969696969697


 50%|█████     | 5/10 [02:07<02:05, 25.15s/it]

Epoch: 5, Training Loss: 0.6480736955128915, Validation Loss: 0.6430482489593101, Validation Accuracy: 0.6666666666666666


 60%|██████    | 6/10 [02:32<01:40, 25.00s/it]

Epoch: 6, Training Loss: 0.6153214025305725, Validation Loss: 0.6232328715197968, Validation Accuracy: 0.6666666666666666


 70%|███████   | 7/10 [02:59<01:17, 25.70s/it]

Epoch: 7, Training Loss: 0.5915779746291873, Validation Loss: 0.5917616424461206, Validation Accuracy: 0.696969696969697


 80%|████████  | 8/10 [03:24<00:50, 25.41s/it]

Epoch: 8, Training Loss: 0.5808850291128802, Validation Loss: 0.5979167244425325, Validation Accuracy: 0.696969696969697


 90%|█████████ | 9/10 [03:49<00:25, 25.17s/it]

Epoch: 9, Training Loss: 0.567361297784827, Validation Loss: 0.6328929368067872, Validation Accuracy: 0.6212121212121212


100%|██████████| 10/10 [04:17<00:00, 25.78s/it]

Epoch: 10, Training Loss: 0.5595406232180244, Validation Loss: 0.6325685942376201, Validation Accuracy: 0.5909090909090909





In [48]:
model = SiameseGNN_SAGE(num_features=balanced_data[0][0].num_node_features)
run_model(model)

 10%|█         | 1/10 [00:53<07:58, 53.12s/it]

Epoch: 1, Training Loss: 0.6997897207562687, Validation Loss: 0.6875970408771978, Validation Accuracy: 0.803030303030303


 20%|██        | 2/10 [01:46<07:04, 53.06s/it]

Epoch: 2, Training Loss: 0.6955334810834182, Validation Loss: 0.686434802683917, Validation Accuracy: 0.7424242424242424


 30%|███       | 3/10 [02:38<06:10, 52.90s/it]

Epoch: 3, Training Loss: 0.6917367833022021, Validation Loss: 0.6708159130631071, Validation Accuracy: 0.803030303030303


 40%|████      | 4/10 [03:31<05:17, 52.95s/it]

Epoch: 4, Training Loss: 0.6810245707079217, Validation Loss: 0.6506446861859524, Validation Accuracy: 0.803030303030303


 50%|█████     | 5/10 [04:25<04:25, 53.07s/it]

Epoch: 5, Training Loss: 0.6649606728501487, Validation Loss: 0.6540512085864039, Validation Accuracy: 0.6818181818181818


 60%|██████    | 6/10 [05:18<03:32, 53.03s/it]

Epoch: 6, Training Loss: 0.6487234287530358, Validation Loss: 0.6536229751778372, Validation Accuracy: 0.48484848484848486


 70%|███████   | 7/10 [06:11<02:39, 53.01s/it]

Epoch: 7, Training Loss: 0.6490531198927301, Validation Loss: 0.6537377166025566, Validation Accuracy: 0.5909090909090909


 80%|████████  | 8/10 [07:03<01:45, 52.93s/it]

Epoch: 8, Training Loss: 0.6225556447579149, Validation Loss: 0.6484664204445753, Validation Accuracy: 0.5909090909090909


 90%|█████████ | 9/10 [07:57<00:53, 53.02s/it]

Epoch: 9, Training Loss: 0.6143977640171265, Validation Loss: 0.5539399132584081, Validation Accuracy: 0.7272727272727273


100%|██████████| 10/10 [08:49<00:00, 52.98s/it]

Epoch: 10, Training Loss: 0.5987672834107053, Validation Loss: 0.5931275310841474, Validation Accuracy: 0.6666666666666666



