In [3]:
%load_ext autoreload
%autoreload 2

In [4]:
from collections import defaultdict
import torch
import os   
import pandas as pd
from torch_geometric.datasets import TUDataset
from torch_geometric.data import Data, InMemoryDataset
import torch.nn.functional as F
from torch_geometric.data import DataLoader
from torch.utils.data import random_split
from cap_dataset import CascadeRegression
from cap_model import GAT

In [5]:
print(os.getcwd())

/Users/rezatabrizi/dev/Repos/CAPP


In [6]:
fb_regression_ds = CascadeRegression(root="data", name="facebook", edge_index_path="data/raw/facebook/adj.txt", task="regression", observation=3)
fb_classification_ds = CascadeRegression(root="data", name="facebook", edge_index_path="data/raw/facebook/adj.txt", task="classification", observation=3)

In [7]:
curr_ds = fb_regression_ds
print(curr_ds[1])
print(curr_ds[1].x[1645])

Data(x=[4039, 4], edge_index=[2, 176468], y=[1], cascade_name='1')
tensor([1.3868e-02, 3.9042e-06, 1.5299e-05, 1.0000e+00])


In [8]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = GAT(fb_regression_ds.num_features, 64, 1, 8)
model = model.to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.001, weight_decay=5e-4)



In [9]:
total_size = len(fb_regression_ds)
train_size = int(0.7 * total_size)
valid_size = int(0.15 * total_size) 
test_size = total_size - train_size - valid_size

train_dataset, valid_dataset, test_dataset = random_split (fb_regression_ds, [train_size, valid_size, test_size])


train_loader = DataLoader(train_dataset, batch_size=4, shuffle = True)
valid_loader = DataLoader(valid_dataset, batch_size=4, shuffle = True)
test_loader = DataLoader(test_dataset, batch_size = 4, shuffle = True)



In [16]:
def mrse_loss(output, target):
    if target != 0:
        loss = ((output - target) / target) ** 2
    else:
        loss = (output - target) ** 2
    return loss

def msre_loss_batch(out, target):
    nonzero_mask = (target != 0)

    loss = torch.zeros_like(out)
    loss[nonzero_mask] = ((out[nonzero_mask] - target[nonzero_mask]) / target[nonzero_mask]) ** 2
    loss[~nonzero_mask] = (out[~nonzero_mask] - target[~nonzero_mask]) ** 2

    return loss.mean()

def mape_loss_batch(out, target):
    nonzero_mask = (target != 0)

    loss = torch.zeros_like(out)
    loss[nonzero_mask] = (torch.abs(out[nonzero_mask] - target[nonzero_mask]) / target[nonzero_mask])
    loss[~nonzero_mask] = (torch.abs(out[~nonzero_mask] - target[~nonzero_mask]))

    return loss.mean()

def wroperc_error(out, target, epsilon):
    nonzero_mask = (target != 0)

    loss = torch.zeros_like(out, dtype=torch.float)
    loss[nonzero_mask] = ((torch.abs(out[nonzero_mask] - target[nonzero_mask]) / target[nonzero_mask]) >= epsilon).float()
    loss[~nonzero_mask] = (torch.abs(out[~nonzero_mask] - target[~nonzero_mask]) >= epsilon).float()

    return loss.mean()

In [17]:
def train(trainLoader):
    mrse_total_loss = 0.0
    mrse_running_loss = 0.0
    mape_total_loss = 0.0
    mape_running_loss = 0.0
    wroperc_total_loss = 0.0
    wroperc_running_loss = 0.0
    model.train()
    n = len(trainLoader)
    for idx, data in enumerate(trainLoader):
        optimizer.zero_grad()
        data = data.to(device)
        out = model(data)
        current_MRSE = msre_loss_batch(out, data.y)
        current_MAPE = mape_loss_batch(out, data.y)
        current_wroperc = wroperc_error(out, data.y, 0.5)

        mrse_total_loss += current_MRSE.item()
        mrse_running_loss += current_MRSE.item()
        mape_total_loss += current_MAPE.item()
        mape_running_loss += current_MAPE.item()
        wroperc_total_loss += current_wroperc.item()
        wroperc_running_loss += current_wroperc.item()

        current_MRSE.backward()
        optimizer.step()
        if (idx+1) % 40 == 0:
            print(f"Batch {idx+1}, MRSE Loss: {mrse_running_loss/40:.2f}")
            mrse_running_loss = 0.0
            mape_running_loss = 0.0
            wroperc_running_loss = 0.0

    return mrse_total_loss / n, mape_total_loss / n, wroperc_total_loss / n


def test(testLoader):
    model.eval()
    mrse_total_loss = 0
    mape_total_loss = 0.0
    wroperc_total_loss = 0.0
    n = len(testLoader)
    with torch.no_grad():
        for data in testLoader:
            data = data.to(device)
            out = model(data)

            current_MRSE = msre_loss_batch(out, data.y)
            current_MAPE = mape_loss_batch(out, data.y)
            current_wroperc = wroperc_error(out, data.y, 0.5)

            mrse_total_loss += current_MRSE.item()
            mape_total_loss += current_MAPE.item()
            wroperc_total_loss += current_wroperc.item()
            
    return mrse_total_loss / n, mape_total_loss / n, wroperc_total_loss / n

In [18]:
for epoch in range(100):
    mrse_loss, mape_loss, wroperc_loss = train(train_loader)
    v_mrse_loss, v_mape_loss, v_wroperc_loss = test(valid_loader)
    print(f'Epoch {epoch+1}: Train Loss: {mrse_loss:.4f}, Val MRSE Loss: {v_mrse_loss:.4f}, Val MAPE Loss: {v_mape_loss:.4}, Val WroPerc: {v_wroperc_loss:.4}')

t_mrse_loss, t_mape_loss, t_wroperc_loss = test(test_dataset)
print(f'Test MRSE Loss: {t_mrse_loss:.4f}, Test MAPE Loss: {t_mape_loss:.4}, Test WroPerc: {t_wroperc_loss:.4}')

Batch 40, MRSE Loss: 13400.46
Batch 80, MRSE Loss: 0.76
Batch 120, MRSE Loss: 0.72
Batch 160, MRSE Loss: 0.74
Batch 200, MRSE Loss: 0.75
Batch 240, MRSE Loss: 0.73
Batch 280, MRSE Loss: 0.72
Batch 320, MRSE Loss: 0.72
Epoch 1: Train Loss: 1629.8815, Val MRSE Loss: 0.7848, Val MAPE Loss: 0.8804, Val WroPerc: 1.0


1. Reduce head to have better performance
2. Run 100 epochs 
3. Use graph norm 
4. See how coupledgnn data is 
5. see if there is an issue with the y and the output

Lab meetin 5/04 
1. Remove graph pooling and just sum the nx1 matrix after decoding. After decoding apply sigmoid. 
2. Download weibo dataset and preprocess
3. Remove activation between layers. Only needed at the end. 
4. make sure code is right because it is only possible to get fluctutation if your code is not right. 