In [2]:
import torch
data_dir = "./graph_data_files/"

print(torch.cuda.is_available())

True


In [3]:
eps = 1e-6

In [4]:
# Load dataset
import os

device = 'cuda:0'

data_size = 6400
dataset = []

for i in range(data_size):
    data_path = data_dir + "data_" + str(i) + ".pt"
    if not(os.path.exists(data_path)) :
        continue
    dd = torch.load(data_path)
    dataset.append(dd)
print(len(dataset))


  from .autonotebook import tqdm as notebook_tqdm


6400


In [5]:
from torch_geometric.loader import DataLoader

train_dataset = dataset[:5400]
test_dataset = dataset[5400:]

train_loader = DataLoader(train_dataset, batch_size = 64, shuffle = True)
test_loader = DataLoader(test_dataset, batch_size = 64, shuffle = True)

In [6]:
##  The Graph Neuro Network for Task 1  ####
import torch

from torch.nn import Linear
import torch.nn.functional as F
from torch_geometric.nn import GCNConv
from torch_geometric.nn import global_mean_pool
from torch.nn import BatchNorm1d


class GCN(torch.nn.Module):
    def __init__(self, hidden_channels):
        super(GCN, self).__init__()
        self.conv1 = GCNConv(2, hidden_channels)
        self.bn1 = BatchNorm1d(hidden_channels)
        self.conv2 = GCNConv(hidden_channels, hidden_channels)
        #self.conv3 = GCNConv(hidden_channels, hidden_channels)
        self.bn2 = BatchNorm1d(hidden_channels)
        self.generator = torch.nn.Sequential(
            torch.nn.Linear(hidden_channels, 32),
            torch.nn.ReLU(),
            torch.nn.Linear(32, 32),
            torch.nn.ReLU(),
            torch.nn.Linear(32, 1)
        )

    def forward(self, x, edge_index, batch):
        x = self.conv1(x, edge_index)
        x = self.bn1(x)
        x = x.relu()
        x = self.conv2(x, edge_index)
        x = self.bn2(x)
        x = x.relu()
        #x = self.conv3(x, edge_index)
        x = global_mean_pool(x, batch)
        x = self.generator(x)
        return x

model = GCN(hidden_channels = 32)

print(model)

GCN(
  (conv1): GCNConv(2, 32)
  (bn1): BatchNorm1d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (conv2): GCNConv(32, 32)
  (bn2): BatchNorm1d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (generator): Sequential(
    (0): Linear(in_features=32, out_features=32, bias=True)
    (1): ReLU()
    (2): Linear(in_features=32, out_features=32, bias=True)
    (3): ReLU()
    (4): Linear(in_features=32, out_features=1, bias=True)
  )
)


In [10]:
model = GCN(hidden_channels = 32)

model.load_state_dict(torch.load("model_bn.pth"))
model.to(device)

optimizer = torch.optim.Adam(model.parameters(), lr = 0.002)
criterion = torch.nn.MSELoss()


##################################################################################################

def train():
    model.train()

    for data in train_loader:
        data = data.to(device)
        out = model(data.x, data.edge_index, data.batch)
        loss = criterion(out.squeeze(), data.y)
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()

def test(loader):
    model.eval()
    tot_graphs = 0
    tot_MSE = 0
    tot_SMAPE = 0

    for data in loader:        
        data = data.to(device)
        out = model(data.x, data.edge_index, data.batch)
        out = out.squeeze()
        
        loss = criterion(out, data.y)
        tot_MSE += loss * data.y.shape[0]

        smape = torch.sum(torch.abs(out - data.y) / (torch.abs(data.y) + torch.abs(out)))
        tot_SMAPE += smape
        
        tot_graphs += data.y.shape[0]

    return (tot_SMAPE / tot_graphs, tot_MSE / tot_graphs)
    
##################################################################################################
    
_epoch = 15

best_loss = float('inf')

for epoch in range(_epoch):
    train()
    (mape, mse) = test(test_loader)
    print(f"epoch = {epoch}, MAPE loss = {mape.item() * 100} %, MSE loss = {mse}")
    if mse < best_loss :
        torch.save(model.state_dict(), 'model_bn_predict_diff.pth')


epoch = 0, MAPE loss = 68.20688843727112 %, MSE loss = 0.012361928820610046
epoch = 1, MAPE loss = 63.93135190010071 %, MSE loss = 0.010783136822283268
epoch = 2, MAPE loss = 64.41999673843384 %, MSE loss = 0.011614982038736343
epoch = 3, MAPE loss = 63.46731781959534 %, MSE loss = 0.010882344096899033
epoch = 4, MAPE loss = 67.23232865333557 %, MSE loss = 0.012490691617131233
epoch = 5, MAPE loss = 63.1655216217041 %, MSE loss = 0.01112994272261858
epoch = 6, MAPE loss = 69.60335969924927 %, MSE loss = 0.013543766923248768
epoch = 7, MAPE loss = 65.35255908966064 %, MSE loss = 0.011233385652303696
epoch = 8, MAPE loss = 67.72045493125916 %, MSE loss = 0.012265411205589771
epoch = 9, MAPE loss = 62.24052906036377 %, MSE loss = 0.0108110336586833
epoch = 10, MAPE loss = 62.89258599281311 %, MSE loss = 0.0104537233710289
epoch = 11, MAPE loss = 65.91620445251465 %, MSE loss = 0.014935337007045746
epoch = 12, MAPE loss = 62.39105463027954 %, MSE loss = 0.010864432901144028
epoch = 13, MAP

In [7]:
test_model = GCN(hidden_channels = 32)
test_model.load_state_dict(torch.load("model_bn.pth"))

#test_model.to(device)
test_model.eval()

tot = 0
hit = 0
for i in range(1000):
    dd = test_loader.dataset[i]
#    dd.to(device)
    tar = dd.y.item()
    res = test_model(dd.x, dd.edge_index, dd.batch)
    y_pred = res.item()
    this_smape = abs((y_pred - tar) / (abs(y_pred) + abs(tar)))
    tot = tot + this_smape * 0.1
    if this_smape < 0.2:
        hit += 1

print(f"average SMAPE = {tot}%")
print(f"# samples of SMAPE <= 20% = {hit}")

average SMAPE = 49.74025649864732%
# samples of SMAPE <= 20% = 327


In [9]:
for i in range(20):
    dd = test_loader.dataset[i]
 #   dd.to(device)
    tar = dd.y.item()
    res = test_model(dd.x, dd.edge_index, dd.batch)
    y_pred = res.item()
    this_smape = abs((y_pred - tar) / (abs(y_pred) + abs(tar)))
    print(f"target = {tar}, res = {y_pred}, SMAPE = {this_smape * 100} %")

None
target = 0.013895466923713684, res = 0.10496320575475693, SMAPE = 76.61850564106017 %
