In [1]:
import pandas as pd
import numpy as np
import torch
import import_ipynb
import utils
import matplotlib.pyplot as plt
import torch.nn as nn
import torch.nn.functional as F
import plotly.graph_objects as go
import copy
import random
import os

importing Jupyter notebook from utils.ipynb


Using backend: pytorch


In [2]:
def seed(seed = 100):
    random.seed(seed)
    np.random.seed(seed) 
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.benchmark = False
    os.environ["PYTHONHASHSEED"] = str(seed)
    
class EarlyStopping:
    """Stop training early if validation loss does not improve after given patience"""
    def __init__(self, patience=7, verbose=False, delta=0, path='checkpoint.pt'):
        self.patience = patience
        self.verbose = verbose
        self.counter = 0
        self.best_score = None
        self.early_stop = False
        self.val_loss_min = np.Inf
        self.delta = delta
        self.path = path

    def __call__(self, val_loss, model):
        score = -val_loss

        if self.best_score is None:
            self.best_score = score
            self.save_checkpoint(val_loss, model)
        elif score < self.best_score + self.delta:
            self.counter += 1
            #print(f'EarlyStopping counter: {self.counter} out of {self.patience}')
            if self.counter >= self.patience:
                self.early_stop = True
        else:
            self.best_score = score
            self.save_checkpoint(val_loss, model)
            self.counter = 0

    def save_checkpoint(self, val_loss, model):
        '''When validation loss begin to decrease, save model'''
        #if self.verbose:
        #    print(f'Validation loss decreased ({self.val_loss_min:.6f} --> {val_loss:.6f}).  Saving model ...')
        torch.save(model.state_dict(), self.path)
        self.val_loss_min = val_loss

In [3]:
''' calulate loss'''
def get_mob_loss(emb, src_matrix, dst_matrix):  
    prediction_src, prediction_dst = predict_distribution(emb, emb)
    src_loss = -torch.multiply(torch.Tensor(src_matrix).to('cuda:0'), prediction_src).sum(-1).sum(-1)
    dst_loss = -torch.multiply(torch.Tensor(dst_matrix).to('cuda:0'), prediction_dst).sum(-1).sum(-1)
    return src_loss + dst_loss   

def predict_distribution(emb_src, emb_dst):
    src = pairwise_inner_product(emb_src, emb_dst).T
    p = nn.LogSoftmax(dim=1)
    prediction_src = p(src)
    dst = pairwise_inner_product(emb_dst, emb_src).T
    q = nn.LogSoftmax(dim=1)
    prediction_dst = q(dst)
    return prediction_src, prediction_dst

def pairwise_inner_product(a, b):
    n, _ = list(a.size())
    b_ = torch.unsqueeze(b, 0)
    b_ = torch.tile(b_, [n, 1, 1])
    b_ = b_.permute(1, 0, 2)
    inner_product = torch.multiply(b_, a)
    inner_product = torch.sum(inner_product, axis=-1)
    return inner_product

def get_loss(embs, data):
    mob_loss = 0
    i = 0
    for year in range(2018, 2022):
        mob_loss += get_mob_loss(embs[i*77:(i+1)*77, :], data['src_matrix{}'.format(year)], data['dst_matrix{}'.format(year)])
        i += 1
    mob_loss = mob_loss / 4
    return mob_loss

In [4]:
def main(seed_no, k, patience, args):
    device = torch.device(args['device'])
    
    # set random seed for reproducibility
    seed(seed_no)
    
    # load dataset
    data = utils.load_dataset(k=k)

    # Downstream Applications
    income = data['income']
    unemployed = data['unemployed']
    education = data['education']
    poverty = data['poverty']
    white = data['white']
    black = data['black']
    hispanic = data['hispanic']
    value = data['value']
    
    # input node feature
    feats = data['feats'].to(args['device'])

    # Heterogenous Urban Graph (HUG)
    for year in range(2018,2022):
        g_ = data['heterograph_unified{}'.format(year)]
        g_ = g_[0][0].to(args['device'])
        data['heterograph_unified{}'.format(year)] = g_
    
    # Longitudinal Heterogenous Urban Graph (l-HUG)
    g_total = data['heterograph_unified_3'][0][0].to(args['device'])
    data['heterograph_unified_3'] = g_total
    
    # metapath for urban-HIN
    meta_paths = data['meta_paths']

    from model import HUGAT_J
    model = HUGAT_J(data, meta_paths=meta_paths,
                    in_size=feats.shape[-1],
                    hidden_size = args['hidden_size'],
                    out_size=args['out_size'],
                    num_heads=args['num_heads'],
                    dropout=args['dropout'])
    model.to(args['device'])

    # params
    total_params = sum(p.numel() for p in model.parameters())
    print("total_params: {:d}".format(total_params))

    # Optimizer
    optimizer = torch.optim.Adam(model.parameters(), lr=args['lr'],weight_decay=args['weight_decay'])
    
    # Early stopping 
    early_stopping = EarlyStopping(patience = patience, verbose = True)

    for epoch in range(args['num_epochs']):
        optimizer.zero_grad()
        pred, _ = model(data, feats)
        loss = get_loss(pred, data)
        loss.backward()
        nn.utils.clip_grad_norm_(model.parameters(), args['grad_norm'])
        optimizer.step()
        if (epoch+1)%100 ==0:
            print("Epoch {:04d} | Loss = {:.4f}".format(epoch+1, loss))
            
        early_stopping(loss, model)
        
        if early_stopping.early_stop:
            print('Early stopping')
            break
            
    print("Epoch {:04d} | Loss = {:.4f}".format(epoch+1, loss))
    
    with torch.no_grad():
        loss = get_loss(pred, data)
    emb, beta = model.forward(data, feats)
    emb = emb.detach().cpu().numpy()
    
    print('emb size:', emb.shape)
    
    mae_income, rmse_income, r2_income = utils.predict_regression(emb, income)
    mae_unemployed, rmse_unemployed, r2_unemployed = utils.predict_regression(emb, unemployed)
    mae_education, rmse_education, r2_education = utils.predict_regression(emb, education)
    mae_poverty, rmse_poverty, r2_poverty = utils.predict_regression(emb, poverty)
    
    mae_value, rmse_value, r2_value = utils.predict_regression(emb, value)
    mae_white, rmse_white, r2_white = utils.predict_regression(emb, white)
    mae_black, rmse_black, r2_black = utils.predict_regression(emb, black)
    mae_hispanic, rmse_hispanic, r2_hispanic = utils.predict_regression(emb, hispanic)
        
    print('----------------------------------------------------Results-----------------------------------------------------\n')

    print('\n')

    print('income prediction MAE:', mae_income.round(3))
    print('income prediction RMSE:', rmse_income.round(3))
    print('income prediction R2:', r2_income.round(3))
    print('\n')

    print('unemployed prediction MAE:', mae_unemployed.round(3))
    print('unemployed prediction RMSE:', rmse_unemployed.round(3))
    print('unemployed prediction R2:', r2_unemployed.round(3))
    print('\n')

    print('education prediction MAE:', mae_education.round(3))
    print('education prediction RMSE:', rmse_education.round(3))
    print('educationg prediction R2:', r2_education.round(3))
    print('\n')

    print('poverty prediction MAE:', mae_poverty.round(3))
    print('poverty prediction RMSE:', rmse_poverty.round(3))
    print('poverty prediction R2:', r2_poverty.round(3))
    print('\n')

    print('white prediction MAE:', mae_white.round(3))
    print('white prediction RMSE:', rmse_white.round(3))
    print('white prediction R2:', r2_white.round(3))
    print('\n')

    print('black prediction MAE:', mae_black.round(3))
    print('black prediction RMSE:', rmse_black.round(3))
    print('black prediction R2:', r2_black.round(3))
    print('\n')

    print('hispanic prediction MAE:', mae_hispanic.round(3))
    print('hispanic prediction RMSE:', rmse_hispanic.round(3))
    print('hispanic prediction R2:', r2_hispanic.round(3))
    print('\n')

    print('value prediction MAE:', mae_value.round(3))
    print('value prediction RMSE:', rmse_value.round(3))
    print('value prediction R2:', r2_value.round(3))
    print('\n')
    
    return emb

In [5]:
import torch, gc
gc.collect()
torch.cuda.empty_cache()

In [6]:
args = {'device': 'cuda:0', 'lr':1e-3, 'weight_decay':0, 'hidden_size':64,
        'num_epochs':8000, 'dropout':0, 'num_heads':[10], 'grad_norm':2,'out_size': 64}      

torch.cuda.empty_cache()
emb = main(100, k=75, patience=100, args=args)

importing Jupyter notebook from model.ipynb
total_params: 2816320
Epoch 0100 | Loss = 501.5728
Epoch 0200 | Loss = 498.8011
Epoch 0300 | Loss = 498.2394
Epoch 0400 | Loss = 497.6246
Epoch 0500 | Loss = 497.0928
Early stopping
Epoch 0582 | Loss = 497.3006
emb size: (308, 64)
----------------------------------------------------Results-----------------------------------------------------



income prediction MAE: 9621.497
income prediction RMSE: 12620.625
income prediction R2: 0.57


unemployed prediction MAE: 2.615
unemployed prediction RMSE: 3.384
unemployed prediction R2: 0.76


education prediction MAE: 5.425
education prediction RMSE: 7.17
educationg prediction R2: 0.41


poverty prediction MAE: 4.329
poverty prediction RMSE: 5.923
poverty prediction R2: 0.686


white prediction MAE: 9.708
white prediction RMSE: 13.083
white prediction R2: 0.753


black prediction MAE: 11.417
black prediction RMSE: 15.951
black prediction R2: 0.83


hispanic prediction MAE: 12.456
hispanic prediction