In [1]:
import sys 
sys.path.append("../../../../rllm/dataloader")

import time
import argparse
import numpy as np

# import math
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from sklearn.metrics import f1_score

# from utils import load_data

from load_data import load_data
from models import GraphSage
from utils import adj_matrix_to_list, multihop_sampling

t_total = time.time()
# Load data
data, adj, features, labels, idx_train, idx_val, idx_test = load_data('movielens-classification')
adjacency_dict = adj_matrix_to_list(adj)

In [2]:
# Training settings
# parser = argparse.ArgumentParser()
# parser.add_argument('--no-cuda', action='store_true', default=False,
#                     help='Disables CUDA training.')
# parser.add_argument('--fastmode', action='store_true', default=False,
#                     help='Validate during training pass.')
# parser.add_argument('--seed', type=int, default=42, help='Random seed.')
# parser.add_argument('--epochs', type=int, default=50,
#                     help='Number of epochs to train.')
# parser.add_argument('--lr', type=float, default=0.01,
#                     help='Initial learning rate.')
# parser.add_argument('--weight_decay', type=float, default=5e-4,
#                     help='Weight decay (L2 loss on parameters).')
# parser.add_argument('--hidden', type=list, default=[128, 18],
#                     help='Number of hidden units.')
# parser.add_argument('--dropout', type=float, default=0.5,
#                     help='Dropout rate (1 - keep probability).')

# args = parser.parse_args()
args_cuda = True
args_hidden = [64, 18]
args_lr = 0.005
args_weight_decay = 5e-4
args_epochs = 60

np.random.seed(42)
torch.manual_seed(42)
if args_cuda:
    torch.cuda.manual_seed(42)
# Model and optimizer

NUM_NEIGHBORS_LIST = [25, 10]
NUM_BATCH_PER_EPOCH = 5
batch_size = 64
model = GraphSage(input_dim=features.shape[1], hidden_dim=args_hidden,
                  num_neighbors_list=NUM_NEIGHBORS_LIST)
optimizer = optim.Adam(model.parameters(),
                       lr=args_lr, weight_decay=args_weight_decay)
# loss_func = nn.BCEWithLogitsLoss()
loss_func = F.cross_entropy
DEVICE = "cpu"

if args_cuda:
    model.cuda()
    features = features.cuda()
    adj = adj.cuda()
    labels = labels.cuda()
    idx_train = idx_train.cuda()
    idx_val = idx_val.cuda()
    idx_test = idx_test.cuda()
    DEVICE = "cuda" if torch.cuda.is_available() else "cpu"


def train(epoch):
    t = time.time()
    model.train()
    loss_lst = []
    if epoch % 5 == 0: test()
    for batch in range(NUM_BATCH_PER_EPOCH):
        optimizer.zero_grad()
        batch_src_index = idx_train[torch.randint(0, len(idx_train), (batch_size,))]
        batch_src_label = labels[batch_src_index].float().to(DEVICE)
        batch_sampling_result = multihop_sampling(batch_src_index, NUM_NEIGHBORS_LIST, adjacency_dict)
        batch_sampling_x = [features[idx].float().to(DEVICE) for idx in batch_sampling_result]
        output = model(batch_sampling_x)
        loss_train = loss_func(torch.sigmoid(output), batch_src_label)
        loss_lst.append(loss_train.detach().item())
        loss_train.backward()
        optimizer.step()
    # if not args.fastmode:
    #     # Evaluate validation set performance separately,
    #     # deactivates dropout during validation run.
    #     model.eval()
    #     output = model(features, adj)

    print('Epoch: {:04d}'.format(epoch+1),
            'loss_train: {:.4f}'.format(sum(loss_lst)/len(loss_lst)),
        #   'loss_val: {:.4f}'.format(loss_val.item()),
        #   'acc_val: {:.4f}'.format(acc_val.item()),
            'time: {:.4f}s'.format(time.time() - t))
    # print('acc_train: {:.4f}'.format(acc_train.item()/num))


def test():
    model.eval()
    with torch.no_grad():
        test_sampling_result = multihop_sampling(idx_test, NUM_NEIGHBORS_LIST, adjacency_dict)
        test_x = [features[idx].float().to(DEVICE) for idx in test_sampling_result]
        test_logits = model(test_x)
        test_logits = torch.sigmoid(test_logits).cpu()
        pred = np.where(test_logits > 0.5, 1, 0)
        test_label = labels[idx_test].float().cpu()
        f1_micro_test = f1_score(test_label, pred, average="micro")
        f1_macro_test = f1_score(test_label, pred, average="macro")
        print(f"micro: {f1_micro_test}; macro: {f1_macro_test}")



# Train model
t_total = time.time()
for epoch in range(args_epochs):
    train(epoch)
print("Optimization Finished!")
print("Total time elapsed: {:.4f}s".format(time.time() - t_total))

# Testing
test()

micro: 0.14021669853409816; macro: 0.12027760362574674
Epoch: 0001 loss_train: 4.6243 time: 2.1052s
Epoch: 0002 loss_train: 4.2922 time: 0.1563s
Epoch: 0003 loss_train: 4.4186 time: 0.1482s
Epoch: 0004 loss_train: 4.2951 time: 0.1540s
Epoch: 0005 loss_train: 4.2336 time: 0.1419s
micro: 0.34491927825261154; macro: 0.0866862588833352
Epoch: 0006 loss_train: 4.0365 time: 1.5356s
Epoch: 0007 loss_train: 4.2655 time: 0.1463s
Epoch: 0008 loss_train: 4.1566 time: 0.1471s
Epoch: 0009 loss_train: 4.1169 time: 0.1508s
Epoch: 0010 loss_train: 4.1048 time: 0.1417s
micro: 0.36170818505338076; macro: 0.09107239325297393
Epoch: 0011 loss_train: 4.1050 time: 1.6162s
Epoch: 0012 loss_train: 3.9615 time: 0.1529s
Epoch: 0013 loss_train: 4.1881 time: 0.1491s
Epoch: 0014 loss_train: 4.0347 time: 0.1476s
Epoch: 0015 loss_train: 3.9477 time: 0.1477s
micro: 0.3420436570705473; macro: 0.1036681317880227
Epoch: 0016 loss_train: 3.9941 time: 1.6117s
Epoch: 0017 loss_train: 4.1377 time: 0.1506s
Epoch: 0018 loss_t