In [1]:
import torch
import torch_geometric
from torch_geometric.data import Data, Dataset, InMemoryDataset, NeighborSampler
from torch_geometric.nn import SAGEConv, GATConv
from torch import nn
import numpy as np
from data_preprocess_gnn import construct_dataset, get_labeled_index
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import roc_curve, confusion_matrix, recall_score, f1_score, auc, accuracy_score, precision_score
import torch.nn.functional as F
import random
from torch.nn import init
import argparse
import easydict

  from ._conv import register_converters as _register_converters


In [2]:
class FullySupervisedGraphSageModel(nn.Module):
    def __init__(self, num_features):
        super(FullySupervisedGraphSageModel, self).__init__()
        self.conv_layers = nn.ModuleList()
        self.conv_layers.append(SAGEConv(num_features, 256))
        #self.conv_layers.append(SAGEConv(256, 256))
        self.classify_layer = nn.Linear(256, 3)
        init.xavier_uniform_(self.classify_layer.weight)
        init.xavier_uniform_(self.conv_layers[0].weight)

    def forward(self, x, data_flow):
        data = data_flow[0]
        x = x[data.n_id]
        x = self.conv_layers[0](x, data.edge_index, size=data.size)
        # data = data_flow[1]
        # x = self.conv_layers[1](x, data.edge_index, size=data.size)
        scores = self.classify_layer(x)
        return F.log_softmax(scores, dim=1)


In [3]:

class FullySupervisedGATModel(nn.Module):
    def __init__(self, num_features):
        super(FullySupervisedGATModel, self).__init__()
        self.conv_layers = nn.ModuleList()
        self.conv_layers.append(GATConv(num_features, 256))
        #self.conv_layers.append(SAGEConv(256, 256))
        self.classify_layer = nn.Linear(256, 3)
        init.xavier_uniform_(self.classify_layer.weight)
        init.xavier_uniform_(self.conv_layers[0].weight)


    def forward(self, x, data_flow):
        block = data_flow[0]
        x = x[block.n_id]
        x = self.conv_layers[0]((x, x[block.res_n_id].squeeze()), block.edge_index, size=block.size)
        # data = data_flow[1]
        # x = self.conv_layers[1](x, data.edge_index, size=data.size)
        scores = self.classify_layer(x)
        return F.log_softmax(scores, dim=1)



In [4]:
def train(loader, data, model, optimizer):
    model.train()
    total_loss = 0
    for data_flow in loader(data.train_mask):
        optimizer.zero_grad()
        out = model(data.x, data_flow)
        loss = F.nll_loss(out, data.y[data_flow.n_id], weight=torch.FloatTensor([1, 0 , 10]))
        loss.backward()
        optimizer.step()
        total_loss += loss.item() * data_flow.batch_size
    return total_loss / (data.train_mask==True).sum().item()

def test(loader, data, model, mask):
    model.eval()
    y_pred = []
    y_true = []
    correct = 0
    for data_flow in loader(mask):
        pred = model(data.x, data_flow).max(1)[1]
        correct += pred.eq(data.y[data_flow.n_id]).sum().item()
        y_pred.extend([1 if v == 2 else 0 for v in pred])
        y_true.extend([1 if v == 2 else 0 for v in data.y[data_flow.n_id]])
    return correct / (mask==True).sum().item(), y_pred, y_true


In [5]:

if __name__ == "__main__":
    #process & create the dataset files
#     parser = argparse.ArgumentParser()

#     # system
#     parser.add_argument("--feature", type=str, default="all", help="glove | all")
#     #no use of user_type for now
#     parser.add_argument("--user_type", type=str, default="hate", help="hate | suspend")
#     parser.add_argument("--model_type", type=str, default="sage", help="sage | gat")
#     parser.add_argument("--epoch", type=int, default=201)
#     args = parser.parse_args()
    
    args = easydict.EasyDict({
        "feature": "all",
        "model_type": "sage",
        "epoch" : 201,
        "user_type": "hate"
    })
    assert(args.feature in ['glove', 'all'])
    assert(args.user_type in ['hate', 'suspend'])
    assert(args.model_type in ['sage', 'gat'])
    print("====information of experiment====")
    print("FEATURE: ", args.feature, "classification_type:", args.user_type, "MODEL:", args.model_type)
    print("====end information of experiment====")
    dataset = construct_dataset(args.feature)
    model_type = args.model_type
    hate_index, normal_index = get_labeled_index(feature_type=args.feature)
    skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=123)
    y_all = [2] * len(hate_index)
    y_normal = [0] * len(normal_index)
    y_all.extend(y_normal)
    all_index = []
    all_index.extend(hate_index)
    all_index.extend(normal_index)
    recall_test = []
    accuracy_test = []
    fscore_test = []
    precision_test = []
    all_index = np.array(all_index)
    trail = 0
    for train_i, test_i in skf.split(all_index, y_all):
        print("========begin trail {:01d}===========".format(trail))
        all_train_index = all_index[train_i]
        test_index = all_index[test_i]
        data = dataset[0]
        data.train_mask = torch.zeros(data.num_nodes, dtype=torch.bool)
        data.train_mask[all_train_index] = 1
        data.test_mask = torch.zeros(data.num_nodes, dtype=torch.bool)
        data.test_mask[test_index] = 1
        loader = NeighborSampler(data, size=[25], num_hops=1, batch_size=128, shuffle=True, add_self_loops=True)
        if model_type == 'sage':
            model = FullySupervisedGraphSageModel(data.num_features)
        else:
            model = FullySupervisedGATModel(data.num_features)
        #optimizer = torch.optim.Adam(model.parameters(), lr=0.001, weight_decay=5e-3)
        optimizer = torch.optim.SGD(filter(lambda p: p.requires_grad, model.parameters()), lr=0.01)
        #loader = DataLoader(dataset, batch_size=args.batch_size, shuffle=True)
        for epoch in range(args.epoch):
            loss = train(loader, data, model, optimizer)
            #test_acc = test(loader, data, model, data.test_mask)
            print('Trail: {:01d}, Epoch: {:02d}, Loss: {:.4f}'.format(trail, epoch, loss))
            if (epoch % 50 == 0):                
                test_acc, y_pred, y_true = test(loader, data, model, data.test_mask)
                fscore = f1_score(y_true, y_pred, labels=None, pos_label=1, average='binary', sample_weight=None)
                recall = recall_score(y_true, y_pred, labels=None, pos_label=1, average='binary', sample_weight=None)
                precision = precision_score(y_true, y_pred, labels=None, pos_label=1, average='binary', sample_weight=None)
                print(confusion_matrix(y_true, y_pred))
                print("Fscore:",fscore, "Recall:", recall, "Precision:", precision, "Test:", test_acc)
        model.eval()
        y_pred = []
        y_true = []
        correct = 0
        for data_flow in loader(data.test_mask):
            pred = model(data.x, data_flow).max(1)[1]
            correct += pred.eq(data.y[data_flow.n_id]).sum().item()
            y_pred.extend([1 if v == 2 else 0 for v in pred])
            y_true.extend([1 if v == 2 else 0 for v in data.y[data_flow.n_id]])
        test_acc = correct / (data.test_mask == True).sum().item()
        fscore = f1_score(y_true, y_pred, labels=None, pos_label=1, average='binary', sample_weight=None)
        recall = recall_score(y_true, y_pred, labels=None, pos_label=1, average='binary', sample_weight=None)
        precision = precision_score(y_true, y_pred, labels=None, pos_label=1, average='binary', sample_weight=None)
        accuracy_test.append(test_acc)
        recall_test.append(recall)
        fscore_test.append(fscore)
        precision_test.append(precision)
        trail+=1
        print("========end this trail==========")
    accuracy_test = np.array(accuracy_test)
    recall_test = np.array(recall_test)
    fscore_test = np.array(fscore_test)
    precision_test = np.array(precision_test)
    print("avg Accuracy   %0.4f +-  %0.4f" % (accuracy_test.mean(), accuracy_test.std()))
    print("avg Recall    %0.4f +-  %0.4f" % (recall_test.mean(), recall_test.std()))
    print("avg Precision    %0.4f +-  %0.4f" % (precision_test.mean(), precision_test.std()))
    print("avg Fscore    %0.4f +-  %0.4f" % (fscore_test.mean(), fscore_test.std()))


====information of experiment====
FEATURE:  all classification_type: hate MODEL: sage
====end information of experiment====
Trail: 0, Epoch: 00, Loss: 0.7707
[[586 300]
 [ 13  96]]
Fscore: 0.3801980198019802 Recall: 0.8807339449541285 Precision: 0.24242424242424243 Test: 0.628140703517588
Trail: 0, Epoch: 01, Loss: 0.5453
Trail: 0, Epoch: 02, Loss: 0.5018
Trail: 0, Epoch: 03, Loss: 0.4739
Trail: 0, Epoch: 04, Loss: 0.4544
Trail: 0, Epoch: 05, Loss: 0.4495
Trail: 0, Epoch: 06, Loss: 0.4312
Trail: 0, Epoch: 07, Loss: 0.4266
Trail: 0, Epoch: 08, Loss: 0.4193
Trail: 0, Epoch: 09, Loss: 0.4141
Trail: 0, Epoch: 10, Loss: 0.4157
Trail: 0, Epoch: 11, Loss: 0.4145
Trail: 0, Epoch: 12, Loss: 0.4062
Trail: 0, Epoch: 13, Loss: 0.4025
Trail: 0, Epoch: 14, Loss: 0.4010
Trail: 0, Epoch: 15, Loss: 0.4021
Trail: 0, Epoch: 16, Loss: 0.3938
Trail: 0, Epoch: 17, Loss: 0.3979
Trail: 0, Epoch: 18, Loss: 0.3861
Trail: 0, Epoch: 19, Loss: 0.3833
Trail: 0, Epoch: 20, Loss: 0.3831
Trail: 0, Epoch: 21, Loss: 0.3

Trail: 1, Epoch: 07, Loss: 0.4287
Trail: 1, Epoch: 08, Loss: 0.4207
Trail: 1, Epoch: 09, Loss: 0.4234
Trail: 1, Epoch: 10, Loss: 0.4178
Trail: 1, Epoch: 11, Loss: 0.4036
Trail: 1, Epoch: 12, Loss: 0.4079
Trail: 1, Epoch: 13, Loss: 0.4080
Trail: 1, Epoch: 14, Loss: 0.3971
Trail: 1, Epoch: 15, Loss: 0.3956
Trail: 1, Epoch: 16, Loss: 0.3931
Trail: 1, Epoch: 17, Loss: 0.3900
Trail: 1, Epoch: 18, Loss: 0.3856
Trail: 1, Epoch: 19, Loss: 0.3834
Trail: 1, Epoch: 20, Loss: 0.3965
Trail: 1, Epoch: 21, Loss: 0.3879
Trail: 1, Epoch: 22, Loss: 0.3826
Trail: 1, Epoch: 23, Loss: 0.4011
Trail: 1, Epoch: 24, Loss: 0.3841
Trail: 1, Epoch: 25, Loss: 0.3756
Trail: 1, Epoch: 26, Loss: 0.3784
Trail: 1, Epoch: 27, Loss: 0.3709
Trail: 1, Epoch: 28, Loss: 0.3863
Trail: 1, Epoch: 29, Loss: 0.3773
Trail: 1, Epoch: 30, Loss: 0.3693
Trail: 1, Epoch: 31, Loss: 0.3669
Trail: 1, Epoch: 32, Loss: 0.3632
Trail: 1, Epoch: 33, Loss: 0.3696
Trail: 1, Epoch: 34, Loss: 0.3679
Trail: 1, Epoch: 35, Loss: 0.3647
Trail: 1, Epoc

Trail: 2, Epoch: 23, Loss: 0.3738
Trail: 2, Epoch: 24, Loss: 0.3703
Trail: 2, Epoch: 25, Loss: 0.3668
Trail: 2, Epoch: 26, Loss: 0.3710
Trail: 2, Epoch: 27, Loss: 0.3674
Trail: 2, Epoch: 28, Loss: 0.3595
Trail: 2, Epoch: 29, Loss: 0.3615
Trail: 2, Epoch: 30, Loss: 0.3575
Trail: 2, Epoch: 31, Loss: 0.3573
Trail: 2, Epoch: 32, Loss: 0.3582
Trail: 2, Epoch: 33, Loss: 0.3580
Trail: 2, Epoch: 34, Loss: 0.3514
Trail: 2, Epoch: 35, Loss: 0.3532
Trail: 2, Epoch: 36, Loss: 0.3556
Trail: 2, Epoch: 37, Loss: 0.3517
Trail: 2, Epoch: 38, Loss: 0.3529
Trail: 2, Epoch: 39, Loss: 0.3524
Trail: 2, Epoch: 40, Loss: 0.3496
Trail: 2, Epoch: 41, Loss: 0.3531
Trail: 2, Epoch: 42, Loss: 0.3622
Trail: 2, Epoch: 43, Loss: 0.3528
Trail: 2, Epoch: 44, Loss: 0.3426
Trail: 2, Epoch: 45, Loss: 0.3499
Trail: 2, Epoch: 46, Loss: 0.3453
Trail: 2, Epoch: 47, Loss: 0.3404
Trail: 2, Epoch: 48, Loss: 0.3494
Trail: 2, Epoch: 49, Loss: 0.3472
Trail: 2, Epoch: 50, Loss: 0.3445
[[750 135]
 [ 21  88]]
Fscore: 0.530120481927710

Trail: 3, Epoch: 38, Loss: 0.3397
Trail: 3, Epoch: 39, Loss: 0.3381
Trail: 3, Epoch: 40, Loss: 0.3389
Trail: 3, Epoch: 41, Loss: 0.3329
Trail: 3, Epoch: 42, Loss: 0.3379
Trail: 3, Epoch: 43, Loss: 0.3302
Trail: 3, Epoch: 44, Loss: 0.3343
Trail: 3, Epoch: 45, Loss: 0.3266
Trail: 3, Epoch: 46, Loss: 0.3286
Trail: 3, Epoch: 47, Loss: 0.3312
Trail: 3, Epoch: 48, Loss: 0.3299
Trail: 3, Epoch: 49, Loss: 0.3279
Trail: 3, Epoch: 50, Loss: 0.3282
[[668 217]
 [ 26  83]]
Fscore: 0.4058679706601467 Recall: 0.7614678899082569 Precision: 0.27666666666666667 Test: 0.755533199195171
Trail: 3, Epoch: 51, Loss: 0.3286
Trail: 3, Epoch: 52, Loss: 0.3273
Trail: 3, Epoch: 53, Loss: 0.3248
Trail: 3, Epoch: 54, Loss: 0.3342
Trail: 3, Epoch: 55, Loss: 0.3335
Trail: 3, Epoch: 56, Loss: 0.3298
Trail: 3, Epoch: 57, Loss: 0.3299
Trail: 3, Epoch: 58, Loss: 0.3292
Trail: 3, Epoch: 59, Loss: 0.3240
Trail: 3, Epoch: 60, Loss: 0.3227
Trail: 3, Epoch: 61, Loss: 0.3252
Trail: 3, Epoch: 62, Loss: 0.3322
Trail: 3, Epoch: 6

Fscore: 0.5863192182410424 Recall: 0.8333333333333334 Precision: 0.45226130653266333 Test: 0.8721047331319235
Trail: 4, Epoch: 51, Loss: 0.3637
Trail: 4, Epoch: 52, Loss: 0.3619
Trail: 4, Epoch: 53, Loss: 0.3617
Trail: 4, Epoch: 54, Loss: 0.3607
Trail: 4, Epoch: 55, Loss: 0.3663
Trail: 4, Epoch: 56, Loss: 0.3627
Trail: 4, Epoch: 57, Loss: 0.3571
Trail: 4, Epoch: 58, Loss: 0.3643
Trail: 4, Epoch: 59, Loss: 0.3593
Trail: 4, Epoch: 60, Loss: 0.3569
Trail: 4, Epoch: 61, Loss: 0.3604
Trail: 4, Epoch: 62, Loss: 0.3597
Trail: 4, Epoch: 63, Loss: 0.3622
Trail: 4, Epoch: 64, Loss: 0.3558
Trail: 4, Epoch: 65, Loss: 0.3572
Trail: 4, Epoch: 66, Loss: 0.3551
Trail: 4, Epoch: 67, Loss: 0.3529
Trail: 4, Epoch: 68, Loss: 0.3652
Trail: 4, Epoch: 69, Loss: 0.3562
Trail: 4, Epoch: 70, Loss: 0.3536
Trail: 4, Epoch: 71, Loss: 0.3583
Trail: 4, Epoch: 72, Loss: 0.3524
Trail: 4, Epoch: 73, Loss: 0.3521
Trail: 4, Epoch: 74, Loss: 0.3542
Trail: 4, Epoch: 75, Loss: 0.3508
Trail: 4, Epoch: 76, Loss: 0.3586
Trail: