In [8]:
import snap
import numpy as np
import matplotlib.pyplot as plt
import os
import sys
import pandas as pd  # To read data
from math import log

import csv
import webbrowser

import torch
import torch.nn as nn
import torch.nn.functional as F
import networkx as nx
from dgl import DGLGraph
import dgl.function as fn
from functools import partial

In [None]:
from load_datasets import load_banned
from load_datasets import load_graph
from load_datasets import get_banned_ids, banned_sub_in_G, get_random_subgraph
import random

DATASET = 'title' 

# lists of strings, which are the names of banned subreddits to train and test on
TESTING, TRAINING = [], []

if __name__ == '__main__':
  # load graph and maps that are used to map ids (int) to subreddit names (str)
  G, id_to_subreddit, subreddit_to_id = load_graph(DATASET)
  # load banned subreddits [str]
  banned = load_banned()
  # get banned subreddits that appear in G
  banned = banned_sub_in_G(G, id_to_subreddit, banned)
  # get list of random sample of banned subreddits to train on. 
  TRAINING = random.sample(banned, int(len(banned) * 3 / 4))
  # rest of subreddits are for testing.
  TESTING = list(set(banned) - set(TRAINING))
  # generate subgraph of G
  G_new = get_random_subgraph(G, get_banned_ids(TRAINING, subreddit_to_id))
  print('number of nodes in random subgraph: ', G_new.GetNodes())

In [5]:
from __future__ import print_function

from keras.layers import Input, Dropout
from keras.models import Model
from keras.optimizers import Adam
from keras.regularizers import l2

from kegra.layers.graph import GraphConvolution
from kegra.utils import *

import time

# Define parameters
DATASET = 'cora'
FILTER = 'localpool'  # 'chebyshev'
MAX_DEGREE = 2  # maximum polynomial degree
SYM_NORM = True  # symmetric (True) vs. left-only (False) normalization
NB_EPOCH = 200
PATIENCE = 10  # early stopping patience

# Get data
X, A, y = load_data(dataset=DATASET)
y_train, y_val, y_test, idx_train, idx_val, idx_test, train_mask = get_splits(y)

# Normalize X
X /= X.sum(1).reshape(-1, 1)

if FILTER == 'localpool':
    """ Local pooling filters (see 'renormalization trick' in Kipf & Welling, arXiv 2016) """
    print('Using local pooling filters...')
    A_ = preprocess_adj(A, SYM_NORM)
    support = 1
    graph = [X, A_]
    G = [Input(shape=(None, None), batch_shape=(None, None), sparse=True)]

elif FILTER == 'chebyshev':
    """ Chebyshev polynomial basis filters (Defferard et al., NIPS 2016)  """
    print('Using Chebyshev polynomial basis filters...')
    L = normalized_laplacian(A, SYM_NORM)
    L_scaled = rescale_laplacian(L)
    T_k = chebyshev_polynomial(L_scaled, MAX_DEGREE)
    support = MAX_DEGREE + 1
    graph = [X]+T_k
    G = [Input(shape=(None, None), batch_shape=(None, None), sparse=True) for _ in range(support)]

else:
    raise Exception('Invalid filter type.')

X_in = Input(shape=(X.shape[1],))

# Define model architecture
# NOTE: We pass arguments for graph convolutional layers as a list of tensors.
# This is somewhat hacky, more elegant options would require rewriting the Layer base class.
H = Dropout(0.5)(X_in)
H = GraphConvolution(16, support, activation='relu', kernel_regularizer=l2(5e-4))([H]+G)
H = Dropout(0.5)(H)
Y = GraphConvolution(y.shape[1], support, activation='softmax')([H]+G)

# Compile model
model = Model(inputs=[X_in]+G, outputs=Y)
model.compile(loss='categorical_crossentropy', optimizer=Adam(lr=0.01))

# Helper variables for main training loop
wait = 0
preds = None
best_val_loss = 99999

# Fit
for epoch in range(1, NB_EPOCH+1):

    # Log wall-clock time
    t = time.time()

    # Single training iteration (we mask nodes without labels for loss calculation)
    model.fit(graph, y_train, sample_weight=train_mask,
              batch_size=A.shape[0], epochs=1, shuffle=False, verbose=0)

    # Predict on full dataset
    preds = model.predict(graph, batch_size=A.shape[0])

    # Train / validation scores
    train_val_loss, train_val_acc = evaluate_preds(preds, [y_train, y_val],
                                                   [idx_train, idx_val])
    print("Epoch: {:04d}".format(epoch),
          "train_loss= {:.4f}".format(train_val_loss[0]),
          "train_acc= {:.4f}".format(train_val_acc[0]),
          "val_loss= {:.4f}".format(train_val_loss[1]),
          "val_acc= {:.4f}".format(train_val_acc[1]),
          "time= {:.4f}".format(time.time() - t))

    # Early stopping
    if train_val_loss[1] < best_val_loss:
        best_val_loss = train_val_loss[1]
        wait = 0
    else:
        if wait >= PATIENCE:
            print('Epoch {}: early stopping'.format(epoch))
            break
        wait += 1

# Testing
test_loss, test_acc = evaluate_preds(preds, [y_test], [idx_test])
print("Test set results:",
      "loss= {:.4f}".format(test_loss[0]),
      "accuracy= {:.4f}".format(test_acc[0]))

In [14]:
edges=pd.read_csv('title-data/snap-redditHyperlinks-title.csv', names = ["source","destination"])
convertName2Id=pd.read_csv('title-data/snap-subreddit-ids-title.csv', names = ["name","id"])

In [13]:
banned=pd.read_csv('gcntestdata-cleaned.csv')
banned.head()

Unnamed: 0,Name
0,Beatingwomen
1,Braincels
2,CreepShots
3,CringeAnarchy
4,DarkNetMarkets


In [15]:
def makeGraph(edges, N):
    """
    :param - N: number of nodes

    return type: snap.PUNGraph
    """
    ############################################################################
    # TODO: Your code here!
    G1 = snap.TUNGraph.New()
    for i in range(N):
        G1.AddNode(i)
    for index, row in edges.iterrows():
        a = int(row['source'])
        b = int(row['destination'])
#         print(a)
#         print(b)
        G1.AddEdge(a, b)
    Graph = snap.ConvertGraph(snap.PUNGraph, G1)
    ############################################################################
    return Graph


In [16]:
G = makeGraph(edges, len(edges))

In [None]:
# load graph data
import numpy as np
# data = load_data(dataset='aifb')
num_nodes = G_new.GetNodes()
num_rels = G_new.GetEdges()
num_classes = 2
labels = data.labels
train_idx = TRAINING
test =  TESTING
# split training and validation set
val_idx = TRAINING[:len(TRAINING) // 5]
train_idx = train_idx[len(TRAINING) // 5:]

# edge type and normalization factor
edge_type = torch.from_numpy(data.edge_type)
edge_norm = torch.from_numpy(data.edge_norm).unsqueeze(1)

labels = torch.from_numpy(labels).view(-1)