In [1]:
import torch
from torch.nn import Linear
from torch_geometric.nn import GCNConv, GATConv
import torch.nn.functional as F 
import numpy as np 
import pickle
import time

In [2]:
torch.manual_seed(1234)

<torch._C.Generator at 0x7fc6bf592e90>

In [3]:
class DataLoad():
    def __init__(self,file,feature=False):
        with open(file+'.train','rb') as fp:
            self.n_class,train_mask = pickle.load(fp)
        self.train_mask = torch.tensor(train_mask,dtype=torch.bool)
        self.n_node = len(self.train_mask)
        with open(file+'.labels','r+') as fp:
            lines = fp.readlines()
        self.y = []
        for each in lines:
            info = each.split(' ')
            label = info[1]
            self.y.append(int(label))
        self.y = torch.tensor(self.y,dtype=torch.long)
        if feature == False:
            self.x = np.zeros((self.n_node,self.n_node))
            for i in range(self.n_node):
                self.x[i][i] = 1
            self.x = torch.tensor(self.x,dtype=torch.float)
            self.n_feature = self.n_node
        else:
            with open(file+'.feat','rb') as fp:
                self.x = torch.tensor(pickle.load(fp),dtype=torch.float)
            self.n_feature = len(self.x[0])
            
        self.test = []
        for i in range(self.n_node):
            if self.train_mask[i] == False:
                self.test.append(i)
    
    def AppendDummy(self,m):
        to_add = torch.tensor([False for _ in range(m)],dtype=torch.bool)
        self.train_mask = torch.cat((self.train_mask,to_add))
        temp = []
        for _ in range(m):
            temp.append([0 for _ in range(self.n_feature)])
        temp = torch.tensor(temp,dtype=torch.float)
        self.x = torch.cat((self.x,temp))
        to_add = torch.tensor([-1 for _ in range(m)],dtype=torch.long)
        self.y = torch.cat((self.y,to_add))

In [4]:
def EdgeLoader(file):
    with open(file,'r+') as fp:
        lines = fp.readlines()
    ret = []
    for each in lines:
        info = each.split(' ')
        src = int(info[0])
        dst = int(info[1])
        ret.append([src,dst])
    ret = np.array(ret)
    return torch.tensor(ret.T,dtype=torch.int)

In [5]:
class GCN(torch.nn.Module):
    def __init__(self,DataLoader):
        super(GCN, self).__init__()
        self.conv1 = GCNConv(DataLoader.n_feature, int(DataLoader.n_feature/4))
        self.conv2 = GCNConv(int(DataLoader.n_feature/4), int(DataLoader.n_feature/8))
        self.classifier = Linear(int(DataLoader.n_feature/8), DataLoader.n_class)

    def forward(self, x, edge_index):
        h = self.conv1(x, edge_index)
        h = F.relu(h)
        h = self.conv2(h, edge_index)
        h = F.relu(h)  
        out = self.classifier(h)
        return out


In [6]:
def TrainAllModel(file):
    loader = DataLoad(file)
    suffix = ['.mtx','_relwire.txt','_swing.txt','_er.txt','_away.txt']
    criterion = torch.nn.CrossEntropyLoss()
    for each in suffix:
        edge_file = file + each
        out_file = edge_file.split('.')[0] + '.pt'
        model = GCN(loader)
        edge_index = EdgeLoader(edge_file)
        optimizer = torch.optim.Adam(model.parameters(), lr=0.005)
        start = time.time()
        for _ in range(50):
            optimizer.zero_grad()  # Clear gradients.
            out = model(loader.x, edge_index)  # Perform a single forward pass.
            loss = criterion(out[loader.train_mask], loader.y[loader.train_mask])  # Compute the loss solely based on the training nodes.
            loss.backward()  # Derive gradients.
            optimizer.step()  # Update parameters based on gradients.
            torch.save(model.state_dict(), out_file)  
        print("Running time for " + edge_file + ":" + str(time.time()-start))  

In [7]:
def EvaluateModel(loader,edge_file):
    model = GCN(loader)
    edge_index = EdgeLoader(edge_file)
    model_name = edge_file.split('.')[0] + '.pt'
    model.load_state_dict(torch.load(model_name))
    out = model(loader.x,edge_index).detach().numpy()
    total = len(loader.test)
    correct = 0
    for each in loader.test:
        idx = np.argmax(out[each])
        if idx == loader.y[each]:
            correct += 1
    score = correct/total 
    return score

In [8]:
def TestAllModel(file):
    loader = DataLoad(file)
    suffix = ['.mtx','_relwire.txt','_swing.txt','_er.txt','_away.txt',"_laser"]
    for each in suffix:
        edge_file = file + each
        score = EvaluateModel(loader,edge_file)
        print('The accuracy of model in graph ' + edge_file + ' is ' + str(score))

In [9]:
def TrainAndTest(file):
    TrainAllModel(file)
    TestAllModel(file)

In [20]:
def TrainOneModel(file,suf):
    loader = DataLoad(file)
    suffix = [suf]
    criterion = torch.nn.CrossEntropyLoss()
    for each in suffix:
        edge_file = file + each
        out_file = edge_file.split('.')[0] + '.pt'
        model = GCN(loader)
        edge_index = EdgeLoader(edge_file)
        optimizer = torch.optim.Adam(model.parameters(), lr=0.005)
        start = time.time()
        for _ in range(50):
            optimizer.zero_grad()  # Clear gradients.
            out = model(loader.x, edge_index)  # Perform a single forward pass.
            loss = criterion(out[loader.train_mask], loader.y[loader.train_mask])  # Compute the loss solely based on the training nodes.
            loss.backward()  # Derive gradients.
            optimizer.step()  # Update parameters based on gradients.
            torch.save(model.state_dict(), out_file)  
        print("Running time for " + edge_file + ":" + str(time.time()-start))  

def TestOneModel(file,suf):
    loader = DataLoad(file)
    suffix = [suf]
    for each in suffix:
        edge_file = file + each
        score = EvaluateModel(loader,edge_file)
        print('The accuracy of model in graph ' + edge_file + ' is ' + str(score))

def TrainAndTestOne(file,suf):
    TrainOneModel(file,suf)
    TestOneModel(file,suf)

In [17]:
TrainAndTest('book')

The accuracy of model in graph book.mtx is 0.8157894736842105
The accuracy of model in graph book_relwire.txt is 0.6447368421052632
The accuracy of model in graph book_swing.txt is 0.8289473684210527
The accuracy of model in graph book_er.txt is 0.8289473684210527
The accuracy of model in graph book_away.txt is 0.8157894736842105


In [18]:
TrainAndTest('football')

The accuracy of model in graph football.mtx is 0.7058823529411765
The accuracy of model in graph football_relwire.txt is 0.7647058823529411
The accuracy of model in graph football_swing.txt is 0.8
The accuracy of model in graph football_er.txt is 0.6352941176470588
The accuracy of model in graph football_away.txt is 0.8823529411764706


In [32]:
TrainAllModel('book')

Running time for book.mtx:0.8253555297851562
Running time for book_relwire.txt:0.8472199440002441
Running time for book_swing.txt:0.759289026260376
Running time for book_er.txt:0.7153055667877197
Running time for book_away.txt:0.6540465354919434


In [31]:
TrainAllModel('karate')

Running time for karate.mtx:0.6257894039154053
Running time for karate_relwire.txt:0.5923221111297607
Running time for karate_swing.txt:0.5691390037536621
Running time for karate_er.txt:0.5876691341400146
Running time for karate_away.txt:0.6073927879333496


In [30]:
TrainAllModel('football')

Running time for football.mtx:0.7631630897521973
Running time for football_relwire.txt:0.8175332546234131
Running time for football_swing.txt:0.7633411884307861
Running time for football_er.txt:0.9730856418609619
Running time for football_away.txt:0.7156040668487549


In [28]:
TrainAllModel('cora')

Running time for cora_away.txt:20.58921480178833
Running time for cora_er.txt:20.527925729751587


In [26]:
TrainAllModel('citeseer')

Running time for citeseer_away.txt:31.778164386749268
Running time for citeseer_er.txt:31.160691261291504


In [34]:
files = ['./MotifER/book_5_erds_0.2.txt','./MotifER/football_5_erds_0.2.txt','./MotifER/cora_5_erds_0.2.txt','./MotifER/karate_5_erds_0.2.txt','./MotifER/citeseer_5_erds_0.2.txt']
criterion = torch.nn.CrossEntropyLoss()
for each in files:
    edge_file = each
    file = each.split('_')[0].split('/')[-1]
    loader = DataLoad(file)
    #out_file = edge_file.split('.')[0] + '.pt'
    model = GCN(loader)
    edge_index = EdgeLoader(edge_file)
    optimizer = torch.optim.Adam(model.parameters(), lr=0.005)
    start = time.time()
    for _ in range(50):
        optimizer.zero_grad()  # Clear gradients.
        out = model(loader.x, edge_index)  # Perform a single forward pass.
        loss = criterion(out[loader.train_mask], loader.y[loader.train_mask])  # Compute the loss solely based on the training nodes.
        loss.backward()  # Derive gradients.
        optimizer.step()  # Update parameters based on gradients.
        #torch.save(model.state_dict(), out_file)  
    print("Running time for " + edge_file + ": " + str(time.time()-start))  

Running time for ./MotifER/book_5_erds_0.2.txt: 0.34540653228759766
Running time for ./MotifER/football_5_erds_0.2.txt: 0.36682677268981934
Running time for ./MotifER/cora_5_erds_0.2.txt: 11.02383041381836
Running time for ./MotifER/karate_5_erds_0.2.txt: 0.28610944747924805
Running time for ./MotifER/citeseer_5_erds_0.2.txt: 23.095571756362915


In [35]:
files = ['./MotifGap/book_5_gap.txt','./MotifGap/football_5_gap.txt','./MotifGap/cora_5_gap.txt','./MotifGap/karate_5_gap.txt','./MotifGap/citeseer_5_gap.txt']
criterion = torch.nn.CrossEntropyLoss()
for each in files:
    edge_file = each
    file = each.split('_')[0].split('/')[-1]
    loader = DataLoad(file)
    #out_file = edge_file.split('.')[0] + '.pt'
    model = GCN(loader)
    edge_index = EdgeLoader(edge_file)
    optimizer = torch.optim.Adam(model.parameters(), lr=0.005)
    start = time.time()
    for _ in range(50):
        optimizer.zero_grad()  # Clear gradients.
        out = model(loader.x, edge_index)  # Perform a single forward pass.
        loss = criterion(out[loader.train_mask], loader.y[loader.train_mask])  # Compute the loss solely based on the training nodes.
        loss.backward()  # Derive gradients.
        optimizer.step()  # Update parameters based on gradients.
        #torch.save(model.state_dict(), out_file)  
    print("Running time for " + edge_file + ": " + str(time.time()-start))  

Running time for ./MotifGap/book_5_gap.txt: 0.3669302463531494
Running time for ./MotifGap/football_5_gap.txt: 0.40163445472717285
Running time for ./MotifGap/cora_5_gap.txt: 20.29318881034851
Running time for ./MotifGap/karate_5_gap.txt: 0.2596139907836914
Running time for ./MotifGap/citeseer_5_gap.txt: 19.373897552490234


In [36]:
files = ['./MotifClique/book_5_clique.txt','./MotifClique/football_5_clique.txt','./MotifClique/cora_5_clique.txt','./MotifClique/karate_5_clique.txt','./MotifClique/citeseer_5_clique.txt']
criterion = torch.nn.CrossEntropyLoss()
for each in files:
    edge_file = each
    file = each.split('_')[0].split('/')[-1]
    loader = DataLoad(file)
    #out_file = edge_file.split('.')[0] + '.pt'
    model = GCN(loader)
    edge_index = EdgeLoader(edge_file)
    optimizer = torch.optim.Adam(model.parameters(), lr=0.005)
    start = time.time()
    for _ in range(50):
        optimizer.zero_grad()  # Clear gradients.
        out = model(loader.x, edge_index)  # Perform a single forward pass.
        loss = criterion(out[loader.train_mask], loader.y[loader.train_mask])  # Compute the loss solely based on the training nodes.
        loss.backward()  # Derive gradients.
        optimizer.step()  # Update parameters based on gradients.
        #torch.save(model.state_dict(), out_file)  
    print("Running time for " + edge_file + ": " + str(time.time()-start))  

Running time for ./MotifClique/book_5_clique.txt: 0.4438934326171875
Running time for ./MotifClique/football_5_clique.txt: 0.5026073455810547
Running time for ./MotifClique/cora_5_clique.txt: 37.29936480522156
Running time for ./MotifClique/karate_5_clique.txt: 0.24621248245239258
Running time for ./MotifClique/citeseer_5_clique.txt: 25.63544726371765


In [10]:
import glob 

In [11]:
def EvaluateModel(loader,edge_file):
    model = GCN(loader)
    edge_index = EdgeLoader(edge_file)
    model_name = edge_file.split('.tx')[0] + '.pt'
    model.load_state_dict(torch.load(model_name))
    out = model(loader.x,edge_index).detach().numpy()
    total = len(loader.test)
    correct = 0
    for each in loader.test:
        idx = np.argmax(out[each])
        if idx == loader.y[each]:
            correct += 1
    score = correct/total 
    return score
    

# Motif ER Test

In [12]:
all_motifs = glob.glob('MotifER/*.txt')
criterion = torch.nn.CrossEntropyLoss()
for each in all_motifs:
    file = each.split('/')[1].split('_')[0]
    loader = DataLoad(file)
    out_file = each.split('.tx')[0] + '.pt'
    model = GCN(loader)
    edge_index = EdgeLoader(each)
    optimizer = torch.optim.Adam(model.parameters(), lr=0.005)
    for _ in range(50):
        optimizer.zero_grad()  # Clear gradients.
        out = model(loader.x, edge_index)  # Perform a single forward pass.
        loss = criterion(out[loader.train_mask], loader.y[loader.train_mask])  # Compute the loss solely based on the training nodes.
        loss.backward()  # Derive gradients.
        optimizer.step()  # Update parameters based on gradients.
        torch.save(model.state_dict(), out_file)    

In [15]:
for each in all_motifs:
    file = each.split('/')[1].split('_')[0]
    loader = DataLoad(file)
    score = EvaluateModel(loader,each)
    print('The accuracy of model in graph ' + each + ' is ' + str(score))

The accuracy of model in graph MotifER/book_3_erds_0.2.txt is 0.8157894736842105
The accuracy of model in graph MotifER/book_3_erds_0.4.txt is 0.8289473684210527
The accuracy of model in graph MotifER/book_3_erds_0.6.txt is 0.8289473684210527
The accuracy of model in graph MotifER/book_3_erds_0.8.txt is 0.8289473684210527
The accuracy of model in graph MotifER/book_4_erds_0.2.txt is 0.8157894736842105
The accuracy of model in graph MotifER/book_4_erds_0.4.txt is 0.8157894736842105
The accuracy of model in graph MotifER/book_4_erds_0.6.txt is 0.8289473684210527
The accuracy of model in graph MotifER/book_4_erds_0.8.txt is 0.8157894736842105
The accuracy of model in graph MotifER/book_5_erds_0.2.txt is 0.8157894736842105
The accuracy of model in graph MotifER/book_5_erds_0.4.txt is 0.8026315789473685
The accuracy of model in graph MotifER/book_5_erds_0.6.txt is 0.8157894736842105
The accuracy of model in graph MotifER/book_5_erds_0.8.txt is 0.8157894736842105
The accuracy of model in gra

In [12]:
from tqdm import tqdm

In [15]:
all_motifs = glob.glob('MotifClique/*.txt')
criterion = torch.nn.CrossEntropyLoss()
for each in all_motifs:
    file = each.split('/')[1].split('_')[0]
    if "c" in file:
        loader = DataLoad(file,True)
    else:
        loader = DataLoad(file)
    out_file = each.split('.tx')[0] + '.pt'
    model = GCN(loader)
    edge_index = EdgeLoader(each)
    optimizer = torch.optim.Adam(model.parameters(), lr=0.005)
    for _ in tqdm(range(20)):
        optimizer.zero_grad()  # Clear gradients.
        out = model(loader.x, edge_index)  # Perform a single forward pass.
        loss = criterion(out[loader.train_mask], loader.y[loader.train_mask])  # Compute the loss solely based on the training nodes.
        loss.backward()  # Derive gradients.
        optimizer.step()  # Update parameters based on gradients.
        torch.save(model.state_dict(), out_file)    
for each in all_motifs:
    file = each.split('/')[1].split('_')[0]
    if "c" in file:
        loader = DataLoad(file,True)
    else:
        loader = DataLoad(file)
    score = EvaluateModel(loader,each)
    print('The accuracy of model in graph ' + each + ' is ' + str(score))

  0%|          | 0/20 [00:00<?, ?it/s]

100%|██████████| 20/20 [00:00<00:00, 49.98it/s]
100%|██████████| 20/20 [00:00<00:00, 123.84it/s]
100%|██████████| 20/20 [00:00<00:00, 125.06it/s]
100%|██████████| 20/20 [00:00<00:00, 114.75it/s]
100%|██████████| 20/20 [00:00<00:00, 123.68it/s]
100%|██████████| 20/20 [00:00<00:00, 106.61it/s]
100%|██████████| 20/20 [00:11<00:00,  1.81it/s]
100%|██████████| 20/20 [00:08<00:00,  2.39it/s]
100%|██████████| 20/20 [00:09<00:00,  2.15it/s]
100%|██████████| 20/20 [00:10<00:00,  1.99it/s]
100%|██████████| 20/20 [00:08<00:00,  2.34it/s]
100%|██████████| 20/20 [00:04<00:00,  4.31it/s]
100%|██████████| 20/20 [00:04<00:00,  4.18it/s]
100%|██████████| 20/20 [00:04<00:00,  4.51it/s]
100%|██████████| 20/20 [00:02<00:00,  7.20it/s]
100%|██████████| 20/20 [00:02<00:00,  8.89it/s]
100%|██████████| 20/20 [00:00<00:00, 130.51it/s]
100%|██████████| 20/20 [00:00<00:00, 113.60it/s]
100%|██████████| 20/20 [00:00<00:00, 96.38it/s]
100%|██████████| 20/20 [00:00<00:00, 108.58it/s]
100%|██████████| 20/20 [00:00<00

The accuracy of model in graph MotifClique/book_3_clique.txt is 0.8157894736842105
The accuracy of model in graph MotifClique/book_4_clique.txt is 0.8289473684210527
The accuracy of model in graph MotifClique/book_5_clique.txt is 0.40789473684210525
The accuracy of model in graph MotifClique/book_6_clique.txt is 0.868421052631579
The accuracy of model in graph MotifClique/book_7_clique.txt is 0.8421052631578947
The accuracy of model in graph MotifClique/book_8_clique.txt is 0.8552631578947368
The accuracy of model in graph MotifClique/citeseer_3_clique.txt is 0.7211497211497212
The accuracy of model in graph MotifClique/citeseer_4_clique.txt is 0.7044187044187045
The accuracy of model in graph MotifClique/citeseer_5_clique.txt is 0.6954096954096954
The accuracy of model in graph MotifClique/citeseer_6_clique.txt is 0.7018447018447018
The accuracy of model in graph MotifClique/citeseer_7_clique.txt is 0.6924066924066924
The accuracy of model in graph MotifClique/cora_3_clique.txt is 0.5

In [15]:
all_motifs = glob.glob('MotifGap/*.txt')
criterion = torch.nn.CrossEntropyLoss()
for each in all_motifs:
    file = each.split('/')[1].split('_')[0]
    if "c" in file:
        loader = DataLoad(file,True)
    else:
        loader = DataLoad(file)
    out_file = each.split('.tx')[0] + '.pt'
    model = GCN(loader)
    edge_index = EdgeLoader(each)
    optimizer = torch.optim.Adam(model.parameters(), lr=0.005)
    for _ in tqdm(range(50)):
        optimizer.zero_grad()  # Clear gradients.
        out = model(loader.x, edge_index)  # Perform a single forward pass.
        loss = criterion(out[loader.train_mask], loader.y[loader.train_mask])  # Compute the loss solely based on the training nodes.
        loss.backward()  # Derive gradients.
        optimizer.step()  # Update parameters based on gradients.
        torch.save(model.state_dict(), out_file)    
for each in all_motifs:
    file = each.split('/')[1].split('_')[0]
    if "c" in file:
        loader = DataLoad(file,True)
    else:
        loader = DataLoad(file)
    score = EvaluateModel(loader,each)
    print('The accuracy of model in graph ' + each + ' is ' + str(score))

  0%|          | 0/50 [00:00<?, ?it/s]

100%|██████████| 50/50 [00:00<00:00, 116.36it/s]
100%|██████████| 50/50 [00:00<00:00, 116.31it/s]
100%|██████████| 50/50 [00:00<00:00, 116.30it/s]
100%|██████████| 50/50 [00:00<00:00, 117.52it/s]
100%|██████████| 50/50 [00:00<00:00, 118.19it/s]
100%|██████████| 50/50 [00:00<00:00, 119.68it/s]
100%|██████████| 50/50 [00:23<00:00,  2.16it/s]
100%|██████████| 50/50 [00:18<00:00,  2.73it/s]
100%|██████████| 50/50 [00:19<00:00,  2.55it/s]
100%|██████████| 50/50 [00:20<00:00,  2.49it/s]
100%|██████████| 50/50 [00:18<00:00,  2.73it/s]
100%|██████████| 50/50 [00:07<00:00,  6.94it/s]
100%|██████████| 50/50 [00:03<00:00, 15.06it/s]
100%|██████████| 50/50 [00:04<00:00, 10.28it/s]
100%|██████████| 50/50 [00:03<00:00, 13.76it/s]
100%|██████████| 50/50 [00:03<00:00, 14.56it/s]
100%|██████████| 50/50 [00:00<00:00, 93.91it/s]
100%|██████████| 50/50 [00:00<00:00, 92.21it/s] 
100%|██████████| 50/50 [00:00<00:00, 95.67it/s]
100%|██████████| 50/50 [00:00<00:00, 96.29it/s]
100%|██████████| 50/50 [00:00<00:

The accuracy of model in graph MotifGap/book_3_gap.txt is 0.8289473684210527
The accuracy of model in graph MotifGap/book_4_gap.txt is 0.8157894736842105
The accuracy of model in graph MotifGap/book_5_gap.txt is 0.8552631578947368
The accuracy of model in graph MotifGap/book_6_gap.txt is 0.8289473684210527
The accuracy of model in graph MotifGap/book_7_gap.txt is 0.8289473684210527
The accuracy of model in graph MotifGap/book_8_gap.txt is 0.8421052631578947
The accuracy of model in graph MotifGap/citeseer_3_gap.txt is 0.6885456885456885
The accuracy of model in graph MotifGap/citeseer_4_gap.txt is 0.6735306735306735
The accuracy of model in graph MotifGap/citeseer_5_gap.txt is 0.682968682968683
The accuracy of model in graph MotifGap/citeseer_6_gap.txt is 0.6640926640926641
The accuracy of model in graph MotifGap/citeseer_7_gap.txt is 0.6803946803946804
The accuracy of model in graph MotifGap/cora_3_gap.txt is 0.8224446786090621
The accuracy of model in graph MotifGap/cora_4_gap.txt is

In [17]:
all_motifs = glob.glob('MotifEE/*.txt')
criterion = torch.nn.CrossEntropyLoss()
for each in all_motifs:
    file = each.split('/')[1].split('_')[0]
    if "c" in file:
        loader = DataLoad(file,True)
    else:
        loader = DataLoad(file)
    out_file = each.split('.tx')[0] + '.pt'
    model = GCN(loader)
    edge_index = EdgeLoader(each)
    optimizer = torch.optim.Adam(model.parameters(), lr=0.005)
    for _ in tqdm(range(50)):
        optimizer.zero_grad()  # Clear gradients.
        out = model(loader.x, edge_index)  # Perform a single forward pass.
        loss = criterion(out[loader.train_mask], loader.y[loader.train_mask])  # Compute the loss solely based on the training nodes.
        loss.backward()  # Derive gradients.
        optimizer.step()  # Update parameters based on gradients.
        torch.save(model.state_dict(), out_file)    
for each in all_motifs:
    file = each.split('/')[1].split('_')[0]
    if "c" in file:
        loader = DataLoad(file,True)
    else:
        loader = DataLoad(file)
    score = EvaluateModel(loader,each)
    print('The accuracy of model in graph ' + each + ' is ' + str(score))

  0%|          | 0/50 [00:00<?, ?it/s]

100%|██████████| 50/50 [00:00<00:00, 117.42it/s]
100%|██████████| 50/50 [00:00<00:00, 118.53it/s]
100%|██████████| 50/50 [00:00<00:00, 119.35it/s]
100%|██████████| 50/50 [00:00<00:00, 120.60it/s]
100%|██████████| 50/50 [00:00<00:00, 118.44it/s]
100%|██████████| 50/50 [00:00<00:00, 119.24it/s]
100%|██████████| 50/50 [00:18<00:00,  2.71it/s]
100%|██████████| 50/50 [00:18<00:00,  2.75it/s]
100%|██████████| 50/50 [00:18<00:00,  2.75it/s]
100%|██████████| 50/50 [00:18<00:00,  2.77it/s]
100%|██████████| 50/50 [00:18<00:00,  2.78it/s]
100%|██████████| 50/50 [00:03<00:00, 15.00it/s]
100%|██████████| 50/50 [00:03<00:00, 15.15it/s]
100%|██████████| 50/50 [00:03<00:00, 15.56it/s]
100%|██████████| 50/50 [00:03<00:00, 15.20it/s]
100%|██████████| 50/50 [00:03<00:00, 15.02it/s]
100%|██████████| 50/50 [00:00<00:00, 98.41it/s] 
100%|██████████| 50/50 [00:00<00:00, 88.40it/s]
100%|██████████| 50/50 [00:00<00:00, 103.76it/s]
100%|██████████| 50/50 [00:00<00:00, 101.91it/s]
100%|██████████| 50/50 [00:00<0

The accuracy of model in graph MotifEE/book_3_gap.txt is 0.8421052631578947
The accuracy of model in graph MotifEE/book_4_gap.txt is 0.8026315789473685
The accuracy of model in graph MotifEE/book_5_gap.txt is 0.8289473684210527
The accuracy of model in graph MotifEE/book_6_gap.txt is 0.8026315789473685
The accuracy of model in graph MotifEE/book_7_gap.txt is 0.8421052631578947
The accuracy of model in graph MotifEE/book_8_gap.txt is 0.8289473684210527
The accuracy of model in graph MotifEE/citeseer_3_gap.txt is 0.6855426855426855
The accuracy of model in graph MotifEE/citeseer_4_gap.txt is 0.6851136851136851
The accuracy of model in graph MotifEE/citeseer_5_gap.txt is 0.6731016731016731
The accuracy of model in graph MotifEE/citeseer_6_gap.txt is 0.6645216645216645
The accuracy of model in graph MotifEE/citeseer_7_gap.txt is 0.6735306735306735
The accuracy of model in graph MotifEE/cora_3_gap.txt is 0.8356164383561644
The accuracy of model in graph MotifEE/cora_4_gap.txt is 0.831401475

# Motif Orbit Test

In [17]:
def GetMax(file):
    with open(file,'r+') as fp:
        lines = fp.readlines()
    ret = []
    for each in lines:
        info = each.split(' ')
        src = int(info[0])
        dst = int(info[1])
        ret.append(src)
        ret.append(dst)
    ret = max(ret)
    return ret

In [19]:
all_motifs = glob.glob('MotifOrbit/*.txt')
criterion = torch.nn.CrossEntropyLoss()
for each in all_motifs:
    file = each.split('/')[1].split('_')[0]
    loader = DataLoad(file)
    out_file = each.split('.tx')[0] + '.pt'
    model = GCN(loader)
    edge_index = EdgeLoader(each)
    m = GetMax(each) #Now we have the number of total nodes
    m = m + 1 - loader.n_node #This is the number of motifs
    loader.AppendDummy(m)
    optimizer = torch.optim.Adam(model.parameters(), lr=0.005)
    for _ in range(50):
        optimizer.zero_grad()  # Clear gradients.
        out = model(loader.x, edge_index)  # Perform a single forward pass.
        loss = criterion(out[loader.train_mask], loader.y[loader.train_mask])  # Compute the loss solely based on the training nodes.
        loss.backward()  # Derive gradients.
        optimizer.step()  # Update parameters based on gradients.
        torch.save(model.state_dict(), out_file)  

In [21]:
for each in all_motifs:
    file = each.split('/')[1].split('_')[0]
    loader = DataLoad(file)
    m = GetMax(each) #Now we have the number of total nodes
    m = m + 1 - loader.n_node #This is the number of motifs
    loader.AppendDummy(m)
    score = EvaluateModel(loader,each)
    print('The accuracy of model in graph ' + each + ' is ' + str(score))

The accuracy of model in graph MotifOrbit/book_3_orbit.txt is 0.8421052631578947
The accuracy of model in graph MotifOrbit/book_4_orbit.txt is 0.8421052631578947
The accuracy of model in graph MotifOrbit/book_5_orbit.txt is 0.8421052631578947
The accuracy of model in graph MotifOrbit/book_6_orbit.txt is 0.8421052631578947
The accuracy of model in graph MotifOrbit/book_7_orbit.txt is 0.8421052631578947
The accuracy of model in graph MotifOrbit/book_8_orbit.txt is 0.8421052631578947
The accuracy of model in graph MotifOrbit/football_3_orbit.txt is 0.4823529411764706
The accuracy of model in graph MotifOrbit/football_4_orbit.txt is 0.5764705882352941
The accuracy of model in graph MotifOrbit/football_5_orbit.txt is 0.43529411764705883
The accuracy of model in graph MotifOrbit/football_6_orbit.txt is 0.5294117647058824
The accuracy of model in graph MotifOrbit/football_7_orbit.txt is 0.5882352941176471
The accuracy of model in graph MotifOrbit/football_8_orbit.txt is 0.5529411764705883
The

# Additional Task

In [16]:
def TrainAllModel(file):
    loader = DataLoad(file,True)
    suffix = ['.mtx','_away.txt','_er.txt']
    criterion = torch.nn.CrossEntropyLoss()
    for each in suffix:
        edge_file = file + each
        out_file = edge_file.split('.')[0] + '.pt'
        model = GCN(loader)
        edge_index = EdgeLoader(edge_file)
        optimizer = torch.optim.Adam(model.parameters(), lr=0.005)
        for _ in range(20):
            optimizer.zero_grad()  # Clear gradients.
            out = model(loader.x, edge_index)  # Perform a single forward pass.
            loss = criterion(out[loader.train_mask], loader.y[loader.train_mask])  # Compute the loss solely based on the training nodes.
            loss.backward()  # Derive gradients.
            optimizer.step()  # Update parameters based on gradients.
            torch.save(model.state_dict(), out_file)    

In [17]:
def TestAllModel(file):
    loader = DataLoad(file,True)
    suffix = ['.mtx','_away.txt','_er.txt']
    for each in suffix:
        edge_file = file + each
        score = EvaluateModel(loader,edge_file)
        print('The accuracy of model in graph ' + edge_file + ' is ' + str(score))

In [18]:
def EvaluateModel(loader,edge_file):
    model = GCN(loader)
    edge_index = EdgeLoader(edge_file)
    model_name = edge_file.split('.')[0] + '.pt'
    model.load_state_dict(torch.load(model_name))
    out = model(loader.x,edge_index).detach().numpy()
    total = len(loader.test)
    correct = 0
    for each in loader.test:
        idx = np.argmax(out[each])
        if idx == loader.y[each]:
            correct += 1
    score = correct/total 
    return score

In [19]:
def TrainAndTest(file):
    TrainAllModel(file)
    TestAllModel(file)

In [20]:
TrainAndTest('cora')

The accuracy of model in graph cora.mtx is 0.7038988408851422
The accuracy of model in graph cora_away.txt is 0.702845100105374
The accuracy of model in graph cora_er.txt is 0.6886195995785037


In [20]:
TrainAndTest('citeseer')

The accuracy of model in graph citeseer.mtx is 0.7284427284427284
The accuracy of model in graph citeseer_away.txt is 0.7168597168597168
The accuracy of model in graph citeseer_er.txt is 0.7185757185757186


# Additional Motif Test

In [14]:
from tqdm import tqdm
all_motifs = glob.glob('MotifGraph/c*.txt')
criterion = torch.nn.CrossEntropyLoss()
for each in all_motifs:
    file = each.split('/')[1].split('_')[0]
    loader = DataLoad(file,True)
    out_file = each.split('.tx')[0] + '_gcn.pt'
    model = GCN(loader)
    edge_index = EdgeLoader(each)
    optimizer = torch.optim.Adam(model.parameters(), lr=0.005)
    for _ in tqdm(range(20)):
        optimizer.zero_grad()  # Clear gradients.
        out = model(loader.x, edge_index)  # Perform a single forward pass.
        loss = criterion(out[loader.train_mask], loader.y[loader.train_mask])  # Compute the loss solely based on the training nodes.
        loss.backward()  # Derive gradients.
        optimizer.step()  # Update parameters based on gradients.
        torch.save(model.state_dict(), out_file)    

100%|██████████| 20/20 [01:07<00:00,  3.38s/it]
100%|██████████| 20/20 [01:07<00:00,  3.37s/it]
100%|██████████| 20/20 [01:08<00:00,  3.44s/it]
100%|██████████| 20/20 [01:25<00:00,  4.30s/it]
100%|██████████| 20/20 [01:06<00:00,  3.33s/it]
100%|██████████| 20/20 [01:05<00:00,  3.26s/it]
100%|██████████| 20/20 [01:09<00:00,  3.45s/it]
100%|██████████| 20/20 [01:01<00:00,  3.08s/it]
100%|██████████| 20/20 [01:04<00:00,  3.23s/it]
100%|██████████| 20/20 [01:07<00:00,  3.39s/it]
100%|██████████| 20/20 [01:09<00:00,  3.45s/it]
100%|██████████| 20/20 [01:10<00:00,  3.50s/it]
100%|██████████| 20/20 [01:08<00:00,  3.43s/it]
100%|██████████| 20/20 [01:09<00:00,  3.50s/it]
100%|██████████| 20/20 [01:10<00:00,  3.52s/it]
100%|██████████| 20/20 [01:22<00:00,  4.14s/it]
100%|██████████| 20/20 [01:07<00:00,  3.38s/it]
100%|██████████| 20/20 [01:09<00:00,  3.50s/it]
100%|██████████| 20/20 [01:08<00:00,  3.44s/it]
100%|██████████| 20/20 [01:09<00:00,  3.46s/it]
100%|██████████| 20/20 [01:01<00:00,  3.

In [16]:
from tqdm import tqdm
def EvaluateModel(loader,edge_file):
    model = GCN(loader)
    edge_index = EdgeLoader(edge_file)
    model_name = edge_file.split('.tx')[0] + '_gcn.pt'
    model.load_state_dict(torch.load(model_name))
    out = model(loader.x,edge_index).detach().numpy()
    total = len(loader.test)
    correct = 0
    for each in loader.test:
        idx = np.argmax(out[each])
        if idx == loader.y[each]:
            correct += 1
    score = correct/total 
    return score

In [16]:
for each in all_motifs:
    file = each.split('/')[1].split('_')[0]
    loader = DataLoad(file,True)
    score = EvaluateModel(loader,each)
    print('The accuracy of model in graph ' + each + ' is ' + str(score))

The accuracy of model in graph MotifGraph/citeseer_3_motif_0.2.txt is 0.7044187044187045
The accuracy of model in graph MotifGraph/citeseer_3_motif_0.4.txt is 0.7211497211497212
The accuracy of model in graph MotifGraph/citeseer_3_motif_0.6.txt is 0.7035607035607035
The accuracy of model in graph MotifGraph/citeseer_3_motif_0.8.txt is 0.7254397254397255
The accuracy of model in graph MotifGraph/citeseer_4_motif_0.2.txt is 0.6954096954096954
The accuracy of model in graph MotifGraph/citeseer_4_motif_0.4.txt is 0.6928356928356928
The accuracy of model in graph MotifGraph/citeseer_4_motif_0.6.txt is 0.7001287001287001
The accuracy of model in graph MotifGraph/citeseer_4_motif_0.8.txt is 0.6958386958386958
The accuracy of model in graph MotifGraph/citeseer_5_motif_0.2.txt is 0.6495066495066495
The accuracy of model in graph MotifGraph/citeseer_5_motif_0.4.txt is 0.6628056628056628
The accuracy of model in graph MotifGraph/citeseer_5_motif_0.6.txt is 0.6615186615186616
The accuracy of model

In [17]:
all_motifs = glob.glob('MotifER/c*.txt')
criterion = torch.nn.CrossEntropyLoss()
for each in all_motifs:
    file = each.split('/')[1].split('_')[0]
    loader = DataLoad(file,True)
    out_file = each.split('.tx')[0] + '_gcn.pt'
    model = GCN(loader)
    edge_index = EdgeLoader(each)
    optimizer = torch.optim.Adam(model.parameters(), lr=0.005)
    for _ in tqdm(range(20)):
        optimizer.zero_grad()  # Clear gradients.
        out = model(loader.x, edge_index)  # Perform a single forward pass.
        loss = criterion(out[loader.train_mask], loader.y[loader.train_mask])  # Compute the loss solely based on the training nodes.
        loss.backward()  # Derive gradients.
        optimizer.step()  # Update parameters based on gradients.
        torch.save(model.state_dict(), out_file)    

for each in all_motifs:
    file = each.split('/')[1].split('_')[0]
    loader = DataLoad(file,True)
    score = EvaluateModel(loader,each)
    print('The accuracy of model in graph ' + each + ' is ' + str(score))

100%|██████████| 20/20 [01:07<00:00,  3.39s/it]
100%|██████████| 20/20 [01:07<00:00,  3.38s/it]
100%|██████████| 20/20 [01:07<00:00,  3.35s/it]
100%|██████████| 20/20 [01:10<00:00,  3.51s/it]
100%|██████████| 20/20 [01:08<00:00,  3.44s/it]
100%|██████████| 20/20 [01:07<00:00,  3.40s/it]
100%|██████████| 20/20 [01:06<00:00,  3.35s/it]
100%|██████████| 20/20 [01:09<00:00,  3.48s/it]
100%|██████████| 20/20 [01:11<00:00,  3.55s/it]
100%|██████████| 20/20 [01:00<00:00,  3.05s/it]
100%|██████████| 20/20 [01:03<00:00,  3.18s/it]
100%|██████████| 20/20 [01:04<00:00,  3.20s/it]
100%|██████████| 20/20 [01:07<00:00,  3.35s/it]
100%|██████████| 20/20 [01:03<00:00,  3.18s/it]
100%|██████████| 20/20 [01:04<00:00,  3.21s/it]
100%|██████████| 20/20 [01:07<00:00,  3.37s/it]
100%|██████████| 20/20 [01:06<00:00,  3.32s/it]
100%|██████████| 20/20 [01:09<00:00,  3.49s/it]
100%|██████████| 20/20 [01:08<00:00,  3.43s/it]
100%|██████████| 20/20 [01:10<00:00,  3.52s/it]
100%|██████████| 20/20 [01:08<00:00,  3.

The accuracy of model in graph MotifER/citeseer_3_erds_0.2.txt is 0.7138567138567139
The accuracy of model in graph MotifER/citeseer_3_erds_0.4.txt is 0.7125697125697126
The accuracy of model in graph MotifER/citeseer_3_erds_0.6.txt is 0.716001716001716
The accuracy of model in graph MotifER/citeseer_3_erds_0.8.txt is 0.7198627198627199
The accuracy of model in graph MotifER/citeseer_4_erds_0.2.txt is 0.6902616902616903
The accuracy of model in graph MotifER/citeseer_4_erds_0.4.txt is 0.6795366795366795
The accuracy of model in graph MotifER/citeseer_4_erds_0.6.txt is 0.7005577005577005
The accuracy of model in graph MotifER/citeseer_4_erds_0.8.txt is 0.6833976833976834
The accuracy of model in graph MotifER/citeseer_5_erds_0.2.txt is 0.6924066924066924
The accuracy of model in graph MotifER/cora_3_erds_0.2.txt is 0.6765015806111696
The accuracy of model in graph MotifER/citeseer_5_erds_0.4.txt is 0.6872586872586872
The accuracy of model in graph MotifER/citeseer_5_erds_0.6.txt is 0.70

In [None]:
all_motifs = glob.glob('MotifOrbit/cora*.txt')
criterion = torch.nn.CrossEntropyLoss()
for each in all_motifs:
    file = each.split('/')[1].split('_')[0]
    loader = DataLoad(file)
    out_file = each.split('.tx')[0] + '_gcn.pt'
    model = GCN(loader)
    edge_index = EdgeLoader(each)
    m = GetMax(each) #Now we have the number of total nodes
    m = m + 1 - loader.n_node #This is the number of motifs
    loader.AppendDummy(m)
    optimizer = torch.optim.Adam(model.parameters(), lr=0.005)
    for _ in range(50):
        optimizer.zero_grad()  # Clear gradients.
        out = model(loader.x, edge_index)  # Perform a single forward pass.
        loss = criterion(out[loader.train_mask], loader.y[loader.train_mask])  # Compute the loss solely based on the training nodes.
        loss.backward()  # Derive gradients.
        optimizer.step()  # Update parameters based on gradients.
        torch.save(model.state_dict(), out_file)  
for each in all_motifs:
    file = each.split('/')[1].split('_')[0]
    loader = DataLoad(file)
    m = GetMax(each) #Now we have the number of total nodes
    m = m + 1 - loader.n_node #This is the number of motifs
    loader.AppendDummy(m)
    score = EvaluateModel(loader,each)
    print('The accuracy of model in graph ' + each + ' is ' + str(score))


In [None]:
def GetMax(file):
    with open(file,'r+') as fp:
        lines = fp.readlines()
    ret = []
    for each in lines:
        info = each.split(' ')
        src = int(info[0])
        dst = int(info[1])
        ret.append(src)
        ret.append(dst)
    ret = max(ret)
    return ret