In [1]:
import torch
from torch.nn import Linear
from torch_geometric.nn import GATConv
import torch.nn.functional as F 
import numpy as np 
import pickle

In [2]:
torch.manual_seed(1234)

<torch._C.Generator at 0x7f806476de50>

In [3]:
class DataLoad():
    def __init__(self,file,feature=False):
        with open(file+'.train','rb') as fp:
            self.n_class,train_mask = pickle.load(fp)
        self.train_mask = torch.tensor(train_mask,dtype=torch.bool)
        self.n_node = len(self.train_mask)
        with open(file+'.labels','r+') as fp:
            lines = fp.readlines()
        self.y = []
        for each in lines:
            info = each.split(' ')
            label = info[1]
            self.y.append(int(label))
        self.y = torch.tensor(self.y,dtype=torch.long)
        if feature == False:
            self.x = np.zeros((self.n_node,self.n_node))
            for i in range(self.n_node):
                self.x[i][i] = 1
            self.x = torch.tensor(self.x,dtype=torch.float)
            self.n_feature = self.n_node
        else:
            with open(file+'.feat','rb') as fp:
                self.x = torch.tensor(pickle.load(fp),dtype=torch.float)
            self.n_feature = len(self.x[0])
            
        self.test = []
        for i in range(self.n_node):
            if self.train_mask[i] == False:
                self.test.append(i)
    
    def AppendDummy(self,m):
        to_add = torch.tensor([False for _ in range(m)],dtype=torch.bool)
        self.train_mask = torch.cat((self.train_mask,to_add))
        temp = []
        for _ in range(m):
            temp.append([0 for _ in range(self.n_feature)])
        temp = torch.tensor(temp,dtype=torch.float)
        self.x = torch.cat((self.x,temp))
        to_add = torch.tensor([-1 for _ in range(m)],dtype=torch.long)
        self.y = torch.cat((self.y,to_add))

In [4]:
def EdgeLoader(file):
    with open(file,'r+') as fp:
        lines = fp.readlines()
    ret = []
    for each in lines:
        info = each.split(' ')
        src = int(info[0])
        dst = int(info[1])
        ret.append([src,dst])
    ret = np.array(ret)
    return torch.tensor(ret.T,dtype=torch.int)

In [5]:
class GAT(torch.nn.Module):
    def __init__(self,DataLoader):
        super(GAT, self).__init__()
        self.conv1 = GATConv(DataLoader.n_feature, int(DataLoader.n_feature/8),heads=4,dropout=0.1)
        self.conv2 = GATConv(int(DataLoader.n_feature/8)*4, int(DataLoader.n_feature/8),heads=1,dropout=0.1)
        self.classifier = Linear(int(DataLoader.n_feature/8), DataLoader.n_class)

    def forward(self, x, edge_index):
        h = self.conv1(x, edge_index)
        h = F.relu(h)
        h = self.conv2(h, edge_index)
        h = F.relu(h)  
        out = self.classifier(h)
        return out


In [6]:
def TrainAllModel(file):
    loader = DataLoad(file)
    suffix = ['.mtx','_relwire.txt','_swing.txt','_er.txt','_away.txt']
    criterion = torch.nn.CrossEntropyLoss()
    for each in suffix:
        edge_file = file + each
        out_file = edge_file.split('.')[0] + '_gat.pt'
        model = GAT(loader)
        edge_index = EdgeLoader(edge_file)
        optimizer = torch.optim.Adam(model.parameters(), lr=0.005)
        for _ in range(50):
            optimizer.zero_grad()  # Clear gradients.
            out = model(loader.x, edge_index)  # Perform a single forward pass.
            loss = criterion(out[loader.train_mask], loader.y[loader.train_mask])  # Compute the loss solely based on the training nodes.
            loss.backward()  # Derive gradients.
            optimizer.step()  # Update parameters based on gradients.
            torch.save(model.state_dict(), out_file)    

In [7]:
def EvaluateModel(loader,edge_file):
    model = GAT(loader)
    edge_index = EdgeLoader(edge_file)
    model_name = edge_file.split('.')[0] + '_gat.pt'
    model.load_state_dict(torch.load(model_name))
    out = model(loader.x,edge_index).detach().numpy()
    total = len(loader.test)
    correct = 0
    for each in loader.test:
        idx = np.argmax(out[each])
        if idx == loader.y[each]:
            correct += 1
    score = correct/total 
    return score
    

In [8]:
def TestAllModel(file):
    loader = DataLoad(file)
    suffix = ['.mtx','_relwire.txt','_swing.txt','_er.txt','_away.txt']
    for each in suffix:
        edge_file = file + each
        score = EvaluateModel(loader,edge_file)
        print('The accuracy of model in graph ' + edge_file + ' is ' + str(score))

In [9]:
def TrainAndTest(file):
    TrainAllModel(file)
    TestAllModel(file)

In [17]:
TrainAndTest('book')

The accuracy of model in graph book.mtx is 0.8157894736842105
The accuracy of model in graph book_relwire.txt is 0.6842105263157895
The accuracy of model in graph book_swing.txt is 0.8026315789473685
The accuracy of model in graph book_er.txt is 0.8289473684210527
The accuracy of model in graph book_away.txt is 0.8289473684210527


In [18]:
TrainAndTest('football')

The accuracy of model in graph football.mtx is 0.8
The accuracy of model in graph football_relwire.txt is 0.8470588235294118
The accuracy of model in graph football_swing.txt is 0.8588235294117647
The accuracy of model in graph football_er.txt is 0.8588235294117647
The accuracy of model in graph football_away.txt is 0.7294117647058823


In [19]:
TrainAndTest('karate')

The accuracy of model in graph karate.mtx is 0.96
The accuracy of model in graph karate_relwire.txt is 0.52
The accuracy of model in graph karate_swing.txt is 0.96
The accuracy of model in graph karate_er.txt is 1.0
The accuracy of model in graph karate_away.txt is 0.72


In [9]:
import glob 

In [23]:
all_motifs = glob.glob('MotifGraph/*.txt')
criterion = torch.nn.CrossEntropyLoss()
for each in all_motifs:
    file = each.split('/')[1].split('_')[0]
    loader = DataLoad(file)
    out_file = each.split('.tx')[0] + '_gat.pt'
    model = GAT(loader)
    edge_index = EdgeLoader(each)
    optimizer = torch.optim.Adam(model.parameters(), lr=0.005)
    for _ in range(50):
        optimizer.zero_grad()  # Clear gradients.
        out = model(loader.x, edge_index)  # Perform a single forward pass.
        loss = criterion(out[loader.train_mask], loader.y[loader.train_mask])  # Compute the loss solely based on the training nodes.
        loss.backward()  # Derive gradients.
        optimizer.step()  # Update parameters based on gradients.
        torch.save(model.state_dict(), out_file)    
    

In [10]:
def EvaluateModel(loader,edge_file):
    model = GAT(loader)
    edge_index = EdgeLoader(edge_file)
    model_name = edge_file.split('.tx')[0] + '_gat.pt'
    model.load_state_dict(torch.load(model_name))
    out = model(loader.x,edge_index).detach().numpy()
    total = len(loader.test)
    correct = 0
    for each in loader.test:
        idx = np.argmax(out[each])
        if idx == loader.y[each]:
            correct += 1
    score = correct/total 
    return score

In [25]:
for each in all_motifs:
    file = each.split('/')[1].split('_')[0]
    loader = DataLoad(file)
    score = EvaluateModel(loader,each)
    print('The accuracy of model in graph ' + each + ' is ' + str(score))

The accuracy of model in graph MotifGraph/book_3_motif_0.3.txt is 0.8157894736842105
The accuracy of model in graph MotifGraph/book_3_motif_0.5.txt is 0.8421052631578947
The accuracy of model in graph MotifGraph/book_3_motif_0.7.txt is 0.8289473684210527
The accuracy of model in graph MotifGraph/book_3_motif_0.9.txt is 0.8421052631578947
The accuracy of model in graph MotifGraph/book_4_motif_0.3.txt is 0.8289473684210527
The accuracy of model in graph MotifGraph/book_4_motif_0.5.txt is 0.8026315789473685
The accuracy of model in graph MotifGraph/book_4_motif_0.7.txt is 0.7894736842105263
The accuracy of model in graph MotifGraph/book_4_motif_0.9.txt is 0.8552631578947368
The accuracy of model in graph MotifGraph/book_5_motif_0.3.txt is 0.8421052631578947
The accuracy of model in graph MotifGraph/book_5_motif_0.5.txt is 0.7894736842105263
The accuracy of model in graph MotifGraph/book_5_motif_0.7.txt is 0.7763157894736842
The accuracy of model in graph MotifGraph/book_5_motif_0.9.txt is

# Motif ER Test

In [27]:
all_motifs = glob.glob('MotifER/*.txt')
criterion = torch.nn.CrossEntropyLoss()
for each in all_motifs:
    file = each.split('/')[1].split('_')[0]
    loader = DataLoad(file)
    out_file = each.split('.tx')[0] + '_gat.pt'
    model = GAT(loader)
    edge_index = EdgeLoader(each)
    optimizer = torch.optim.Adam(model.parameters(), lr=0.005)
    for _ in range(50):
        optimizer.zero_grad()  # Clear gradients.
        out = model(loader.x, edge_index)  # Perform a single forward pass.
        loss = criterion(out[loader.train_mask], loader.y[loader.train_mask])  # Compute the loss solely based on the training nodes.
        loss.backward()  # Derive gradients.
        optimizer.step()  # Update parameters based on gradients.
        torch.save(model.state_dict(), out_file)    

In [28]:
for each in all_motifs:
    file = each.split('/')[1].split('_')[0]
    loader = DataLoad(file)
    score = EvaluateModel(loader,each)
    print('The accuracy of model in graph ' + each + ' is ' + str(score))

The accuracy of model in graph MotifER/book_3_erds_0.2.txt is 0.8157894736842105
The accuracy of model in graph MotifER/book_3_erds_0.4.txt is 0.7894736842105263
The accuracy of model in graph MotifER/book_3_erds_0.6.txt is 0.8026315789473685
The accuracy of model in graph MotifER/book_3_erds_0.8.txt is 0.8026315789473685
The accuracy of model in graph MotifER/book_4_erds_0.2.txt is 0.8157894736842105
The accuracy of model in graph MotifER/book_4_erds_0.4.txt is 0.8289473684210527
The accuracy of model in graph MotifER/book_4_erds_0.6.txt is 0.8289473684210527
The accuracy of model in graph MotifER/book_4_erds_0.8.txt is 0.8421052631578947
The accuracy of model in graph MotifER/book_5_erds_0.2.txt is 0.868421052631579
The accuracy of model in graph MotifER/book_5_erds_0.4.txt is 0.8157894736842105
The accuracy of model in graph MotifER/book_5_erds_0.6.txt is 0.8289473684210527
The accuracy of model in graph MotifER/book_5_erds_0.8.txt is 0.7894736842105263
The accuracy of model in grap

In [13]:
from tqdm import tqdm
all_motifs = glob.glob('MotifClique/*.txt')
criterion = torch.nn.CrossEntropyLoss()
for each in all_motifs:
    file = each.split('/')[1].split('_')[0]
    if "c" in file:
        loader = DataLoad(file,True)
    else:
        loader = DataLoad(file)
    out_file = each.split('.tx')[0] + '_gat.pt'
    model = GAT(loader)
    edge_index = EdgeLoader(each)
    optimizer = torch.optim.Adam(model.parameters(), lr=0.005)
    for _ in tqdm(range(50)):
        optimizer.zero_grad()  # Clear gradients.
        out = model(loader.x, edge_index)  # Perform a single forward pass.
        loss = criterion(out[loader.train_mask], loader.y[loader.train_mask])  # Compute the loss solely based on the training nodes.
        loss.backward()  # Derive gradients.
        optimizer.step()  # Update parameters based on gradients.
        torch.save(model.state_dict(), out_file)    
for each in all_motifs:
    file = each.split('/')[1].split('_')[0]
    if "c" in file:
        loader = DataLoad(file,True)
    else:
        loader = DataLoad(file)
    score = EvaluateModel(loader,each)
    print('The accuracy of model in graph ' + each + ' is ' + str(score))

  0%|          | 0/50 [00:00<?, ?it/s]

100%|██████████| 50/50 [00:00<00:00, 67.59it/s]
100%|██████████| 50/50 [00:00<00:00, 77.45it/s]
100%|██████████| 50/50 [00:00<00:00, 66.20it/s]
100%|██████████| 50/50 [00:00<00:00, 73.44it/s]
100%|██████████| 50/50 [00:00<00:00, 69.91it/s]
100%|██████████| 50/50 [00:00<00:00, 77.08it/s]
100%|██████████| 50/50 [00:53<00:00,  1.07s/it]
100%|██████████| 50/50 [00:37<00:00,  1.35it/s]
100%|██████████| 50/50 [00:46<00:00,  1.08it/s]
100%|██████████| 50/50 [00:49<00:00,  1.01it/s]
100%|██████████| 50/50 [00:41<00:00,  1.22it/s]
100%|██████████| 50/50 [00:23<00:00,  2.09it/s]
100%|██████████| 50/50 [00:22<00:00,  2.21it/s]
100%|██████████| 50/50 [00:22<00:00,  2.19it/s]
100%|██████████| 50/50 [00:13<00:00,  3.84it/s]
100%|██████████| 50/50 [00:12<00:00,  3.98it/s]
100%|██████████| 50/50 [00:00<00:00, 59.48it/s]
100%|██████████| 50/50 [00:00<00:00, 67.95it/s]
100%|██████████| 50/50 [00:00<00:00, 63.76it/s]
100%|██████████| 50/50 [00:00<00:00, 63.02it/s]
100%|██████████| 50/50 [00:00<00:00, 77.

The accuracy of model in graph MotifClique/book_3_clique.txt is 0.8552631578947368
The accuracy of model in graph MotifClique/book_4_clique.txt is 0.8552631578947368
The accuracy of model in graph MotifClique/book_5_clique.txt is 0.8289473684210527
The accuracy of model in graph MotifClique/book_6_clique.txt is 0.7763157894736842
The accuracy of model in graph MotifClique/book_7_clique.txt is 0.8289473684210527
The accuracy of model in graph MotifClique/book_8_clique.txt is 0.8157894736842105
The accuracy of model in graph MotifClique/citeseer_3_clique.txt is 0.694980694980695
The accuracy of model in graph MotifClique/citeseer_4_clique.txt is 0.6791076791076791
The accuracy of model in graph MotifClique/citeseer_5_clique.txt is 0.6731016731016731
The accuracy of model in graph MotifClique/citeseer_6_clique.txt is 0.6773916773916774
The accuracy of model in graph MotifClique/citeseer_7_clique.txt is 0.6640926640926641
The accuracy of model in graph MotifClique/cora_3_clique.txt is 0.81

In [14]:
from tqdm import tqdm
all_motifs = glob.glob('MotifEE/*.txt')
criterion = torch.nn.CrossEntropyLoss()
for each in all_motifs:
    file = each.split('/')[1].split('_')[0]
    if "c" in file:
        loader = DataLoad(file,True)
    else:
        loader = DataLoad(file)
    out_file = each.split('.tx')[0] + '_gat.pt'
    model = GAT(loader)
    edge_index = EdgeLoader(each)
    optimizer = torch.optim.Adam(model.parameters(), lr=0.005)
    for _ in tqdm(range(50)):
        optimizer.zero_grad()  # Clear gradients.
        out = model(loader.x, edge_index)  # Perform a single forward pass.
        loss = criterion(out[loader.train_mask], loader.y[loader.train_mask])  # Compute the loss solely based on the training nodes.
        loss.backward()  # Derive gradients.
        optimizer.step()  # Update parameters based on gradients.
        torch.save(model.state_dict(), out_file)    
for each in all_motifs:
    file = each.split('/')[1].split('_')[0]
    if "c" in file:
        loader = DataLoad(file,True)
    else:
        loader = DataLoad(file)
    score = EvaluateModel(loader,each)
    print('The accuracy of model in graph ' + each + ' is ' + str(score))

100%|██████████| 50/50 [00:00<00:00, 73.35it/s]
100%|██████████| 50/50 [00:00<00:00, 56.79it/s]
100%|██████████| 50/50 [00:00<00:00, 79.54it/s]
100%|██████████| 50/50 [00:00<00:00, 70.64it/s]
100%|██████████| 50/50 [00:00<00:00, 69.91it/s]
100%|██████████| 50/50 [00:00<00:00, 64.88it/s]
100%|██████████| 50/50 [00:37<00:00,  1.35it/s]
100%|██████████| 50/50 [00:36<00:00,  1.39it/s]
100%|██████████| 50/50 [00:36<00:00,  1.36it/s]
100%|██████████| 50/50 [00:36<00:00,  1.38it/s]
100%|██████████| 50/50 [00:37<00:00,  1.35it/s]
100%|██████████| 50/50 [00:06<00:00,  7.66it/s]
100%|██████████| 50/50 [00:06<00:00,  7.50it/s]
100%|██████████| 50/50 [00:06<00:00,  7.88it/s]
100%|██████████| 50/50 [00:06<00:00,  7.91it/s]
100%|██████████| 50/50 [00:06<00:00,  7.46it/s]
100%|██████████| 50/50 [00:00<00:00, 78.31it/s]
100%|██████████| 50/50 [00:00<00:00, 79.51it/s]
100%|██████████| 50/50 [00:00<00:00, 75.67it/s]
100%|██████████| 50/50 [00:00<00:00, 81.28it/s]
100%|██████████| 50/50 [00:00<00:00, 76.

The accuracy of model in graph MotifEE/book_3_gap.txt is 0.8289473684210527
The accuracy of model in graph MotifEE/book_4_gap.txt is 0.8026315789473685
The accuracy of model in graph MotifEE/book_5_gap.txt is 0.8289473684210527
The accuracy of model in graph MotifEE/book_6_gap.txt is 0.8026315789473685
The accuracy of model in graph MotifEE/book_7_gap.txt is 0.8157894736842105
The accuracy of model in graph MotifEE/book_8_gap.txt is 0.7894736842105263
The accuracy of model in graph MotifEE/citeseer_3_gap.txt is 0.6752466752466753
The accuracy of model in graph MotifEE/citeseer_4_gap.txt is 0.6735306735306735
The accuracy of model in graph MotifEE/citeseer_5_gap.txt is 0.6705276705276705
The accuracy of model in graph MotifEE/citeseer_6_gap.txt is 0.6662376662376662
The accuracy of model in graph MotifEE/citeseer_7_gap.txt is 0.6636636636636637
The accuracy of model in graph MotifEE/cora_3_gap.txt is 0.821917808219178
The accuracy of model in graph MotifEE/cora_4_gap.txt is 0.8087460484

In [11]:
from tqdm import tqdm
all_motifs = glob.glob('MotifGap/*.txt')
criterion = torch.nn.CrossEntropyLoss()
for each in all_motifs:
    file = each.split('/')[1].split('_')[0]
    if "c" in file:
        loader = DataLoad(file,True)
    else:
        loader = DataLoad(file)
    out_file = each.split('.tx')[0] + '_gat.pt'
    model = GAT(loader)
    edge_index = EdgeLoader(each)
    optimizer = torch.optim.Adam(model.parameters(), lr=0.005)
    for _ in tqdm(range(20)):
        optimizer.zero_grad()  # Clear gradients.
        out = model(loader.x, edge_index)  # Perform a single forward pass.
        loss = criterion(out[loader.train_mask], loader.y[loader.train_mask])  # Compute the loss solely based on the training nodes.
        loss.backward()  # Derive gradients.
        optimizer.step()  # Update parameters based on gradients.
        torch.save(model.state_dict(), out_file)    
for each in all_motifs:
    file = each.split('/')[1].split('_')[0]
    if "c" in file:
        loader = DataLoad(file,True)
    else:
        loader = DataLoad(file)
    score = EvaluateModel(loader,each)
    print('The accuracy of model in graph ' + each + ' is ' + str(score))

100%|██████████| 20/20 [00:00<00:00, 67.92it/s]
100%|██████████| 20/20 [00:00<00:00, 96.10it/s]
100%|██████████| 20/20 [00:00<00:00, 73.07it/s]
100%|██████████| 20/20 [00:00<00:00, 86.74it/s]
100%|██████████| 20/20 [00:00<00:00, 98.54it/s]
100%|██████████| 20/20 [00:00<00:00, 82.88it/s]
100%|██████████| 20/20 [00:15<00:00,  1.32it/s]
100%|██████████| 20/20 [00:14<00:00,  1.42it/s]
100%|██████████| 20/20 [00:14<00:00,  1.37it/s]
100%|██████████| 20/20 [00:15<00:00,  1.29it/s]
100%|██████████| 20/20 [00:14<00:00,  1.39it/s]
100%|██████████| 20/20 [00:02<00:00,  7.92it/s]
100%|██████████| 20/20 [00:02<00:00,  7.93it/s]
100%|██████████| 20/20 [00:05<00:00,  3.93it/s]
100%|██████████| 20/20 [00:03<00:00,  5.43it/s]
100%|██████████| 20/20 [00:02<00:00,  6.76it/s]
100%|██████████| 20/20 [00:00<00:00, 81.51it/s]
100%|██████████| 20/20 [00:00<00:00, 69.30it/s]
100%|██████████| 20/20 [00:00<00:00, 73.02it/s]
100%|██████████| 20/20 [00:00<00:00, 67.02it/s]
100%|██████████| 20/20 [00:00<00:00, 69.

The accuracy of model in graph MotifGap/book_3_gap.txt is 0.8026315789473685
The accuracy of model in graph MotifGap/book_4_gap.txt is 0.7894736842105263
The accuracy of model in graph MotifGap/book_5_gap.txt is 0.8552631578947368
The accuracy of model in graph MotifGap/book_6_gap.txt is 0.8421052631578947
The accuracy of model in graph MotifGap/book_7_gap.txt is 0.8157894736842105
The accuracy of model in graph MotifGap/book_8_gap.txt is 0.8026315789473685
The accuracy of model in graph MotifGap/citeseer_3_gap.txt is 0.6791076791076791
The accuracy of model in graph MotifGap/citeseer_4_gap.txt is 0.6778206778206778
The accuracy of model in graph MotifGap/citeseer_5_gap.txt is 0.6752466752466753
The accuracy of model in graph MotifGap/citeseer_6_gap.txt is 0.6675246675246675
The accuracy of model in graph MotifGap/citeseer_7_gap.txt is 0.6816816816816816
The accuracy of model in graph MotifGap/cora_3_gap.txt is 0.7629083245521602
The accuracy of model in graph MotifGap/cora_4_gap.txt i

# Test Motif Orbit

In [21]:
def GetMax(file):
    with open(file,'r+') as fp:
        lines = fp.readlines()
    ret = []
    for each in lines:
        info = each.split(' ')
        src = int(info[0])
        dst = int(info[1])
        ret.append(src)
        ret.append(dst)
    ret = max(ret)
    return ret

In [31]:
all_motifs = glob.glob('MotifOrbit/*.txt')
criterion = torch.nn.CrossEntropyLoss()
for each in all_motifs:
    file = each.split('/')[1].split('_')[0]
    loader = DataLoad(file)
    out_file = each.split('.tx')[0] + '_gat.pt'
    model = GAT(loader)
    edge_index = EdgeLoader(each)
    m = GetMax(each) #Now we have the number of total nodes
    m = m + 1 - loader.n_node #This is the number of motifs
    loader.AppendDummy(m)
    optimizer = torch.optim.Adam(model.parameters(), lr=0.005)
    for _ in range(50):
        optimizer.zero_grad()  # Clear gradients.
        out = model(loader.x, edge_index)  # Perform a single forward pass.
        loss = criterion(out[loader.train_mask], loader.y[loader.train_mask])  # Compute the loss solely based on the training nodes.
        loss.backward()  # Derive gradients.
        optimizer.step()  # Update parameters based on gradients.
        torch.save(model.state_dict(), out_file)  

In [32]:
for each in all_motifs:
    file = each.split('/')[1].split('_')[0]
    loader = DataLoad(file)
    m = GetMax(each) #Now we have the number of total nodes
    m = m + 1 - loader.n_node #This is the number of motifs
    loader.AppendDummy(m)
    score = EvaluateModel(loader,each)
    print('The accuracy of model in graph ' + each + ' is ' + str(score))

The accuracy of model in graph MotifOrbit/book_3_orbit.txt is 0.8289473684210527
The accuracy of model in graph MotifOrbit/book_4_orbit.txt is 0.8421052631578947
The accuracy of model in graph MotifOrbit/book_5_orbit.txt is 0.8026315789473685
The accuracy of model in graph MotifOrbit/book_6_orbit.txt is 0.7894736842105263
The accuracy of model in graph MotifOrbit/book_7_orbit.txt is 0.8289473684210527
The accuracy of model in graph MotifOrbit/book_8_orbit.txt is 0.7763157894736842
The accuracy of model in graph MotifOrbit/football_3_orbit.txt is 0.6352941176470588
The accuracy of model in graph MotifOrbit/football_4_orbit.txt is 0.8235294117647058
The accuracy of model in graph MotifOrbit/football_5_orbit.txt is 0.7176470588235294
The accuracy of model in graph MotifOrbit/football_6_orbit.txt is 0.6941176470588235
The accuracy of model in graph MotifOrbit/football_7_orbit.txt is 0.5529411764705883
The accuracy of model in graph MotifOrbit/football_8_orbit.txt is 0.788235294117647
The a

# Additional Tasks

In [12]:
from tqdm import tqdm
def TrainAllModel(file):
    loader = DataLoad(file,True)
    suffix = ['.mtx','_away.txt','_er.txt']
    criterion = torch.nn.CrossEntropyLoss()
    for each in suffix:
        edge_file = file + each
        out_file = edge_file.split('.')[0] + '.pt'
        model = GAT(loader)
        edge_index = EdgeLoader(edge_file)
        optimizer = torch.optim.Adam(model.parameters(), lr=0.005)
        for _ in tqdm(range(20)):
            optimizer.zero_grad()  # Clear gradients.
            out = model(loader.x, edge_index)  # Perform a single forward pass.
            loss = criterion(out[loader.train_mask], loader.y[loader.train_mask])  # Compute the loss solely based on the training nodes.
            loss.backward()  # Derive gradients.
            optimizer.step()  # Update parameters based on gradients.
            torch.save(model.state_dict(), out_file)    

In [13]:
def TestAllModel(file):
    loader = DataLoad(file,True)
    suffix = ['.mtx','_away.txt','_er.txt']
    for each in suffix:
        edge_file = file + each
        score = EvaluateModel(loader,edge_file)
        print('The accuracy of model in graph ' + edge_file + ' is ' + str(score))

In [14]:
def EvaluateModel(loader,edge_file):
    model = GAT(loader)
    edge_index = EdgeLoader(edge_file)
    model_name = edge_file.split('.')[0] + '.pt'
    model.load_state_dict(torch.load(model_name))
    out = model(loader.x,edge_index).detach().numpy()
    total = len(loader.test)
    correct = 0
    for each in loader.test:
        idx = np.argmax(out[each])
        if idx == loader.y[each]:
            correct += 1
    score = correct/total 
    return score

In [15]:
def TrainAndTest(file):
    TrainAllModel(file)
    TestAllModel(file)

In [16]:
TrainAndTest('cora')

100%|██████████| 20/20 [00:03<00:00,  5.97it/s]
100%|██████████| 20/20 [00:03<00:00,  6.04it/s]
100%|██████████| 20/20 [00:02<00:00,  6.67it/s]


The accuracy of model in graph cora.mtx is 0.7523709167544784
The accuracy of model in graph cora_away.txt is 0.7413066385669126
The accuracy of model in graph cora_er.txt is 0.7586933614330874


In [16]:
TrainAndTest('citeseer')

100%|██████████| 20/20 [00:15<00:00,  1.31it/s]
100%|██████████| 20/20 [00:15<00:00,  1.31it/s]
100%|██████████| 20/20 [00:14<00:00,  1.36it/s]


The accuracy of model in graph citeseer.mtx is 0.7202917202917203
The accuracy of model in graph citeseer_away.txt is 0.7078507078507078
The accuracy of model in graph citeseer_er.txt is 0.7168597168597168


# Additional Motif Test

In [17]:
from tqdm import tqdm

In [19]:
all_motifs = glob.glob('MotifGraph/c*.txt')
criterion = torch.nn.CrossEntropyLoss()
for each in all_motifs:
    file = each.split('/')[1].split('_')[0]
    loader = DataLoad(file,True)
    out_file = each.split('.tx')[0] + '_gat.pt'
    model = GAT(loader)
    edge_index = EdgeLoader(each)
    optimizer = torch.optim.Adam(model.parameters(), lr=0.005)
    for _ in tqdm(range(20)):
        optimizer.zero_grad()  # Clear gradients.
        out = model(loader.x, edge_index)  # Perform a single forward pass.
        loss = criterion(out[loader.train_mask], loader.y[loader.train_mask])  # Compute the loss solely based on the training nodes.
        loss.backward()  # Derive gradients.
        optimizer.step()  # Update parameters based on gradients.
        torch.save(model.state_dict(), out_file)    

100%|██████████| 20/20 [01:59<00:00,  5.99s/it]
100%|██████████| 20/20 [01:58<00:00,  5.92s/it]
100%|██████████| 20/20 [01:58<00:00,  5.95s/it]
100%|██████████| 20/20 [02:27<00:00,  7.36s/it]
100%|██████████| 20/20 [01:57<00:00,  5.87s/it]
100%|██████████| 20/20 [01:56<00:00,  5.84s/it]
100%|██████████| 20/20 [01:56<00:00,  5.80s/it]
100%|██████████| 20/20 [01:53<00:00,  5.66s/it]
100%|██████████| 20/20 [01:57<00:00,  5.88s/it]
100%|██████████| 20/20 [02:00<00:00,  6.01s/it]
100%|██████████| 20/20 [01:57<00:00,  5.89s/it]
100%|██████████| 20/20 [01:59<00:00,  5.99s/it]
100%|██████████| 20/20 [01:55<00:00,  5.78s/it]
100%|██████████| 20/20 [01:55<00:00,  5.75s/it]
100%|██████████| 20/20 [02:06<00:00,  6.32s/it]
100%|██████████| 20/20 [02:27<00:00,  7.37s/it]
100%|██████████| 20/20 [01:55<00:00,  5.78s/it]
100%|██████████| 20/20 [01:56<00:00,  5.84s/it]
100%|██████████| 20/20 [01:57<00:00,  5.86s/it]
100%|██████████| 20/20 [01:58<00:00,  5.91s/it]
100%|██████████| 20/20 [01:38<00:00,  4.

In [17]:
def EvaluateModel(loader,edge_file):
    model = GAT(loader)
    edge_index = EdgeLoader(edge_file)
    model_name = edge_file.split('.tx')[0] + '_gat.pt'
    model.load_state_dict(torch.load(model_name))
    out = model(loader.x,edge_index).detach().numpy()
    total = len(loader.test)
    correct = 0
    for each in loader.test:
        idx = np.argmax(out[each])
        if idx == loader.y[each]:
            correct += 1
    score = correct/total 
    return score

In [22]:
for each in all_motifs:
    file = each.split('/')[1].split('_')[0]
    loader = DataLoad(file,True)
    score = EvaluateModel(loader,each)
    print('The accuracy of model in graph ' + each + ' is ' + str(score))

The accuracy of model in graph MotifGraph/citeseer_3_motif_0.2.txt is 0.6696696696696697
The accuracy of model in graph MotifGraph/citeseer_3_motif_0.4.txt is 0.694980694980695
The accuracy of model in graph MotifGraph/citeseer_3_motif_0.6.txt is 0.6984126984126984
The accuracy of model in graph MotifGraph/citeseer_3_motif_0.8.txt is 0.7065637065637066
The accuracy of model in graph MotifGraph/citeseer_4_motif_0.2.txt is 0.676962676962677
The accuracy of model in graph MotifGraph/citeseer_4_motif_0.4.txt is 0.6675246675246675
The accuracy of model in graph MotifGraph/citeseer_4_motif_0.6.txt is 0.6825396825396826
The accuracy of model in graph MotifGraph/citeseer_4_motif_0.8.txt is 0.6872586872586872
The accuracy of model in graph MotifGraph/citeseer_5_motif_0.2.txt is 0.6366366366366366
The accuracy of model in graph MotifGraph/citeseer_5_motif_0.4.txt is 0.6550836550836551
The accuracy of model in graph MotifGraph/citeseer_5_motif_0.6.txt is 0.6640926640926641
The accuracy of model i

In [18]:
all_motifs = glob.glob('MotifER/c*.txt')
criterion = torch.nn.CrossEntropyLoss()
for each in all_motifs:
    file = each.split('/')[1].split('_')[0]
    loader = DataLoad(file,True)
    out_file = each.split('.tx')[0] + '_gat.pt'
    model = GAT(loader)
    edge_index = EdgeLoader(each)
    optimizer = torch.optim.Adam(model.parameters(), lr=0.005)
    for _ in tqdm(range(20)):
        optimizer.zero_grad()  # Clear gradients.
        out = model(loader.x, edge_index)  # Perform a single forward pass.
        loss = criterion(out[loader.train_mask], loader.y[loader.train_mask])  # Compute the loss solely based on the training nodes.
        loss.backward()  # Derive gradients.
        optimizer.step()  # Update parameters based on gradients.
        torch.save(model.state_dict(), out_file)    

100%|██████████| 20/20 [01:58<00:00,  5.91s/it]
100%|██████████| 20/20 [01:53<00:00,  5.68s/it]
100%|██████████| 20/20 [01:56<00:00,  5.82s/it]
100%|██████████| 20/20 [01:57<00:00,  5.87s/it]
100%|██████████| 20/20 [01:51<00:00,  5.58s/it]
100%|██████████| 20/20 [01:51<00:00,  5.60s/it]
100%|██████████| 20/20 [01:57<00:00,  5.87s/it]
100%|██████████| 20/20 [01:57<00:00,  5.85s/it]
100%|██████████| 20/20 [02:03<00:00,  6.16s/it]
100%|██████████| 20/20 [01:37<00:00,  4.86s/it]
100%|██████████| 20/20 [01:57<00:00,  5.89s/it]
100%|██████████| 20/20 [01:53<00:00,  5.67s/it]
100%|██████████| 20/20 [01:52<00:00,  5.62s/it]
100%|██████████| 20/20 [01:53<00:00,  5.69s/it]
100%|██████████| 20/20 [01:54<00:00,  5.74s/it]
100%|██████████| 20/20 [01:57<00:00,  5.89s/it]
100%|██████████| 20/20 [01:52<00:00,  5.63s/it]
100%|██████████| 20/20 [01:54<00:00,  5.71s/it]
100%|██████████| 20/20 [01:56<00:00,  5.83s/it]
100%|██████████| 20/20 [01:56<00:00,  5.85s/it]
100%|██████████| 20/20 [01:55<00:00,  5.

In [19]:
for each in all_motifs:
    file = each.split('/')[1].split('_')[0]
    loader = DataLoad(file,True)
    score = EvaluateModel(loader,each)
    print('The accuracy of model in graph ' + each + ' is ' + str(score))

The accuracy of model in graph MotifER/citeseer_3_erds_0.2.txt is 0.6924066924066924
The accuracy of model in graph MotifER/citeseer_3_erds_0.4.txt is 0.7074217074217074
The accuracy of model in graph MotifER/citeseer_3_erds_0.6.txt is 0.6966966966966966
The accuracy of model in graph MotifER/citeseer_3_erds_0.8.txt is 0.7104247104247104
The accuracy of model in graph MotifER/citeseer_4_erds_0.2.txt is 0.6786786786786787
The accuracy of model in graph MotifER/citeseer_4_erds_0.4.txt is 0.6838266838266839
The accuracy of model in graph MotifER/citeseer_4_erds_0.6.txt is 0.6731016731016731
The accuracy of model in graph MotifER/citeseer_4_erds_0.8.txt is 0.6846846846846847
The accuracy of model in graph MotifER/citeseer_5_erds_0.2.txt is 0.6799656799656799
The accuracy of model in graph MotifER/cora_3_erds_0.2.txt is 0.7386722866174921
The accuracy of model in graph MotifER/citeseer_5_erds_0.4.txt is 0.6855426855426855
The accuracy of model in graph MotifER/citeseer_5_erds_0.6.txt is 0.6

In [23]:
all_motifs = glob.glob('MotifOrbit/c*.txt')
criterion = torch.nn.CrossEntropyLoss()
for each in all_motifs:
    file = each.split('/')[1].split('_')[0]
    loader = DataLoad(file,True)
    out_file = each.split('.tx')[0] + '_gat.pt'
    model = GAT(loader)
    edge_index = EdgeLoader(each)
    m = GetMax(each) #Now we have the number of total nodes
    m = m + 1 - loader.n_node #This is the number of motifs
    loader.AppendDummy(m)
    optimizer = torch.optim.Adam(model.parameters(), lr=0.005)
    for _ in tqdm(range(20)):
        optimizer.zero_grad()  # Clear gradients.
        out = model(loader.x, edge_index)  # Perform a single forward pass.
        loss = criterion(out[loader.train_mask], loader.y[loader.train_mask])  # Compute the loss solely based on the training nodes.
        loss.backward()  # Derive gradients.
        optimizer.step()  # Update parameters based on gradients.
        torch.save(model.state_dict(), out_file)  

KeyboardInterrupt: 

In [None]:
for each in all_motifs:
    file = each.split('/')[1].split('_')[0]
    loader = DataLoad(file,True)
    score = EvaluateModel(loader,each)
    print('The accuracy of model in graph ' + each + ' is ' + str(score))