In [1]:
import pandas as pd
import numpy as np
import networkx as nx
import torch
from torch_geometric.data import Data
from torch_geometric.utils import to_networkx
import torch_geometric.utils as tgu
import torch.nn as nn
import random
import time
import math
import matplotlib.pyplot as plt
import json
from DrBC_model import DrBC
from scipy.stats import kendalltau
from networkit.centrality import KadabraBetweenness
from networkit.graph import Graph

In [2]:
def evaluate(pred, label, N):
    
    len_ = label.shape[0]
    N_ = math.ceil(len_*N/100)
    
    top_pred = torch.topk(pred,N_).indices
    top_label = torch.topk(label,N_).indices
    count = 0
    for i in top_pred:
        if i in top_label:
            count +=1
    acc = count / N_
    return acc

In [6]:
train_config ={'EMBEDDING_SIZE' : 128,
               'node_default_dim' : 3,  # [degree, 1, 1]
                'encoder_layer' : 5,
                'num_nodes' : None,
                'batch' : 1,
                'episode' : 100,}


#將預測結果輸出至txt檔
with open('output.txt', 'w') as f:


    scale = [500]#,300,500,800,1000] 
    
    for i in scale:
        top1 = []
        top5 = []
        top10 = []
        K_tau = []
        kadabra_score = []
        kadabra_score5 = []
        kadabra_score10 = []
        kadabra_K_tau = []
        kadabra_train_time =[]
        pred_time = []
        
        train_config['num_nodes']= i 
        
        
        
        
        f.write('scale size = '+str(i)+"\n")
        f.write(json.dumps(train_config))
        f.write("\n")
        f.write("\n")
        # train
        start = time.time()
        Drbc = DrBC(train_config['EMBEDDING_SIZE'],
                    train_config['node_default_dim'],
                    train_config['encoder_layer'],
                    train_config['batch'])
        pred,loss_list = Drbc.train(train_config['episode'],train_config['num_nodes'])
        end = time.time()
        f.write('running time = '+str(end-start)+"\n")
        f.write("\n")
        torch.save(Drbc,f'Drbc{train_config["num_nodes"]}.pt')
        
        # loss plot
        fig = plt.figure(1)	#identifies the figure 
        plt.plot([i for i in range(train_config['episode'])], loss_list)
        plt.xlabel("epoch",fontsize='13')
        plt.ylabel("loss",fontsize='13')
        plt.grid()
        plt.savefig(f'scale{i}_loss.png')
        plt.clf()
        
        # predict
        
        for j in range(30):
            
            f.write('test on data '+str(j)+"\n")
            f.write("\n")
            df = pd.read_csv(f'hw1_data/Synthetic/5000/{j}.txt', sep="\t", header=None)
            edge_index = torch.tensor(np.array(df).T, dtype=torch.long)
            num_nodes  = torch.unique(edge_index).shape
            graph = Data(edge_index=edge_index,num_nodes=num_nodes[0])
            score = pd.read_csv(f'hw1_data/Synthetic/5000/{j}_score.txt', sep="\t", header=None)
            score = torch.tensor(score[1].values)
            
            start = time.time()
            pred,loss_list =Drbc.predict(graph,score)
            end = time.time()
            pred_time.append(end-start)
            
            acc = evaluate(pred, score, 1)
            top1.append(acc)
            f.write(f'Top-1 Accuracy on the test data {j} is : ' + str(acc)+"\n")

            acc = evaluate(pred, score, 5)
            top5.append(acc)
            f.write(f'Top-5 Accuracy on the test data {j} is : ' + str(acc)+"\n")

            acc = evaluate(pred, score, 10)
            top10.append(acc)
            f.write(f'Top-10 Accuracy on the test data {j} is : ' + str(acc)+"\n")

            tau, p_value  = kendalltau(pred.argsort().detach().numpy(), score.argsort().detach().numpy())
            K_tau.append(tau)
            f.write('Kendall tau distance is : '+str(round(tau,4))+"\n")
            
            gra = Graph()
            gra.addNodes(num_nodes[0])
            for k in range(df.shape[0]):
                gra.addEdge(np.array(df)[k][0],np.array(df)[k][1])
            
            start = time.time()
            kadabra = KadabraBetweenness(gra,err = 0.01, delta = 0.1, k = 0,).run().scores()
            
            end = time.time()
            kadabra_train_time.append(end-start)
            
            kada_s = evaluate(torch.tensor(kadabra), score, 1)
            kadabra_score.append(kada_s)
            
            kada_s = evaluate(torch.tensor(kadabra), score, 5)
            kadabra_score5.append(kada_s)
            
            kada_s = evaluate(torch.tensor(kadabra), score, 10)
            kadabra_score10.append(kada_s)
            
            tau, p_value  = kendalltau(np.array(kadabra), score.argsort().detach().numpy())
            kadabra_K_tau.append(tau)
            
           
        top1_mean = round(sum(top1)/len(top1),4)
        f.write('mean of top1 accuracy is : '+str(top1_mean)+"\n")
        top5_mean = round(sum(top5)/len(top5),4)
        f.write('mean of top5 accuracy is : '+str(top5_mean)+"\n")
        top10_mean = round(sum(top10)/len(top10),4)
        f.write('mean of top10 accuracy is : '+str(top10_mean)+"\n")
        K_tau_mean = round(sum(K_tau)/len(K_tau),4)
        f.write('mean of tau accuracy is : '+str(K_tau_mean)+"\n")
        
        kadabra_score_mean = round(sum(kadabra_score)/len(kadabra_score),4)
        f.write('kadabra mean of top1 accuracy is : '+str(kadabra_score_mean)+"\n")
        kadabra_score_mean = round(sum(kadabra_score5)/len(kadabra_score5),4)
        f.write('kadabra mean of top5 accuracy is : '+str(kadabra_score_mean)+"\n")
        kadabra_score_mean = round(sum(kadabra_score10)/len(kadabra_score10),4)
        f.write('kadabra mean of top10 accuracy is : '+str(kadabra_score_mean)+"\n")
        kadabra_K_tau_mean = round(sum(kadabra_K_tau)/len(kadabra_K_tau),4)
        f.write('mean of tau accuracy is : '+str(kadabra_K_tau_mean)+"\n")
        
        avg_kadabra_train_time =  round(sum(kadabra_train_time)/len(kadabra_train_time),4)
        f.write('average kadabra train time = '+str(avg_kadabra_train_time)+"\n")

        average_time = round(sum(pred_time)/len(pred_time),4)
        f.write('average predict time = '+str(average_time)+"\n")
        f.write("\n")

<Figure size 640x480 with 0 Axes>