In [None]:
from function import *
import numpy as np
import urllib.request
import zipfile
import io
import networkx as nx
import numpy as np

# dataset download
url = "http://www-personal.umich.edu/~mejn/netdata/polbooks.zip"
response = urllib.request.urlopen(url)
zipfile = zipfile.ZipFile(io.BytesIO(response.read()))
zipfile.extractall()

# obtain political book graph
G = nx.read_gml('polbooks.gml')

# prior information ratio
BP = 0.02

# expontial base weight
exp_factor = 1

# Using G, we generate a simplfied version of graph G, Simplices, and target value (Label)
# Label is of the form of scalar: for example, "0" denotes that corresponding node belongs to the
# first community ("1" denotes second community, and so on).
# Label_mat is of the form of vector (one hot type): if a node belongs to the first community, 
# then corresponding label_mat = [1,0,0] when n_classes = 3
G, Simplices, Classes, Label, Label_mat = fn.Reconstruction(G)

n_classes = len(Classes)

for i in range(len(Simplices[-1][0])):
    print('The number of',i,'-simplex:','(','n_node:',i+1,')',len(Simplices[i]))

# x_known consists of known nodes information
# For example, x_known = [[3,1],[4,0],[5,0],[153,0],[154,1],[155,0],[303,0],[304,0],[305,1]] implies
# node 2 belongs to the first community, node 52 belongs to the second community, and
# node 102 belongs to the third community
# x_init and pred1 is one hot type  and scalar type classification result, respectively, 
# obtained by equilibrium measure (EM) method 
x_init, pred1, x_known = fn.Initialization(G, Classes, BP)  

# in the Optimzation function below, we need to distinguish between known information and
# trained information (obtained from EM method), so we remove the known information here.
x_init_revised = x_init.copy()
x_known_reversed_list = x_known[:,0][::-1]
for i in range(len(x_known_reversed_list)):
    x_init_revised = np.delete(x_init_revised, int(x_known_reversed_list[i]))

# HOI indicates the usage of the higher order interaction,
# that is, if HOI = 0, then the algorithm only use pairwise interactions between nodes
# however, if HOI = 1, then the algorithm use higher order interaction as well as pairwise interactions
HOI = 0

# upto 1-simplex
Simplices2 = Simplices[:2]

# result2, pred2 correspond to the prediction with vector and scalar form, respectively.
result2, pred2 = fn.Optimization(x_init_revised, x_known, Simplices2, n_classes, HOI, exp_factor)

# upto 2-simplex
HOI = 1
Simplices3 = Simplices[:3]
result3, pred3 = fn.Optimization(x_init_revised, x_known, Simplices3, n_classes, HOI, exp_factor)

# upto 3-simplex
HOI = 1
Simplices4 = Simplices[:4]
result4, pred4 = fn.Optimization(x_init_revised, x_known, Simplices4, n_classes, HOI, exp_factor)

# upto 4-simplex
HOI = 1
Simplices5 = Simplices[:5]
result5, pred5 = fn.Optimization(x_init_revised, x_known, Simplices5, n_classes, HOI, exp_factor)

# upto 5-simplex
HOI = 1
Simplices6 = Simplices[:6]
result6, pred6 = fn.Optimization(x_init_revised, x_known, Simplices6, n_classes, HOI, exp_factor)

# pre, rec, f1s, acc indicate precision, recall, f1-score, accuracy, respectively.
# index "1" indicates the result obtained by EM method
conf_matrix1 = confusion_matrix(Label, pred1, n_classes)
pre1, rec1, f1s1, acc1 = precision_recall_f1_accuracy(conf_matrix1)

# index "2" indicates the result obtained by the objective function when only pairwise relation considered.
conf_matrix2 = confusion_matrix(Label, pred2, n_classes)
pre2, rec2, f1s2, acc2 = precision_recall_f1_accuracy(conf_matrix2)

# index "3" indicates the result obtained by the objective function when only upto 2-simplices considered.
conf_matrix3 = confusion_matrix(Label, pred3, n_classes)
pre3, rec3, f1s3, acc3 = precision_recall_f1_accuracy(conf_matrix3)

# index "4" indicates the result obtained by the objective function when only upto 3-simplices considered.
conf_matrix4 = confusion_matrix(Label, pred4, n_classes)
pre4, rec4, f1s4, acc4 = precision_recall_f1_accuracy(conf_matrix4)

# index "5" indicates the result obtained by the objective function when only upto 4-simplices considered.
conf_matrix5 = confusion_matrix(Label, pred5, n_classes)
pre5, rec5, f1s5, acc5 = precision_recall_f1_accuracy(conf_matrix5)

# index "6" indicates the result obtained by the objective function when all simplices considered.
conf_matrix6 = confusion_matrix(Label, pred6, n_classes)
pre6, rec6, f1s6, acc6 = precision_recall_f1_accuracy(conf_matrix6)