In [1]:
import linkpred
from linkpred.evaluation import Pair
import networkx as nx
import random
import itertools
from operator import itemgetter
import pickle
import matplotlib.pyplot as plt

In [2]:
path = "../DataSet FFF/"

In [3]:
path_graph = "../DataSet FFF/Graph_data/"

In [4]:
saved_path = "../DataSet FFF/LinkPrediction_task/1057/"

In [5]:
G = nx.read_graphml(path_graph+"Real_Network.graphml")

In [6]:
nx.info(G)

'Name: \nType: Graph\nNumber of nodes: 8786\nNumber of edges: 34505\nAverage degree:   7.8545'

# 1) Set preparation

In [7]:
degree_dict = dict(G.degree(G.nodes()))
nx.set_node_attributes(G, degree_dict, 'degree')
sorted_degree = sorted(degree_dict.items(), key = itemgetter(1), reverse = True)

In [8]:
sample_nodes = []
for i in sorted_degree:
    if i[1] > 30:
        sample_nodes.append(i[0])
print(len(sample_nodes))

192


In [9]:
new_graph = G.subgraph(sample_nodes)

print(len(new_graph.nodes()))
print(len(new_graph.edges()))

training_edge_len = len(new_graph.edges()) - int(len(new_graph.edges())/100 * 20)
test_edge_len = len(new_graph.edges()) - training_edge_len

192
2258


In [10]:
print(training_edge_len)
print(test_edge_len)

1807
451


In [11]:
training_edges = random.sample(new_graph.edges, training_edge_len)

In [12]:
training_edges

[('climatecrisis', 'carbon'),
 ('sustainability', ' cleanenergy'),
 ('hamburg', 'fridaysforfuture'),
 (' climatechangeisreal', 'savecongorainforest'),
 ('climate', 'covid19'),
 ('berlin', ' neustartklima'),
 (' digitalstrike', 'covid_19'),
 (' covid19', 'science'),
 ('neubauer', 'klima'),
 ('fridayfeeling', 'food'),
 ('climatejustice', 'facetheclimateemergency'),
 ('globalclimatestrike', 'peoplenotprofit'),
 ('energy', 'actonclimate'),
 ('endcoal', 'carbon'),
 ('merkel', 'klimakrise'),
 ('climatechange', ' nomoreemptypromises'),
 (' renewableenergy', 'planet'),
 ('globalclimatestrike', 'climatestrikes'),
 ('climateaction', 'endcoal'),
 ('demvoice1', 'renewables'),
 ('beautiful', 'nature'),
 ('huelgamundialporelclima', 'schoolstrike4climate'),
 ('fridays4future', 'klimanotstand'),
 (' digitalstrike', 'climatecrisis'),
 ('climateaction', 'tiredearth'),
 (' gretathunberg', 'fridaymorning'),
 (' storage', 'green'),
 ('globalclimatestrike', 'climatejustice'),
 ('mindthegap', 'climateactionn

2) Create training and test set

In [13]:
training = new_graph.edge_subgraph(training_edges)

In [14]:
print(nx.info(training))

Name: 
Type: Graph
Number of nodes: 189
Number of edges: 1807
Average degree:  19.1217


In [15]:
test = new_graph.copy()
test.remove_edges_from(training.edges())

In [16]:
print(nx.info(test))

Name: 
Type: Graph
Number of nodes: 192
Number of edges: 451
Average degree:   4.6979


4) Create node set

In [17]:
nodes = list(new_graph.nodes())
nodes.extend(list(test.nodes()))

5) Compute pair test set and universe set

In [18]:
test = [Pair(i) for i in test.edges()]
universe = set([Pair(i) for i in itertools.product(nodes, nodes) if i[0]!=i[1]])

AssertionError: Predicted link (climatestrike, climatestrike) is a self-loop!

In [19]:
print(len(universe))

NameError: name 'universe' is not defined

6) Storage file

In [None]:
universe

In [None]:
with open(saved_path+'universe_1057.pickle', 'wb') as f:
    pickle.dump(universe, f)
with open(saved_path+'test_1057.pickle', 'wb') as f:
    pickle.dump(test, f) 
nx.write_graphml(training, saved_path+"training_1057.graphml")

# 2) Prediction

1) Read data

In [None]:
with open(saved_path+'universe_1057.pickle', 'rb') as f:
    universe = pickle.load(f)
with open(saved_path+'test_1057.pickle', 'rb') as f:
    test = pickle.load(f)
training = nx.read_graphml(saved_path+"training_1057.graphml")

2) Predictions

In [None]:
cn = linkpred.predictors.CommonNeighbours(training, excluded=training.edges())
cn_results = cn.predict()

In [None]:
aa = linkpred.predictors.AdamicAdar(training, excluded=training.edges())
aa_results = aa.predict()

In [None]:
jc = linkpred.predictors.Jaccard(training, excluded=training.edges())
jc_results = jc.predict()

In [None]:
kz = linkpred.predictors.Katz(training, excluded=training.edges())
kz_results = kz.predict()

In [None]:
simrank = linkpred.predictors.SimRank(training, excluded=training.edges())
simrank_results = simrank.predict()

3) Saved predictions

In [None]:
res = []
for i, j in zip(cn_results, cn_results.values()):
    res.append([Pair(i), j])
with open(saved_path+'cn_results_1057.pickle', 'wb') as f:
    pickle.dump(res, f)

In [None]:
res = []
for i, j in zip(aa_results, aa_results.values()):
    res.append([Pair(i), j])
with open(saved_path+'aa_results_1057.pickle', 'wb') as f:
    pickle.dump(res, f)

In [None]:
res = []
for i, j in zip(jc_results, jc_results.values()):
    res.append([Pair(i), j])
with open(saved_path+'jc_results_1057.pickle', 'wb') as f:
    pickle.dump(res, f)

In [None]:
res = []
for i, j in zip(kz_results, kz_results.values()):
    res.append([Pair(i), j])
with open(saved_path+'kz_results_1057.pickle', 'wb') as f:
    pickle.dump(res, f)

In [None]:
res = []
for i, j in zip(simrank_results, simrank_results.values()):
    res.append([Pair(i), j])
with open(saved_path+'simrank_results.pickle', 'wb') as f:
    pickle.dump(res, f)

3) Evaluation

In [None]:
cn_evaluation = linkpred.evaluation.EvaluationSheet(cn_results, test, universe)

In [None]:
with open(saved_path+'cn_evaluation_1057.pickle', 'wb') as f:
    pickle.dump(cn_evaluation, f) 

In [None]:
aa_evaluation = linkpred.evaluation.EvaluationSheet(aa_results, test, universe)

In [None]:
with open(saved_path+'aa_evaluation_1057.pickle', 'wb') as f:
    pickle.dump(aa_evaluation, f) 

In [None]:
jc_evaluation = linkpred.evaluation.EvaluationSheet(jc_results, test, universe)

In [None]:
with open(saved_path+'jc_evaluation_1057.pickle', 'wb') as f:
    pickle.dump(jc_evaluation, f) 

In [None]:
kz_evaluation = linkpred.evaluation.EvaluationSheet(kz_results, test, universe)

In [None]:
with open(saved_path+'kz_evaluation_1057.pickle', 'wb') as f:
    pickle.dump(kz_evaluation, f) 

In [None]:
simrank_evaluation = linkpred.evaluation.EvaluationSheet(simrank_results, test, universe)

In [None]:
with open(saved_path+'simrank_evaluation_1057.pickle', 'wb') as f:
    pickle.dump(simrank_evaluation, f) 

# 3) Result Analysis

In [None]:
with open(saved_path+'aa_evaluation_1057.pickle', 'rb') as f:
    aa_evaluation = pickle.load(f)
with open(saved_path+'aa_results_1057.pickle', 'rb') as f:
    aa_results = pickle.load(f)
    
with open(saved_path+'cn_evaluation_1057.pickle', 'rb') as f:
    cn_evaluation = pickle.load(f)
with open(saved_path+'cn_results_1057.pickle', 'rb') as f:
    cn_results = pickle.load(f)

with open(saved_path+'jc_evaluation_1057.pickle', 'rb') as f:
    jc_evaluation = pickle.load(f)
with open(saved_path+'jc_results_1057.pickle', 'rb') as f:
    jc_results = pickle.load(f)

with open(saved_path+'kz_evaluation_1057.pickle', 'rb') as f:
    kz_evaluation = pickle.load(f)
with open(saved_path+'kz_results_1057.pickle', 'rb') as f:
    kz_results = pickle.load(f)

with open(saved_path+'simrank_evaluation_1057.pickle', 'rb') as f:
    sm_evaluation = pickle.load(f)
with open(saved_path+'simrank_results_1057.pickle', 'rb') as f:
    sm_results = pickle.load(f)

In [None]:
def print_res(res):
    res=sorted(res,key=lambda x: x[1],reverse=True)
    for i in res[:10]:
        print(i)

In [None]:
print_res(aa_results)
print("")
print("")
print("")
print_res(cn_results)
print("")
print("")
print("")
print_res(jc_results)
print("")
print("")
print("")
print_res(kz_results)
print("")
print("")
print("")
print_res(sm_results)

In [None]:
plt.plot(sm_evaluation.fallout(), sm_evaluation.recall(), label="SimRank")

plt.xlabel("FPR")
plt.xlabel("TPR")
plt.legend()
plt.show()

In [None]:
plt.plot(cn_evaluation.fallout(), cn_evaluation.recall(), label="Common Neighbors")
plt.plot(aa_evaluation.fallout(), aa_evaluation.recall(), label="Adamic Adar")
plt.plot(jc_evaluation.fallout(), jc_evaluation.recall(), label="Jaccard")
plt.plot(kz_evaluation.fallout(), kz_evaluation.recall(), label="Katz")


plt.xlabel("FPR")
plt.xlabel("TPR")
plt.legend()
plt.show()

In [None]:
plt.plot(cn_evaluation.fallout(), cn_evaluation.recall(), label="Common Neighbors")
plt.plot(aa_evaluation.fallout(), aa_evaluation.recall(), label="Adamic Adar")
plt.plot(jc_evaluation.fallout(), jc_evaluation.recall(), label="Jaccard")
plt.plot(kz_evaluation.fallout(), kz_evaluation.recall(), label="Katz")
plt.plot(sm_evaluation.fallout(), sm_evaluation.recall(), label="SimRank")


plt.xlabel("FPR")
plt.ylabel("TPR")
plt.legend()
plt.show()

In [None]:
plt.plot(jc_evaluation.fallout(), jc_evaluation.recall(), label="Jaccard")


plt.xlabel("FPR")
plt.xlabel("TPR")
plt.legend()
plt.show()

In [None]:
plt.plot(aa_evaluation.fallout(), aa_evaluation.recall(), label="Adamic Adar")


plt.xlabel("FPR")
plt.xlabel("TPR")
plt.legend()
plt.show()

In [None]:
plt.plot(cn_evaluation.fallout(), cn_evaluation.recall(), label="Common Neighbors")
plt.xlabel("FPR")
plt.xlabel("TPR")
plt.legend()
plt.show()

In [None]:
plt.plot(kz_evaluation.fallout(), kz_evaluation.recall(), label="Katz")


plt.xlabel("FPR")
plt.xlabel("TPR")
plt.legend()
plt.show()

In [None]:
sm_evaluation.data 

In [None]:
plt.plot(sm_evaluation.fallout(), sm_evaluation.recall(), label="SimRank")
plt.plot(aa_evaluation.fallout(), aa_evaluation.recall(), label="Adamic Adar")
plt.plot(jc_evaluation.fallout(), jc_evaluation.recall(), label="Jaccard")
plt.plot(kz_evaluation.fallout(), kz_evaluation.recall(), label="Katz")


plt.xlabel("FPR")
plt.xlabel("TPR")
plt.legend()
plt.show()

In [None]:
plt.plot(cn_evaluation.fallout(), cn_evaluation.recall(), label="Common Neighbors")
plt.plot(sm_evaluation.fallout(), sm_evaluation.recall(), label="SimRank")
plt.plot(jc_evaluation.fallout(), jc_evaluation.recall(), label="Jaccard")
plt.plot(kz_evaluation.fallout(), kz_evaluation.recall(), label="Katz")


plt.xlabel("FPR")
plt.xlabel("TPR")
plt.legend()
plt.show()

In [None]:
from sklearn.metrics import auc

print("Area Under Roc Curve (AUROC)")
print(f"Common Neigh.: \t {auc(cn_evaluation.fallout(), cn_evaluation.recall())}")
print(f"Adamic Adar: \t {auc(aa_evaluation.fallout(), aa_evaluation.recall())}")
print(f"Jaccard: \t {auc(jc_evaluation.fallout(), jc_evaluation.recall())}")
print(f"SimRank: \t {auc(sm_evaluation.fallout(), sm_evaluation.recall())}")
print(f"Katz:\t \t {auc(kz_evaluation.fallout(), kz_evaluation.recall())}")

In [None]:
preds = list(nx.preferential_attachment(training,universe))
pref_results=[]
for u, v, p in preds:
    #print('(%s, %s) -> %d' % (u, v, p))
    pref_results.append([u,v,p])
pref_results=sorted(pref_results,key=lambda x: x[2],reverse=True)
pref_results

In [None]:
def intersect(result_1, result_2, dim, name):
    print(f"{name}")
    int_1_2=[]
    count=0
    for item_1 in result_1[:300]:
        for item_2 in result_2[:300]:
            if item_1[0]==item_2[0]:
                print('intersezione:',item_1[0])
                int_1_2.append(item_1[0])
                count+=1
    print(count)
    return int_1_2

In [None]:
def intersection(lst1, lst2): 
    lst3 = [value for value in lst1 if value in lst2] 
    return lst3 

In [None]:
aa_cn = intersect(aa_results, cn_results, 300, "AA vs CN")
aa_jc = intersect(aa_results, jc_results, 300, "AA vs JC")
aa_kz = intersect(aa_results, kz_results, 300, "AA vs KZ")
aa_sm = intersect(aa_results, sm_results, 300, "AA vs SM")


In [None]:
cn_jc = intersect(cn_results, jc_results, 300, "CN vs JC")
cn_kz = intersect(cn_results, kz_results, 300, "CN vs KZ")
cn_sm = intersect(cn_results, sm_results, 300, "CN vs SM")

In [None]:
jc_kz = intersect(jc_results, kz_results, 300, "JC vs KZ")
jc_sm = intersect(jc_results, sm_results, 300, "JC vs SM")

In [None]:
kz_sm = intersect(kz_results, sm_results, 300, "KZ vs SM")

In [None]:
# All intersection
int_m=intersection(cn_jc,kz_sm)
int_final=intersection(int_m,aa_sm)

In [None]:
len(int_final)

In [None]:
int_final