In [16]:
import pandas as pd
import networkx as nx
from seaborn.external.docscrape import header

positive_edges = pd.read_csv('../../data/edgelist.txt', sep=',', header=None, index_col=False)
positive_edges.columns = ['source', 'target']

negative_edges = pd.read_csv('../../data/training/negative/non_existing.csv')
negative_edges = negative_edges[['source', 'target']]
predict_edges = pd.read_csv('../../data/test.txt', sep = ',',header=None, index_col=False)
predict_edges.columns = ['source', 'target']

G = nx.from_pandas_edgelist(positive_edges, 'source', 'target')

In [17]:
#compute page rank for all nodes
pagerank = nx.pagerank(G, alpha=0.85)

#compute corerank for all nodes
#the core rank of each node is the sum of the pagerank of all its neighbors
corerank = {}
for node in G.nodes():
    neighbors = list(G.neighbors(node))
    if neighbors:
        corerank[node] = sum(pagerank[neighbor] for neighbor in neighbors)
    else:
        corerank[node] = 0  # If no neighbors, core rank is 0
#create df with paper_id and corerank
corerank_df = pd.DataFrame(corerank.items(), columns=['paper_id', 'corerank'])

positive_edges['source_corerank'] = positive_edges['source'].map(corerank_df.set_index('paper_id')['corerank'])
positive_edges['target_corerank'] = positive_edges['target'].map(corerank_df.set_index('paper_id')['corerank'])

negative_edges['source_corerank'] = negative_edges['source'].map(corerank_df.set_index('paper_id')['corerank'])
negative_edges['target_corerank'] = negative_edges['target'].map(corerank_df.set_index('paper_id')['corerank'])

predict_edges['source_corerank'] = predict_edges['source'].map(corerank_df.set_index('paper_id')['corerank'])
predict_edges['target_corerank'] = predict_edges['target'].map(corerank_df.set_index('paper_id')['corerank'])

negative_edges.head()

Unnamed: 0,source,target,source_corerank,target_corerank
0,25323,54708,0.001452,0.000343
1,72184,108982,0.000253,1.1e-05
2,113386,4507,3.9e-05,0.002507
3,11191,128219,0.001338,1.5e-05
4,28126,85955,0.000428,0.002456


In [18]:


positive_edges.to_csv('../../data/training/positive/positive_edges_corerank.csv', index=False)
negative_edges.to_csv('../../data/training/negative/negative_edges_corerank.csv', index=False)
predict_edges.to_csv('../../data/training/predict/predict_edges_corerank.csv', index=False)