In [9]:
from config import Config
import networkx as nx
import pickle
import json
import pandas as pd

g: nx.DiGraph = None
with open(Config.GRAPH_BIN_PATH, 'rb') as infile:
    g = pickle.load(infile)

papers: dict = None
with open(Config.REDUCED_JSON_PATH, 'r') as infile:
    papers = json.load(infile)

unarxive: dict = None
with open(Config.UNARXIVE_REDUCED_JSON_PATH, 'r') as infile:
    unarxive = json.load(infile)

In [10]:
def pairwise_impact(id_a: str, id_b:str) -> float:
    impact_coef = 1.0
    data_a = unarxive[id_a]
    data_b = unarxive[id_b]

    # Different field -> more impact
    if data_a['discipline'] != data_b['discipline']:
        impact_coef *= 1.5
    
    # Shared authors -> less impact
    common_authors = len(set(data_a['authors']).intersection(data_b['authors']))
    if common_authors > 0:
        impact_coef *= 1 - (common_authors / max(len(data_a['authors']), len(data_b['authors'])))

    return impact_coef

In [11]:
def get_node_score(id: str) -> float:
    if 'impact_score' in g.nodes[id]:
        return g.nodes[id]['impact_score']

    score = 1.0
    for _, citing in list(g.out_edges(id)):
        score += get_node_score(citing) * pairwise_impact(id, citing)
    
    g.nodes[id]['impact_score'] = score
    return score

In [12]:
eligible_papers = set(papers.keys()).intersection(set(g.nodes))
for id in eligible_papers:
    get_node_score(id)

In [13]:
scores_dict = {id: g.nodes[id]['impact_score'] for id in eligible_papers}
scores = pd.DataFrame(scores_dict.items(), columns=['id', 'impact'])
scores.set_index('id', inplace=True)
scores.sort_values('impact', ascending=False, inplace=True)
scores['impact_pct'] = scores['impact'] / max(scores['impact']) * 100

In [16]:
most_impactful = scores.head().copy()
most_impactful['title'] = [unarxive[id]['title'] for id in most_impactful.index]
most_impactful

Unnamed: 0_level_0,impact,impact_pct,title
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1806.00425,4774825.0,100.0,Observation of Higgs boson production in assoc...
1807.02512,525980.3,11.0157,Transversal Modes and Higgs Bosons in Electrow...
1808.04403,302137.4,6.327718,CoDEx: Wilson coefficient calculator connectin...
2012.02779,301875.1,6.322223,"Top, Higgs, Diboson and Electroweak Fit to the..."
1809.0352,201363.8,4.217197,Probing top-quark couplings indirectly at Higg...
