In [None]:
import json
from py2neo import Graph, Node, Relationship

# Need to get authentication working, currently NEO4J_AUTH=none
graph = Graph("bolt://neo4j:7687")
# graph = Graph('bolt://localhost:7687', bolt=True)

#graph.delete_all()

n_nodes = graph.database.primitive_counts['NumberOfNodeIdsInUse']
n_relationships = graph.database.primitive_counts['NumberOfRelationshipIdsInUse']
print("Connected to graph database with {:,} nodes and {:,} relationships!".format(n_nodes, n_relationships))

In [None]:
import pandas as pd
import time
import numpy as np
from matplotlib import pyplot as plt

start_time = time.time()
start_year, end_year, step = 1940, 2020, 2
#dfs = []
for year in range(start_year, end_year+1, step):
    
    # < IS MUCH FASTER THAN <=
    print("Running PageRank on patents from < {}...".format(year), end=" ")
    query_start_time = time.time()
    query = """
    CALL algo.pageRank.stream(
         'MATCH (p:Patent) WHERE p.pub_date < "{}-01-01" AND p.pub_date <> "" RETURN id(p) as id'
        ,'MATCH (p1:Patent)-[:CITES]->(p2:Patent) RETURN id(p1) as source, id(p2) as target'
        ,{{graph:'cypher', iterations:20, write:false}})
    YIELD node, score
    WITH * 
    ORDER BY score DESC
    RETURN 
        node.number AS number, 
        score;
    """.format(year)
#     print(query)
    df = graph.run(query).to_data_frame()
    df['year'] = year
#    dfs.append(df)
    query_end_time = time.time()
    print("Done ({:.2f} minutes).".format((query_end_time-query_start_time)/60))
    
    #get data of score and doing log scala
    score_list = df['score'].tolist()
    min_score = min(score_list)
    score_list_without_lowest = []
    for i in score_list:
        if i != min_score:
            score_list_without_lowest.append(i)
    score_list_wo_lowest_log = np.log(score_list_without_lowest)
    
    #plot figure
    fig = plt.figure()
    ax = fig.add_subplot(1, 1, 1)
    ax.set_facecolor((1, 1, 1))

    plt.axis([-2,4,0,250000])
    plt.hist(score_list_wo_lowest_log, bins = 200)
   
    font = {'family':'sans-serif',
            'color':  'grey',
            'weight': 'normal',
            'size': 60,
            }
    plt.xlabel('Impact Metric')
    plt.ylabel('Count')
    plt.ticklabel_format(axis='y', style='sci', scilimits=(-2,2))
    fontdict=font
    plt.text(1, 25000, str(year), fontdict=font)
    plt.show()


    fig.savefig('Figures/' + str(year) + '.png')
    print("Done the figure of year " + str(year))
    
    
end_time = time.time()
print("Finished all calculations in {:.2f} minutes.".format((end_time-start_time)/60))