# Part B

## Setup

In [9]:
from py2neo import Graph, ClientError

In [10]:
#set connection variables
PORT = "7687" #database running on this port for bolt connections
USER = "neo4j" #standard user
PASSWORD = "publication-graph" #db password

In [11]:
#connect to database
try:
    graph = Graph('bolt://localhost:'+PORT, auth=(USER, PASSWORD))
    print('SUCCESS: Connected to the Neo4j Database.')
except Exception as e:
    print('ERROR: Could not connect to the Neo4j Database. See console for details.')
    raise SystemExit(e)

SUCCESS: Connected to the Neo4j Database.


In [12]:
# query helper function
def run_query(query:str):
    try:
        return graph.run(query)
    except ClientError as e:
        print(e.message)

## 3 Most cited papers of each conference

Information: if a paper has no citations it won't be shown. That means that for some conferences where there is only one paper with citations there will only be one paper in the result

In [None]:
most_cited_papers = """
Match (paper:Paper)<-[:CITES]-(citing_paper:Paper), (paper)-[:PUBLISHED_IN]->(:ConferenceEdition)<-[:HOLDS]-(conference:Conference)
with conference, paper, count(citing_paper) as no_citations  order by conference.conference_name, count(citing_paper) desc

with conference, collect(paper.title) as papers
with conference, papers[0..3] as most_cited

return conference.conference_name as conference, most_cited
"""

In [34]:
cursor = run_query(most_cited_papers)
df = cursor.to_data_frame()

In [35]:
df.head(10)

Unnamed: 0,conference,most_cited
0,ADG,[Maximizing the Sum of the Distances between F...
1,AISTATS,"[Markov Topic Models., Noise-contrastive estim..."
2,ANT/MobiWIS,"[Distributed Usage Control., Multi-hop Interfe..."
3,APC 25,"[On Combining Probability and Nondeterminism.,..."
4,ARSPA@IJCAR,[Preface.]
5,AVoCS,[Isabelle Theories for Machine Words.]
6,Advanced Database Systems,"[Amalgame: A Tool for Creating Interoperating,..."
7,BMC@CAV,[A satisfiability-based approach to abstractio...
8,Biometrics Technology,[Biometric Template Security based on Watermar...
9,Bytecode@ETAPS,"[Bytecode Rewriting in Tom., Kleene Algebra an..."


## Community

In [24]:
community_query = """
match (c:Conference)-[:HOLDS]->(ce:ConferenceEdition)<-[:PUBLISHED_IN]-(p:Paper)<-[:AUTHOR_OF]-(r:Researcher)
with c, r, count(distinct ce) as no_editions
where no_editions >= 4
with c, collect(r.name) as community
return c.conference_name, community, size(community) as community_size
"""

In [25]:
cursor = run_query(community_query)
df = cursor.to_data_frame()

In [26]:
df.head(10)

Unnamed: 0,c.conference_name,community,community_size
0,AISTATS,"[Bert Huang, Zoubin Ghahramani, Jennifer G. Dy...",25
1,MFPS,"[Sam Staton, Alexandra Silva 0001, Bart Jacobs...",29
2,COCV@ETAPS,"[Wolf Zimmermann, Jens Knoop, Sabine Glesner, ...",4
3,LSFA,"[Alejandro Díaz-Caro, Carlos Olarte, Marcelo F...",13
4,EXPRESS,"[Uwe Nestmann, Anna Ingólfsdóttir, Luca Aceto,...",9
5,FESCA@ETAPS,"[Ralf H. Reussner, Iman Poernomo]",2
6,CMCS,"[Jirí Adámek, Bart Jacobs 0001, Alexander Kurz...",23
7,WRLA,"[Manuel Clavel, José Meseguer, Carolyn L. Talc...",16
8,PASM,"[Nigel Thomas, William J. Knottenbelt, Marco G...",3
9,LDTA@ETAPS,"[Mark van den Brand, Görel Hedin, Jurgen J. Vi...",3


## Impact Factor

In [40]:
impact_factor = """
//Impact factor (citations(year x) of papers published in last two year)/(publications in last two years)

Match (:Paper)-[jc:JOURNAL_CITATION]->(journal:Journal)
where jc.year_cited = <YEAR> AND (jc.publication_year_cited_paper=<YEAR-2> Or jc.publication_year_cited_paper=<YEAR-1>)
with journal, count(jc) as no_citations

match (:Paper)<-[jp:JOURNAL_PUBLICATION]-(journal)
where jp.year = <YEAR-2> or jp.year = <YEAR-1>

with journal, no_citations, count(jp) as no_publications
return journal.journal_name, no_citations, no_publications, (tofloat(no_citations) / no_publications) as impact_factor
order by impact_factor desc

"""

In [41]:
def get_impact_factor(year:int):
    q = impact_factor.replace("<YEAR>", str(year)).replace("<YEAR-1>", str(year-1)).replace("<YEAR-2>", str(year-2))

    print(run_query(q))

In [44]:
get_impact_factor(2016)

 journal.journal_name         | no_citations | no_publications |      impact_factor 
------------------------------|--------------|-----------------|--------------------
 IEEE Trans. Hum. Mach. Syst. |          985 |             145 |  6.793103448275862 
 Comput. Chem. Eng.           |          429 |             472 | 0.9088983050847458 



## H-index

In [46]:
h_index = """
Match (a:Researcher)-[ao:AUTHOR_OF]->(paper:Paper)
optional match (paper)<-[ci:CITES]-(citing_paper:Paper)

With  a, paper, count(citing_paper) as no_cit
With a, collect([a,paper,no_cit]) as papers

unwind range(0, size(papers)-1) as ind

//where no_cit > no_pub

with a, papers[ind][0] as auth, papers[ind][1] as pap,ind,papers[ind][2] as no_cit, case papers[ind][2] >= ind +1 when true then 1 else 0 end as in_count
//where  no_cit >= ind +1
//return a.name, pap.title, ind + 1 , no_cit, in_count
order by ind desc, no_cit
return a.name, sum( in_count) as h_index
order by h_index desc

"""

In [47]:
cursor = run_query(h_index)
df = cursor.to_data_frame()

In [49]:
df.head(10)

Unnamed: 0,a.name,h_index
0,Andrew B. Whinston,8
1,Clyde W. Holsapple,5
2,Jay F. Nunamaker Jr.,3
3,Pierre-Etienne Moreau,3
4,Ronald M. Lee,3
5,Peter Wegner,3
6,Patrick Lincoln,3
7,Dennis G. Kafura,3
8,Peter de Jong,3
9,Bruce Anderson,2
