In [None]:
## HELPER FUNCTIONS ##

def author_hindex(value):
    MATCH (a:Author)-[:AUTHORED]->(q:Quanta)-[:PUBLISHED_IN]->(y:Year {year: value})
    MATCH (p:Quanta)-[:CITES]->(q)
    WITH y, count(p) as citations, a, collect(citations) as paper_citations
    WITH y, a, apoc.coll.sort(paper_citations) as inverse, apoc.coll.reverse(inverse) as ordered_citations
    WITH y, a, apoc.coll.toList([x in ordered_citations | {index: apoc.coll.indexOf(ordered_citations, x), value: x}]) as indexed_citations
    WITH y, a, apoc.coll.toList([x in indexed_citations WHERE x.index-x.value<0 | x.index]) as filtered
    RETURN [a, y, filtered[0] + 1] as result
    
def mean_citations_year(value):
    MATCH (a:Author)-[:AUTHORED]->(q:Quanta)-[:PUBLISHED_IN]->(y:Year {year: value})
    MATCH (p:Quanta)-[:CITES]->(q)
    RETURN count(p)/count(q) as mean
    
def papers_published_year(value):
    MATCH (a:Author)-[:AUTHORED]->(q:Quanta)-[:PUBLISHED_IN]->(y:Year {year: value})
    RETURN count(q)
    
def venue_hindex(v, value):
    MATCH (v:Venue)<-[:PUBLISHED_IN]-(q:Quanta)-[:PUBLISHED_IN]->(y:Year {year: value})
    MATCH (p:Quanta)-[:CITES]->(q)
    WITH y, count(p) as citations, v, collect(citations) as paper_citations
    WITH y, v, apoc.coll.sort(paper_citations) as inverse, apoc.coll.reverse(inverse) as ordered_citations
    WITH y, v, apoc.coll.toList([x in ordered_citations | {index: apoc.coll.indexOf(ordered_citations, x), value: x}]) as indexed_citations
    WITH y, v, apoc.coll.toList([x in indexed_citations WHERE x.index-x.value<0 | x.index]) as filtered
    RETURN [v, y, filtered[0] + 1] as result
    
def venue_paper_citations(v, value):
    MATCH (v:Venue)<-[:PUBLISHED_IN]-(q:Quanta)<-[:CITES]-(:Quanta)<-[:PUBLISHED_IN]-(y:Year)
    WHERE y.year <= value
    WITH count(*)/count(q) as venue_avg
    
def venue_papers(v, value):
    MATCH (v:Venue)<-[:PUBLISHED_IN]-(q:Quanta)<-[:PUBLISHED_IN]-(y:Year)
    WHERE y.year <= value
    WITH count(q) as venue_papers
    
def venue_papers_max(v):
    MATCH (v:Venue)<-[:PUBLISHED_IN]-(:Quanta)<-[:CITES]-(q:Quanta)
    WITH apoc.coll.toSet(count(q)) as counts
    RETURN apoc.coll.max(counts) as max

In [None]:
########## CALCULATING METRICS ##########
# AUTHOR HINDEX

MATCH (a:Author)-[:AUTHORED]->(q:Quanta)
MATCH (p:Quanta)-[:CITES]->(q)
WITH count(p) as citations, a, collect(citations) as paper_citations
WITH apoc.coll.sort(paper_citations) as inverse, apoc.coll.reverse(inverse) as ordered_citations
WITH apoc.coll.toList([x in ordered_citations | {index: apoc.coll.indexOf(ordered_citations, x), value: x}]) as indexed_citations
WITH apoc.coll.toList([x in indexed_citations WHERE x.index-x.value<0 | x.index]) as filtered
RETURN filtered[0] + 1 as hindex

# AUTHOR HINDEX DELTA

CALL author_hindex(value) YIELD result[0] as a, result[1] as y, result[2] as hindex
CALL author_hindex(value+1) YIELD result[2] as second_hindex
RETURN hindex-second_hindex as hindex_delta

# AUTHOR CITATION COUNT

MATCH (a:Author)-[:AUTHORED]->(q:Quanta)
MATCH (p:Quanta)-[:CITES]->(q)
RETURN count(p)

# KEY CITATION COUNT

(need additional paper)

# AUTHOR CITATIONS DELTA

MATCH (a:Author)-[:AUTHORED]->(q:Quanta)-[:PUBLISHED_IN]->(y:Year)
WHERE y.year <= 2018
MATCH (p:Quanta)-[:CITES]->(q)
RETURN count(p)

# AUTHOR KEY CITATIONS DELTA

(need additional paper)

# AUTHOR MEAN CITATIONS PER PAPER

MATCH (a:Author)-[:AUTHORED]->(q:Quanta)
MATCH (p:Quanta)-[:CITES]->(q)
RETURN count(p)/count(q)

# AUTHOR MEAN CITATION PER PAPER DELTA

CALL mean_citations_year(value) YIELD mean as old
CALL mean_citations_year(value+1) YIELD mean as new
RETURN new-old

# AUTHOR MEAN CITATIONS PER YEAR

MATCH (a:Author)-[:AUTHORED]->(q:Quanta)-[:PUBLISHED_IN]->(y:Year)
MATCH (p:Quanta)-[:CITES]->(q)
WITH count(p) as citations, apoc.coll.toSet(y) as years, apoc.coll.min(years) as start_year
RETURN citations/(2019-start_year)

# AUTHOR PAPERS

MATCH (a:Author)-[:AUTHORED]->(q:Quanta)
RETURN count(q)

# AUTHOR PAPERS DELTA

CALL papers_published_year(value) YIELD mean as old
CALL papers_published_year(value+1) YIELD mean as new
RETURN new-old

# AUTHOR MEAN CITATION RANK 
# rank of author (between 0 and 1) among all other authors in terms of mean citations per year

(can be implemented after every author has their mean citations per year calculated)

# AUTHOR UNWEIGHTED PAGERANK (by coauthorship)

(already have base code)

# AUTHOR WEIGHTED PAGERANK (by coauthorship)

(already have base code)

# AUTHOR AGE

MATCH (a:Author)-[:AUTHORED]->(q:Quanta)-[:PUBLISHED_IN]->(y:Year)
WITH apoc.coll.toSet(y) as years
RETURN apoc.coll.min(years) as author_age

# AUTHOR RECENT NUM COAUTHORS

MATCH (a:Author)-[:AUTHORED]->(q:Quanta)-[:PUBLISHED_IN]->(y:Year)
WHERE year >= 2018
MATCH (b:Author)-[:AUTHORED]->(q)
WHERE b.name != a.name
RETURN count(b)

# AUTHOR MAX SINGLE PAPER CITATIONS

MATCH (a:Author)-[:AUTHORED]->(q:Quanta)<-[:CITES]-(p:Quanta)
WITH apoc.coll.toSet(count(p)) as paper_citations
RETURN apoc.coll.max(paper_citations) as max

# VENUE HINDEX

MATCH (a:Author)-[:AUTHORED]->(:Quanta)-[:PUBLISHED_IN]->(v:Venue)
CALL venue_hindex(v, value) YIELD apoc.coll.toList(result[2]) as hindexes
RETURN [apoc.coll.avg(hindexes), apoc.coll.min(hindexes), apoc.coll.max(hindexes)]

# VENUE HINDEX DELTA

MATCH (a:Author)-[:AUTHORED]->(:Quanta)-[:PUBLISHED_IN]->(v:Venue)
CALL venue_hindex(v, value) YIELD apoc.coll.toList({venue: v, index: result[2]}) as old_hindexes
CALL venue_hindex(v, value+1) YIELD apoc.coll.toList({venue: v, index: result[2]}) as new_hindexes
WITH apoc.coll.toList([x IN old_hindexes and y IN new_hindexes WHERE v.venue = y.venue | y.index-x.index]) as differences
RETURN [apoc.coll.avg(differences), apoc.coll.min(differences), apoc.coll.max(differences)]

# VENUE CITATIONS

MATCH (a:Author)-[:AUTHORED]->(:Quanta)-[:PUBLISHED_IN]->(v:Venue)<-[:PUBLISHED_IN]-(:Quanta)<-[:CITES]-(q:Quanta)
WITH apoc.coll.toList(count(q)) as venue_citations
RETURN [apoc.coll.avg(venue_citations), apoc.coll.min(venue_citations), apoc.coll.max(venue_citations)]


# VENUE CITATIONS DELTA

MATCH (a:Author)-[:AUTHORED]->(:Quanta)-[:PUBLISHED_IN]->(v:Venue)
CALL venue_paper_citations(v, value) YIELD apoc.coll.toList({venue: v, avg: venue_avg}) as old_avg
CALL venue_paper_citations(v, value+1) YIELD apoc.coll.toList({venue: v, avg: venue_avg}) as new_avg
WITH apoc.coll.toList([x IN old_avg and y IN new_avg WHERE v.venue = y.venue | y.avg-x.avg]) as differences
RETURN [apoc.coll.avg(differences), apoc.coll.min(differences), apoc.coll.max(differences)]

# VENUE PAPERS

MATCH (a:Author)-[:AUTHORED]->(:Quanta)-[:PUBLISHED_IN]->(v:Venue)<-[:PUBLISHED_IN]-(q:Quanta)
WITH apoc.coll.toList(count(q)) as venue_citations
RETURN [apoc.coll.avg(venue_citations), apoc.coll.min(venue_citations), apoc.coll.max(venue_citations)]

#VENUE PAPERS DELTA

MATCH (a:Author)-[:AUTHORED]->(:Quanta)-[:PUBLISHED_IN]->(v:Venue)
CALL venue_paper_citations(v, value) YIELD apoc.coll.toList({venue: v, count: venue_papers}) as old_count
CALL venue_paper_citations(v, value+1) YIELD apoc.coll.toList({venue: v, count: venue_papers}) as new_count
WITH apoc.coll.toList([x IN old_count and y IN new_count WHERE v.venue = y.venue | y.count-x.count]) as differences
RETURN [apoc.coll.avg(differences), apoc.coll.min(differences), apoc.coll.max(differences)]

# VENUE RANK 
# venue rank fmean, min, maxg  Ranks of venues (between 0-1) in which the author has published determined by mean number of citations per paper

(can be implemented after every author has their mean citations per year calculated)

# VENUE MAX SINGLE PAPER CITATIONS

MATCH (a:Author)-[:AUTHORED]->(:Quanta)-[:PUBLISHED_IN]->(v:Venue)
CALL venue_paper_max(v) YIELD apoc.coll.toList(max) as maxes
RETURN [apoc.coll.avg(maxes), apoc.coll.min(maxes), apoc.coll.max(maxes)]

# TOTAL NUMBER OF VENUES

MATCH (a:Author)-[:AUTHORED]->(:Quanta)-[:PUBLISHED_IN]->(v:Venue)
WITH DISTINCT v
RETURN count(v)
