In [1]:
%%capture
import sys
!{sys.executable} -m pip install py2neo;
import py2neo
import pandas as pd

In [2]:
graph = py2neo.Graph('http://neo4j:7474')

def yield_results(cursor):
    halt = False
    while not halt:
        try:
            yield cur.next().data()
        except:
            halt = True

# New York Times Comments
The data contains information about the comments made on the articles published in New York Times in Jan-May 2017 and Jan-April 2018.

See: https://www.kaggle.com/aashita/nyt-comments/home

## Data Summary
### Record Counts

In [3]:
pd.DataFrame(
    [
        graph.run("MATCH (n:ARTICLE) RETURN count(*)").next().data()['count(*)'],
        graph.run("MATCH (n:USER) RETURN count(*)").next().data()['count(*)'],
        graph.run("MATCH ()-[r:COMMENTED]->() RETURN count(*)").next().data()['count(*)']
    ],
    index=['Articles', 'Users', 'Comments'],
    columns=['Counts']
)

Unnamed: 0,Counts
Articles,9298
Users,301682
Comments,1723935


### What articles have the highest degree (i.e., most comments)?

In [4]:
cur = graph.run("""MATCH (a:ARTICLE)
WITH a, SIZE(()-[:COMMENTED]->(a)) as cmtCnt
ORDER BY cmtCnt DESC LIMIT 5
MATCH p=()-[:COMMENTED]->(a)
RETURN a, count(p)""")

pd.DataFrame([{**res['a'], 'Comments': res['count(p)']} for res in yield_results(cur)]) \
    .sort_values('Comments', ascending=False) \
    .set_index('articleID') \
    .style.format({'webURL': lambda x: f'<a target="_blank" href="{x}"">Link</a>'})

Unnamed: 0_level_0,Comments,byline,headline,newDesk,partition,pubDate,webURL
articleID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
58b0894195d0e0247463875e,4996,By JULIE HIRSCHFELD DAVIS and MICHAEL M. GRYNBAUM,Trump Intensifies Criticism of F.B.I. and Journalists,National,155571,2017-02-24 19:27:53,Link
5912391b7c459f24986de9ab,4184,By MICHAEL D. SHEAR and MATT APUZZO,Trump Fires Comey Amid Russia Inquiry,National,177172,2017-05-09 21:48:03,Link
58ebb1437c459f24986d96ed,4014,By DANIEL VICTOR and MATT STEVENS,"Man Is Dragged From a Full Jet, Stirring a Furor",Business,177172,2017-04-10 16:22:22,Link
591a524d7c459f24986dfc28,3791,By DAVID BROOKS,When a Child Is Leading The World,OpEd,177172,2017-05-16 01:13:44,Link
5930616f7c459f24986e2e41,3709,By MICHAEL D. SHEAR,Trump Abandoning Global Climate Accord,Foreign,177172,2017-06-01 18:48:08,Link


### What users have the highest degree (i.e., most comments)?

In [5]:
cur = graph.run("""MATCH (u:USER)
WITH u, SIZE((u)-[:COMMENTED]->()) as cmtCnt
ORDER BY cmtCnt DESC LIMIT 5
MATCH p=(u)-[:COMMENTED]->()
RETURN u, count(p)""")

pd.DataFrame([{**res['u'], 'Comments': res['count(p)']} for res in yield_results(cur)]) \
    .sort_values('Comments', ascending=False) \
    .set_index('userID')

Unnamed: 0_level_0,Comments,partition,userDisplayName,userLocation
userID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
61986282.0,1571,155571,Phyliss Dalmatian,"Wichita, Kansas"
17374907.0,1235,155571,Blackmamba,Il
47123844.0,1232,155571,Richard Luettgen,New Jersey
47112177.0,1174,155571,manfred m,Bolivia
37475504.0,1157,155571,John Doe,Johnstown


In [6]:
cur = graph.run("""CALL dbms.procedures() YIELD name, signature, description
WHERE name starts with "algo"
RETURN name, signature, description""")
yield_results(cur).next()

[{'name': 'algo.allShortestPaths.stream',
  'signature': 'algo.allShortestPaths.stream(propertyName :: STRING?, config = {} :: MAP?) :: (sourceNodeId :: INTEGER?, targetNodeId :: INTEGER?, distance :: FLOAT?)',
  'description': "CALL algo.allShortestPaths.stream(weightProperty:String{nodeQuery:'labelName', relationshipQuery:'relationshipName', defaultValue:1.0, concurrency:4}) YIELD sourceNodeId, targetNodeId, distance - yields a stream of {sourceNodeId, targetNodeId, distance}"},
 {'name': 'algo.articleRank',
  'signature': 'algo.articleRank(label =  :: STRING?, relationship =  :: STRING?, config = {} :: MAP?) :: (nodes :: INTEGER?, iterations :: INTEGER?, loadMillis :: INTEGER?, computeMillis :: INTEGER?, writeMillis :: INTEGER?, dampingFactor :: FLOAT?, write :: BOOLEAN?, writeProperty :: STRING?)',
  'description': "CALL algo.articleRank(label:String, relationship:String, {iterations:5, dampingFactor:0.85, weightProperty: null, write: true, writeProperty:'articlerank', concurrency:

In [None]:
cur = graph.run("""CALL dbms.procedures() YIELD name, signature, description
WHERE name starts with "apoc"
RETURN name, signature, description""")
yield_results(cur).next()