In [2]:
import json
import bz2

import pandas as pd
from neo4j import GraphDatabase

In [3]:
def createClaim(causal_relation):
    cause = causal_relation['causal_relation']['cause']['concept']
    effect = causal_relation['causal_relation']['effect']['concept']
    
    claimQuery =  "MERGE (c:CausalConcept {concept: \"" + json.dumps(cause)[1:-1] + "\"})\n"
    claimQuery += "MERGE (e:CausalConcept {concept: \"" + json.dumps(effect)[1:-1] + "\"})\n"
    claimQuery += "CREATE (claim:Claim {type: \"mayCause\"})\n"
    claimQuery += "CREATE (claim) -[:cause]-> (c)\n"
    claimQuery += "CREATE (claim) -[:effect]-> (e)\n\n"
    claimQuery += "RETURN id(claim)"
    
    return claimQuery

In [4]:
def createSourceQueries(claim_id, causal_relation):
    cause = causal_relation['causal_relation']['cause']['concept']
    effect = causal_relation['causal_relation']['effect']['concept']
    
    claimQuery =  "MATCH (claim)\n"
    claimQuery += "WHERE id(claim)=" + str(claim_id) + "\n"
    
    sourceQueries = []
    for i in range(len(causal_relation['sources'])):
        sourceID = "s" + str(i)
        query = "CREATE (claim) -[:claimedIn]-> (" + sourceID + ":Source)\n"
        query += createSource(sourceID, causal_relation['sources'][i])
        sourceQueries.append(claimQuery + query)
    
    return sourceQueries

In [5]:
def createSource(sourceID, source):
    query = "SET " + sourceID + ".type=\"" + source['type'] + "\"\n"
    query += "SET " + sourceID + "=$" + sourceID + "\n" 
    return query

In [6]:
def createParameters(causal_relation):
    parameters = []
    for i in range(len(causal_relation['sources'])):
        paramID = "s" + str(i)
        source = causal_relation['sources'][i]
        parameter = {}
        parameter[paramID] = {k:v for (k,v) in source['payload'].items() if type(v) is str}
        if 'sentence' in source['payload']:
            parameter[paramID].update({'sentence': source['payload']['sentence']})
        parameters.append(parameter)
    return parameters

In [11]:
def loadCauseNetIntoNeo4j(uri, user, password, graph_path):
    for line in bz2.BZ2File(graph_path):
        causal_relation = json.loads(line)
        
        # create Claim
        createClaimQuery = createClaim(causal_relation)
        driver = GraphDatabase.driver(uri, auth=(user, password),encrypted = False)
        with driver.session() as session:
            result = session.run(createClaimQuery)
            for record in result:
                claim_id = record["id(claim)"]
        driver.close()
        
        # Create sources (important: don't do this all in once [too long queries])
        sourceQueries = createSourceQueries(claim_id, causal_relation)
        parameters = createParameters(causal_relation)
        
        assert len(sourceQueries) == len(parameters)
        for i in range(len(sourceQueries)):
            driver = GraphDatabase.driver(uri, auth=(user, password),encrypted = False)
            with driver.session() as session:
                session.run(sourceQueries[i], parameters=parameters[i])
            driver.close()

In [13]:
# docker run --publish=7474:7474 --publish=7687:7687 --user="$(id -u):$(id -g)" neo4j:4.0

uri = "bolt://127.0.0.1:7687"
user = "neo4j"
password = "testing"
graph_path = "./data/causenet-precision.jsonl.bz2"

loadCauseNetIntoNeo4j(uri, user, password, graph_path)

StopIteration: 

In [16]:
data = [i for i in open('data/causenet-precision.jsonl')]

In [17]:
import pandas as pd
causal = pd.DataFrame(data)
causal

Unnamed: 0,0
0,"{""causal_relation"": {""cause"": {""concept"": ""acc..."
1,"{""causal_relation"": {""cause"": {""concept"": ""dis..."
2,"{""causal_relation"": {""cause"": {""concept"": ""pne..."
3,"{""causal_relation"": {""cause"": {""concept"": ""can..."
4,"{""causal_relation"": {""cause"": {""concept"": ""hea..."
...,...
197801,"{""causal_relation"": {""cause"": {""concept"": ""aut..."
197802,"{""causal_relation"": {""cause"": {""concept"": ""aut..."
197803,"{""causal_relation"": {""cause"": {""concept"": ""aut..."
197804,"{""causal_relation"": {""cause"": {""concept"": ""aut..."


In [41]:
causal = []
concepts = []


for line in open('./data/causenet-precision.jsonl', 'r'):
    data = json.loads(line)
    causal.append(json.loads(line))
    concepts.append(data["causal_relation"]["cause"]["concept"])


In [44]:
df = pd.DataFrame(causal)
df

Unnamed: 0,causal_relation,sources,support
0,"{'cause': {'concept': 'accident'}, 'effect': {...","[{'type': 'wikipedia_sentence', 'payload': {'w...",38
1,"{'cause': {'concept': 'disease'}, 'effect': {'...","[{'type': 'wikipedia_sentence', 'payload': {'w...",37
2,"{'cause': {'concept': 'pneumonia'}, 'effect': ...","[{'type': 'wikipedia_sentence', 'payload': {'w...",37
3,"{'cause': {'concept': 'cancer'}, 'effect': {'c...","[{'type': 'wikipedia_sentence', 'payload': {'w...",36
4,"{'cause': {'concept': 'heart_attack'}, 'effect...","[{'type': 'wikipedia_sentence', 'payload': {'w...",36
...,...,...,...
197801,{'cause': {'concept': 'autoimmune_gfap_astrocy...,"[{'type': 'wikipedia_list', 'payload': {'wikip...",0
197802,{'cause': {'concept': 'autoimmune_gfap_astrocy...,"[{'type': 'wikipedia_list', 'payload': {'wikip...",0
197803,{'cause': {'concept': 'autoimmune_gfap_astrocy...,"[{'type': 'wikipedia_list', 'payload': {'wikip...",0
197804,{'cause': {'concept': 'autoimmune_gfap_astrocy...,"[{'type': 'wikipedia_list', 'payload': {'wikip...",0


In [39]:
test = set()

for i in concepts:
    test.add(i)


51863

In [34]:
concepts

['accident',
 'disease',
 'pneumonia',
 'cancer',
 'heart_attack',
 'illness',
 'stroke',
 'complications',
 'infection',
 'injuries',
 'heart_failure',
 'negligence',
 'violence',
 'injury',
 'accident',
 'drought',
 'injury',
 'shock',
 'cardiac_arrest',
 'disease',
 'heart_disease',
 'dehydration',
 'malnutrition',
 'stress',
 'smoking',
 'sin',
 'negligence',
 'war',
 'fall',
 'fire',
 'death',
 'stress',
 'stress',
 'depression',
 'old_age',
 'paralysis',
 'diabetes',
 'smoking',
 'anxiety',
 'endometriosis',
 'diarrhea',
 'assault',
 'suffocation',
 'aids',
 'respiratory_failure',
 'obesity',
 'bacteria',
 'stress',
 'accidents',
 'accident',
 'negligence',
 'exposure',
 'tuberculosis',
 'overdose',
 'infection',
 'stress',
 'diseases',
 'heavy_rains',
 'stress',
 'fire',
 'suicide',
 'starvation',
 'disease',
 'accidents',
 'disease',
 'infections',
 'stress',
 'accidents',
 'inflammation',
 'injury',
 'infection',
 'bacteria',
 'condition',
 'bacteria',
 'heavy_rain',
 'hypothe

In [19]:
causal = pd.DataFrame(causal)

In [20]:
causal

Unnamed: 0,causal_relation,sources,support
0,"{'cause': {'concept': 'accident'}, 'effect': {...","[{'type': 'wikipedia_sentence', 'payload': {'w...",38
1,"{'cause': {'concept': 'disease'}, 'effect': {'...","[{'type': 'wikipedia_sentence', 'payload': {'w...",37
2,"{'cause': {'concept': 'pneumonia'}, 'effect': ...","[{'type': 'wikipedia_sentence', 'payload': {'w...",37
3,"{'cause': {'concept': 'cancer'}, 'effect': {'c...","[{'type': 'wikipedia_sentence', 'payload': {'w...",36
4,"{'cause': {'concept': 'heart_attack'}, 'effect...","[{'type': 'wikipedia_sentence', 'payload': {'w...",36
...,...,...,...
197801,{'cause': {'concept': 'autoimmune_gfap_astrocy...,"[{'type': 'wikipedia_list', 'payload': {'wikip...",0
197802,{'cause': {'concept': 'autoimmune_gfap_astrocy...,"[{'type': 'wikipedia_list', 'payload': {'wikip...",0
197803,{'cause': {'concept': 'autoimmune_gfap_astrocy...,"[{'type': 'wikipedia_list', 'payload': {'wikip...",0
197804,{'cause': {'concept': 'autoimmune_gfap_astrocy...,"[{'type': 'wikipedia_list', 'payload': {'wikip...",0
