In [3]:
!pip install neo4j

Collecting neo4j
  Downloading neo4j-5.28.1-py3-none-any.whl.metadata (5.9 kB)
Downloading neo4j-5.28.1-py3-none-any.whl (312 kB)
Installing collected packages: neo4j
Successfully installed neo4j-5.28.1


In [9]:
from elasticsearch import Elasticsearch
import pandas as pd
from collections import Counter
from itertools import product
from itertools import combinations

es = Elasticsearch("http://localhost:9200")
ES_INDEX = "efc_articles_1"

# Fetch all docs (or scroll for >10000)
def fetch_all_articles(index):
    docs = []
    resp = es.search(index=index, size=1000, query={"match_all": {}})
    docs.extend(hit["_source"] for hit in resp["hits"]["hits"])
    return docs

articles = fetch_all_articles(ES_INDEX)

In [12]:
from neo4j import GraphDatabase
from collections import Counter
from itertools import combinations

# Replace with your own data
driver = GraphDatabase.driver("bolt://localhost:7687", auth=("neo4j", "password"))

def create_person(tx, name, count):
    tx.run("MERGE (p:Person {name: $name}) "
           "ON CREATE SET p.count = $count", name=name, count=count)

def create_org(tx, name, count):
    tx.run("MERGE (o:Organization {name: $name}) "
           "ON CREATE SET o.count = $count", name=name, count=count)

def create_mentions(tx, p1, p2):
    tx.run("""
        MATCH (a:Person {name: $p1}), (b:Person {name: $p2})
        MERGE (a)-[r:MENTIONS]-(b)
        ON CREATE SET r.count = 1
        ON MATCH SET r.count = r.count + 1
    """, p1=p1, p2=p2)

def create_association(tx, person, org):
    tx.run("""
        MATCH (p:Person {name: $person}), (o:Organization {name: $org})
        MERGE (p)-[r:ASSOCIATED_WITH]->(o)
        ON CREATE SET r.count = 1
        ON MATCH SET r.count = r.count + 1
    """, person=person, org=org)

# Build counters
person_counter = Counter()
org_counter = Counter()

def clean_people(people_list):
    return [p for p in people_list if not p.startswith("@") and len(p) > 1]

for doc in articles:
    if "entities" in doc and "people" in doc["entities"]:
        doc["entities"]["people"] = clean_people(doc["entities"]["people"])
    ents = doc.get("entities", {})
    people = ents.get("people", [])
    orgs = ents.get("orgs", [])
    person_counter.update(people)
    org_counter.update(orgs)

# Load data
with driver.session() as session:
    # Create nodes
    for name, count in person_counter.items():
        session.write_transaction(create_person, name, count)
    for name, count in org_counter.items():
        session.write_transaction(create_org, name, count)

    # Create relationships
    for doc in articles:
        people = list(set(doc.get("entities", {}).get("people", [])))
        orgs = list(set(doc.get("entities", {}).get("orgs", [])))

        for p1, p2 in combinations(people, 2):
            session.write_transaction(create_mentions, p1, p2)

        for p in people:
            for o in orgs:
                session.write_transaction(create_association, p, o)

  session.write_transaction(create_person, name, count)
  session.write_transaction(create_org, name, count)
  session.write_transaction(create_mentions, p1, p2)
  session.write_transaction(create_association, p, o)
