## 1. Get data from acled

This script gets all acled data on mexico in 2025

In [6]:
import requests, csv, io, os

API      = "https://api.acleddata.com/acled/read.csv"
EMAIL    = "hannes.schiemann@bse.eu"
API_KEY  = "mMf1VD0-TEhEp34fg1VX"
COUNTRY  = "Mexico"
YEAR     = 2025
LIMIT    = 5000              # maximum allowed
outfile  = f"acled_{COUNTRY.lower()}_{YEAR}.csv"

first_page = True
page       = 1

with open(outfile, "w", newline='', encoding="utf-8") as fh_out:
    writer = None

    while True:
        params = {
            "email": EMAIL,
            "key"  : API_KEY,
            "country": COUNTRY,
            "year"   : YEAR,
            "limit"  : LIMIT,
            "page"   : page,
        }
        resp = requests.get(API, params=params, timeout=60)
        resp.raise_for_status()
        rows = list(csv.reader(io.StringIO(resp.text)))

        if first_page:
            writer = csv.writer(fh_out)
            writer.writerows(rows)         # header + data
            first_page = False
        else:
            writer.writerows(rows[1:])     # skip header on later pages

        print(f"Page {page}: {len(rows)-1} rows")
        if len(rows) < LIMIT + 1:          # header + < LIMIT → last page
            break
        page += 1

print(f"✅ Finished. CSV saved as {outfile}")

Page 1: 5000 rows
Page 2: 2124 rows
✅ Finished. CSV saved as acled_mexico_2025.csv


## 2. Create Knowledge Graph in Neo4j

In [None]:
from neo4j import GraphDatabase

driver = GraphDatabase.driver("bolt://localhost:7687",
                              auth=("neo4j","password"))
with driver.session() as s:
    s.run("MATCH (n) DETACH DELETE n")
    for row in s.run("SHOW CONSTRAINTS"):
        s.run(f"DROP CONSTRAINT {row['name']}")
print("Neo4j is now empty.")
driver.close()

In [5]:
# ============================================================
#  ACLED CSV  ➜  Neo4j loader  (Year–Month–Day hierarchy)
# ============================================================

import pandas as pd, numpy as np
from tqdm.auto import tqdm
from neo4j import GraphDatabase

CSV_FILE = "acled_mexico_2025.csv"
BOLT_URI = "bolt://localhost:7687"
USER, PWD = "neo4j", "password"
BATCH     = 500

# ---------- dataframe prep ----------------------------------
df = pd.read_csv(CSV_FILE, low_memory=False)
df["event_date"] = pd.to_datetime(df["event_date"])
df["year"]  = df["event_date"].dt.year.astype(int)
df["month"] = df["event_date"].dt.month.astype(int)
df["day"]   = df["event_date"].dt.day.astype(int)
df["date_int"] = df["event_date"].dt.strftime("%Y%m%d").astype(int)

def nz(v): return None if pd.isna(v) else str(v).strip()

def to_dict(r):
    return dict(
        id         = r.event_id_cnty,
        date_str   = r.event_date.strftime("%Y-%m-%d"),
        year       = int(r.year),
        month      = int(r.month),
        day        = int(r.day),
        date_int   = int(r.date_int),
        fatalities = 0 if pd.isna(r.fatalities) else int(r.fatalities),
        notes      = nz(r.notes) or "",
        admin1     = nz(r.admin1),
        lat        = None if pd.isna(r.latitude)  else float(r.latitude),
        lon        = None if pd.isna(r.longitude) else float(r.longitude),
        etype      = nz(r.event_type),
        actor1     = nz(r.actor1),
        inter1     = nz(r.inter1),
        actor2     = nz(r.actor2),
        inter2     = nz(r.inter2)
    )

driver = GraphDatabase.driver(BOLT_URI, auth=(USER, PWD))
with driver.session() as s:

    # -- constraints -------------------------------------------------------
    s.run("CREATE CONSTRAINT IF NOT EXISTS FOR (e:Event)      REQUIRE e.id IS UNIQUE")
    s.run("CREATE CONSTRAINT IF NOT EXISTS FOR (a:Actor)      REQUIRE a.name IS UNIQUE")
    s.run("CREATE CONSTRAINT IF NOT EXISTS FOR (g:ActorGroup) REQUIRE g.name IS UNIQUE")
    s.run("CREATE CONSTRAINT IF NOT EXISTS FOR (t:EventType)  REQUIRE t.code IS UNIQUE")
    s.run("CREATE CONSTRAINT IF NOT EXISTS FOR (s:State)      REQUIRE s.admin1 IS UNIQUE")
    s.run("CREATE CONSTRAINT IF NOT EXISTS FOR (y:Year)       REQUIRE y.value IS UNIQUE")
    s.run("CREATE CONSTRAINT IF NOT EXISTS FOR (m:Month) REQUIRE (m.year,m.value) IS UNIQUE")
    s.run("CREATE CONSTRAINT IF NOT EXISTS FOR (d:Day)   REQUIRE d.date_int IS UNIQUE")

    # -- static dimension nodes ------------------------------------------
    for row in df[["admin1","latitude","longitude"]].drop_duplicates().itertuples(False):
        s.run("MERGE (st:State {admin1:$a}) "
              "ON CREATE SET st.lat=$lat, st.lon=$lon",
              a=row.admin1, lat=row.latitude, lon=row.longitude)

    for et in df["event_type"].dropna().unique():
        s.run("MERGE (:EventType {code:$c})", c=et)

    for grp in pd.unique(df[["inter1","inter2"]].values.ravel()):
        if pd.notna(grp) and grp:
            s.run("MERGE (:ActorGroup {name:$n})", n=grp)

    # -- batch insert -----------------------------------------------------
    cypher = """
    UNWIND $rows AS row
    // Time hierarchy -----------------------------------------------------
    MERGE (y:Year  {value:row.year})
    MERGE (m:Month {year:row.year, value:row.month})
    MERGE (y)-[:HAS_MONTH]->(m)
    MERGE (d:Day   {year:row.year, month:row.month, value:row.day, date_int:row.date_int})
    MERGE (m)-[:HAS_DAY]->(d)

    // Event --------------------------------------------------------------
    MERGE (e:Event {id:row.id})
      ON CREATE SET
        e.date       = date(row.date_str),
        e.year       = row.year,
        e.month      = row.month,
        e.day        = row.day,
        e.date_int   = row.date_int,
        e.fatalities = row.fatalities,
        e.notes      = row.notes,
        e.lat        = row.lat,
        e.lon        = row.lon

    // Link to time nodes
    MERGE (e)-[:IN_YEAR ]->(y)
    MERGE (e)-[:ON_MONTH]->(m)
    MERGE (e)-[:ON_DAY  ]->(d)

    WITH e, row                                         // separator #1

    // Spatial ------------------------------------------------------------
    MATCH (s:State {admin1:row.admin1})
    MERGE (e)-[:IN_STATE]->(s)

    WITH e, row                                         // separator #2

    // EventType ----------------------------------------------------------
    MATCH (t:EventType {code:row.etype})
    MERGE (e)-[:TYPE]->(t)

    WITH e, row                                         // separator #3

    // Actors -------------------------------------------------------------
    FOREACH (_ IN CASE WHEN row.actor1 IS NOT NULL THEN [1] ELSE [] END |
      MERGE (a1:Actor {name:row.actor1})
      MERGE (e)-[:INVOLVES {role:'actor1'}]->(a1)
      FOREACH (_ IN CASE WHEN row.inter1 IS NOT NULL THEN [1] ELSE [] END |
        MERGE (g1:ActorGroup {name:row.inter1})
        MERGE (a1)-[:BELONGS_TO]->(g1)
      )
    )
    FOREACH (_ IN CASE WHEN row.actor2 IS NOT NULL THEN [1] ELSE [] END |
      MERGE (a2:Actor {name:row.actor2})
      MERGE (e)-[:INVOLVES {role:'actor2'}]->(a2)
      FOREACH (_ IN CASE WHEN row.inter2 IS NOT NULL THEN [1] ELSE [] END |
        MERGE (g2:ActorGroup {name:row.inter2})
        MERGE (a2)-[:BELONGS_TO]->(g2)
      )
    )
    """

    for start in tqdm(range(0, len(df), BATCH), desc="Loading events"):
        batch = [to_dict(r) for r in df.iloc[start:start+BATCH].itertuples()]
        s.run(cypher, rows=batch)

print("✅ Graph loaded successfully")
driver.close()

Loading events: 100%|██████████| 14/14 [00:02<00:00,  6.11it/s]

✅ Graph loaded successfully



