#Imports

In [None]:
import csv
import ast
import os

#File Paths

In [None]:
# Input files
ENTITIES_FILE = "./Entities.csv"
RELATIONS_FILE = "./Relations.csv"

# Output folder for Neo4j bulk CSVs
OUTPUT_DIR = "./CsvForNeo4j"
os.makedirs(OUTPUT_DIR, exist_ok=True)

# For entity type csv files


In [None]:
def convert_entities():
    with open(ENTITIES_FILE, "r") as f:
        reader = csv.DictReader(f)
        entities = list(reader)

    # Group by type
    grouped = {}
    for row in entities:
        eid = row["id"]
        etype = row["type"]
        props_str = row["properties"]

        try:
            props = ast.literal_eval(props_str)
        except Exception:
            props = {}

        if etype not in grouped:
            grouped[etype] = []
        grouped[etype].append((eid, props))

    # Write one CSV per entity type
    for etype, rows in grouped.items():
        # Collect all property keys across rows
        all_keys = set()
        for _, props in rows:
            all_keys.update(props.keys())

        filename = os.path.join(OUTPUT_DIR, f"{etype}.csv")
        with open(filename, "w", newline="") as f:
            writer = csv.writer(f)

            # Header: :ID + all keys + :LABEL
            header = [":ID"] + list(all_keys) + [":LABEL"]
            writer.writerow(header)

            for eid, props in rows:
                row = [eid]
                for k in all_keys:
                    row.append(props.get(k, ""))
                row.append(etype)
                writer.writerow(row)

## For relation type csv files

In [None]:
def convert_relations():
    with open(RELATIONS_FILE, "r") as f:
        reader = csv.DictReader(f)
        relations = list(reader)

    # Group by relation type
    grouped = {}
    for row in relations:
        start_id = row["start_id"]
        end_id = row["end_id"]
        rel = row["relation"]

        if rel not in grouped:
            grouped[rel] = []
        grouped[rel].append((start_id, end_id))

    # Write one CSV per relationship type
    for rel, rows in grouped.items():
        filename = os.path.join(OUTPUT_DIR, f"{rel}.csv")
        with open(filename, "w", newline="") as f:
            writer = csv.writer(f)
            # Header: :START_ID,:END_ID,:TYPE
            writer.writerow([":START_ID", ":END_ID", ":TYPE"])
            for start_id, end_id in rows:
                writer.writerow([start_id, end_id, rel])

#Final Conversion

In [None]:
def run_conversion():
    print("Converting entities...")
    convert_entities()
    print("Converting relations...")
    convert_relations()
    print(f"✅ Conversion complete. Files are in {OUTPUT_DIR}/")

# Call it directly
run_conversion()


Converting entities...
Converting relations...
✅ Conversion complete. Files are in /content/drive/MyDrive/PRKG/EmailKG/CsvForNeo4j_v2/
