In [None]:
import pandas as pd

len(pd.read_json('/home/al/projects/idx-bei/data/companyDetailsByKodeEmiten.json').columns)

In [None]:
with open("/home/al/projects/idx-bei/data/companyDetailsByKodeEmiten.json", "r", encoding="utf-8") as f:
    stocks = json.load(f)  # Make sure this returns a list of dicts


In [None]:
print(type(stocks))         # should be <class 'list'>
print(type(stocks[0]))      # should be <class 'dict'>


In [9]:
import re

def clean_indonesian_name(name):
    # Define known titles, degrees, and honorifics (add more as needed)
    noise_tokens = {
        'dr', 'drs', 'h', 'ir', 'prof', 'kh', 'hj', 'hrh', 'mr', 'mrs', 'ms',  # prefixes
        'sh', 'mh', 'phd', 'spd', 'mpd', 'se', 'mm', 'msi', 'skom', 'st', 'mt', 'mkom', 'pm', 'bsc'  # suffixes
    }

    name = name.lower()
    name = re.sub(r'[^\w\s]', '', name)  # remove punctuation
    tokens = name.split()
    
    # Remove known titles and single-letter fragments (initials)
    tokens = [t for t in tokens if t not in noise_tokens and len(t) > 2]
    
    return ' '.join(tokens)

# Example usage
names = [
    "Dr. M. Arsjad Rasjid PM BSC",
    "m arsjad rasjid pm",
    "MR. Joko Widodo, S.H., M.H.",
    "Ir. Sri Mulyani Indrawati, M.Si.",
    "Mrs. Megawati S. Soekarnoputri",
    "Hj. Tri Rismaharini"
]

cleaned = [clean_insider_name(n) for n in names]
for original, c in zip(names, cleaned):
    print(f"{original}  -->  {c}")


Dr. M. Arsjad Rasjid PM BSC  -->  arsjad rasjid
m arsjad rasjid pm  -->  arsjad rasjid
MR. Joko Widodo, S.H., M.H.  -->  joko widodo
Ir. Sri Mulyani Indrawati, M.Si.  -->  sri mulyani indrawati
Mrs. Megawati S. Soekarnoputri  -->  megawati soekarnoputri
Hj. Tri Rismaharini  -->  tri rismaharini


In [4]:
import json
from neo4j import GraphDatabase

# Neo4j connection config
NEO4J_URI = "bolt://localhost:7687"
NEO4J_USER = "neo4j"
NEO4J_PASSWORD = "password"

In [11]:
# Load JSON data
with open("/home/al/projects/idx-bei/data/companyDetailsByKodeEmiten.json", "r", encoding="utf-8") as f:
    stocks = dict(list(json.load(f).items())[:])

# Connect to Neo4j
driver = GraphDatabase.driver(NEO4J_URI, auth=(NEO4J_USER, NEO4J_PASSWORD))

def ingest_stock(tx, stock):
    # Create Company node
    tx.run("""
        MERGE (c:Company {kode: $kode})
        SET c.name = $kode, c.industry = $industry, c.sector = $sector, c.website = $website, c.companyName = $name
    """, kode=stock['Profiles'][0]["KodeEmiten"], name=stock['Profiles'][0]["NamaEmiten"], industry=stock['Profiles'][0]["Industri"], sector=stock['Profiles'][0]["Sektor"], website=stock['Profiles'][0]["Website"])

    # Directors
    for d in stock.get("Direktur", []):
        tx.run("""
            MERGE (d:Insider {name: $name})
            WITH d
            MATCH (c:Company {kode: $kode})
            MERGE (d)-[:DIRECTOR_OF {jabatan: $jabatan, afiliasi: $afiliasi}]->(c)
        """, name=clean_indonesian_name(d["Nama"]), jabatan=d["Jabatan"], afiliasi=d.get("Afiliasi", False), kode=stock['Profiles'][0]["KodeEmiten"])

    # Commissioners
    for k in stock.get("Komisaris", []):
        tx.run("""
            MERGE (k:Insider {name: $name})
            WITH k
            MATCH (c:Company {kode: $kode})
            MERGE (k)-[:COMMISSIONER_OF {jabatan: $jabatan, independen: $independen}]->(c)
        """, name=clean_indonesian_name(k["Nama"]), jabatan=k["Jabatan"], independen=k.get("Independen", False), kode=stock['Profiles'][0]["KodeEmiten"])

    # Shareholders
    for s in stock.get("PemegangSaham", []):
        tx.run("""
            MERGE (s:Insider {name: $name})
            WITH s
            MATCH (c:Company {kode: $kode})
            MERGE (s)-[:OWNS {jumlah: $jumlah, kategori: $kategori, pengendali: $pengendali, persentase: $persentase}]->(c)
        """,
            jumlah=s.get("Jumlah"),  
            kategori=s.get("Kategori"),
            name=clean_indonesian_name(s["Nama"]), 
            pengendali=s.get("Pengendali"),
            persentase=s.get("Persentase"),  
            kode=stock['Profiles'][0]["KodeEmiten"]
        )

# Ingest all stocks
with driver.session() as session:
    for ticker, stock_data in stocks.items():
        # print(ticker, stock_data['Profiles'][0][0]["KodeEmiten"])
        session.execute_write(ingest_stock, stock_data) 

print("Ingestion complete.")
driver.close()


Ingestion complete.


In [None]:
MATCH (c:Company)-[]-(i:Insider)
WHERE toLower(i.name) CONTAINS 'lo kheng hong'
RETURN c, i

In [10]:
# Cypher command to delete all data
cypher_query = "MATCH (n) DETACH DELETE n"

def delete_all_data():
    driver = GraphDatabase.driver(NEO4J_URI, auth=(NEO4J_USER, NEO4J_PASSWORD))
    with driver.session() as session:
        session.run(cypher_query)
    driver.close()
    print("All data deleted.")

delete_all_data()

All data deleted.
