# 📊 Amazon XBRL to Amazon Neptune GraphDB Pipeline

In [None]:
!pip install rdflib boto3 gremlinpython arelle --quiet


In [None]:
from rdflib import Graph, URIRef, Literal, Namespace
from rdflib.namespace import RDF
from arelle import Cntlr, ModelXbrl

EX = Namespace("http://example.org/xbrl/")
g = Graph()
g.bind("ex", EX)

cntlr = Cntlr.Cntlr(logFileName="logToPrint")
model_xbrl = ModelXbrl.load(cntlr, "amzn-20241231_htm.xml")

for fact in model_xbrl.facts:
    uri = URIRef(EX[str(fact.contextID) + "/" + str(fact.qname.localName)])
    g.add((uri, RDF.type, EX.XBRLFact))
    g.add((uri, EX.contextRef, Literal(fact.contextID)))
    g.add((uri, EX.concept, Literal(fact.qname.localName)))
    g.add((uri, EX.value, Literal(fact.value)))
    if fact.unitID:
        g.add((uri, EX.unit, Literal(fact.unitID)))

g.serialize("output.rdf", format="xml")
print("✅ RDF triples saved to output.rdf")


In [None]:
from gremlin_python.structure.graph import Graph
from gremlin_python.driver.driver_remote_connection import DriverRemoteConnection

NEPTUNE_ENDPOINT = "wss://<your-neptune-endpoint>:8182/gremlin"
gsql = Graph().traversal().withRemote(DriverRemoteConnection(NEPTUNE_ENDPOINT, "g"))
print("🔗 Connected to Amazon Neptune")

gsql.addV("Fact").property("concept", "Revenue").property("year", "2024").property("value", "999000").next()


In [None]:
for year in ['2022', '2023', '2024']:
    g.add((URIRef(EX["Revenue_" + year]), RDF.type, EX.FinancialMetric))
    g.add((URIRef(EX["Revenue_" + year]), EX.value, Literal(900000 + 5000 * int(year))))
    g.add((URIRef(EX["Revenue_" + year]), EX.year, Literal(year)))

g.add((URIRef(EX["Revenue_2022"]), EX.nextYear, URIRef(EX["Revenue_2023"])))
g.add((URIRef(EX["Revenue_2023"]), EX.nextYear, URIRef(EX["Revenue_2024"])))

g.serialize("temporal_linkbase.ttl", format="turtle")
print("🎯 Exported TTL file: temporal_linkbase.ttl")
