In [60]:
import os
import json
from dotenv import load_dotenv
import pandas as pd
from tqdm.notebook import tqdm

load_dotenv()  # take environment variables from .env.

True

In [61]:
# establish a connection to the MongoDB database
from pymongo import MongoClient

# connect to your Atlas cluster
client = MongoClient(os.environ["MONGODB_URI"])

In [62]:
# establish a connection to the PostgreSQL database
import psycopg2 as pg

conn = pg.connect(
    dbname=os.environ["POSTGRES_DB"],
    user=os.environ["POSTGRES_USER"],
    password=os.environ["POSTGRES_PASSWORD"],
    host=os.environ["POSTGRES_HOST"]
)


# RDFlib

## Outline

 * Get data from the PostgreSQL DB
 * Populate graph with triples


In [63]:
from rdflib import Graph, Literal, URIRef
from rdflib.namespace import RDF, RDFS, XSD, FOAF, OWL
import regex as re


In [64]:
# craete a graph
g = Graph()

# create a namespace object
fdo = "http://fandom.com/ontology/"
fdr = "http://fandom.com/resource/"

# bind the namespace to the graph
g.bind("fdo", fdo)
g.bind("fdr", fdr)


In [65]:
# get data from postgres
df = pd.read_sql_query(
    """SELECT
            p.game_name,
            p.game_url,
            p.page_url,
            p.page_name,

            fe.object_id as entity_id,
            fe.name as entity_name,
            fe.class as entity_class,
            fe.subclass as entity_subclass,
            fe.description as entity_description,

            fp.id as property_id,
            fp.property_name as property_name,
            fp.description as property_description,
            fp.target_entity as target_entity,
            fp.target_class as target_class

    FROM fandom_properties_clean fp join fandom_entities_clean fe on fp.object_id = fe.object_id
    JOIN fandom_pages p on p.object_id = fe.object_id
    ;""",
      conn)

  df = pd.read_sql_query(


In [66]:
df.head()

Unnamed: 0,game_name,game_url,page_url,page_name,entity_id,entity_name,entity_class,entity_subclass,entity_description,property_id,property_name,property_description,target_entity,target_class
0,bravelydefault,https://bravelydefault.fandom.com/,https://bravelydefault.fandom.com/wiki/Adam_Ho...,Adam_Holograd,67139f9e8f64cb721b2f3eec,Adam Holograd,Character,LordCommander,Adam Holograd is a character from the game Bra...,1,hasHome,The home location of the character.,Holograd,Location
1,bravelydefault,https://bravelydefault.fandom.com/,https://bravelydefault.fandom.com/wiki/Adam_Ho...,Adam_Holograd,67139f9e8f64cb721b2f3eec,Adam Holograd,Character,LordCommander,Adam Holograd is a character from the game Bra...,2,affiliatedWith,The faction or group the character is associat...,Holograd Empire,Faction
2,bravelydefault,https://bravelydefault.fandom.com/,https://bravelydefault.fandom.com/wiki/Adam_Ho...,Adam_Holograd,67139f9e8f64cb721b2f3eec,Adam Holograd,Character,LordCommander,Adam Holograd is a character from the game Bra...,3,hasOccupation,The professional role or title held by the cha...,Lord Commander of the Holograd Empire,Occupation
3,bravelydefault,https://bravelydefault.fandom.com/,https://bravelydefault.fandom.com/wiki/Adam_Ho...,Adam_Holograd,67139f9e8f64cb721b2f3eec,Adam Holograd,Character,LordCommander,Adam Holograd is a character from the game Bra...,4,hasRace,The racial identity of the character.,Human,Race
4,bravelydefault,https://bravelydefault.fandom.com/,https://bravelydefault.fandom.com/wiki/Adam_Ho...,Adam_Holograd,67139f9e8f64cb721b2f3eec,Adam Holograd,Character,LordCommander,Adam Holograd is a character from the game Bra...,5,hasGender,The gender identity of the character.,Male,Gender


In [67]:
# iterate over the rows of the dataframe
for row in tqdm(df.to_dict(orient="records")):
    # create a URIRef object for the entity
    entity = URIRef(fdr + re.sub('[^A-Za-z0-9_]+', '', row["page_name"]))

    # add a triple for the entity
    g.add((entity, RDF.type, URIRef(fdo + row["entity_class"])))

    # add a triple for the entity's name
    g.add((entity, RDFS.label, Literal(row["entity_name"], lang="en")))

    # add a triple for the entity's description
    g.add((entity, RDFS.comment, Literal(row["entity_description"], lang="en")))

    # add a triple for the entity's subclass
    if row["entity_subclass"]:
        g.add((URIRef(fdo + row["entity_subclass"]), RDFS.subClassOf, URIRef(fdo + row["entity_class"])))


    if row["target_class"] == "Class" and 'type' in row["property_name"].lower():
        # add target class to the entity
        g.add((entity, RDF.type, URIRef(fdo + re.sub('[^A-Za-z0-9_]+', '', row["target_entity"]))))

    else:
        # create a URIRef object for the property
        property = URIRef(fdo + row["property_name"])

        # create a URIRef object for the target entity, format strig to URIRef
        target = URIRef(fdr + re.sub('[^A-Za-z0-9_]+', '', row["target_entity"]))

        # add property
        g.add((property, RDF.type, OWL.ObjectProperty))

        # add property description
        g.add((property, RDFS.comment, Literal(row["property_description"], lang="en")))


        # add a triple for the property
        g.add((entity, property, target))

        # add a triple for the target entity class
        g.add((target, RDF.type, URIRef(fdo + row["target_class"])))

        # add a triple for the target entity name
        g.add((target, RDFS.label, Literal(row["target_entity"], lang="en")))


  0%|          | 0/66246 [00:00<?, ?it/s]

In [68]:
g.serialize("fandom.ttl", format="turtle")

<Graph identifier=Nba7ce0e3b7fb4e738cec19e36657bdc5 (<class 'rdflib.graph.Graph'>)>