In [29]:
import pandas as pd
from rdflib import Graph, Literal, RDF, URIRef, Namespace
from rdflib.namespace import XSD, RDFS, OWL
import urllib.parse
import re

In [30]:
csv_file = './game-of-thrones/character-predictions.csv'

In [31]:
#load the data
df = pd.read_csv(csv_file)
print(f"Loaded {len(df)} rows from {csv_file}")

columns_to_check = ['name', 'house', 'isAlive', 'isNoble', 'isMarried', 'isRoyal', 'isFemale', 'isMale', 'isBastard']
df.head(5)

Loaded 1946 rows from ./game-of-thrones/character-predictions.csv


Unnamed: 0,S.No,actual,pred,alive,plod,name,title,male,culture,dateOfBirth,...,isAliveHeir,isAliveSpouse,isMarried,isNoble,age,numDeadRelations,boolDeadRelations,isPopular,popularity,isAlive
0,1,0,0,0.054,0.946,Viserys II Targaryen,,1,,,...,0.0,,0,0,,11,1,1,0.605351,0
1,2,1,0,0.387,0.613,Walder Frey,Lord of the Crossing,1,Rivermen,208.0,...,,1.0,1,1,97.0,1,1,1,0.896321,1
2,3,1,0,0.493,0.507,Addison Hill,Ser,1,,,...,,,0,1,,0,0,0,0.267559,1
3,4,0,0,0.076,0.924,Aemma Arryn,Queen,0,,82.0,...,,0.0,1,1,23.0,0,0,0,0.183946,0
4,5,1,1,0.617,0.383,Sylva Santagar,Greenstone,0,Dornish,276.0,...,,1.0,1,1,29.0,0,0,0,0.043478,1


In [32]:
g = Graph()
GOT = Namespace("http://www.semanticweb.org/gameofthrones/ontology#")

g.bind("got", GOT)
g.bind("owl", OWL)
  
def clean_uri(text):
  if pd.isna(text) or str(text).lower() == 'nan' or str(text).strip() == '':
    return None

  clean_text = str(text).strip().replace(" ", "_").replace("'", "").replace('"', "").replace(".", "").replace("(","_").replace(")","")
  clean_text = re.sub(r'_+', '_', clean_text)
  
  return urllib.parse.quote(clean_text)

print("Graph initialized and helper function ready.")

Graph initialized and helper function ready.


In [33]:
# Transformation
count = 0
for index, row in df.iterrows():
    name_raw = row['name']
    hero_id = clean_uri(name_raw)
    
    if not hero_id:
        continue
    
    # Create the URI: http://.../ontology#Jon_Snow
    hero_uri = GOT[hero_id]
    
    # Define type: Jon Snow is a Person
    g.add((hero_uri, RDF.type, GOT.Person))
    
    # Gender (Column 'male': 1=Male, 0=Female)
    if row['male'] == 1:
        g.add((hero_uri, RDF.type, GOT.Male))
    else:
        g.add((hero_uri, RDF.type, GOT.Female))
    
    # Knight logic
    title_text = str(row['title']) if not pd.isna(row['title']) else ""
    if 'Ser' in title_text:
        g.add((hero_uri, RDF.type, GOT.Knight))
        
    # Royalty logic (King/Queen)
    if 'King' in title_text or 'Queen' in title_text:
        g.add((hero_uri, RDF.type, GOT.King))
    
    # Taken from google: 
    # In Game of Thrones, bastards receive regional surnames reflecting their birthplace, such as Snow (North), Sand (Dorne), 
    # Rivers (Riverlands), Stone (Vale), Flowers (Reach), Hill (Westerlands), Pyke (Iron Islands), Storm (Stormlands), and Waters (Crownlands)
    bastard_surnames = ['Snow', 'Sand', 'Rivers', 'Stone', 'Flowers', 'Hill', 'Pyke', 'Storm', 'Waters']
    surname = name_raw.split(' ')[-1]
    if surname in bastard_surnames:
        g.add((hero_uri, RDF.type, GOT.Bastard))
    
    # --- DATA PROPERTIES ---

    # data property name
    g.add((hero_uri, GOT.hasName, Literal(str(name_raw), datatype=XSD.string)))
    
    # data property isAlive
    if 'isAlive' in row:
        is_alive = bool(row['isAlive'])
        g.add((hero_uri, GOT.isAlive, Literal(is_alive, datatype=XSD.boolean)))
        
    # Popularity 
    if 'popularity' in row and not pd.isna(row['popularity']):
        pop = float(row['popularity'])
        g.add((hero_uri, GOT.popularityScore, Literal(pop, datatype=XSD.float)))
        
    if 'age' in row and not pd.isna(row['age']):
        g.add((hero_uri, GOT.age, Literal(int(row['age']), datatype=XSD.integer)))
        
    # --- OBJECT PROPERTIES (RELATIONSHIPS) ---    
    
    # --- House Relationship ---
    if not pd.isna(row['house']):
        house_name = row['house']
        house_uri = GOT[clean_uri(house_name)]
        g.add((house_uri, RDF.type, GOT.House))
        g.add((hero_uri, GOT.belongsToHouse, house_uri))
        
    # --- Family Relationships ---
    
    # Father (Column 'father')
    if not pd.isna(row['father']):
        father_uri = GOT[clean_uri(row['father'])]
        g.add((father_uri, RDF.type, GOT.Person))
        g.add((father_uri, RDF.type, GOT.Male))
        g.add((hero_uri, GOT.hasFather, father_uri))
    
    # Mother (Column 'mother')
    if not pd.isna(row['mother']):
        mother_uri = GOT[clean_uri(row['mother'])]
        g.add((mother_uri, RDF.type, GOT.Person))
        g.add((mother_uri, RDF.type, GOT.Female))
        g.add((hero_uri, GOT.hasMother, mother_uri))
        
    # Spouse (Column 'spouse')
    if not pd.isna(row['spouse']):
        spouse_uri = GOT[clean_uri(row['spouse'])]
        g.add((spouse_uri, RDF.type, GOT.Person))
        g.add((hero_uri, GOT.hasSpouse, spouse_uri))
        
    count += 1

print(f"Conversion complete. Processed {count} characters.")
print(f"Total Triples generated: {len(g)}")

Conversion complete. Processed 1945 characters.
Total Triples generated: 12908


In [34]:
output_file = 'got_abox.ttl'
g.serialize(destination=output_file, format='turtle')

print(f"SUCCESS! Ontology saved to: {output_file}")
print("You can now open this file in Protégé.")

SUCCESS! Ontology saved to: got_abox.ttl
You can now open this file in Protégé.
