# Notebook for Ontology Population

In [20]:
# import packages
!pip install rdflib
import rdflib
from rdflib.namespace import RDF, RDFS, XSD
from rdflib import Graph, Literal, RDF, URIRef
import pandas as pd



In [21]:
# create graph and load pre-defined ontology
g = Graph()
g.parse('./ontology/WebDeDatos_IMDb.ttl')

<Graph identifier=Nb6eb9054ed6b49359a5b8dee10d8fabd (<class 'rdflib.graph.Graph'>)>

In [22]:
# import csv files with the data
countries = pd.read_csv('./data/sql_processed/countries.csv', sep=',', header=0)
countries = countries.fillna("")

genres = pd.read_csv('./data/sql_processed/genres.csv', sep=',', header=0)
genres = genres.fillna("")

languages = pd.read_csv('./data/sql_processed/languages.csv', sep=',', header=0)
languages = languages.fillna("")

movies = pd.read_csv('./data/sql_processed/movies.csv', sep=',', header=0)
movies = movies.fillna("")

names = pd.read_csv('./data/sql_processed/names.csv', sep=',', header=0)
names = names.fillna("")

ratings = pd.read_csv('./data/sql_processed/ratings.csv', sep=',', header=0)
ratings = ratings.fillna("")

mapping_title_names = pd.read_csv('./data/sql_processed/mapping_title_name.csv', sep=',', header=0)
mapping_title_names = mapping_title_names.fillna("")

In [23]:
# import prefixes we will use
IMDB = rdflib.Namespace('./ontology/WebDeDatos_IMDb.ttl')

## Add persons to ontology

In [24]:
for i in range(0, len(names.imdb_name_id)):
  person = URIRef(f"http://example.org/{names.imdb_name_id[i]}")

  # add person
  g.add((person, RDF.type, IMDB.Person))

  # add person name as label and as name
  if names.name[i] != "":
    g.add((person, RDFS.label, Literal(names.name[i])))
    g.add((person, IMDB.name, Literal(names.name[i])))

  # add height
  if names.height[i] != "":
    g.add((person, IMDB.height, Literal(names.name[i], datatype=XSD.float)))

  # add date of birth
  if names.date_of_birth[i] != "":
    g.add((person, IMDB.date_of_birth, Literal(names.date_of_birth[i], datatype=XSD.dateTime)))

  # add city of birth
  if names.city_of_birth[i] != "":
    g.add((person, IMDB.city_of_birth, Literal(names.city_of_birth[i])))

  # add country of birth
  if names.country_of_birth[i] != "":
    g.add((person, IMDB.country_of_birth, Literal(names.country_of_birth[i])))

  # add date of death
  if names.date_of_death[i] != "":
    g.add((person, IMDB.date_of_death, Literal(names.date_of_death[i], datatype=XSD.dateTime)))
  
  # add city of death
  if names.city_of_death[i] != "":
    g.add((person, IMDB.city_of_death, Literal(names.city_of_death[i])))

  # add country of death
  if names.country_of_death[i] != "":
    g.add((person, IMDB.country_of_death, Literal(names.country_of_death[i])))

  # add number of spouses
  if names.spouses[i] != "":
    g.add((person, IMDB.spouses, Literal(names.spouses[i])))

  # add number of divorces
  if names.divorces[i] != "":
    g.add((person, IMDB.divorces, Literal(names.divorces[i])))

  # add number of children
  if names.children[i] != "":
    g.add((person, IMDB.children, Literal(names.children[i])))



## Add movies to ontology

In [25]:
for i in range(0, len(movies.imdb_title_id)):
  movie = URIRef(f"http://example.org/{movies.imdb_title_id[i]}")

  # add movie
  g.add((movie, RDF.type, IMDB.Movie))

  # add movie name as label and as name
  if movies.title[i] != "":
    g.add((movie, RDFS.label, Literal(movies.title[i])))
    g.add((movie, IMDB.title, Literal(movies.title[i])))

  # add year
  if movies.year[i] != "":
    g.add((movie, IMDB.yearPublished, Literal(movies.year[i], datatype=XSD.int)))

  # add date published
  if movies.date_published[i] != "":
    g.add((movie, IMDB.date_published, Literal(movies.date_published[i], datatype=XSD.dateTime)))
  
  # add duration
  if movies.duration[i] != "":
    g.add((movie, IMDB.duration, Literal(movies.duration[i], datatype=XSD.float)))

  # add production company
  if movies.production_company[i] != "":
    g.add((movie, IMDB.productionCompany, Literal(movies.production_company[i])))

  # add description
  if movies.description[i] != "":
    g.add((movie, IMDB.description, Literal(movies.description[i])))



## Add ratings

In [26]:
for i in range(0, len(ratings.imdb_title_id)):
  rating = URIRef(f"http://example.org/{ratings.imdb_title_id[i]}_imdbRating")

  # add rating
  g.add((rating, RDF.type, IMDB.IMDbRating))

  # connect rating to movie
  g.add((URIRef(f"http://example.org/{ratings.imdb_title_id[i]}"), IMDB.IMDbRating, rating))

  # add mean rating
  if ratings.mean_vote[i] != "":
    g.add((rating, IMDB.meanRating, Literal(ratings.mean_vote[i], datatype=XSD.float)))

  # add median rating
  if ratings.median_vote[i] != "":
    g.add((rating, IMDB.medianRating, Literal(ratings.median_vote[i], datatype=XSD.float)))

  # add number of rating
  if ratings.total_votes[i] != "":
    g.add((rating, IMDB.numberOfRatings, Literal(ratings.total_votes[i], datatype=XSD.int)))

## Add genres, languages and countries to movies

In [27]:
# genres

for i in range(0, len(genres.imdb_title_id)):
  movie = URIRef(f"http://example.org/{genres.imdb_title_id[i]}")

  # add genre
  if genres.genre[i] == "Romance":
    g.add((movie, IMDB.genre, IMDB.Romance))

  elif genres.genre[i] == "Drama":
    g.add((movie, IMDB.genre, IMDB.Drama))
  
  elif genres.genre[i] == "Biography":
    g.add((movie, IMDB.genre, IMDB.Biography))
  
  elif genres.genre[i] == "Crime":
    g.add((movie, IMDB.genre, IMDB.Crime))
  
  elif genres.genre[i] == "History":
    g.add((movie, IMDB.genre, IMDB.History))
  
  elif genres.genre[i] == "Fantasy":
    g.add((movie, IMDB.genre, IMDB.Fantasy))

  elif genres.genre[i] == "Adventure":
    g.add((movie, IMDB.genre, IMDB.Adventure))
  
  elif genres.genre[i] == "War":
    g.add((movie, IMDB.genre, IMDB.War))

  elif genres.genre[i] == "Mystery":
    g.add((movie, IMDB.genre, IMDB.Mystery))

  elif genres.genre[i] == "Horror":
    g.add((movie, IMDB.genre, IMDB.Horror))

  elif genres.genre[i] == "Western":
    g.add((movie, IMDB.genre, IMDB.Western))

  elif genres.genre[i] == "Comedy":
    g.add((movie, IMDB.genre, IMDB.Comedy))

  elif genres.genre[i] == "Family":
    g.add((movie, IMDB.genre, IMDB.Family))

  elif genres.genre[i] == "Action":
    g.add((movie, IMDB.genre, IMDB.Action))

  elif genres.genre[i] == "Sci-Fi":
    g.add((movie, IMDB.genre, IMDB.SciFi))

  elif genres.genre[i] == "Thriller":
    g.add((movie, IMDB.genre, IMDB.Thriller))

  elif genres.genre[i] == "Sport":
    g.add((movie, IMDB.genre, IMDB.Sport))

  elif genres.genre[i] == "Musical":
    g.add((movie, IMDB.genre, IMDB.Musical))

  elif genres.genre[i] == "Music":
    g.add((movie, IMDB.genre, IMDB.Music))

  elif genres.genre[i] == "Film-Noir":
    g.add((movie, IMDB.genre, IMDB.FilmNoir))

  elif genres.genre[i] == "Animation":
    g.add((movie, IMDB.genre, IMDB.Animation))

  elif genres.genre[i] == "Adult":
    g.add((movie, IMDB.genre, IMDB.Adult))

  elif genres.genre[i] == "Documentary":
    g.add((movie, IMDB.genre, IMDB.Documentary))

  elif genres.genre[i] == "Reality-TV":
    g.add((movie, IMDB.genre, IMDB.RealityTV))

  elif genres.genre[i] == "News":
    g.add((movie, IMDB.genre, IMDB.News))

In [28]:
# languages

for i in range(0, len(languages.imdb_title_id)):
  movie = URIRef(f"http://example.org/{languages.imdb_title_id[i]}")

  # add language
  g.add((movie, IMDB.language, Literal(languages.language[i])))


In [29]:
# countries

for i in range(0, len(countries.imdb_title_id)):
  movie = URIRef(f"http://example.org/{countries.imdb_title_id[i]}")

  # add language
  g.add((movie, IMDB.country, Literal(countries.country[i])))


## Add persons roles in movies

In [32]:
for i in range(0, len(mapping_title_names.imdb_title_id)):
  movie = URIRef(f"http://example.org/{mapping_title_names.imdb_title_id[i]}")
  person = URIRef(f"http://example.org/{mapping_title_names.imdb_name_id[i]}")

  if mapping_title_names.category[i] != "":
    if (mapping_title_names.category[i] == "actress") or (mapping_title_names.category[i] == "actor"):
      g.add((person, RDF.type, IMDB.Actor))
      g.add((person, IMDB.actsIn, movie))

    elif mapping_title_names.category[i] == "director":
      g.add((person, RDF.type, IMDB.Director))
      g.add((person, IMDB.directed, movie))

    elif mapping_title_names.category[i] == "producer":
      g.add((person, RDF.type, IMDB.Producer))
      g.add((person, IMDB.produced, movie))

    elif mapping_title_names.category[i] == "composer":
      g.add((person, RDF.type, IMDB.Composer))
      g.add((person, IMDB.composed, movie))

    elif mapping_title_names.category[i] == "cinematographer":
      g.add((person, RDF.type, IMDB.Cinematographer))
      g.add((person, IMDB.cinematographed, movie))

    elif mapping_title_names.category[i] == "writer":
      g.add((person, RDF.type, IMDB.Writer))
      g.add((person, IMDB.wrote, movie))

    elif mapping_title_names.category[i] == "production_designer":
      g.add((person, RDF.type, IMDB.ProductionDesigner))
      g.add((person, IMDB.visuallyDesigned, movie))

    elif mapping_title_names.category[i] == "editor":
      g.add((person, RDF.type, IMDB.Editor))
      g.add((person, IMDB.edited, movie))

## Export the ontology

In [33]:
g.serialize(destination='./ontology/WebDeDatos_IMDb_final.owl', format='xml')

<Graph identifier=Nb6eb9054ed6b49359a5b8dee10d8fabd (<class 'rdflib.graph.Graph'>)>