### Importing Libraries

In [3]:
# !pip install rdflib
# !pip install pandas
# !pip install numpy

In [4]:
# import modules 
from rdflib import Graph
from rdflib import URIRef, BNode, Literal
from rdflib import Namespace
from rdflib.namespace import OWL, RDF, RDFS, FOAF, XSD
from rdflib import BNode

import pandas as pd
import numpy as np

### Loading Dataset

In [5]:
# read csv file that contains character information
df_raw = pd.read_csv("marvel_characters_info_with_external_resource.csv", delimiter=";")
df_raw

Unnamed: 0,ID,Name,Alignment,Gender,EyeColor,Species,HairColor,Publisher,Height,Weight,ExternalResource,LivingStatus,FormerlyDeceased
0,0,A-Bomb,good,Male,yellow,Human,No Hair,Marvel Comics,203.0,441.0,Rick_Jones_(character),Alive,No
1,1,Abe Sapien,good,Male,blue,Icthyo Sapien,No Hair,Dark Horse Comics,191.0,65.0,,,
2,2,Abin Sur,good,Male,blue,Ungaran,No Hair,DC Comics,185.0,90.0,,,
3,3,Abomination,bad,Male,green,Human,No Hair,Marvel Comics,203.0,441.0,Abomination_(character),Alive,Yes
4,5,Absorbing Man,bad,Male,blue,Human,No Hair,Marvel Comics,193.0,122.0,Absorbing_Man,Alive,No
...,...,...,...,...,...,...,...,...,...,...,...,...,...
626,727,Yellow Claw,bad,Male,blue,Human,No Hair,Marvel Comics,188.0,95.0,Yellow_Claw_(character),Deceased,No
627,728,Yellowjacket,good,Male,blue,Human,Blond,Marvel Comics,183.0,83.0,Yellowjacket_(comics),Alive,No
628,731,Yoda,good,Male,brown,Yoda's species,White,George Lucas,66.0,17.0,,,
629,732,Zatanna,good,Female,blue,Human,Black,DC Comics,170.0,57.0,,,


In [6]:
# filter dataframe: only characters where publisher is marvel comics
df_marvel = df_raw[df_raw['Publisher'] == "Marvel Comics"] 
df_marvel

Unnamed: 0,ID,Name,Alignment,Gender,EyeColor,Species,HairColor,Publisher,Height,Weight,ExternalResource,LivingStatus,FormerlyDeceased
0,0,A-Bomb,good,Male,yellow,Human,No Hair,Marvel Comics,203.0,441.0,Rick_Jones_(character),Alive,No
3,3,Abomination,bad,Male,green,Human,No Hair,Marvel Comics,203.0,441.0,Abomination_(character),Alive,Yes
4,5,Absorbing Man,bad,Male,blue,Human,No Hair,Marvel Comics,193.0,122.0,Absorbing_Man,Alive,No
7,8,Agent 13,good,Female,blue,Human,Blond,Marvel Comics,173.0,61.0,Sharon_Carter,Alive,No
8,9,Agent Bob,good,Male,brown,Human,Brown,Marvel Comics,178.0,81.0,"Bob,_Agent_of_Hydra",Alive,No
...,...,...,...,...,...,...,...,...,...,...,...,...,...
623,724,Wyatt Wingfoot,good,Male,brown,Human,Black,Marvel Comics,196.0,117.0,Wyatt_Wingfoot,Alive,No
624,725,X-23,good,Female,green,Mutant,Black,Marvel Comics,155.0,50.0,X-23,Alive,No
625,726,X-Man,good,Male,blue,Mutant,Brown,Marvel Comics,175.0,61.0,Nate_Grey,Alive,No
626,727,Yellow Claw,bad,Male,blue,Human,No Hair,Marvel Comics,188.0,95.0,Yellow_Claw_(character),Deceased,No


In [7]:
# read character csv file that contains name and id of characters
df_char = pd.read_csv("characters.csv", delimiter=";")
df_char

Unnamed: 0,characterID,name
0,1009220,Captain America
1,1010740,Winter Soldier
2,1009471,Nick Fury
3,1009552,S.H.I.E.L.D.
4,1009228,Sharon Carter
...,...,...
1165,1011395,Talon (Fraternity of Raptors)
1166,1011196,Captain Flint
1167,1009397,Lava-Man
1168,1011113,Blue Blade


In [8]:
# read csv file that maps comic id to charcter id (which character appears in which comic)
df_char_to_com = pd.read_csv("charactersToComics.csv", delimiter =";")

df_char_to_com

Unnamed: 0,comicID,characterID
0,16232,1009220
1,16248,1009220
2,21486,1011109
3,58634,1010808
4,16241,1009220
...,...,...
22245,45824,1009536
22246,46509,1009664
22247,46047,1009189
22248,46210,1009368


In [9]:
# join dfs by characterID
df_name_com_char = df_char.set_index('characterID').join(df_char_to_com.set_index('characterID'))
df_name_com_char['characterID'] = df_name_com_char.index
df_name_com_char

Unnamed: 0_level_0,name,comicID,characterID
characterID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1009144,A.I.M.,65466.0,1009144
1009144,A.I.M.,43944.0,1009144
1009144,A.I.M.,65357.0,1009144
1009144,A.I.M.,64790.0,1009144
1009144,A.I.M.,36737.0,1009144
...,...,...,...
1017583,Devil Dinosaur (Devil Dinosaur),59351.0,1017583
1017583,Devil Dinosaur (Devil Dinosaur),66917.0,1017583
1017583,Devil Dinosaur (Devil Dinosaur),59350.0,1017583
1017583,Devil Dinosaur (Devil Dinosaur),56399.0,1017583


In [10]:
# join by name
df_join = df_marvel.set_index('Name').join(df_name_com_char.set_index('name'))
# if no comicID then fill with 0 as value
df_join['comicID'].fillna(0, inplace=True)
# transform ID to integer
df_join['comicID'] = df_join['comicID'].astype(int)
# if no ID then fill with 0 as value
df_join['characterID'].fillna(0, inplace=True)
# and safe ID as integer
df_join['characterID'] = df_join['characterID'].astype(int)
df_join

Unnamed: 0_level_0,ID,Alignment,Gender,EyeColor,Species,HairColor,Publisher,Height,Weight,ExternalResource,LivingStatus,FormerlyDeceased,comicID,characterID
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
A-Bomb,0,good,Male,yellow,Human,No Hair,Marvel Comics,203.0,441.0,Rick_Jones_(character),Alive,No,0,0
Abomination,3,bad,Male,green,Human,No Hair,Marvel Comics,203.0,441.0,Abomination_(character),Alive,Yes,0,0
Absorbing Man,5,bad,Male,blue,Human,No Hair,Marvel Comics,193.0,122.0,Absorbing_Man,Alive,No,43507,1009148
Absorbing Man,5,bad,Male,blue,Human,No Hair,Marvel Comics,193.0,122.0,Absorbing_Man,Alive,No,36484,1009148
Absorbing Man,5,bad,Male,blue,Human,No Hair,Marvel Comics,193.0,122.0,Absorbing_Man,Alive,No,36479,1009148
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
X-Man,726,good,Male,blue,Mutant,Brown,Marvel Comics,175.0,61.0,Nate_Grey,Alive,No,18232,1009725
X-Man,726,good,Male,blue,Mutant,Brown,Marvel Comics,175.0,61.0,Nate_Grey,Alive,No,18221,1009725
X-Man,726,good,Male,blue,Mutant,Brown,Marvel Comics,175.0,61.0,Nate_Grey,Alive,No,18175,1009725
Yellow Claw,727,bad,Male,blue,Human,No Hair,Marvel Comics,188.0,95.0,Yellow_Claw_(character),Deceased,No,0,1009736


In [11]:
# only take characters that appear in at least one comic
df_join_wo_0 = df_join[df_join['comicID'] != 0]
df_join_wo_0

Unnamed: 0_level_0,ID,Alignment,Gender,EyeColor,Species,HairColor,Publisher,Height,Weight,ExternalResource,LivingStatus,FormerlyDeceased,comicID,characterID
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
Absorbing Man,5,bad,Male,blue,Human,No Hair,Marvel Comics,193.0,122.0,Absorbing_Man,Alive,No,43507,1009148
Absorbing Man,5,bad,Male,blue,Human,No Hair,Marvel Comics,193.0,122.0,Absorbing_Man,Alive,No,36484,1009148
Absorbing Man,5,bad,Male,blue,Human,No Hair,Marvel Comics,193.0,122.0,Absorbing_Man,Alive,No,36479,1009148
Absorbing Man,5,bad,Male,blue,Human,No Hair,Marvel Comics,193.0,122.0,Absorbing_Man,Alive,No,36480,1009148
Absorbing Man,5,bad,Male,blue,Human,No Hair,Marvel Comics,193.0,122.0,Absorbing_Man,Alive,No,41433,1009148
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
X-Man,726,good,Male,blue,Mutant,Brown,Marvel Comics,175.0,61.0,Nate_Grey,Alive,No,18250,1009725
X-Man,726,good,Male,blue,Mutant,Brown,Marvel Comics,175.0,61.0,Nate_Grey,Alive,No,18243,1009725
X-Man,726,good,Male,blue,Mutant,Brown,Marvel Comics,175.0,61.0,Nate_Grey,Alive,No,18232,1009725
X-Man,726,good,Male,blue,Mutant,Brown,Marvel Comics,175.0,61.0,Nate_Grey,Alive,No,18221,1009725


In [12]:
# read csv that contains information about comics
df_com = pd.read_csv("comics.csv")

df_com['issueNumber'] = df_com['issueNumber'].astype(int)
df_com

Unnamed: 0,comicID,title,issueNumber,description
0,16232,Cap Transport (2005) #12,12,
1,16248,Cap Transport (2005) #9,9,
2,4990,Halo Preview (2006),0,
3,21486,Ultimate X-Men (Spanish Language Edition) (200...,9,
4,58634,A Year of Marvels: The Incredible (2016) #5,5,It’s Halloween in the Marvel U! What does that...
...,...,...,...,...
41222,47542,Kick-Ass 3 (2013) #1 (Ferry Variant),1,Kick-Ass and Hit-Girl&rsquo;s blockbuster retu...
41223,46766,X-Factor (2005) #257,257,<ul><li>The end begins here.</li><li>THE END O...
41224,45951,Cable and X-Force (2012) #9,9,Guest starring the Uncanny Avengers!\n- Hope g...
41225,46750,Wolverine: Sabretooth Reborn (Hardcover),0,Superstars Jeph Loeb and Simone Bianchi's tita...


In [13]:
# merge dfs by comicID
df = df_join_wo_0.merge(df_name_com_char, on='comicID').merge(df_com, on='comicID')
# drop this column
df = df.drop(['characterID_y'], axis=1)
# and add another columns that contains the ID
df.rename(columns = {'characterID_x':'characterID'}, inplace = True)
df

Unnamed: 0,ID,Alignment,Gender,EyeColor,Species,HairColor,Publisher,Height,Weight,ExternalResource,LivingStatus,FormerlyDeceased,comicID,characterID,name,title,issueNumber,description
0,5,bad,Male,blue,Human,No Hair,Marvel Comics,193.0,122.0,Absorbing_Man,Alive,No,43507,1009148,Absorbing Man,A+X (2012) #8,8,SPIDER-WOMAN & KITTY PRYDE (with Lockheed in t...
1,5,bad,Male,blue,Human,No Hair,Marvel Comics,193.0,122.0,Absorbing_Man,Alive,No,43507,1009148,Absorbing Man,A+X (2012) #8,8,SPIDER-WOMAN & KITTY PRYDE (with Lockheed in t...
2,5,bad,Male,blue,Human,No Hair,Marvel Comics,193.0,122.0,Absorbing_Man,Alive,No,36484,1009148,Absorbing Man,Avengers Academy (2010) #19,19,FEAR ITSELF tie-in! The students of Avengers A...
3,5,bad,Male,blue,Human,No Hair,Marvel Comics,193.0,122.0,Absorbing_Man,Alive,No,36479,1009148,Absorbing Man,Avengers Academy (2010) #18,18,FEAR ITSELF tie-in! The young heroes struggle ...
4,5,bad,Male,blue,Human,No Hair,Marvel Comics,193.0,122.0,Absorbing_Man,Alive,No,36480,1009148,Absorbing Man,Avengers Academy (2010) #17,17,FEAR ITSELF tie-in! Trapped in the Infinite Ma...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
12814,726,good,Male,blue,Mutant,Brown,Marvel Comics,175.0,61.0,Nate_Grey,Alive,No,18250,1009725,X-Man,X-Man (1995) #8,8,
12815,726,good,Male,blue,Mutant,Brown,Marvel Comics,175.0,61.0,Nate_Grey,Alive,No,18243,1009725,X-Man,X-Man (1995) #7,7,
12816,726,good,Male,blue,Mutant,Brown,Marvel Comics,175.0,61.0,Nate_Grey,Alive,No,18232,1009725,X-Man,X-Man (1995) #6,6,
12817,726,good,Male,blue,Mutant,Brown,Marvel Comics,175.0,61.0,Nate_Grey,Alive,No,18221,1009725,X-Man,X-Man (1995) #5,5,


### Data Cleaning

In [14]:
# change values from NaN to "None" --> now if no description available it will have the value "None"
df1 = df.where(pd.notnull(df), 'None')
df = df1


# replace "None" values in the weight column with 0
df['Weight'] = df['Weight'].replace('None', 0)

In [15]:
# safe the values for height and weight as integers
df['Height'] = df['Height'].astype('int')
df['Weight'] = df['Weight'].astype('int')

In [16]:
# for the titles replace whitespaces with "_"
df['title'] = df['title'].str.replace(' ', '_', regex=True)

In [17]:
df = df.drop_duplicates("title")
df

Unnamed: 0,ID,Alignment,Gender,EyeColor,Species,HairColor,Publisher,Height,Weight,ExternalResource,LivingStatus,FormerlyDeceased,comicID,characterID,name,title,issueNumber,description
0,5,bad,Male,blue,Human,No Hair,Marvel Comics,193,122,Absorbing_Man,Alive,No,43507,1009148,Absorbing Man,A+X_(2012)_#8,8,SPIDER-WOMAN & KITTY PRYDE (with Lockheed in t...
2,5,bad,Male,blue,Human,No Hair,Marvel Comics,193,122,Absorbing_Man,Alive,No,36484,1009148,Absorbing Man,Avengers_Academy_(2010)_#19,19,FEAR ITSELF tie-in! The students of Avengers A...
3,5,bad,Male,blue,Human,No Hair,Marvel Comics,193,122,Absorbing_Man,Alive,No,36479,1009148,Absorbing Man,Avengers_Academy_(2010)_#18,18,FEAR ITSELF tie-in! The young heroes struggle ...
4,5,bad,Male,blue,Human,No Hair,Marvel Comics,193,122,Absorbing_Man,Alive,No,36480,1009148,Absorbing Man,Avengers_Academy_(2010)_#17,17,FEAR ITSELF tie-in! Trapped in the Infinite Ma...
5,5,bad,Male,blue,Human,No Hair,Marvel Comics,193,122,Absorbing_Man,Alive,No,41433,1009148,Absorbing Man,Fear_Itself_(2010)_#2_(3rd_Printing_Variant),2,The Mighty Thor--imprisoned by his own father!...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
12814,726,good,Male,blue,Mutant,Brown,Marvel Comics,175,61,Nate_Grey,Alive,No,18250,1009725,X-Man,X-Man_(1995)_#8,8,
12815,726,good,Male,blue,Mutant,Brown,Marvel Comics,175,61,Nate_Grey,Alive,No,18243,1009725,X-Man,X-Man_(1995)_#7,7,
12816,726,good,Male,blue,Mutant,Brown,Marvel Comics,175,61,Nate_Grey,Alive,No,18232,1009725,X-Man,X-Man_(1995)_#6,6,
12817,726,good,Male,blue,Mutant,Brown,Marvel Comics,175,61,Nate_Grey,Alive,No,18221,1009725,X-Man,X-Man_(1995)_#5,5,


In [18]:
# group by the character name and join all the titles of comics and their descriptions
df_filtered = df.groupby(['name']).agg({'title': ', '.join, 'description': ', '.join}).reset_index()
df_filtered

Unnamed: 0,name,title,description
0,Absorbing Man,"A+X_(2012)_#8, Avengers_Academy_(2010)_#19, Av...",SPIDER-WOMAN & KITTY PRYDE (with Lockheed in t...
1,Agent Zero,Weapon_X:_Days_of_Future_Now_(Trade_Paperback)...,Weapon X is back! The War of the Programs is o...
2,Annihilus,Fantastic_Four_(1998)_#587_(3rd_Printing_Varia...,"One of the four will fall!, The conclusion to ..."
3,Apocalypse,"Uncanny_Avengers_(2012)_#8, Uncanny_Avengers_(...","<ul><li>Xavier is dead, now begins the age of ..."
4,Arachne,"Herc_(2010)_#8, Amazing_Spider-Man_(1999)_#637...",SPIDER-ISLAND TIE IN Hercules finds himself in...
...,...,...,...
152,Wolfsbane,"X-Factor_(2005)_#224, X-Factor_(2005)_#222, X-...","""HARD LABOR"" Conclusion! About to give birth a..."
153,Wolverine,"Wolverine:_Season_One_(Hardcover), Wolverine_&...",Discovered as a feral mutant prowling the wild...
154,Wonder Man,Wonder_Man:_My_Fair_Super_Hero_(Trade_Paperbac...,Meet Lady Killer -- the dangerous assassin who...
155,X-23,All-New_Wolverine_Vol._6:_Old_Woman_Laura_(Tra...,"Collects All-New Wolverine #31-35. The past, p..."


In [19]:
# drop the title and description column and then join the dfs 
df = df.drop("title", axis = 1)
df = df.drop("description", axis = 1)
df = df_filtered.set_index('name').join(df.set_index('name'))
df['name'] = df.index
df = df.drop_duplicates(subset="name")
df

Unnamed: 0_level_0,title,description,ID,Alignment,Gender,EyeColor,Species,HairColor,Publisher,Height,Weight,ExternalResource,LivingStatus,FormerlyDeceased,comicID,characterID,issueNumber,name
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
Absorbing Man,"A+X_(2012)_#8, Avengers_Academy_(2010)_#19, Av...",SPIDER-WOMAN & KITTY PRYDE (with Lockheed in t...,5,bad,Male,blue,Human,No Hair,Marvel Comics,193,122,Absorbing_Man,Alive,No,43507,1009148,8,Absorbing Man
Agent Zero,Weapon_X:_Days_of_Future_Now_(Trade_Paperback)...,Weapon X is back! The War of the Programs is o...,10,good,Male,brown,Human,black,Marvel Comics,191,104,David_North_(character),Alive,No,3357,1009150,0,Agent Zero
Annihilus,Fantastic_Four_(1998)_#587_(3rd_Printing_Varia...,"One of the four will fall!, The conclusion to ...",28,bad,Male,green,Arthrosian,No Hair,Marvel Comics,180,90,Annihilus,Alive,No,39304,1009154,587,Annihilus
Apocalypse,"Uncanny_Avengers_(2012)_#8, Uncanny_Avengers_(...","<ul><li>Xavier is dead, now begins the age of ...",34,bad,Male,red,Mutant,Black,Marvel Comics,213,135,Apocalypse_(character),Alive,No,43332,1009156,8,Apocalypse
Arachne,"Herc_(2010)_#8, Amazing_Spider-Man_(1999)_#637...",SPIDER-ISLAND TIE IN Hercules finds himself in...,38,good,Female,blue,Human,Blond,Marvel Comics,175,63,Julia_Carpenter,Alive,No,29773,1010773,8,Arachne
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Wolfsbane,"X-Factor_(2005)_#224, X-Factor_(2005)_#222, X-...","""HARD LABOR"" Conclusion! About to give birth a...",718,good,Female,green,Mutant,Auburn,Marvel Comics,366,473,Wolfsbane_(character),Alive,Yes,35274,1009717,224,Wolfsbane
Wolverine,"Wolverine:_Season_One_(Hardcover), Wolverine_&...",Discovered as a feral mutant prowling the wild...,719,good,Male,blue,Mutant,Black,Marvel Comics,160,135,Wolverine_(character),Alive,Yes,46751,1009718,0,Wolverine
Wonder Man,Wonder_Man:_My_Fair_Super_Hero_(Trade_Paperbac...,Meet Lady Killer -- the dangerous assassin who...,721,good,Male,red,Mutant,Black,Marvel Comics,188,171,Wonder_Man,Alive,No,16017,1009719,0,Wonder Man
X-23,All-New_Wolverine_Vol._6:_Old_Woman_Laura_(Tra...,"Collects All-New Wolverine #31-35. The past, p...",725,good,Female,green,Mutant,Black,Marvel Comics,155,50,X-23,Alive,No,67622,1009722,0,X-23


In [20]:
b_node = BNode()
b_node.n3()

'_:Naf98fb39810043f598c1a34460d49108'

In [21]:
node1 = BNode("node1")
node1.n3()

'_:node1'

## Ontology Creation

In [22]:
# Create a new RDF graph
g = Graph()

# Define the namespaces
comic = Namespace("http://comicCharacters.com/")
rdf = Namespace("http://www.w3.org/1999/02/22-rdf-syntax-ns#")
rdfs = Namespace("http://www.w3.org/2000/01/rdf-schema#")
owl = Namespace("http://www.w3.org/2002/07/owl#")
dbo = Namespace("http://dbpedia.org/ontology/")
dbr = Namespace("http://dbpedia.org/resource/")
dbp = Namespace("http://dbpedia.org/property/")


# Bind the prefixes
g.bind("comic", comic)
g.bind("rdf", rdf)
g.bind("rdfs", rdfs)
g.bind("owl", owl)
g.bind("dbo", dbo)
g.bind("dbr", dbr)
g.bind("dbp", dbp)



# Define the classes
res_char_align = BNode()
res_eye_color = BNode()
res_gender = BNode()
res_hair_color = BNode()
res_height = BNode()
res_name = BNode()
res_weight = BNode()
res_characterID = BNode()
res_living_status = BNode()
res_formely_dec = BNode()
res_species = BNode()
res_comicID = BNode()
res_publisher = BNode()
res_issue = BNode()
res_has_publisher = BNode()
res_description = BNode()
res_title = BNode()
res_hasSpecies = BNode()



character_class = comic.Character
publisher_class = comic.Publisher
species_class = comic.Species
comic_class = comic.Comic

g.add((character_class, RDFS.subClassOf, res_char_align))
g.add((res_char_align, RDF.type, OWL.Restriction))
g.add((res_char_align, OWL.onProperty, comic.hasAlignment))
g.add((res_char_align, OWL.cardinality, Literal("1", datatype=XSD.string)))
g.add((res_char_align, OWL.onDataRange, XSD.string))

g.add((character_class, RDFS.subClassOf, res_eye_color))
g.add((res_eye_color, RDF.type, OWL.Restriction))
g.add((res_eye_color, OWL.onProperty, comic.hasEyeColor))
g.add((res_eye_color, OWL.cardinality, Literal("1", datatype=XSD.string)))
g.add((res_eye_color, OWL.onDataRange, XSD.string))

g.add((character_class, RDFS.subClassOf, res_gender))
g.add((res_gender, RDF.type, OWL.Restriction))
g.add((res_gender, OWL.onProperty, comic.hasGender))
g.add((res_gender, OWL.cardinality, Literal("1", datatype=XSD.string)))
g.add((res_gender, OWL.onDataRange, XSD.string))

g.add((character_class, RDFS.subClassOf, res_hair_color))
g.add((res_hair_color, RDF.type, OWL.Restriction))
g.add((res_hair_color, OWL.onProperty, comic.hasHairColor))
g.add((res_hair_color, OWL.cardinality, Literal("1", datatype=XSD.string)))
g.add((res_hair_color, OWL.onDataRange, XSD.string))

g.add((character_class, RDFS.subClassOf, res_height))
g.add((res_height, RDF.type, OWL.Restriction))
g.add((res_height, OWL.onProperty, comic.hasHeight))
g.add((res_height, OWL.cardinality, Literal("1", datatype=XSD.nonNegativeInteger)))
g.add((res_height, OWL.onDataRange, XSD.nonNegativeInteger))

g.add((character_class, RDFS.subClassOf, res_weight))
g.add((res_weight, RDF.type, OWL.Restriction))
g.add((res_weight, OWL.onProperty, comic.hasWeight))
g.add((res_weight, OWL.cardinality, Literal("1", datatype=XSD.nonNegativeInteger)))
g.add((res_weight, OWL.onDataRange, XSD.nonNegativeInteger))

g.add((character_class, RDFS.subClassOf, res_characterID))
g.add((res_characterID, RDF.type, OWL.Restriction))
g.add((res_characterID, OWL.onProperty, comic.hasCharacterID))
g.add((res_characterID, OWL.cardinality, Literal("1", datatype=XSD.nonNegativeInteger)))
g.add((res_characterID, OWL.onDataRange, XSD.nonNegativeInteger))

g.add((character_class, RDFS.subClassOf, res_living_status))
g.add((res_living_status, RDF.type, OWL.Restriction))
g.add((res_living_status, OWL.onProperty, comic.hasLivingStatus))
g.add((res_living_status, OWL.cardinality, Literal("1", datatype=XSD.string)))
g.add((res_living_status, OWL.onDataRange, XSD.string))

g.add((character_class, RDFS.subClassOf, res_formely_dec))
g.add((res_formely_dec, RDF.type, OWL.Restriction))
g.add((res_formely_dec, OWL.onProperty, comic.hasFormerlyDeceased))
g.add((res_formely_dec, OWL.cardinality, Literal("1", datatype=XSD.string)))
g.add((res_formely_dec, OWL.onDataRange, XSD.string))

g.add((character_class, RDFS.subClassOf, res_hasSpecies))
g.add((res_hasSpecies, RDF.type, OWL.Restriction))
g.add((res_hasSpecies, OWL.onProperty, comic.hasSpecies))
g.add((res_hasSpecies, OWL.cardinality, Literal("1", datatype=XSD.string)))
g.add((res_hasSpecies, OWL.onDataRange, comic.Species))

g.add((publisher_class, RDFS.subClassOf, res_publisher))
g.add((res_publisher, RDF.type, OWL.Restriction))
g.add((res_publisher, OWL.onProperty, comic.publisherName))
g.add((res_publisher, OWL.cardinality, Literal("1", datatype=XSD.string)))
g.add((res_publisher, OWL.onDataRange, XSD.string))

g.add((comic_class, RDFS.subClassOf, res_comicID))
g.add((res_comicID, RDF.type, OWL.Restriction))
g.add((res_comicID, OWL.onProperty, comic.hasComicID))
g.add((res_comicID, OWL.cardinality, Literal("1", datatype=XSD.nonNegativeInteger)))
g.add((res_comicID, OWL.onDataRange, XSD.nonNegativeInteger))

g.add((comic_class, RDFS.subClassOf, res_has_publisher))
g.add((res_has_publisher, RDF.type, OWL.Restriction))
g.add((res_has_publisher, OWL.onProperty, comic.hasPublisher))
g.add((res_has_publisher, OWL.cardinality, Literal("1", datatype=XSD.string)))
g.add((res_has_publisher, OWL.onDataRange, XSD.string))

g.add((comic_class, RDFS.subClassOf, res_description))
g.add((res_description, RDF.type, OWL.Restriction))
g.add((res_description, OWL.onProperty, comic.hasDescription))
g.add((res_description, OWL.cardinality, Literal("1", datatype=XSD.string)))
g.add((res_description, OWL.onDataRange, XSD.string))

g.add((comic_class, RDFS.subClassOf, res_title))
g.add((res_title, RDF.type, OWL.Restriction))
g.add((res_title, OWL.onProperty, comic.hasTitle))
g.add((res_title, OWL.cardinality, Literal("1", datatype=XSD.string)))
g.add((res_title, OWL.onDataRange, XSD.string))


g.add((character_class, RDFS.subClassOf, dbo.ComicsCharacter))
g.add((character_class, RDFS.label, Literal("Character", lang="en")))
g.add((character_class, RDFS.comment, Literal("A class defining the character.")))

g.add((comic_class, RDFS.subClassOf, dbo.Comic))
g.add((comic_class, RDFS.label, Literal("Comic", lang="en")))
g.add((comic_class, RDFS.comment, Literal("A class defining the comics.")))

g.add((publisher_class, RDFS.label, Literal("Publisher", lang="en")))
g.add((publisher_class, RDFS.comment, Literal("A class defining the publisher.")))

g.add((species_class, RDFS.label, Literal("Species", lang="en")))
g.add((species_class, RDFS.comment, Literal("A class defining the species of the character.")))

# Add classes as owl:Class to the graph
g.add((character_class, RDF.type, owl.Class))
g.add((publisher_class, RDF.type, owl.Class))
g.add((comic_class, RDF.type, owl.Class))
g.add((species_class, RDF.type, owl.Class))

# Define the object properties
has_publisher_property = comic.hasPublisher
appearsIn_property = comic.appearsIn
has_characters_property = comic.hasCharacters

# Define the data properties
has_alignment_property = comic.hasAlignment
has_eye_color_property = comic.hasEyeColor
has_gender_property = comic.hasGender
has_hair_color_property = comic.hasHairColor
has_height_property = comic.hasHeight
has_name_property = dbp.characterName
has_weight_property = comic.hasWeight
hasCharacterID_property = comic.hasCharacterID
hasComicID_property = comic.hasComicID
has_LivingStatus_property = comic.hasLivingStatus
has_FormelyDeceased_property = comic.hasFormerlyDeceased
has_species_property = comic.hasSpecies
species_name = comic.speciesName
publisher_name = comic.publisherName
has_title_property = comic.hasTitle
Description = comic.hasDescription
has_publisher_property = comic.hasPublisher
issueNumber = comic.hasIssueNumber

# Add property definitions to the graph
g.add((has_publisher_property, RDF.type, owl.ObjectProperty))
g.add((appearsIn_property, RDF.type, owl.ObjectProperty))
g.add((has_characters_property, RDF.type, owl.ObjectProperty))
g.add((has_species_property, RDF.type, owl.ObjectProperty))

g.add((hasCharacterID_property, RDF.type, owl.DatatypeProperty))
g.add((has_LivingStatus_property, RDF.type, owl.DatatypeProperty))
g.add((has_FormelyDeceased_property, RDF.type, owl.DatatypeProperty))
g.add((has_alignment_property, RDF.type, owl.DatatypeProperty))
g.add((has_eye_color_property, RDF.type, owl.DatatypeProperty))
g.add((has_gender_property, RDF.type, owl.DatatypeProperty))
g.add((has_hair_color_property, RDF.type, owl.DatatypeProperty))
g.add((has_height_property, RDF.type, owl.DatatypeProperty))
g.add((has_name_property, RDF.type, owl.DatatypeProperty))
g.add((has_weight_property, RDF.type, owl.DatatypeProperty))
g.add((species_name, RDF.type, owl.DatatypeProperty))
g.add((publisher_name,RDF.type,owl.DatatypeProperty))
g.add((has_title_property, RDF.type, owl.DatatypeProperty))
g.add((Description, RDF.type, owl.DatatypeProperty))
g.add((hasComicID_property, RDF.type, owl.DatatypeProperty))
g.add((issueNumber,RDF.type,owl.DatatypeProperty))


# Make the properties functional
g.add((has_alignment_property, RDF.type, owl.FunctionalProperty))
g.add((has_eye_color_property, RDF.type, owl.FunctionalProperty))
g.add((has_gender_property, RDF.type, owl.FunctionalProperty))
g.add((has_hair_color_property, RDF.type, owl.FunctionalProperty))
g.add((has_height_property, RDF.type, owl.FunctionalProperty))
g.add((has_weight_property, RDF.type, owl.FunctionalProperty))
g.add((has_species_property, RDF.type, owl.FunctionalProperty))
g.add((has_LivingStatus_property, RDF.type, owl.FunctionalProperty))
g.add((has_FormelyDeceased_property, RDF.type, owl.FunctionalProperty))
g.add((hasCharacterID_property, RDF.type, owl.FunctionalProperty))
g.add((species_name, RDF.type, owl.FunctionalProperty))
g.add((publisher_name, RDF.type, owl.FunctionalProperty))
g.add((has_title_property, RDF.type, owl.FunctionalProperty))
g.add((Description, RDF.type, owl.FunctionalProperty))
g.add((hasComicID_property, RDF.type, owl.FunctionalProperty))
g.add((has_publisher_property, RDF.type, owl.FunctionalProperty))
g.add((issueNumber,RDF.type,owl.FunctionalProperty))

# Make the properties inverse functional
g.add((hasComicID_property, RDF.type, owl.InverseFunctionalProperty))
g.add((hasCharacterID_property, RDF.type, owl.InverseFunctionalProperty))

# Set domain for the properties
g.add((has_alignment_property, RDFS.domain, character_class))
g.add((has_eye_color_property, RDFS.domain, character_class))
g.add((has_gender_property, RDFS.domain, character_class))
g.add((has_hair_color_property, RDFS.domain, character_class))
g.add((has_height_property, RDFS.domain, character_class))
g.add((has_weight_property, RDFS.domain, character_class))
g.add((has_species_property, RDFS.domain, character_class))
g.add((has_LivingStatus_property, RDFS.domain, character_class))
g.add((has_FormelyDeceased_property, RDFS.domain, character_class))
g.add((hasCharacterID_property, RDFS.domain, character_class))
g.add((appearsIn_property, RDFS.domain, character_class))
g.add((species_name, RDFS.domain, species_class))
g.add((publisher_name, RDFS.domain, species_class))
g.add((has_title_property, RDFS.domain, comic_class))
g.add((Description, RDFS.domain, comic_class))
g.add((hasComicID_property, RDFS.domain, comic_class))
g.add((has_publisher_property, RDFS.domain, comic_class))
g.add((issueNumber, RDFS.domain, comic_class))


# Set range for the properties
g.add((has_alignment_property, RDFS.range, XSD.string))
g.add((has_eye_color_property, RDFS.range, XSD.string))
g.add((has_gender_property, RDFS.range, XSD.string))
g.add((has_hair_color_property, RDFS.range, XSD.string))
g.add((has_height_property, RDFS.range, XSD.nonNegativeInteger))
g.add((has_weight_property, RDFS.range, XSD.nonNegativeInteger))
g.add((has_title_property, RDFS.range, XSD.string))
g.add((has_species_property, RDFS.range, species_class))
g.add((Description, RDFS.range, XSD.string))
g.add((has_LivingStatus_property, RDFS.range, XSD.string))
g.add((has_FormelyDeceased_property, RDFS.range, XSD.string))
g.add((hasComicID_property, RDFS.range, XSD.nonNegativeInteger))
g.add((hasCharacterID_property, RDFS.range, XSD.nonNegativeInteger))
g.add((species_name, RDFS.range, species_class))
g.add((publisher_name, RDFS.range, publisher_class))
g.add((has_publisher_property, RDFS.range, publisher_class))
g.add((appearsIn_property, RDFS.range, comic_class))
g.add((issueNumber, RDFS.range, XSD.nonNegativeInteger))



# label for the properties
g.add((has_alignment_property, RDFS.label, Literal("alignment", lang="en")))
g.add((has_eye_color_property, RDFS.label, Literal("eye color", lang="en")))
g.add((has_gender_property, RDFS.label, Literal("gender", lang="en")))
g.add((has_hair_color_property, RDFS.label, Literal("hair color", lang="en")))
g.add((has_height_property, RDFS.label, Literal("height", lang="en")))
g.add((has_weight_property, RDFS.label, Literal("weight", lang="en")))
g.add((has_title_property, RDFS.label, Literal("comic title", lang="en")))
g.add((has_publisher_property, RDFS.label, Literal("has publisher", lang="en")))
g.add((has_species_property, RDFS.label, Literal("species", lang="en")))
g.add((Description, RDFS.label, Literal("description", lang="en")))
g.add((has_LivingStatus_property, RDFS.label, Literal("living status", lang="en")))
g.add((has_FormelyDeceased_property, RDFS.label, Literal("formely deceased", lang="en")))
g.add((hasComicID_property, RDFS.label, Literal("comic ID", lang="en")))
g.add((hasCharacterID_property, RDFS.label, Literal("character ID", lang="en")))
g.add((species_name, RDFS.label, Literal("species name", lang="en")))
g.add((publisher_name, RDFS.label, Literal("publisher name", lang="en")))
g.add((appearsIn_property, RDFS.label, Literal("appears in", lang="en")))
g.add((issueNumber, RDFS.label, Literal("issue number", lang="en")))


# comments for the properties
g.add((has_alignment_property, RDFS.comment, Literal("The alignment of the character.")))
g.add((has_eye_color_property, RDFS.comment, Literal("The eye color of the character.")))
g.add((has_gender_property, RDFS.comment, Literal("The gender of the character.")))
g.add((has_hair_color_property, RDFS.comment, Literal("The hair color of the character.")))
g.add((has_height_property, RDFS.comment, Literal("The height of the character in cm.")))
g.add((has_weight_property, RDFS.comment, Literal("The weight of the character in kg.")))
g.add((has_title_property, RDFS.comment, Literal("The title of the comic.")))
g.add((has_publisher_property, RDFS.comment, Literal("The publisher of the comic.")))
g.add((has_species_property, RDFS.comment, Literal("The species of the character.")))
g.add((Description, RDFS.comment, Literal("The description of the character.")))
g.add((has_LivingStatus_property, RDFS.comment, Literal("The living status of the character.")))
g.add((has_FormelyDeceased_property, RDFS.comment, Literal("If character was formely deceased.")))
g.add((hasComicID_property, RDFS.comment, Literal("The ID of the comic.")))
g.add((hasCharacterID_property, RDFS.comment, Literal("The ID of the character.")))
g.add((species_name, RDFS.comment, Literal("The name of the species.")))
g.add((publisher_name, RDFS.comment, Literal("The name of the publisher.")))
g.add((appearsIn_property, RDFS.comment, Literal("The comics the character appears in.")))
g.add((issueNumber, RDFS.comment, Literal("The issue number of the comic.")))





# add inverse property
g.add((has_characters_property, OWL.inverseOf, appearsIn_property))




# Save the graph to disk
g.serialize("comicChar.owl", format="ttl")

<Graph identifier=Nc47f1de8a489440a898a8a6286163608 (<class 'rdflib.graph.Graph'>)>

### Mapping Dataset

In [23]:
def createTriples():
    # create graph
    g = Graph()
    # and parse the file
    g.parse("comicChar.owl", format="ttl")    
    g.bind("comic",comic)
    g.bind("rdf", rdf)
    g.bind("rdfs", rdfs)
    g.bind("owl", owl)
    g.bind("dbo", dbo)
    g.bind("dbp", dbp)

         
    # list with all the available species
    species_list = ["Alien", "Animal", "Arthrosian", "Asgardian", "Atlantean", "Cyborg", "Deity", "Demon", "Duckworldian","Eternal","Faltine hybrid", "Flora Colossus", "Human", "Inhuman", "Kakarantharaian", "Korbinite", "Lumphomoid", "Mutant", "Neyaphem", "Robot", "Sakaaran Shadow People", "Skrull", "Strontian", "Symbiote", "Vampire", "Xandarian", "Zen-Whoberian"]
        
    
    # create the uri for the resources  
    str1 = 'http://comicCharacters.com/Marvel_Comics'  
    Marvel = URIRef(str1)
    # add the other triples
    g.add((Marvel, RDF.type, comic.Publisher))
    g.add((Marvel, comic.publisherName, Literal("Marvel Comics", lang="en")))
    g.add((Marvel, RDFS.label, Literal("Marvel Comics", lang="en")))
    g.add((Marvel, RDFS.comment, Literal(f"A rescource describing Marvel comics.")))
    g.add((Marvel, OWL.sameAs, dbr.Marvel_Comics))
    
    # Create an instance of owl:AllDifferent
    all_different = URIRef("http://comicCharacters.com/allDifferent")

    # Add the owl:AllDifferent statement to the graph
    g.add((all_different, RDF.type, OWL.AllDifferent))
    
    # iterate over the items in the species list to create the resources
    for species in species_list:
        Species = species.replace(' ', '_') 
        Species_2 =  comic[Species] 
        g.add((Species_2, RDF.type, comic.Species))
        
        species_l = Literal(species,datatype=XSD.string)
        g.add((Species_2, comic.SpeciesName, species_l))
        
        g.add((Species_2, RDFS.label, Literal(species_l, lang="en")))
        g.add((Species_2, RDFS.comment, Literal(f"A rescource describing the species {species_l}.")))
    
        # Add each species resource as a distinct member of owl:AllDifferent
        g.add((all_different, OWL.distinctMembers, Species_2))
        
        
    # iterate over dataframe to create resource for every character
    for i in range(len(df)):                                                                   
        Issue = df.iloc[i,16]
        Name = df.iloc[i,17]
        Alignment = df.iloc[i,3]        
        Gender = df.iloc[i,4]
        EyeColor = df.iloc[i,5]
        Species_df = df.iloc[i,6]
        Species_replaced = Species_df.replace(' ', '_') 
        str_species =  comic[Species_replaced]
        HairColor = df.iloc[i,7]
        Publisher = df.iloc[i,8]
        Height = df.iloc[i,9]
        Weight = df.iloc[i,10]   
        Title = df.iloc[i,0].split(", ")
        Description = df.iloc[i,1].split(", ")
        LivingStatus = df.iloc[i,12]
        FormelyDeceased = df.iloc[i,13]
        SameAS_new = df.iloc[i,11]
        str_same = "http://dbpedia.org/resource/" + SameAS_new   
        ComicID = df.iloc[i,14]
        CharacterID = df.iloc[i,15]
        
        ################ Making Literals #############            

        Issue_l = Literal(Issue,datatype=XSD.int)
        Name_l = Literal(Name, datatype=XSD.string)    
        Alignment_l = Literal(Alignment, datatype=XSD.string)    
        Gender_l = Literal(Gender, datatype=XSD.string)
        EyeColor_l = Literal(EyeColor, datatype=XSD.string)
        HairColor_l = Literal(HairColor, datatype=XSD.string) 
        Height_l = Literal(Height,datatype=XSD.int)   
        Weight_l = Literal(Weight, datatype=XSD.int)                                                                 
        liv_sta = Literal(LivingStatus, datatype=XSD.string)
        for_dec_l = Literal(FormelyDeceased, datatype=XSD.string)
        comicID_l = Literal(ComicID,datatype=XSD.int)   
        characterID_l = Literal(CharacterID,datatype=XSD.int)
        
        
        ############# Making URIs ##############

        # character uri
        Name = Name.replace(' ', '_') 
        str1 = 'http://comicCharacters.com/' + Name   
        Character = URIRef(str1)  
        Comment = "A rescource representing " + Name
        Comment_l = Literal(Comment, datatype=XSD.string) 
        
        
        
        
        # enumerate over the list from the comics where the character appears in
        for index, title in enumerate(Title):       
                
            appears_str = 'http://comicCharacters.com/' + title + "_MV"
                
            appears_uri = URIRef(appears_str)
            g.add((Character, comic.appearsIn, appears_uri))
            Title_l = Literal(title)
            Title_str = title.replace(' ', '_') 
        
        
            
            str_title = 'http://comicCharacters.com/' + title + "_MV"
            Title_uri = URIRef(str_title)
            g.add((Title_uri, RDF.type, comic.Comic))
            g.add((Title_uri, RDFS.label, Title_l))
            title = Title_l.replace('_', ' ')
            title = title.replace('  ', ' ')
            g.add((Title_uri, comic.hasTitle, Literal(title, datatype=XSD.string)))
            Desc_l = Literal(Description[index], datatype=XSD.string)
            g.add((Title_uri, RDFS.comment, Literal(f"The resource of the comic {title}", datatype=XSD.string)))
            g.add((Title_uri, comic.hasDescription, Desc_l))
            g.add((Title_uri, comic.comicID, comicID_l))
            g.add((Title_uri, comic.hasPublisher, Marvel))
            g.add((Title_uri, comic.issueNumber, Issue_l))
            
        ############# Making Connnections ###########
                                              
        character_uri = comic[Name.replace(' ', '_')]

        # Add statements for the character
        g.add((character_uri, RDF.type, character_class))
        g.add((character_uri, dbp.characterName, Name_l))
        g.add((character_uri, comic.hasAlignment, Alignment_l))
        g.add((character_uri, comic.hasGender, Gender_l))
        g.add((character_uri, comic.hasEyeColor, EyeColor_l))
        g.add((character_uri, comic.hasHairColor, HairColor_l))
        g.add((character_uri, comic.hasHeight, Height_l))
        g.add((character_uri, comic.hasWeight, Weight_l))
        g.add((character_uri, comic.hasSpecies, str_species))
        g.add((character_uri, comic.hasLivingStatus, liv_sta))
        g.add((character_uri, comic.FormelyDeceased, for_dec_l))
        g.add((character_uri, RDFS.comment, Comment_l))
        g.add((character_uri, OWL.sameAs, URIRef(str_same)))
        g.add((character_uri, comic.characterID, characterID_l))
        
    reified_statement = comic.reifiedStatement
    b1 = BNode()
    knows_property = comic.knows
    g.add((knows_property, RDF.type, owl.ObjectProperty))
    g.add((knows_property, RDFS.domain, character_class))
    g.add((knows_property, RDFS.range, character_class))
    g.add((knows_property, RDF.type, owl.symmetricProperty))
    g.add((knows_property, RDFS.label, Literal("knows", lang="en")))
    g.add((knows_property, RDFS.comment, Literal("A character knows another character.")))
    
    believes_property = comic.believes
    g.add((believes_property, RDF.type, owl.ObjectProperty))
    g.add((believes_property, RDFS.domain, character_class))
    g.add((believes_property, RDFS.label, Literal("believes", lang="en")))
    g.add((believes_property, RDFS.comment, Literal("A character believes something.")))
    
    
    g.add((reified_statement, rdf.type, RDF.Statement))
    g.add((reified_statement, RDF.subject, URIRef('http://comicCharacters.com/' + "Spider-Man")))
    g.add((reified_statement, RDF.predicate, comic.believes))
    g.add((reified_statement, RDF.object, b1))
    
    
    g.add((b1, rdf.subject, URIRef('http://comicCharacters.com/' + "Absorbing_Man")))
    g.add((b1, rdf.predicate, comic.knows))
    g.add((b1, rdf.object, URIRef('http://comicCharacters.com/' + "Iron_Man")))
    
    
    
    print("\nSaving graph to 'mappedOntology.ttl':\n\n")
    g.serialize(destination="mappedOntology.ttl", format="ttl")
    
    # print(g.serialize(format="ttl"))    

createTriples()


Saving graph to 'mappedOntology.ttl':


