In [1]:
# Import definitions
import pwiki
import pwiki.wiki
import mwparserfromhell
import re
import os

from rdflib import Graph, URIRef, Literal, Namespace
from rdflib.namespace import RDF, RDFS, XSD, OWL


In [2]:
# piwiki configuration

bulbabot = pwiki.wiki.Wiki(api_endpoint="https://bulbapedia.bulbagarden.net/w/api.php")

In [None]:
# piwiki basic tests
print(bulbabot.exists("Bulbasaur"))

bulbasaur_test = bulbabot.parse(title="Bulbasaur")
print( bulbasaur_test)

In [None]:
#Bulbasaur page content test
bulbasaur_test2 = bulbabot.page_text(title="Bulbasaur (Pokémon)")
bulbasaur_test2

In [25]:
# FUNCTION DEFINITION : pokémon infobox text extraction
def extract_infobox(content):
    match = re.search(r"{{Pokémon Infobox.*?}}", content, re.DOTALL)
    if match:
        return match.group(0)
    return None


def extract_region_infobox(content):
    match = re.search(r"{{RegionInfobox.*?}}", content, re.DOTALL)
    if match:
        return match.group(0)
    return None

def extract_character_infobox(content):
    match = re.search(r"{{Character Infobox.*?}}", content, re.DOTALL)
    if match:
        return match.group(0)
    return None

def extract_move_infobox(content):
    match = re.search(r"{{MoveInfobox.*?}}", content, re.DOTALL)
    if match:
        return match.group(0)
    return None

def extract_location_infobox(content):
    match = re.search(r"{{Infobox location.*?}}", content, re.DOTALL)
    if match:
        return match.group(0)
    return None

In [None]:
# infobox extration test
infobox = extract_infobox(bulbasaur_test2)
infobox

In [4]:
# FUNCTION DEFINITION : parsing of the infobox text 
def parse_infobox(infobox):
    properties = {}
    lines = infobox.split("\n")
    for line in lines:
        if line.startswith("|"):
            key_value = line[1:].split("=", 1)  # Remove leading `|` and split at `=`
            if len(key_value) == 2:
                key, value = key_value
                new_key = key.strip()
                new_value = value.strip().replace("[[","").replace("]]","").replace("{{","").replace("}}","")
                new_value = re.sub(r'<!--.*?-->', '', new_value, flags=re.DOTALL)
                if new_key == "size":
                    new_value = new_value.replace("px", "")
                if not("|" in new_value):
                    properties[new_key] = new_value
    return properties

In [None]:
# Infobox text parsing test 
properties = parse_infobox(infobox)
properties

In [5]:
# Vocabulary Graph 

#path 
vocabulary_path = f'C:/Users/HP/Documents/07. EMSE/Master DSC/WS/Project-BulbapediaKG/BulbapediaKG1/Vocabulary/'

vocabulary_graph = Graph()

for file_name in os.listdir(vocabulary_path):
    if file_name.endswith('.ttl'):
        vocabulary_graph.parse(os.path.join(vocabulary_path,file_name), format="ttl")
        print(file_name)


property_mappings = {}

# Iterate over the graph to get class properties and their types
for subject, predicate, obj in vocabulary_graph:
    if predicate == RDF.type and obj == RDFS.Class:
        # Class found, check for properties of this class
        class_uri = subject
        for s, p, o in vocabulary_graph.triples((None, RDFS.domain, class_uri)):
            if p == RDFS.domain and o == class_uri:
                # Now we know this property is for our class
                property_uri = s
                # Check for range (type) of the property
                range_type = None
                values_list = []

                for _, p, o in vocabulary_graph.triples((property_uri, RDFS.range, None)):
                    range_type = o

                values_list.append(range_type)
                match range_type:
                    case XSD.string:
                        values_list.append('Literal')
                    case XSD.integer:
                        values_list.append('Integer')
                    case XSD.date:
                        values_list.append('Date')
                    case XSD.decimal:
                        values_list.append('Decimal')
                    case _:
                        values_list.append('URIRef')

                property_mappings[str(property_uri)] = values_list
                    

property_mappings


infoboxCharacter.ttl
infoboxLocation.ttl
infoboxMove.ttl
infoboxPokemon.ttl
infoboxRegion.ttl


{'http://example.org/move/n': [rdflib.term.URIRef('http://www.w3.org/2001/XMLSchema#integer'),
  'Integer'],
 'http://schema.org/name': [rdflib.term.URIRef('http://www.w3.org/2001/XMLSchema#string'),
  'Literal'],
 'http://example.org/move/jname': [rdflib.term.URIRef('http://www.w3.org/2001/XMLSchema#string'),
  'Literal'],
 'http://example.org/move/jtrans': [rdflib.term.URIRef('http://www.w3.org/2001/XMLSchema#string'),
  'Literal'],
 'http://example.org/move/jtranslit': [rdflib.term.URIRef('http://www.w3.org/2001/XMLSchema#string'),
  'Literal'],
 'http://example.org/move/gameimage': [rdflib.term.URIRef('http://www.w3.org/2001/XMLSchema#anyURI'),
  'URIRef'],
 'http://example.org/move/gameimage2': [rdflib.term.URIRef('http://www.w3.org/2001/XMLSchema#anyURI'),
  'URIRef'],
 'http://example.org/move/gameimagewidth': [rdflib.term.URIRef('http://www.w3.org/2001/XMLSchema#integer'),
  'Integer'],
 'http://example.org/move/type': [rdflib.term.URIRef('http://example.org/move/type'),
  'URI

In [31]:
# FUNCTION DEFINITION : mapping of pokemon infobox properties to existing graph
def map_infobox_to_rdf(name_space, graph, property_mappings, properties):
    
    #POKEMON = Namespace("http://example.org/pokemon/")
    name_space = name_space
    nstype = str(name_space)
    nstype = nstype.rstrip('/').split('/')[-1]
    RDF = Namespace("http://www.w3.org/1999/02/22-rdf-syntax-ns#")
    
    resoucre_uri = URIRef(name_space[properties.get("name", "Unknown").replace(" ", "_")])
    if properties.get("name", "Unknown").replace(" ", "_") == "Unkown":
        resoucre_uri = URIRef(name_space[properties.get("translated_name", "Unknown").replace(" ", "_")])
    graph.add((resoucre_uri, RDF.type, URIRef(name_space + nstype)))

    for propertyUri, propertyRestriction in property_mappings.items():
        if name_space in propertyUri:
            property_name = propertyUri[propertyUri.rfind('/') + 1:]
            if property_name in properties:
                match propertyRestriction[1]:
                    case "Literal":
                        graph.add((resoucre_uri, URIRef(propertyUri), Literal(properties[property_name])))
                    case "Integer":
                        if isinstance(int(properties[property_name]), int):
                            graph.add((resoucre_uri, URIRef(propertyUri), Literal(int(properties[property_name]))))
                    case "Date": 
                        graph.add((resoucre_uri, URIRef(propertyUri), Literal(properties[property_name], datatype=XSD.date)))
                    case "Decimal": 
                        graph.add((resoucre_uri, URIRef(propertyUri), Literal(properties[property_name], datatype=XSD.decimal)))
                    case _:
                        uriString = properties[property_name].replace(" ","_")
                        if str(name_space) in str(propertyRestriction[0]):
                            graph.add((resoucre_uri, URIRef(propertyUri), URIRef(propertyRestriction[0] +"/" +uriString)))
                        else:
                            graph.add((resoucre_uri, URIRef(propertyUri), URIRef(uriString)))
                            graph.add((URIRef(uriString), RDF.type ,propertyRestriction[0]))


    return graph


In [None]:
pokemon_infobox_graph = Graph()
POKEMON = Namespace("http://example.org/pokemon/")
pokemon_infobox_graph = map_infobox_to_rdf(POKEMON, pokemon_infobox_graph, property_mappings, properties=properties) 
print(pokemon_infobox_graph.serialize(format="turtle"))

In [41]:
# Get the list of pokemons
poke_list = bulbabot.page_text("List_of_Pokémon_by_National_Pokédex_number")
poke_list


'{{shortcut|3|Ndex|Olddex|Natdex}}\nThis is a list of Pokémon in the order dictated by the [[National Pokédex]], meaning that Pokémon from the [[Kanto]] region will appear first, followed by those from [[Johto]], [[Hoenn]], [[Sinnoh]], [[Unova]], [[Kalos]], [[Alola]], [[Galar]], [[Hisui]], and [[Paldea]]. As of the release of the [[Mochi Mayhem]] epilogue of [[The Indigo Disk]] expansion for {{g|Scarlet and Violet}}, there are 1025 Pokémon in total. Each region\'s set of Pokémon starts with its own set of [[first partner Pokémon]] and their [[evolution]]s, going in order of {{t|Grass}}, {{t|Fire}}, {{t|Water}}; the only exception is Unova, which begins with {{p|Victini}}, who is then followed by the first partner Pokémon. \n\nThe first 151 entries in this Pokédex also serve as [[List of Pokémon by Kanto Pokédex number|Kanto\'s]] [[regional Pokédex]]. In [[Generation II]], this Pokédex order was known as the "Old Pokédex", with a [[List of Pokémon by New Pokédex number|new order]] that 

In [42]:
# FUNTION DEFINITION : get pokemon list
def extract_third_substrings(input_string):
    # Define the regular expression pattern
    # pattern = r"\{\{ndex\|\d+\|([^\|]+)\|[^\|]+\|[^\|]+\}\}"
    # pattern = r"\{\{ndex\|\d+\|([^\|]+)(?:\|[^\|]+)*\}\}"
    pattern = r"\{\{ndex\|\d+\|([^\|]+)\|?.*?\}\}"
    
    # Find all matches of the pattern
    matches = re.findall(pattern, input_string)
    
    # Return the list of third substrings
    return matches

In [43]:
poke_list = extract_third_substrings(poke_list)
poke_list = [name + " (Pokémon)" for name in poke_list]
poke_list

['Bulbasaur (Pokémon)',
 'Ivysaur (Pokémon)',
 'Venusaur (Pokémon)',
 'Charmander (Pokémon)',
 'Charmeleon (Pokémon)',
 'Charizard (Pokémon)',
 'Squirtle (Pokémon)',
 'Wartortle (Pokémon)',
 'Blastoise (Pokémon)',
 'Caterpie (Pokémon)',
 'Metapod (Pokémon)',
 'Butterfree (Pokémon)',
 'Weedle (Pokémon)',
 'Kakuna (Pokémon)',
 'Beedrill (Pokémon)',
 'Pidgey (Pokémon)',
 'Pidgeotto (Pokémon)',
 'Pidgeot (Pokémon)',
 'Rattata (Pokémon)',
 'Raticate (Pokémon)',
 'Spearow (Pokémon)',
 'Fearow (Pokémon)',
 'Ekans (Pokémon)',
 'Arbok (Pokémon)',
 'Pikachu (Pokémon)',
 'Raichu (Pokémon)',
 'Sandshrew (Pokémon)',
 'Sandslash (Pokémon)',
 'Nidoran♀ (Pokémon)',
 'Nidorina (Pokémon)',
 'Nidoqueen (Pokémon)',
 'Nidoran♂ (Pokémon)',
 'Nidorino (Pokémon)',
 'Nidoking (Pokémon)',
 'Clefairy (Pokémon)',
 'Clefable (Pokémon)',
 'Vulpix (Pokémon)',
 'Ninetales (Pokémon)',
 'Jigglypuff (Pokémon)',
 'Wigglytuff (Pokémon)',
 'Zubat (Pokémon)',
 'Golbat (Pokémon)',
 'Oddish (Pokémon)',
 'Gloom (Pokémon)',
 'V

In [44]:
POKEMON = Namespace("http://example.org/pokemon/")
intial_graph = Graph()
final_graph = Graph()

for pokemon in poke_list:
    try:
        # Step 1: Fetch the page text
        result1 = bulbabot.page_text(title=pokemon)
        
        # Step 2: Extract the infobox
        result2 = extract_infobox(result1)
        
        # Step 3: Parse the infobox
        result3 = parse_infobox(result2)
        
        # Step 4: Map the parsed infobox to RDF
        final_graph = map_infobox_to_rdf(POKEMON, intial_graph, property_mappings, result3)
        
        # Log or handle the final result (optional)
        # print(f"Successfully processed {pokemon}")
    except Exception as e:
        # Handle any errors that occur in the pipeline
        print(f"Error processing {pokemon}: {e}")

In [46]:
# Save or print the created graph
graph_path = "GeneratedGraphs/"
print(final_graph.serialize(format="turtle"))
with open(os.path.join(graph_path, "poke_graph.ttl"), "w", encoding="utf-8") as file:
    file.write(final_graph.serialize(format="turtle"))

@prefix ns1: <http://example.org/pokemon/> .
@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .

ns1:Abomasnow a ns1:pokemon ;
    ns1:abilityd <http://example.org/pokemon/ability/Soundproof> ;
    ns1:category <http://example.org/pokemon/category/Frost_Tree> ;
    ns1:color <White> ;
    ns1:friendship 70 ;
    ns1:height-m 2.2 ;
    ns1:jname "ユキノオー" ;
    ns1:jtranslit "Yukino'ō" ;
    ns1:name "Abomasnow" ;
    ns1:ndex 460 ;
    ns1:tmname "Yukinooh" ;
    ns1:type1 <http://example.org/pokemon/type/Grass> ;
    ns1:type2 <http://example.org/pokemon/type/Ice> ;
    ns1:weight-kg 135.5 .

ns1:Abra a ns1:pokemon ;
    ns1:ability1 <http://example.org/pokemon/ability/Synchronize> ;
    ns1:ability2 <http://example.org/pokemon/ability/Inner_Focus> ;
    ns1:abilityd <http://example.org/pokemon/ability/Magic_Guard> ;
    ns1:category <http://example.org/pokemon/category/Psi> ;
    ns1:color <Brown> ;
    ns1:friendship 70 ;
    ns1:height-m 0.9 ;
    ns1:jname "ケーシィ" ;
    ns1:jtranslit

In [7]:
Regions_wiht_infobox = [
    "Kanto",
    "Johto",
    "Hoenn",
    "Orre",
    "Sevii Islands",
    "Orange Islands",
    "Pokémon Island",
    "White City",
    "Fiore",
    "Sinnoh",
    "Pokétopia",
    "Pokémon world (Mystery Dungeon)",
    "Mintale Town",
    "Almia",
    "Trading Card Game Islands",
    "Oblivia",
    "Unova",
    "PokéPark (game)",
    "Ransei",
    "Decolore Islands",
    "Kalos",
    "Ferrum",
    "Alola",
    "Carmonte Island",
    "Ryme City",
    "Tumblecube Island",
    "Galar",
    "Pasio",
    "Isle of Armor",
    "Crown Tundra",
    "Lental",
    "Hisui",
    "Aeos Island",
    "Paldea",
    "Kitakami",
    "Blueberry Academy"
]

In [8]:
final_region_graph = Graph()
intial_region_graph = Graph()
REGION = Namespace("http://example.org/region/")
for region in Regions_wiht_infobox:
    try:
        # Step 1: Fetch the page text
        result1 = bulbabot.page_text(title=region)
        
        # Step 2: Extract the infobox
        result2 = extract_region_infobox(result1)
        
        # Step 3: Parse the infobox
        result3 = parse_infobox(result2)
        
        # Step 4: Map the parsed infobox to RDF
        # final_region_graph = map_region_infobox_to_rdf(intial_region_graph, property_mappings, result3)}
        final_region_graph = map_infobox_to_rdf(REGION, intial_region_graph, property_mappings, result3)
        
        # Log or handle the final result (optional)
        # print(f"Successfully processed {region}")
    except Exception as e:
        # Handle any errors that occur in the pipeline
        print(f"Error processing {region}: {e}")
        print(e)

In [37]:
# Save or print the created graph
print(final_region_graph.serialize(format="turtle"))
with open(os.path.join(graph_path,"region_graph.ttl"), "w", encoding="utf-8") as file:
    file.write(final_region_graph.serialize(format="turtle"))

@prefix ns1: <http://example.org/region/> .
@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .

ns1:Aeos_Island a ns1:region ;
    ns1:caption "Aeos Island" ;
    ns1:image <UNITE_Aeos_Island.png> ;
    ns1:introduction "Pokémon UNITE" ;
    ns1:jname "エオス島" ;
    ns1:professor <Professor_Phorus> ;
    ns1:regioncolor <Orange> ;
    ns1:size 250 ;
    ns1:tmname "Aeos Island" .

ns1:Almia a ns1:region ;
    ns1:image <Almia.png> ;
    ns1:introduction "Pokémon Ranger: Shadows of Almia" ;
    ns1:jname "アルミア" ;
    ns1:professor <Professor_Hastings> ;
    ns1:regioncolor <SoA> ;
    ns1:size 200 ;
    ns1:tmname "Almia" ;
    ns1:villain "Team Dim Sun" .

ns1:Alola a ns1:region ;
    ns1:image <Alola_USUM_artwork.png> ;
    ns1:jname "アローラ" ;
    ns1:regioncolor <alola> ;
    ns1:size 300 ;
    ns1:tmname "Alola" .

ns1:Blueberry_Academy a ns1:region ;
    ns1:image <Blueberry_Academy_entrance.png> ;
    ns1:jname "ブルーベリー学園" ;
    ns1:regioncolor <Blueberry> ;
    ns1:size 300 ;
    ns1

In [14]:
with open('RessourcesList\character_list.txt', 'r') as file:
    character_list = [line.strip() for line in file]

print(character_list)

['Tracey Sketchit', 'Max', 'Misty', 'Steven Stone', 'Norman', 'Ritchie', 'Giovanni', 'Erika', 'Wes', 'Red (game)', 'Jessie', 'James', 'Lt. Surge', 'Sabrina', 'Blaine', 'Lorelei', 'Lance', 'Nurse Joy', 'Name Rater', 'Todd Snap', 'Leaf (game)', 'Ethan (game)', 'Kris (game)', 'May (game)', 'Anabel', 'Peanut', 'Michael', 'Solana', 'Fantina', 'Lunick', 'Dawn (game)', 'Mark (TCG GB)', 'Mint (TCG GB)', 'Lucy Fleetfoot', 'Mai', 'Marnie', 'PokÃ©mon Center lady', 'Kellyn', 'Kate (Ranger)', 'Koga', 'Ash Ketchum', 'Brock', 'Kanata (CCP)', 'Primo', 'Cynthia', 'Lyra (game)', 'May (anime)', 'Carob', 'Day-Care Couple', 'Dawn (anime)', 'Ben (Ranger)', 'Summer (Ranger)', 'Misty (anime)', 'Brock (anime)', 'Professor Juniper', 'Hilda (game)', 'Hilbert (game)', 'Cheren', 'Bianca', 'N', 'Fennel', 'Iris (anime)', 'Cilan', 'Lenora', 'Cress', 'Chili', 'Ghetsis', 'Iris', 'Clay', 'Drayden', 'Shadow Triad', 'Amanita', 'Giallo', 'Cedric Juniper', 'Hawes', 'Ingo', 'Emmet', 'Charles', 'Anthea and Concordia', 'Gorm',

In [12]:
CHARACTER = Namespace("http://example.org/character/")
intial_character_graph = Graph()
final_character_graph = Graph()

for character in character_list:
    try:
        # Step 1: Fetch the page text
        result1 = bulbabot.page_text(title=character)
        
        # Step 2: Extract the infobox
        result2 = extract_character_infobox(result1)
        
        # Step 3: Parse the infobox
        result3 = parse_infobox(result2)
        
        # Step 4: Map the parsed infobox to RDF
        final_character_graph = map_infobox_to_rdf(CHARACTER, intial_character_graph, property_mappings, result3)
        
        # Log or handle the final result (optional)
        print(f"Successfully processed {character}")
    except Exception as e:
        # Handle any errors that occur in the pipeline
        print(f"Error processing {character}: {e}")

Successfully processed Tracey Sketchit
Successfully processed Max
Successfully processed Misty
Successfully processed Steven Stone
Successfully processed Norman
Successfully processed Ritchie
Successfully processed Giovanni
Successfully processed Erika
Successfully processed Wes
Successfully processed Red (game)
Error processing Jessie: invalid literal for int() with base 10: 'Unconfirmed<ref name="age"/> '
Error processing James: invalid literal for int() with base 10: 'Unconfirmed<ref name="age"/>'
Successfully processed Lt. Surge
Successfully processed Sabrina
Successfully processed Blaine
Successfully processed Lorelei
Successfully processed Lance
Successfully processed Nurse Joy
Successfully processed Name Rater
Successfully processed Todd Snap
Successfully processed Leaf (game)
Successfully processed Ethan (game)
Successfully processed Kris (game)
Successfully processed May (game)
Successfully processed Anabel
Successfully processed Peanut
Successfully processed Michael
Successfu

In [39]:
# Save or print the created graph
print(final_character_graph.serialize(format="turtle"))
with open(os.path.join(graph_path,"character_graph.ttl"), "w", encoding="utf-8") as file:
    file.write(final_character_graph.serialize(format="turtle"))

@prefix ns1: <http://example.org/character/> .

ns1:Aliquis a ns1:character ;
    ns1:bordercolor <D87257> ;
    ns1:caption "Aliquis" ;
    ns1:color <4B5293> ;
    ns1:corecolor <F1CB90> ;
    ns1:eyes <Blue> ;
    ns1:gender <Male> ;
    ns1:hair <Dark_blue_(Yellow_highlights)> ;
    ns1:hometown <Unknown> ;
    ns1:image "Aliquis PW.png" ;
    ns1:jname "アリキス" ;
    ns1:region <Paldea> ;
    ns1:tmname "Aliquis" .

ns1:Anabel a ns1:character ;
    ns1:bordercolor <4a474a> ;
    ns1:caption "Concept art from Pokémon Sun and Moon" ;
    ns1:color <c0b2dd> ;
    ns1:corecolor <e5cff9> ;
    ns1:gender <Female> ;
    ns1:image "Anabel SM concept art.png" ;
    ns1:jname "リラ" ;
    ns1:tmname "Lila" .

ns1:Anthea_and_Concordia a ns1:character ;
    ns1:bordercolor <f1c8b4> ;
    ns1:color <fffeba> ;
    ns1:corecolor <f1c8b4> ;
    ns1:image "Black 2 White 2 Anthea and Concordia.png" ;
    ns1:jname "愛の女神と平和の女神" ;
    ns1:tmname "<br />Goddess of Love and Goddess of Peace" .

ns1:Dianth

In [18]:
with open('RessourcesList\move_list.txt', 'r') as file:
    move_list = [line.strip() for line in file]

print(move_list)

['Hidden Power (move)', 'Volt Tackle (move)', 'Earthquake (move)', 'Surf (move)', 'Toxic (move)', 'Double Team (move)', 'Rest (move)', 'Double-Edge (move)', 'Swagger (move)', 'Protect (move)', 'Endure (move)', 'Defense Curl (move)', 'Rain Dance (move)', 'Rock Smash (move)', 'Thunder (move)', 'Strength (move)', 'Pound (move)', 'Karate Chop (move)', 'Double Slap (move)', 'Comet Punch (move)', 'Mega Punch (move)', 'Pay Day (move)', 'Fire Punch (move)', 'Ice Punch (move)', 'Thunder Punch (move)', 'Scratch (move)', 'Vise Grip (move)', 'Guillotine (move)', 'Razor Wind (move)', 'Swords Dance (move)', 'Cut (move)', 'Gust (move)', 'Wing Attack (move)', 'Whirlwind (move)', 'Fly (move)', 'Bind (move)', 'Slam (move)', 'Vine Whip (move)', 'Stomp (move)', 'Double Kick (move)', 'Mega Kick (move)', 'Jump Kick (move)', 'Rolling Kick (move)', 'Sand Attack (move)', 'Headbutt (move)', 'Horn Attack (move)', 'Fury Attack (move)', 'Horn Drill (move)', 'Tackle (move)', 'Body Slam (move)', 'Wrap (move)', 'Take

In [23]:
MOVE = Namespace("http://example.org/move/")
intial_move_graph = Graph()
final_move_graph = Graph()

for individual_move in move_list:
    try:
        # Step 1: Fetch the page text
        result1 = bulbabot.page_text(title=individual_move)
        
        # Step 2: Extract the infobox
        result2 = extract_move_infobox(result1)
        
        # Step 3: Parse the infobox
        result3 = parse_infobox(result2)
        
        # Step 4: Map the parsed infobox to RDF
        final_move_graph = map_infobox_to_rdf(MOVE, intial_move_graph, property_mappings, result3)
        
        # Log or handle the final result (optional)
        print(f"Successfully processed {individual_move}")
    except Exception as e:
        # Handle any errors that occur in the pipeline
        print(f"Error processing {individual_move}: {e}")

Successfully processed Hidden Power (move)
Successfully processed Volt Tackle (move)
Successfully processed Earthquake (move)
Successfully processed Surf (move)
Error processing Toxic (move): invalid literal for int() with base 10: '—'
Error processing Double Team (move): invalid literal for int() with base 10: '—'
Successfully processed Rest (move)
Error processing Double-Edge (move): invalid literal for int() with base 10: '300px'
Error processing Swagger (move): invalid literal for int() with base 10: '—'
Error processing Protect (move): invalid literal for int() with base 10: '—'
Error processing Endure (move): invalid literal for int() with base 10: '—'
Error processing Defense Curl (move): invalid literal for int() with base 10: '—'
Error processing Rain Dance (move): invalid literal for int() with base 10: '—'
Successfully processed Rock Smash (move)
Successfully processed Thunder (move)
Successfully processed Strength (move)
Successfully processed Pound (move)
Error processing 

In [38]:
# Save or print the created graph
print(final_move_graph.serialize(format="turtle"))
with open(os.path.join(graph_path,"move_graph.ttl"), "w", encoding="utf-8") as file:
    file.write(final_move_graph.serialize(format="turtle"))

@prefix ns1: <http://example.org/move/> .
@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .

ns1:Absorb a ns1:move ;
    ns1:damagecategory <http://example.org/move/damagecategory/Special> ;
    ns1:gameimage <Absorb_IX.png> ;
    ns1:gameimage2 <Absorb_IX_2.png> ;
    ns1:gameimagewidth 300 ;
    ns1:jname "すいとる" ;
    ns1:jtrans "Absorb" ;
    ns1:jtranslit "Suitoru" ;
    ns1:n 71 ;
    ns1:type <http://example.org/move/type/Grass> .

ns1:Acid a ns1:move ;
    ns1:accuracy 100 ;
    ns1:appeal 1 ;
    ns1:appeal6 3 ;
    ns1:appealsc 3 ;
    ns1:basepp 30 ;
    ns1:category "Smart" ;
    ns1:category6 "Clever" ;
    ns1:cdesc "Badly startles the Pokémon in front." ;
    ns1:cdesc6 "Brings down the energy of any Pokémon that have already used a move this turn." ;
    ns1:damagecategory <http://example.org/move/damagecategory/Special> ;
    ns1:gameimage <Acid_IX.png> ;
    ns1:gameimage2 <Acid_IX_2.png> ;
    ns1:gameimagewidth 300 ;
    ns1:gen "I" ;
    ns1:jam 4 ;
    ns1:jam6 0 

In [26]:
with open('RessourcesList\location_list.txt', 'r') as file:
    location_list = [line.strip() for line in file]

print(location_list)

['Tohjo Falls', 'Realgam Tower', 'Viridian Forest', 'Outskirt Stand', 'Petalburg Woods', 'The Under Subway', 'Relic Cave', 'Mt. Battle', 'Cipher Lab', 'One Island', 'Two Island', 'Three Island', 'Silph Co.', 'Five Island', 'Six Island', 'Devon Corporation', 'Bell Tower', 'Pewter Museum of Science', 'Celadon Department Store', 'Goldenrod Department Store', 'Seven Island', 'Mt. Moon', 'Rock Tunnel', 'PokÃ©mon Tower', 'Kanto Power Plant', 'PokÃ©mon Mansion (Kanto)', 'Victory Road (Kanto)', 'Sprout Tower', 'Cerulean Cave', 'Trainer Hill', 'Ilex Forest', 'TV Mauville', 'Faraway Island', 'Southern Island', 'Pattern Bush', 'Whirl Islands', 'Mt. Chimney', 'Meteor Falls', 'Altering Cave', 'Rusturf Tunnel', 'S.S. Tidal', 'Icefall Cave', 'Tanoby Key', 'Orre Colosseum', 'Deep Colosseum', 'Pyrite Colosseum', 'ONBS', 'Citadark Isle', 'Shoal Cave', 'Granite Cave', 'Scorched Slab', 'Seafloor Cavern', 'Sealed Chamber', 'Mt. Pyre', 'Sky Pillar', 'Island Cave', 'Desert Ruins', 'Ancient Tomb', 'Desert Und

In [32]:
LOCATION = Namespace("http://example.org/location/")
intial_location_graph = Graph()
final_location_graph = Graph()

for individual_location in location_list:
    try:
        # Step 1: Fetch the page text
        result1 = bulbabot.page_text(title=individual_location)
        
        # Step 2: Extract the infobox
        result2 = extract_location_infobox(result1)
        
        # Step 3: Parse the infobox
        result3 = parse_infobox(result2)
        
        # Step 4: Map the parsed infobox to RDF
        final_location_graph = map_infobox_to_rdf(LOCATION, intial_location_graph, property_mappings, result3)
        
        # Log or handle the final result (optional)
        print(f"Successfully processed {individual_location}")
    except Exception as e:
        # Handle any errors that occur in the pipeline
        print(f"Error processing {individual_location}: {e}")

Successfully processed Tohjo Falls
Successfully processed Realgam Tower
Successfully processed Viridian Forest
Successfully processed Outskirt Stand
Successfully processed Petalburg Woods
Successfully processed The Under Subway
Successfully processed Relic Cave
Successfully processed Mt. Battle
Successfully processed Cipher Lab
Successfully processed One Island
Successfully processed Two Island
Successfully processed Three Island
Successfully processed Silph Co.
Successfully processed Five Island
Successfully processed Six Island
Successfully processed Devon Corporation
Successfully processed Bell Tower
Successfully processed Pewter Museum of Science
Successfully processed Celadon Department Store
Successfully processed Goldenrod Department Store
Successfully processed Seven Island
Successfully processed Mt. Moon
Successfully processed Rock Tunnel
Error processing PokÃ©mon Tower: 'NoneType' object has no attribute 'split'
Successfully processed Kanto Power Plant
Error processing PokÃ©m

KeyboardInterrupt: 

In [33]:
# Save or print the created graph
print(final_location_graph.serialize(format="turtle"))
# with open(os.path.join(graph_path,"location_graph.ttl"), "w", encoding="utf-8") as file:
#     file.write(final_location_graph.serialize(format="turtle"))

@prefix ns1: <http://example.org/location/> .

ns1:Unknown a ns1:location ;
    ns1:slogan "Full Selection of Pokémon Goods!" .




In [73]:
# def map_infobox_to_rdf(properties):
#     # Define RDF namespaces
#     POKEMON = Namespace("http://example.org/pokemon/")
#     RDF = Namespace("http://www.w3.org/1999/02/22-rdf-syntax-ns#")

#     # Initialize RDF graph
#     g = Graph()

#     # Add Pokémon URI
#     pokemon_uri = URIRef(POKEMON[properties.get("name", "Unknown").replace(" ", "_")])
#     g.add((pokemon_uri, RDF.type, POKEMON.Pokémon))

#     # Map general properties
#     mappings = {
#         "name": POKEMON.name,
#         "jname": POKEMON.japaneseName,
#         "tmname": POKEMON.trademarkedJapaneseName,
#         "jtranslit": POKEMON.japaneseTransliteration,
#         "category": POKEMON.category,
#         "ndex": POKEMON.nationalDexNumber,
#         "forme": POKEMON.numberOfForms,
#         "type1": POKEMON.primaryType,
#         "type2": POKEMON.secondaryType,
#         "ability1": POKEMON.ability1,
#         "ability2": POKEMON.ability2,
#         "abilityd": POKEMON.hiddenAbility,
#         "height-m": POKEMON.heightInMeters,
#         "height-ftin": POKEMON.heightInFeetInches,
#         "weight-kg": POKEMON.weightInKilograms,
#         "weight-lbs": POKEMON.weightInPounds,
#         "catchrate": POKEMON.catchRate,
#         "gendercode": POKEMON.genderRatioCode,
#         "egggroup1": POKEMON.eggGroup1,
#         "egggroup2": POKEMON.eggGroup2,
#         "color": POKEMON.color,
#         "friendship": POKEMON.baseFriendship,
#         "generation": POKEMON.generationIntroduced,
#         "expyield": POKEMON.baseExperienceYield,
#         "evtotal": POKEMON.evTotal,
#         "evhp": POKEMON.evYieldHP,
#         "evat": POKEMON.evYieldAttack,
#         "evde": POKEMON.evYieldDefense,
#         "evsa": POKEMON.evYieldSpecialAttack,
#         "evsd": POKEMON.evYieldSpecialDefense,
#         "evsp": POKEMON.evYieldSpeed
#     }

#     # Map form-specific properties dynamically
#     for i in range(2, 7):  # Forms 2 through 6
#         form_suffix = f"form{i}"
#         mappings.update({
#             f"{form_suffix}": POKEMON[f"form{i}Name"],
#             f"{form_suffix}type1": POKEMON[f"form{i}PrimaryType"],
#             f"{form_suffix}type2": POKEMON[f"form{i}SecondaryType"],
#             f"height-m{i}": POKEMON[f"form{i}HeightInMeters"],
#             f"height-ftin{i}": POKEMON[f"form{i}HeightInFeetInches"],
#             f"weight-kg{i}": POKEMON[f"form{i}WeightInKilograms"],
#             f"weight-lbs{i}": POKEMON[f"form{i}WeightInPounds"],
#         })

#     # Generate RDF triples for properties that exist in the input
#     for key, predicate in mappings.items():
#         if key in properties and properties[key]:
#             g.add((pokemon_uri, predicate, Literal(properties[key])))

#     return g

In [None]:
# grap = map_infobox_to_rdf(properties)
# print(grap.serialize(format="turtle"))

In [100]:
# # FUNCTION DEFINITION : mapping of character infobox properties to existing graph
# def map_character_infobox_to_rdf(graph, property_mappings, properties):
#     CHARACTER = Namespace("http://example.org/character/")
#     RDF = Namespace("http://www.w3.org/1999/02/22-rdf-syntax-ns#")

#     character_uri = URIRef(CHARACTER[properties.get("name", "Unknown").replace(" ", "_")])
#     graph.add((character_uri, RDF.type, CHARACTER.Character))

#     mappings = {
#         "color": CHARACTER.color,
#         "bordercolor": CHARACTER.borderColor,
#         "corecolor": CHARACTER.coreColor,
#         "name": CHARACTER.name,
#         "jname": CHARACTER.japaneseName,
#         "tmname": CHARACTER.romanizedJapaneseName,
#         "jtranslit": CHARACTER.japaneseTransliteration,
#         "text": CHARACTER.textColor,
#         "sloganline": CHARACTER.sloganLine,
#         "image": CHARACTER.image,
#         "size": CHARACTER.imageSize,
#         "caption": CHARACTER.imageCaption,
#         "age": CHARACTER.age,
#         "birthday": CHARACTER.birthday,
#         "gender": CHARACTER.gender,
#         "height": CHARACTER.height,
#         "eyes": CHARACTER.eyeColor,
#         "hair": CHARACTER.hairColor,
#         "hometown": CHARACTER.hometown,
#         "region": CHARACTER.region,
#         "relatives": CHARACTER.relatives,
#     }

#     for key, predicate in mappings.items():
#         if key in properties and properties[key]:
#             property_type = property_mappings.get(str(URIRef(CHARACTER + key)))
#             match property_type:
#                 case "Literal":
#                     graph.add((character_uri, predicate, Literal(properties[key])))
#                 case "Integer":
#                     if isinstance(int(properties[key]), int):
#                         graph.add((character_uri, predicate, Literal(int(properties[key]))))
#                 case _:
#                     print(property_type)
#                     uriString = properties[key].replace(" ","_")
#                     if not("<" in uriString) and not(">" in uriString):
#                         graph.add((character_uri, predicate, URIRef(CHARACTER + uriString)))

#     return graph

In [90]:
# # FUNCTION DEFINITION : mapping of region infobox properties to existing graph
# def map_region_infobox_to_rdf(graphModify, property_mappings, properties):

#     REGION = Namespace("http://example.org/region/")
#     RDF = Namespace("http://www.w3.org/1999/02/22-rdf-syntax-ns#")

#     region_uri = URIRef(REGION[properties.get("name", "Unknown").replace(" ", "_")])
#     graphModify.add((region_uri, RDF.type, REGION.Region))

#     mappings = {
#         "regioncolor": REGION.colorTemplate,
#         "name": REGION.name,
#         "jname": REGION.japaneseName,
#         "tmname": REGION.romanizedJapaneseName,
#         "region": REGION.isRegion,
#         "image": REGION.image,
#         "size": REGION.imageSize,
#         "caption": REGION.imageCaption,
#         "introduction": REGION.introductionMedia,
#         "professor": REGION.professor,
#         "firstpartner": REGION.firstPartnerPokemon,
#         "villain": REGION.villainousOrganization,
#         "league": REGION.league,
#         "location": REGION.leagueLocation,
#         "pokedext": REGION.pokedex,
#         "series": REGION.animationSeries,
#         "season": REGION.animationSeason,
#         "generation": REGION.generation,
#         "games": REGION.games,
#         "manga": REGION.mangaChapter,
#     }
    
#     for key, predicate in mappings.items():
#         if key in properties and properties[key]:
#             property_type = property_mappings.get(str(URIRef(REGION + key)))
#             match property_type:
#                 case "Literal":
#                     graphModify.add((region_uri, predicate, Literal(properties[key])))
#                 case "Integer":
#                     if isinstance(int(properties[key]), int):
#                         graphModify.add((region_uri, predicate, Literal(int(properties[key]))))
#                 case _:
#                     uriString = properties[key].replace(" ","_")
#                     graphModify.add((region_uri, predicate, URIRef(REGION + uriString)))

#     return graphModify