In [2]:
from rdflib import Graph, Literal, RDF, URIRef, Namespace, RDFS, OWL
from rdflib.namespace import XSD, RDFS
import pandas as pd 
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

#### Step 1: Define ontology 

I chose to have only one class for all players, https://dbpedia.org/page/Football_player 

The case can be made that each position can be a different class. This is possible because dbpedia does indeed have different classes for each position. I chose not to do this because:

(1) Many statistics and properties are shared between both types of players (club, matches played, starts, etc.)

(2) For my use case, which is predicting which players will be at the same club at a given year, the type of player shouldn't make a difference to the predictions.  

(3) Based on the way I have cleaned the data, there can be no inconsistencies. That is, outfield players cannot have goalkeeper statistics and vice versa.  

(4) There are only 64 goalkeepers in this dataset, so it feels like unnecessary effort to define a seperate class just for them.

Cons:

(1) Different players share the same super class. Which means there is a compromise in the semantics, since I chose to ignore subclasses that exist.

Note: queries can be made to find goalkeepers or other positions, it would just be querying the property BestPosition of each player, instead of type of node itself. If I was just building a KG I would have to be more stringent while defining my main, sub classes. 

In [3]:
# Define namespaces 

sofifa = Namespace("https://sofifa.com")
dbpediaclass = Namespace("https://dbpedia.org/page")
wikidata = Namespace("https://www.wikidata.org/wiki/")
sportsschema= Namespace("https://sportschema.org/ontologies/soccer")
dbpedia = Namespace("https://dbpedia.org/property")

g = Graph()
g.bind("sofifa", sofifa)
g.bind("dbpediaclass", dbpediaclass)
g.bind("wikidata", wikidata)
g.bind("sportsschema", sportsschema)
g.bind("dbpedia", dbpedia)

In [4]:
# Add main class 

player_uri = sofifa['player']
g.add((player_uri, RDF.type, dbpediaclass['Football_player']))

# Add player properties that don't change in 2 years (name, height, etc.)

constant_literals = {'Property:P1477':'name', 'Property:P27':'nationality', 'Property:P413':'position',
                     'Property:P8006':'preferredfoot'}

for key, value in constant_literals.items():
    g.add((URIRef(wikidata[key]), RDF.type, RDF.Property))
    g.add((URIRef(wikidata[key]), RDFS.domain, player_uri))
    g.add((URIRef(wikidata[key]), RDFS.range, RDFS.Literal))
    g.add((URIRef(wikidata[key]), RDFS.label, Literal(value)))

g.add((URIRef(wikidata['Property:P2048']), RDF.type, RDF.Property))
g.add((URIRef(wikidata['Property:P2048']), RDFS.domain, player_uri))
g.add((URIRef(wikidata['Property:P2048']), RDFS.range, XSD.float))
g.add((URIRef(wikidata['Property:P2048']), RDFS.label, Literal('height')))
      
g.add((URIRef(wikidata['Q21821348']), RDF.type, RDF.Property))
g.add((URIRef(wikidata['Q21821348']), RDFS.domain, player_uri))
g.add((URIRef(wikidata['Q21821348']), RDFS.range, XSD.integer))
g.add((URIRef(wikidata['Q21821348']), RDFS.label, Literal('birthyear')))


<Graph identifier=N3ed9689a32a14fe680bac6099f33df0b (<class 'rdflib.graph.Graph'>)>

In [5]:
# Add fifa properties that change every year 

dbpedia_properties = [ "fts", "IntReputation", "Potential", "attackingworkrate", "contractuntil",
                       "defending", "defensiveworkrate", "dribbling", "gkdiving", "gkhandling",
                       "gkkicking", "gkpositioning", "gkreflexes", "matchesplayed", "minutes",
                       "overallRating", "pace", "passing", "physicality", "shooting", "skillmoves",
                       "starts", "value", "club" ]

for prop in dbpedia_properties:
    g.add((URIRef(dbpedia[prop]), RDF.type, RDF.Property))
    g.add((URIRef(dbpedia[prop]), RDFS.domain, player_uri))
    g.add((URIRef(dbpedia[prop]), RDFS.range, RDF.Seq))

In [6]:
# Add statistics properties that change every year

sportsschema_properties = [ "aerialsWon", "assistsTotal", "clearancesSuccessful", "foulsCommited",
                            "foulsSuffered", "goalsTotal", "interceptions", "passesCompleteLong",
                            "passesCompletePercentage", "shotsOnGoalTotal", "touches" ]

for prop in sportsschema_properties:
    g.add((URIRef(sportsschema[prop]), RDF.type, RDF.Property))
    g.add((URIRef(sportsschema[prop]), RDFS.domain, player_uri))
    g.add((URIRef(sportsschema[prop]), RDFS.range, RDF.Seq))

In [7]:
# Define a local namespace & make all properties and classes equivalent to make querying easier
fb = Namespace("https://footballerontology.com/")
g.bind("fb", fb)

# Link main player class
g.add((sofifa.player, OWL.equivalentClass, fb.player))

# Link properties 
for prop in dbpedia_properties:
    g.add((URIRef(dbpedia[prop]), OWL.equivalentProperty, URIRef(fb[prop])))

for prop in sportsschema_properties:
    g.add((URIRef(sportsschema[prop]), OWL.equivalentProperty, URIRef(fb[prop])))

for key, value in constant_literals.items():
    g.add((URIRef(wikidata[key]), OWL.equivalentProperty, URIRef(fb[value])))

g.add((URIRef(wikidata['Property:P2048']), OWL.equivalentProperty, URIRef(fb['height'])))
g.add((URIRef(wikidata['Q21821348']), OWL.equivalentProperty, URIRef(fb['birthyear'])))

<Graph identifier=N3ed9689a32a14fe680bac6099f33df0b (<class 'rdflib.graph.Graph'>)>

In [8]:
g.serialize(destination="../kg/schema.ttl", format='ttl')

<Graph identifier=N3ed9689a32a14fe680bac6099f33df0b (<class 'rdflib.graph.Graph'>)>

#### Step 2: Populate KG 

In [9]:
g = Graph()

fb = Namespace("https://footballerontology.com/")
g.bind("fb", fb)

data_21 = pd.read_csv("../data/cleaned/Data 21.csv")

# Add properties that do not change in a year
for index, row in data_21.iterrows():
    player_uri = URIRef(fb[f"player/{row[0]}"])
    g.add((player_uri, RDF.type, fb.player))
    g.add((player_uri, fb.name, Literal(row[1])))
    g.add((player_uri, fb.height, Literal(row[2], datatype=XSD.float)))
    g.add((player_uri, fb.nationality, Literal(row[3])))
    g.add((player_uri, fb.position, Literal(row[6]))) 
    g.add((player_uri, fb.preferredfoot, Literal(row[11])))
    g.add((player_uri, fb.birthyear, Literal(row[26], datatype=XSD.integer)))

In [10]:
fb = Namespace("https://footballerontology.com/")
g.bind("fb", fb)

# Iterate through both files and make a rdf seq of the values
for index, row in data_21.iterrows():
    player_uri = URIRef(fb[f"player/{row[0]}"])
    player_id = row[0]
    
    # Create RDF sequences for properties that change every year
    # Define the sequence URIs
    fts_seq_uri = URIRef(fb[f"ftsSeq/{player_id}"])
    intrep_seq_uri = URIRef(fb[f"intrepSeq/{player_id}"])
    potential_seq_uri = URIRef(fb[f"potentialSeq/{player_id}"])
    matchesplayed_seq_uri = URIRef(fb[f"matchesplayedSeq/{player_id}"])
    starts_seq_uri = URIRef(fb[f"startsSeq/{player_id}"])
    value_seq_uri = URIRef(fb[f"valueSeq/{player_id}"])
    minutes_seq_uri = URIRef(fb[f"minutesSeq/{player_id}"])
    overallrating_seq_uri = URIRef(fb[f"overallratingSeq/{player_id}"])
    contractuntil_seq_uri = URIRef(fb[f"contractuntilSeq/{player_id}"])
    club_seq_uri = URIRef(fb[f"clubSeq/{player_id}"])
    
    # Add the sequence type
    g.add((fts_seq_uri, RDF.type, RDF.Seq))
    g.add((intrep_seq_uri, RDF.type, RDF.Seq))
    g.add((potential_seq_uri, RDF.type, RDF.Seq))
    g.add((matchesplayed_seq_uri, RDF.type, RDF.Seq))
    g.add((starts_seq_uri, RDF.type, RDF.Seq))
    g.add((value_seq_uri, RDF.type, RDF.Seq))
    g.add((minutes_seq_uri, RDF.type, RDF.Seq))
    g.add((overallrating_seq_uri, RDF.type, RDF.Seq))
    g.add((contractuntil_seq_uri, RDF.type, RDF.Seq))
    g.add((club_seq_uri, RDF.type, RDF.Seq))

    if row['BestPosition'] == 'GK':
        # Create goalkeeper-specific sequences
        gkdiving_seq_uri = URIRef(fb[f"gkdivingSeq/{player_id}"])
        gkhandling_seq_uri = URIRef(fb[f"gkhandlingSeq/{player_id}"])
        gkkicking_seq_uri = URIRef(fb[f"gkkickingSeq/{player_id}"])
        gkpositioning_seq_uri = URIRef(fb[f"gkpositioningSeq/{player_id}"])
        gkreflexes_seq_uri = URIRef(fb[f"gkreflexesSeq/{player_id}"])
        touches_seq_uri = URIRef(fb[f"touchesSeq/{player_id}"])
        foulscommitted_seq_uri = URIRef(fb[f"foulscommittedSeq/{player_id}"])
        foulssuffered_seq_uri = URIRef(fb[f"foulssufferedSeq/{player_id}"])
        clearancessuccessful_seq_uri = URIRef(fb[f"clearancessuccessfulSeq/{player_id}"])
        aerialswon_seq_uri = URIRef(fb[f"aerialswonSeq/{player_id}"])
        passescompletelong_seq_uri = URIRef(fb[f"passescompletelongSeq/{player_id}"])
        passescompletepercentage_seq_uri = URIRef(fb[f"passescompletepercentageSeq/{player_id}"])
        passing_seq_uri = URIRef(fb[f"passingSeq/{player_id}"])
        physicality_seq_uri = URIRef(fb[f"physicalitySeq/{player_id}"])
        
        # Add the sequence type
        g.add((gkdiving_seq_uri, RDF.type, RDF.Seq))
        g.add((gkhandling_seq_uri, RDF.type, RDF.Seq))
        g.add((gkkicking_seq_uri, RDF.type, RDF.Seq))
        g.add((gkpositioning_seq_uri, RDF.type, RDF.Seq))
        g.add((gkreflexes_seq_uri, RDF.type, RDF.Seq))
        g.add((touches_seq_uri, RDF.type, RDF.Seq))
        g.add((foulscommitted_seq_uri, RDF.type, RDF.Seq))
        g.add((foulssuffered_seq_uri, RDF.type, RDF.Seq))
        g.add((clearancessuccessful_seq_uri, RDF.type, RDF.Seq))
        g.add((aerialswon_seq_uri, RDF.type, RDF.Seq))
        g.add((passescompletelong_seq_uri, RDF.type, RDF.Seq))
        g.add((passescompletepercentage_seq_uri, RDF.type, RDF.Seq))
        g.add((passing_seq_uri, RDF.type, RDF.Seq))
        g.add((physicality_seq_uri, RDF.type, RDF.Seq))
    else:
        # Create outfield player-specific sequences
        defending_seq_uri = URIRef(fb[f"defendingSeq/{player_id}"])
        attackingworkrate_seq_uri = URIRef(fb[f"attackingworkrateSeq/{player_id}"])
        defensiveworkrate_seq_uri = URIRef(fb[f"defensiveworkrateSeq/{player_id}"])
        dribbling_seq_uri = URIRef(fb[f"dribblingSeq/{player_id}"])
        pace_seq_uri = URIRef(fb[f"paceSeq/{player_id}"])
        passing_seq_uri = URIRef(fb[f"passingSeq/{player_id}"])
        physicality_seq_uri = URIRef(fb[f"physicalitySeq/{player_id}"])
        shooting_seq_uri = URIRef(fb[f"shootingSeq/{player_id}"])
        skillmoves_seq_uri = URIRef(fb[f"skillmovesSeq/{player_id}"])
        goalstotal_seq_uri = URIRef(fb[f"goalstotalSeq/{player_id}"])
        assiststotal_seq_uri = URIRef(fb[f"assiststotalSeq/{player_id}"])
        shotsongoaltotal_seq_uri = URIRef(fb[f"shotsongoaltotalSeq/{player_id}"])
        touches_seq_uri = URIRef(fb[f"touchesSeq/{player_id}"])
        foulscommitted_seq_uri = URIRef(fb[f"foulscommittedSeq/{player_id}"])
        foulssuffered_seq_uri = URIRef(fb[f"foulssufferedSeq/{player_id}"])
        interceptions_seq_uri = URIRef(fb[f"interceptionsSeq/{player_id}"])
        clearancessuccessful_seq_uri = URIRef(fb[f"clearancessuccessfulSeq/{player_id}"])
        aerialswon_seq_uri = URIRef(fb[f"aerialswonSeq/{player_id}"])
        passescompletelong_seq_uri = URIRef(fb[f"passescompletelongSeq/{player_id}"])
        passescompletepercentage_seq_uri = URIRef(fb[f"passescompletepercentageSeq/{player_id}"])
        
        # Add the sequence type
        g.add((defending_seq_uri, RDF.type, RDF.Seq))
        g.add((attackingworkrate_seq_uri, RDF.type, RDF.Seq))
        g.add((defensiveworkrate_seq_uri, RDF.type, RDF.Seq))
        g.add((dribbling_seq_uri, RDF.type, RDF.Seq))
        g.add((pace_seq_uri, RDF.type, RDF.Seq))
        g.add((passing_seq_uri, RDF.type, RDF.Seq))
        g.add((physicality_seq_uri, RDF.type, RDF.Seq))
        g.add((shooting_seq_uri, RDF.type, RDF.Seq))
        g.add((skillmoves_seq_uri, RDF.type, RDF.Seq))
        g.add((goalstotal_seq_uri, RDF.type, RDF.Seq))
        g.add((assiststotal_seq_uri, RDF.type, RDF.Seq))
        g.add((shotsongoaltotal_seq_uri, RDF.type, RDF.Seq))
        g.add((touches_seq_uri, RDF.type, RDF.Seq))
        g.add((foulscommitted_seq_uri, RDF.type, RDF.Seq))
        g.add((foulssuffered_seq_uri, RDF.type, RDF.Seq))
        g.add((interceptions_seq_uri, RDF.type, RDF.Seq))
        g.add((clearancessuccessful_seq_uri, RDF.type, RDF.Seq))
        g.add((aerialswon_seq_uri, RDF.type, RDF.Seq))
        g.add((passescompletelong_seq_uri, RDF.type, RDF.Seq))
        g.add((passescompletepercentage_seq_uri, RDF.type, RDF.Seq))
    
    # Add values to sequences - 2021 data
    # In RDF.Seq, the first item is at position _1 (not _0)
    g.add((fts_seq_uri, RDF._1, Literal(row[30], datatype=XSD.float)))
    g.add((intrep_seq_uri, RDF._1, Literal(row[10], datatype=XSD.integer)))
    g.add((potential_seq_uri, RDF._1, Literal(row[5], datatype=XSD.integer)))
    g.add((matchesplayed_seq_uri, RDF._1, Literal(row[27], datatype=XSD.integer)))
    g.add((starts_seq_uri, RDF._1, Literal(row[28], datatype=XSD.integer)))
    g.add((value_seq_uri, RDF._1, Literal(row[8], datatype=XSD.float)))
    g.add((minutes_seq_uri, RDF._1, Literal(row[29], datatype=XSD.integer)))
    g.add((overallrating_seq_uri, RDF._1, Literal(row[4], datatype=XSD.integer)))
    g.add((contractuntil_seq_uri, RDF._1, Literal(row[9], datatype=XSD.float)))
    g.add((club_seq_uri, RDF._1, Literal(row[7])))

    if row['BestPosition'] == 'GK':
        g.add((gkdiving_seq_uri, RDF._1, Literal(row[21], datatype=XSD.integer)))
        g.add((gkhandling_seq_uri, RDF._1, Literal(row[22], datatype=XSD.integer)))
        g.add((gkkicking_seq_uri, RDF._1, Literal(row[23], datatype=XSD.integer)))
        g.add((gkpositioning_seq_uri, RDF._1, Literal(row[24], datatype=XSD.integer)))
        g.add((gkreflexes_seq_uri, RDF._1, Literal(row[25], datatype=XSD.integer)))
        g.add((touches_seq_uri, RDF._1, Literal(row[37], datatype=XSD.float)))
        g.add((foulscommitted_seq_uri, RDF._1, Literal(row[39], datatype=XSD.float)))
        g.add((foulssuffered_seq_uri, RDF._1, Literal(row[40], datatype=XSD.float)))
        g.add((clearancessuccessful_seq_uri, RDF._1, Literal(row[35], datatype=XSD.float)))
        g.add((aerialswon_seq_uri, RDF._1, Literal(row[36], datatype=XSD.float)))
        g.add((passescompletelong_seq_uri, RDF._1, Literal(row[38], datatype=XSD.float)))
        g.add((passescompletepercentage_seq_uri, RDF._1, Literal(row[33], datatype=XSD.float)))
        g.add((passing_seq_uri, RDF._1, Literal(row[17], datatype=XSD.integer)))
        g.add((physicality_seq_uri, RDF._1, Literal(row[20], datatype=XSD.integer)))
    else:
        g.add((defending_seq_uri, RDF._1, Literal(row[19], datatype=XSD.integer)))
        g.add((attackingworkrate_seq_uri, RDF._1, Literal(row[13])))
        g.add((defensiveworkrate_seq_uri, RDF._1, Literal(row[14])))
        g.add((dribbling_seq_uri, RDF._1, Literal(row[18], datatype=XSD.integer)))
        g.add((pace_seq_uri, RDF._1, Literal(row[15], datatype=XSD.integer)))
        g.add((passing_seq_uri, RDF._1, Literal(row[17], datatype=XSD.integer)))
        g.add((physicality_seq_uri, RDF._1, Literal(row[20], datatype=XSD.integer)))
        g.add((shooting_seq_uri, RDF._1, Literal(row[16], datatype=XSD.integer)))
        g.add((skillmoves_seq_uri, RDF._1, Literal(row[12], datatype=XSD.integer)))
        g.add((goalstotal_seq_uri, RDF._1, Literal(row[31], datatype=XSD.float)))
        g.add((assiststotal_seq_uri, RDF._1, Literal(row[32], datatype=XSD.float)))
        g.add((shotsongoaltotal_seq_uri, RDF._1, Literal(row[41], datatype=XSD.float)))
        g.add((touches_seq_uri, RDF._1, Literal(row[37], datatype=XSD.float)))
        g.add((foulscommitted_seq_uri, RDF._1, Literal(row[39], datatype=XSD.float)))
        g.add((foulssuffered_seq_uri, RDF._1, Literal(row[40], datatype=XSD.float)))
        g.add((interceptions_seq_uri, RDF._1, Literal(row[34], datatype=XSD.float)))
        g.add((clearancessuccessful_seq_uri, RDF._1, Literal(row[35], datatype=XSD.float)))
        g.add((aerialswon_seq_uri, RDF._1, Literal(row[36], datatype=XSD.float)))
        g.add((passescompletelong_seq_uri, RDF._1, Literal(row[38], datatype=XSD.float)))
        g.add((passescompletepercentage_seq_uri, RDF._1, Literal(row[33], datatype=XSD.float)))

In [11]:
data_22 = pd.read_csv("../data/cleaned/Data 22.csv")

# Add 2022 data to existing sequences
for index, row in data_22.iterrows():
    player_id = row[0]
    
    # Get references to existing sequences URIs
    fts_seq_uri = URIRef(fb[f"ftsSeq/{player_id}"])
    intrep_seq_uri = URIRef(fb[f"intrepSeq/{player_id}"])
    potential_seq_uri = URIRef(fb[f"potentialSeq/{player_id}"])
    matchesplayed_seq_uri = URIRef(fb[f"matchesplayedSeq/{player_id}"])
    starts_seq_uri = URIRef(fb[f"startsSeq/{player_id}"])
    value_seq_uri = URIRef(fb[f"valueSeq/{player_id}"])
    minutes_seq_uri = URIRef(fb[f"minutesSeq/{player_id}"])
    overallrating_seq_uri = URIRef(fb[f"overallratingSeq/{player_id}"])
    contractuntil_seq_uri = URIRef(fb[f"contractuntilSeq/{player_id}"])
    club_seq_uri = URIRef(fb[f"clubSeq/{player_id}"])
    
    # Append 2022 data to sequences (at position _2)
    g.add((fts_seq_uri, RDF._2, Literal(row[30], datatype=XSD.float)))
    g.add((intrep_seq_uri, RDF._2, Literal(row[10], datatype=XSD.integer)))
    g.add((potential_seq_uri, RDF._2, Literal(row[5], datatype=XSD.integer)))
    g.add((matchesplayed_seq_uri, RDF._2, Literal(row[27], datatype=XSD.integer)))
    g.add((starts_seq_uri, RDF._2, Literal(row[28], datatype=XSD.integer)))
    g.add((value_seq_uri, RDF._2, Literal(row[8], datatype=XSD.float)))
    g.add((minutes_seq_uri, RDF._2, Literal(row[29], datatype=XSD.integer)))
    g.add((overallrating_seq_uri, RDF._2, Literal(row[4], datatype=XSD.integer)))
    g.add((contractuntil_seq_uri, RDF._2, Literal(row[9], datatype=XSD.float)))
    g.add((club_seq_uri, RDF._2, Literal(row[7])))

    if row['BestPosition'] == 'GK':
        gkdiving_seq_uri = URIRef(fb[f"gkdivingSeq/{player_id}"])
        gkhandling_seq_uri = URIRef(fb[f"gkhandlingSeq/{player_id}"])
        gkkicking_seq_uri = URIRef(fb[f"gkkickingSeq/{player_id}"])
        gkpositioning_seq_uri = URIRef(fb[f"gkpositioningSeq/{player_id}"])
        gkreflexes_seq_uri = URIRef(fb[f"gkreflexesSeq/{player_id}"])
        touches_seq_uri = URIRef(fb[f"touchesSeq/{player_id}"])
        foulscommitted_seq_uri = URIRef(fb[f"foulscommittedSeq/{player_id}"])
        foulssuffered_seq_uri = URIRef(fb[f"foulssufferedSeq/{player_id}"])
        clearancessuccessful_seq_uri = URIRef(fb[f"clearancessuccessfulSeq/{player_id}"])
        aerialswon_seq_uri = URIRef(fb[f"aerialswonSeq/{player_id}"])
        passescompletelong_seq_uri = URIRef(fb[f"passescompletelongSeq/{player_id}"])
        passescompletepercentage_seq_uri = URIRef(fb[f"passescompletepercentageSeq/{player_id}"])
        passing_seq_uri = URIRef(fb[f"passingSeq/{player_id}"])
        physicality_seq_uri = URIRef(fb[f"physicalitySeq/{player_id}"])
        
        g.add((gkdiving_seq_uri, RDF._2, Literal(row[21], datatype=XSD.integer)))
        g.add((gkhandling_seq_uri, RDF._2, Literal(row[22], datatype=XSD.integer)))
        g.add((gkkicking_seq_uri, RDF._2, Literal(row[23], datatype=XSD.integer)))
        g.add((gkpositioning_seq_uri, RDF._2, Literal(row[24], datatype=XSD.integer)))
        g.add((gkreflexes_seq_uri, RDF._2, Literal(row[25], datatype=XSD.integer)))
        g.add((touches_seq_uri, RDF._2, Literal(row[37], datatype=XSD.float)))
        g.add((foulscommitted_seq_uri, RDF._2, Literal(row[39], datatype=XSD.float)))
        g.add((foulssuffered_seq_uri, RDF._2, Literal(row[40], datatype=XSD.float)))
        g.add((clearancessuccessful_seq_uri, RDF._2, Literal(row[35], datatype=XSD.float)))
        g.add((aerialswon_seq_uri, RDF._2, Literal(row[36], datatype=XSD.float)))
        g.add((passescompletelong_seq_uri, RDF._2, Literal(row[38], datatype=XSD.float)))
        g.add((passescompletepercentage_seq_uri, RDF._2, Literal(row[33], datatype=XSD.float)))
        g.add((passing_seq_uri, RDF._2, Literal(row[17], datatype=XSD.integer)))
        g.add((physicality_seq_uri, RDF._2, Literal(row[20], datatype=XSD.integer)))
    else:
        defending_seq_uri = URIRef(fb[f"defendingSeq/{player_id}"])
        attackingworkrate_seq_uri = URIRef(fb[f"attackingworkrateSeq/{player_id}"])
        defensiveworkrate_seq_uri = URIRef(fb[f"defensiveworkrateSeq/{player_id}"])
        dribbling_seq_uri = URIRef(fb[f"dribblingSeq/{player_id}"])
        pace_seq_uri = URIRef(fb[f"paceSeq/{player_id}"])
        passing_seq_uri = URIRef(fb[f"passingSeq/{player_id}"])
        physicality_seq_uri = URIRef(fb[f"physicalitySeq/{player_id}"])
        shooting_seq_uri = URIRef(fb[f"shootingSeq/{player_id}"])
        skillmoves_seq_uri = URIRef(fb[f"skillmovesSeq/{player_id}"])
        goalstotal_seq_uri = URIRef(fb[f"goalstotalSeq/{player_id}"])
        assiststotal_seq_uri = URIRef(fb[f"assiststotalSeq/{player_id}"])
        shotsongoaltotal_seq_uri = URIRef(fb[f"shotsongoaltotalSeq/{player_id}"])
        touches_seq_uri = URIRef(fb[f"touchesSeq/{player_id}"])
        foulscommitted_seq_uri = URIRef(fb[f"foulscommittedSeq/{player_id}"])
        foulssuffered_seq_uri = URIRef(fb[f"foulssufferedSeq/{player_id}"])
        interceptions_seq_uri = URIRef(fb[f"interceptionsSeq/{player_id}"])
        clearancessuccessful_seq_uri = URIRef(fb[f"clearancessuccessfulSeq/{player_id}"])
        aerialswon_seq_uri = URIRef(fb[f"aerialswonSeq/{player_id}"])
        passescompletelong_seq_uri = URIRef(fb[f"passescompletelongSeq/{player_id}"])
        passescompletepercentage_seq_uri = URIRef(fb[f"passescompletepercentageSeq/{player_id}"])
        
        g.add((defending_seq_uri, RDF._2, Literal(row[19], datatype=XSD.integer)))
        g.add((attackingworkrate_seq_uri, RDF._2, Literal(row[13])))
        g.add((defensiveworkrate_seq_uri, RDF._2, Literal(row[14])))
        g.add((dribbling_seq_uri, RDF._2, Literal(row[18], datatype=XSD.integer)))
        g.add((pace_seq_uri, RDF._2, Literal(row[15], datatype=XSD.integer)))
        g.add((passing_seq_uri, RDF._2, Literal(row[17], datatype=XSD.integer)))
        g.add((physicality_seq_uri, RDF._2, Literal(row[20], datatype=XSD.integer)))
        g.add((shooting_seq_uri, RDF._2, Literal(row[16], datatype=XSD.integer)))
        g.add((skillmoves_seq_uri, RDF._2, Literal(row[12], datatype=XSD.integer)))
        g.add((goalstotal_seq_uri, RDF._2, Literal(row[31], datatype=XSD.float)))
        g.add((assiststotal_seq_uri, RDF._2, Literal(row[32], datatype=XSD.float)))
        g.add((shotsongoaltotal_seq_uri, RDF._2, Literal(row[41], datatype=XSD.float)))
        g.add((touches_seq_uri, RDF._2, Literal(row[37], datatype=XSD.float)))
        g.add((foulscommitted_seq_uri, RDF._2, Literal(row[39], datatype=XSD.float)))
        g.add((foulssuffered_seq_uri, RDF._2, Literal(row[40], datatype=XSD.float)))
        g.add((interceptions_seq_uri, RDF._2, Literal(row[34], datatype=XSD.float)))
        g.add((clearancessuccessful_seq_uri, RDF._2, Literal(row[35], datatype=XSD.float)))
        g.add((aerialswon_seq_uri, RDF._2, Literal(row[36], datatype=XSD.float)))
        g.add((passescompletelong_seq_uri, RDF._2, Literal(row[38], datatype=XSD.float)))
        g.add((passescompletepercentage_seq_uri, RDF._2, Literal(row[33], datatype=XSD.float)))

In [12]:
data_21 = pd.read_csv("../data/cleaned/Data 21.csv")

# Add sequences to graph
for index, row in data_21.iterrows():
    player_uri = URIRef(fb[f"player/{row[0]}"])
    
    # Add the sequence relationships to the player
    g.add((player_uri, fb.fts, URIRef(fb[f"ftsSeq/{row[0]}"]) ))
    g.add((player_uri, fb.intreputation, URIRef(fb[f"intrepSeq/{row[0]}"]) ))
    g.add((player_uri, fb.potential, URIRef(fb[f"potentialSeq/{row[0]}"]) ))
    g.add((player_uri, fb.matchesplayed, URIRef(fb[f"matchesplayedSeq/{row[0]}"]) ))
    g.add((player_uri, fb.starts, URIRef(fb[f"startsSeq/{row[0]}"]) ))
    g.add((player_uri, fb.value, URIRef(fb[f"valueSeq/{row[0]}"]) ))
    g.add((player_uri, fb.minutes, URIRef(fb[f"minutesSeq/{row[0]}"]) ))
    g.add((player_uri, fb.overallrating, URIRef(fb[f"overallratingSeq/{row[0]}"]) ))
    g.add((player_uri, fb.contractuntil, URIRef(fb[f"contractuntilSeq/{row[0]}"]) ))
    g.add((player_uri, fb.club, URIRef(fb[f"clubSeq/{row[0]}"]) ))

    if row['BestPosition'] == 'GK':
        g.add((player_uri, fb.gkdiving, URIRef(fb[f"gkdivingSeq/{row[0]}"]) ))
        g.add((player_uri, fb.gkhandling, URIRef(fb[f"gkhandlingSeq/{row[0]}"]) ))
        g.add((player_uri, fb.gkkicking, URIRef(fb[f"gkkickingSeq/{row[0]}"]) ))
        g.add((player_uri, fb.gkpositioning, URIRef(fb[f"gkpositioningSeq/{row[0]}"]) ))
        g.add((player_uri, fb.gkreflexes, URIRef(fb[f"gkreflexesSeq/{row[0]}"]) ))
        g.add((player_uri, fb.touches, URIRef(fb[f"touchesSeq/{row[0]}"]) ))
        g.add((player_uri, fb.foulscommitted, URIRef(fb[f"foulscommittedSeq/{row[0]}"]) ))
        g.add((player_uri, fb.foulssuffered, URIRef(fb[f"foulssufferedSeq/{row[0]}"]) ))
        g.add((player_uri, fb.clearancessuccessful, URIRef(fb[f"clearancessuccessfulSeq/{row[0]}"]) ))
        g.add((player_uri, fb.aerialswon, URIRef(fb[f"aerialswonSeq/{row[0]}"])))
        g.add((player_uri, fb.passescompletelong, URIRef(fb[f"passescompletelongSeq/{row[0]}"])))
        g.add((player_uri, fb.passescompletepercentage, URIRef(fb[f"passescompletepercentageSeq/{row[0]}"])))
    else:
        g.add((player_uri, fb.defending, URIRef(fb[f"defendingSeq/{row[0]}"]) ))
        g.add((player_uri, fb.attackingworkrate, URIRef(fb[f"attackingworkrateSeq/{row[0]}"]) ))
        g.add((player_uri, fb.defensiveworkrate, URIRef(fb[f"defensiveworkrateSeq/{row[0]}"]) ))
        g.add((player_uri, fb.dribbling, URIRef(fb[f"dribblingSeq/{row[0]}"]) ))
        g.add((player_uri, fb.pace, URIRef(fb[f"paceSeq/{row[0]}"]) ))
        g.add((player_uri, fb.passing, URIRef(fb[f"passingSeq/{row[0]}"]) ))
        g.add((player_uri, fb.physicality, URIRef(fb[f"physicalitySeq/{row[0]}"]) ))
        g.add((player_uri, fb.shooting, URIRef(fb[f"shootingSeq/{row[0]}"]) ))
        g.add((player_uri, fb.skillmoves, URIRef(fb[f"skillmovesSeq/{row[0]}"]) ))
        g.add((player_uri, fb.goalstotal, URIRef(fb[f"goalstotalSeq/{row[0]}"]) ))
        g.add((player_uri, fb.assiststotal, URIRef(fb[f"assiststotalSeq/{row[0]}"]) ))
        g.add((player_uri, fb.shotsongoaltotal, URIRef(fb[f"shotsongoaltotalSeq/{row[0]}"]) ))
        g.add((player_uri, fb.touches, URIRef(fb[f"touchesSeq/{row[0]}"]) ))
        g.add((player_uri, fb.foulscommitted, URIRef(fb[f"foulscommittedSeq/{row[0]}"]) ))
        g.add((player_uri, fb.foulssuffered, URIRef(fb[f"foulssufferedSeq/{row[0]}"]) ))
        g.add((player_uri, fb.interceptions, URIRef(fb[f"interceptionsSeq/{row[0]}"]) ))
        g.add((player_uri, fb.clearancessuccessful, URIRef(fb[f"clearancessuccessfulSeq/{row[0]}"]) ))
        g.add((player_uri, fb.aerialswon, URIRef(fb[f"aerialswonSeq/{row[0]}"]) ))
        g.add((player_uri, fb.passescompletelong, URIRef(fb[f"passescompletelongSeq/{row[0]}"]) ))
        g.add((player_uri, fb.passescompletepercentage, URIRef(fb[f"passescompletepercentageSeq/{row[0]}"]) ))

In [13]:
print(f"There are {len(g)} triples in the graph")

There are 143673 triples in the graph


In [14]:
g.serialize(destination='../kg/players.ttl', format='ttl')

<Graph identifier=Na57091eb7a0447509e21ec76501ace18 (<class 'rdflib.graph.Graph'>)>