In [None]:
# Imports

import os
from pathlib import Path
import pandas as pd
import json

# Load the required libraries
from rdflib import Graph, Literal, RDF, URIRef, Namespace

# rdflib knows about some namespaces, like FOAF
from rdflib.namespace import FOAF, XSD

In [None]:
# Get absolute path
absPath = str(Path(os.path.abspath(os.getcwd())).absolute())
datasetsPath = os.path.join(absPath, "datasets")
rdfPath = os.path.join(absPath, "rdf")

# Create dataset directory if not exists
if not os.path.exists(datasetsPath):
    os.mkdir(datasetsPath)

# Create RDF directory if not exists
if not os.path.exists(rdfPath):
    os.mkdir(rdfPath)

# Setup datasets paths
spotifyChartsPath = os.path.join(datasetsPath, "reducedSpotifyCharts.csv")
genresPath = os.path.join(datasetsPath, "genres.csv")
marketsPath = os.path.join(datasetsPath, "markets.csv")
tracksPath = os.path.join(datasetsPath, "tracks.csv")
albumsPath = os.path.join(datasetsPath, "albums.csv")
artistsPath = os.path.join(datasetsPath, "artists.csv")
peoplePath = os.path.join(datasetsPath, "people.csv")

# Setup Turtle paths
genresTTLPath = os.path.join(rdfPath, "genres.ttl")
marketsTTLPath = os.path.join(rdfPath, "markets.ttl")
tracksTTLPath = os.path.join(rdfPath, "tracks.ttl")
albumsTTLPath = os.path.join(rdfPath, "albums.ttl")
artistsTTLPath = os.path.join(rdfPath, "artists.ttl")
peopleTTLPath = os.path.join(rdfPath, "people.ttl")
appearanceTTLPath = os.path.join(rdfPath, "appearance.ttl")


In [None]:
# Construct the ontologies namespaces not known by RDFlib

# Country Ontology
CNS = Namespace("http://eulersharp.sourceforge.net/2003/03swap/countries#")

# Spotify Ontology
SO = Namespace("https://www.dei.unipd.it/~martinelli/spotify/spotifyOntology#")

# Albums

In [None]:
# Create the graph
g = Graph()

# Bind the namespaces to a prefix for more readable output
g.bind("foaf", FOAF)
g.bind("xsd", XSD)
g.bind("countries", CNS)
g.bind("so", SO)

In [None]:
# Load the CSV files in memory
albums = pd.read_csv(albumsPath, sep=",", index_col="id")

In [None]:
# Iterate over the album DataFrame
for albumID, row in albums.iterrows():
    # Create the node to add to the Graph 
    Album = URIRef(SO[albumID])

    # Add triples using store's add() method.
    g.add((Album, RDF.type, SO.Album))

    # Add the name of the album
    g.add((Album, SO["name"], Literal(row["title"], datatype=XSD.string)))
    
    # Add the total tracks of the album
    g.add((Album, SO["totalTracks"], Literal(row["total_tracks"], datatype=XSD.int)))

    # Manage release date taking into account release precision
    releaseDate = row["release_date"]
    if(row["release_date_precision"]=="year"):
        releaseDate += "-01-01"
    elif(row["release_date_precision"]=="month"):
        releaseDate += "-01"
    
    # Add the release date of the album
    g.add((Album, SO["releaseDate"], Literal(releaseDate, datatype=XSD.date)))    
    
    # Add album type
    albumType = URIRef(SO[row["album_type"]])
    g.add((Album, SO["isTypeOf"], albumType))  

    # Load countries as array
    countries = row["available_countries"].split(",") if not pd.isnull(row["available_countries"]) else []

    for country in countries:
        # Create the RDF node
        Country = URIRef(CNS[country.lower()])

        # Add the edge connecting the Album and the Country 
        g.add((Album, SO["isAvailableIn"], Country))  
     


In [None]:
# Save all the data in the Turtle format
print("[💾] SAVING")
with open(albumsTTLPath, "w", encoding="utf-8") as fp:
    fp.write(g.serialize(format="turtle"))