## Spotify Data Ingestion

In this notebook we execute a cypher query to load csv files and create a property graph.

In [1]:
# Required libraries
from neo4j import GraphDatabase
import os
from pathlib import Path
from urllib.request import pathname2url


In [2]:
# Get absolute path
absPath = str(Path(os.path.abspath(os.getcwd())).absolute())
datasetsPath = os.path.join(absPath, "datasets")

# Create dataset directory if not exists
if not os.path.exists(datasetsPath):
    os.mkdir(datasetsPath)


# Setup datasets paths
genresPath = os.path.join(datasetsPath, "genres.csv")
countriesPath = os.path.join(datasetsPath, "countries.csv")
artistsPath = os.path.join(datasetsPath, "artists.csv")
albumsPath = os.path.join(datasetsPath, "albums.csv")
tracksPath = os.path.join(datasetsPath, "tracks.csv")
chartsPath = os.path.join(datasetsPath, "charts.csv")
peoplePath = os.path.join(datasetsPath, "people.csv")
recordLabelsPath = os.path.join(datasetsPath, "recordLabels.csv")


In [3]:
def getPathURI(fullPath):
    return "file:" + pathname2url(fullPath)


### Connection to Neo4j

In [4]:
# Neo4J params class
class Neo4jParams:
  def __init__(self, user, psw, dbname, dbpsw, uri):
    self.user = user
    self.psw = psw
    self.dbname = dbname
    self.dbpsw = dbpsw
    self.uri = uri


In [5]:
# DB parameters
user = "neo4j"
psw = "neo4j"
dbname = "SpotifyDB"
dbpsw = "SpotifyDB"
uri = "bolt://localhost:7687"

params = Neo4jParams(user, psw, dbname, dbpsw, uri)


In [6]:
def getDriver():
    return GraphDatabase.driver(params.uri, auth=(params.user, params.dbpsw))

## Data ingestion

### Create constraints

In [7]:
# connect to the DB
driver = getDriver()
# create a session
session = driver.session()

session.run("CREATE CONSTRAINT IF NOT EXISTS FOR (g: Genre) REQUIRE g.id IS UNIQUE")
session.run("CREATE CONSTRAINT IF NOT EXISTS FOR (c: Country) REQUIRE c.id IS UNIQUE")
session.run("CREATE CONSTRAINT IF NOT EXISTS FOR (a: Artist) REQUIRE a.id IS UNIQUE")
session.run("CREATE CONSTRAINT IF NOT EXISTS FOR (alb: Album) REQUIRE alb.id IS UNIQUE")
session.run("CREATE CONSTRAINT IF NOT EXISTS FOR (t: Track) REQUIRE t.id IS UNIQUE")
session.run("CREATE CONSTRAINT IF NOT EXISTS FOR (ch: Chart) REQUIRE ch.id IS UNIQUE")
session.run("CREATE CONSTRAINT IF NOT EXISTS FOR (p: Person) REQUIRE p.id IS UNIQUE")
session.run("CREATE CONSTRAINT IF NOT EXISTS FOR (i:Instrument) REQUIRE i.instrument IS UNIQUE ;")
session.run("CREATE CONSTRAINT IF NOT EXISTS FOR (r:RecorLabel) REQUIRE r.id IS UNIQUE ;")

session.close()
driver.close()


### Genres

In [8]:
# connect to the DB
driver = getDriver()
# create a session
session = driver.session()

session.run(
    """
    USING PERIODIC COMMIT LOAD CSV WITH HEADERS FROM $genresPath AS genre
        CREATE (:Genre { id: genre.id, name: genre.name })
    """, 
    genresPath=getPathURI(genresPath)
)

session.close()
driver.close()


### Countries

In [9]:
# connect to the DB
driver = getDriver()
# create a session
session = driver.session()

session.run(
    """
    USING PERIODIC COMMIT LOAD CSV WITH HEADERS FROM $countriesPath AS country
        CREATE (:Country { id: country.Code, name: country.Name })
    """,
    countriesPath=getPathURI(countriesPath)
)

session.close()
driver.close()


### Artists

In [10]:
# connect to the DB
driver = getDriver()
# create a session
session = driver.session()

session.run(
    """
    USING PERIODIC COMMIT LOAD CSV WITH HEADERS FROM $artistsPath AS artist
        CREATE (a:Artist { id: artist.id, name: artist.name, popularity: artist.popularity })
        
        WITH a, split(artist.genres, ",") AS genres
        UNWIND genres AS genre
            MATCH (g:Genre { id: genre })
            CREATE (a)-[:hasGenre]->(g)
    """,
    artistsPath=getPathURI(artistsPath)
)

session.close()
driver.close()


### Albums

In [11]:
# connect to the DB
driver = getDriver()
# create a session
session = driver.session()

session.run(
    """
    USING PERIODIC COMMIT LOAD CSV WITH HEADERS FROM $albumsPath AS album
        CREATE (alb:Album {
            id: album.id,
            name: album.title,
            totalTracks: toInteger(album.total_tracks),
            releaseDate: date(album.release_date),
            albumType: album.album_type
        })

        WITH album, alb, split(album.artists, ",") AS artists
        UNWIND artists AS artist
            MATCH (a:Artist { id: artist })
            CREATE (a)-[:partecipateIn]->(alb)

        WITH alb, split(album.available_countries, ",") AS countries
        UNWIND countries AS country
            MATCH (c:Country { id: country })
            CREATE (alb)-[:isAvailableIn]->(c)
    """,
    albumsPath=getPathURI(albumsPath)
)

session.close()
driver.close()


### Tracks

In [12]:
# connect to the DB
driver = getDriver()
# create a session
session = driver.session()

session.run(
    """
    USING PERIODIC COMMIT LOAD CSV WITH HEADERS FROM $tracksPath AS track
        CREATE (t:Track {
            id: track.id,
            name: track.title,
            duration: toInteger(track.duration), 
            popularity: toInteger(track.popularity), 
            explicit: toBoolean(track.explicit),
            key: toInteger(track.key),
            tempo: toFloat(track.tempo),
            mode: toInteger(track.mode),
            time_signature: toInteger(track.time_signature),
            acousticness: toFloat(track.acousticness),
            danceability: toFloat(track.danceability),
            energy: toFloat(track.energy),
            loudness: toFloat(track.loudness),
            liveness: toFloat(track.liveness),
            valence: toFloat(track.valence),
            speechiness: toFloat(track.speechiness),
            instrumentalness: toFloat(track.instrumentalness)
        })

        WITH track, t
        MATCH (alb:Album { id: track.album })
        CREATE (t)-[:isPartOf]->(alb)

        WITH track, t, split(track.artists, ",") AS artists
        UNWIND artists AS artist
            MATCH (a:Artist { id: artist })
            CREATE (a)-[:partecipateIn]->(t)

        WITH t, split(track.available_countries, ",") AS countries
        UNWIND countries AS country
            MATCH (c:Country { id: country })
            CREATE (t)-[:isAvailableIn]->(c)
    """,
    tracksPath=getPathURI(tracksPath)
)

session.close()
driver.close()


### Charts

In [13]:
# connect to the DB
driver = getDriver()
# create a session
session = driver.session()

session.run(
    """
    USING PERIODIC COMMIT LOAD CSV WITH HEADERS FROM $chartsPath AS chart
        MERGE (ch:Chart { id: chart.id, name: chart.name, date: date(chart.date), chartType: chart.type })
        
        WITH chart, ch
        MATCH (c:Country { id: chart.country_code })
        MERGE (ch)-[:isReferredTo]->(c)

        WITH chart, ch
        MATCH (t:Track { id: chart.trackID })
        CREATE (t)-[r:isPositionedIn { position: toInteger(chart.position) }]->(ch)
    """,
    chartsPath=getPathURI(chartsPath)
)

session.close()
driver.close()


### Record Labels

In [None]:
# connect to the DB
driver = getDriver()
# create a session
session = driver.session()

session.run(
    """
    USING PERIODIC COMMIT LOAD CSV WITH HEADERS FROM $recordLabelsPath AS recordLabel
        MERGE (r:RecorLabel {
            id: recordLabel.id,
            name: recordLabel.name,
        })

        WITH recordLabel, r
        MATCH (c:Country { id: recordLabel.country })
        MERGE (r)-[:isLocatedIn]->(c)
    """,
    recordLabelsPath=getPathURI(recordLabelsPath)
)

session.close()
driver.close()


### People

In [15]:
# connect to the DB
driver = getDriver()
# create a session
session = driver.session()

session.run(
    """
    USING PERIODIC COMMIT LOAD CSV WITH HEADERS FROM $peoplePath AS person
        MERGE (p:Person {
            id: person.id,
            name: person.name,
            surname: person.surname,
            gender: person.gender,
            birthDate: date(person.birthdate),
            deathDate: date(person.deathdate),
        })

        WITH person, p
        MATCH (c:Country { id: person.nationality })
        MERGE (p)-[:hasNationality]->(c)

        WITH person, p
        MATCH (a:Artist { id: person.artist })
        CREATE (p)-[r:isMemberOf]->(a)

        WITH person, p, split(person.instruments, ",") as instruments
        UNWIND instruments AS instrument
            MERGE (p)-[:plays]->(i:Instrument { name: instrument })

        WITH person, p, split(person.recordLabels, ",") as recordLabels
        UNWIND recordLabels AS recordLabel
            MATCH (r:RecorLabel { id: recordLabel })
            MERGE (p)-[:hasContractWith]->(r)
    """,
    peoplePath=getPathURI(peoplePath)
)

session.close()
driver.close()
