## Spotify Query

In this notebook we perform some query on our property graph about Spotify. In particular the queries will be divided into three parts:
1. ***Example queries***: we perform two example queries where we show how it is possibile to use the new added information such as *record label* and *instruments*.
1. ***Italian tracks and Italian artists from 2017 to 2020:*** we perfom some queries about italian tracks and artists present in the TOP 100 Italy.
1. ***Italian tracks abroad:*** we want to discover if italian tracks are listened also outside Italy.

In [None]:
# Import required libraries
from neo4j import GraphDatabase
import pandas as pd
import calendar
import numpy as np
import matplotlib.pyplot as plt
import os

### Connection to Neo4j

In [None]:
# Neo4J params class
class Neo4jParams:
  def __init__(self, user, psw, dbname, dbpsw, uri):
    self.user = user
    self.psw = psw
    self.dbname = dbname
    self.dbpsw = dbpsw
    self.uri = uri

In [None]:
#DB parameters
user = "neo4j"
psw = "neo4j"
dbname = "SpotifyDB"
dbpsw = "SpotifyDB"
uri = "bolt://localhost:7687"

params = Neo4jParams(user, psw, dbname, dbpsw, uri)

In [None]:
def getDriver():
    return GraphDatabase.driver(params.uri, auth=(params.user, params.dbpsw))

In [None]:
def executeQuery(query, **kwargs):
    # Connect to the DB and create a session
    driver = getDriver()
    session = driver.session()

    result = session.run(query, kwargs)

    resultDF = pd.DataFrame(result.values(), columns=result.keys())

    session.close()
    driver.close()

    return resultDF

## Plot utilities

In [None]:
TITLE_PAD = 30
TITLE_DIM = 30
XY_LABEL_PAD = 23
XY_LABEL_DIM = 21
XY_TICKS_DIM = 18
GRID_ALPHA = 0.2
FILL_ALPHA = 0.05
BAR_WIDTH = 0.95
LEGEND_LOC = "upper left"
LEGEND_LABEL_DIM = 15
COLOR_SCHEME = ["#1DB954", "#22577A", "#A03C78", "#C67ACE"]

FIG_SIZE = (18, 8)
FIG_BG_COLOR = None # "w"
FIG_DPI = 500
FIG_BBOX = "tight"
FIG_PAD = .3
FIG_FORMATS = ["png", "pdf"]

In [None]:
def plotResults(title=None, x=[], yArr=[],
                xLabel=None, xTicks=None, xTicksPosition=None, xTicksRotation=0,
                yLim=None, yLabel=None, yTicks=None, yTicksPosition=None, yTicksRotation=0,
                legend=None, legendLocation=LEGEND_LOC,
                style="line", showGrid=False, gridAxis="both", fillPlot=False,
                figSize=FIG_SIZE, saveTitle=None):

    plt.figure(figsize=figSize, facecolor=FIG_BG_COLOR)

    maxY = None
    minY = None

    numPlots = len(yArr)
    for i, y in zip(range(numPlots), yArr):
        label = None
        try:
            label = legend[i]
        except:
            pass

        yNum = [float(yVal) for yVal in y]
        maxY = np.max(
            [np.max(yNum), maxY]
        ) if not maxY is None else np.max(yNum)
        minY = np.min(
            [np.min(yNum), minY]
        ) if not minY is None else np.min(yNum)

        if style == "line":
            plt.plot(x, yNum, label=label, color=COLOR_SCHEME[i])

            if fillPlot:
                plt.fill_between(x, yNum, alpha=FILL_ALPHA,
                                 facecolor=COLOR_SCHEME[i])

        elif style == "bar":
            barWidth = BAR_WIDTH / numPlots
            xOffset = -BAR_WIDTH / 2 + barWidth / 2 + barWidth * i
            plt.bar(x + xOffset, yNum, width=barWidth,
                    label=label, align="center", color=COLOR_SCHEME[i])

        elif style == "polar":
            x = np.linspace(0, 2 * np.pi, len(yNum) + 1, endpoint=True)
            yNum.append(yNum[0])
            plt.polar(x, yNum, label=label, color=COLOR_SCHEME[i])

            if fillPlot:
                plt.fill(x, yNum, alpha=FILL_ALPHA,
                         facecolor=COLOR_SCHEME[i])

        elif style == "boxplot":
            lineWidth = 1.5

            if fillPlot:
                plt.boxplot(yNum,
                            positions=[i],
                            notch=True,
                            widths=[1 / numPlots],
                            patch_artist=True,
                            showcaps=False,
                            showfliers=False,
                            showmeans=False,
                            boxprops=dict(
                                facecolor=COLOR_SCHEME[i],
                                alpha=FILL_ALPHA,
                            ),
                            )

            plt.boxplot(yNum,
                        positions=[i],
                        notch=True,
                        widths=[1 / numPlots],
                        boxprops=dict(
                            color=COLOR_SCHEME[i],
                            linewidth=lineWidth,
                        ),
                        capprops=dict(
                            color=COLOR_SCHEME[i],
                            linewidth=lineWidth,
                        ),
                        whiskerprops=dict(
                            color=COLOR_SCHEME[i],
                            linewidth=lineWidth,
                        ),
                        flierprops=dict(
                            color=COLOR_SCHEME[i],
                            markeredgecolor=COLOR_SCHEME[i],
                            markerfacecolor=COLOR_SCHEME[i],
                        ),
                        medianprops=dict(
                            color=COLOR_SCHEME[i],
                            linewidth=lineWidth,
                        ),
                        )

    if not xLabel is None:
        plt.xlabel(xLabel, labelpad=XY_LABEL_PAD, fontsize=XY_LABEL_DIM)

    if not yLabel is None:
        plt.ylabel(yLabel, labelpad=XY_LABEL_PAD, fontsize=XY_LABEL_DIM)

    if not xTicks is None:
        if not xTicksPosition:
            xTicksPosition = x if style != "boxplot" else np.arange(
                0, numPlots)

            if style == "polar":
                xTicks.append(xTicks[0])

        plt.xticks(xTicksPosition, xTicks, fontsize=XY_TICKS_DIM,
                   rotation=xTicksRotation)
    else:
        plt.xticks(fontsize=XY_TICKS_DIM, rotation=xTicksRotation)

    if not yTicks is None and not yTicksPosition is None:
        plt.yticks(yTicksPosition, yTicks, fontsize=XY_TICKS_DIM,
                   rotation=yTicksRotation)
    elif not yTicksPosition is None:
        plt.yticks(yTicksPosition, fontsize=XY_TICKS_DIM,
                   rotation=yTicksRotation)
    else:
        plt.yticks(fontsize=XY_TICKS_DIM, rotation=yTicksRotation)

    if yLim is None:
        yPad = (maxY - minY) * 0.1
        topLim = maxY + yPad * 2
        infLim = minY - yPad if style == "bar" else minY - yPad
        infLim = 0 if style == "bar" and minY >= 0 and minY <= yPad else infLim
        infLim = minY if style == "polar" else infLim
        plt.ylim(top=topLim, bottom=infLim)
    else:
        plt.ylim(top=yLim[1], bottom=yLim[0])

    if showGrid:
        plt.grid(alpha=GRID_ALPHA, axis=gridAxis)

    if not legend is None:
        plt.legend(loc=legendLocation, fontsize=LEGEND_LABEL_DIM)

    if not saveTitle is None:
        baseDir = "plots"
        if not os.path.exists(baseDir):
            os.mkdir("plots")

        for figFormat in FIG_FORMATS:
            saveDir = baseDir + "/" + figFormat
            if not os.path.exists(saveDir):
                os.mkdir(saveDir)

            plt.savefig(saveDir + "/" + saveTitle + "." + figFormat,
                        facecolor=FIG_BG_COLOR,
                        dpi=FIG_DPI,
                        bbox_inches=FIG_BBOX,
                        pad_inches=FIG_PAD)

    if not title is None:
        plt.title(title, fontsize=TITLE_DIM, pad=TITLE_PAD)

    plt.show()

### Example Queries

##### Q1: Considering only records labels with more than 20 artists, show the top 10 record labels according to the averare position of the tracks released by artists of that specifc record label.

In [None]:
# Q1: Considering only records labels with more than 20 artists, show the top 10 record labels according to the averare position of the tracks released by artists of that specifc record label

executeQuery("""
    MATCH (r:RecordLabel)<-[:hasContractWith]-(a:Artist)
    WITH r,COUNT(DISTINCT a) AS numArtists
    WHERE numArtists > 20
    MATCH (r:RecordLabel)<-[:hasContractWith]-(a:Artist)-[:partecipateIn]->(t:Track)-[pos:isPositionedIn]->(ch:Chart)
    RETURN r.name as RecordLabel, AVG(pos.position) AS AvgPosition
    ORDER BY AvgPosition
    LIMIT 10
""")

###### Q1.2

In [None]:
executeQuery("""
    MATCH (l:RecordLabel)<-[:hasContractWith]-(a:Artist)-[:partecipateIn]->(t:Track)-[p:isPositionedIn]->(c:Chart)
    WITH l, avg(p.position) AS averagePosition
    MATCH (l)<-[:hasContractWith]-(a:Artist)-[:partecipateIn]->(t:Track)
    WITH l, averagePosition, count(distinct t) AS numTracks
    WITH l.name AS RecordLabel, averagePosition / numTracks AS QualityValue
    RETURN RecordLabel, QualityValue
    ORDER BY QualityValue asc
    LIMIT 10
""")

##### Q2: Show the most common played instrument in rock groups

In [None]:
# Q2: Show the most common played instrument in rock groups

executeQuery("""
    MATCH (g:Genre{id:"rock"})<-[:hasGenre]-(a:Artist)<-[:isMemberOf]-(p:Person)-[:plays]->(i:Instrument)
    RETURN i.name as Instrument, COUNT(DISTINCT p) AS NumPlayers
    ORDER BY NumPlayers DESC
    LIMIT 3
""")

### Italian Tracks and Italian Artists

#### Q3: Show technical features of Italian Tracks in the TOP 100 Italy through the years 2017-2020

In [None]:
#Q3: Show technical features of Italian Tracks in the TOP 100 Italy through the years 2017-2020

resultDF = executeQuery("""
    MATCH (c1:Country { id:"IT" })<-[:hasNationality]-(p:Person)-[:isMemberOf]->(a:Artist)-[:partecipateIn]->(t:Track)-[:isPositionedIn]->(ch:Chart)-[:isReferredTo]->(c2:Country { id:"IT" })
    RETURN
        ch.date.year as year,
        AVG(toInteger(t.explicit)) as explicit,
        AVG(t.danceability) as danceability,
        AVG(t.energy) as energy,
        AVG(t.mode) as mode,
        AVG(t.speechiness) as speechiness,
        AVG(t.acousticness) as acousticness,
        AVG(t.liveness) as liveness,
        AVG(t.valence) as valence 
""")

In [None]:
resultDF

In [None]:
featuresToExtract = ["explicit", "danceability", "energy",
                     "mode", "speechiness", "acousticness", "liveness", "valence"]

plotResults(
    title="Techincal features",
    yArr=resultDF[featuresToExtract].to_numpy().tolist(),
    xTicks=featuresToExtract,
    legend=list(resultDF["year"]),
    yLim=[0, 0.75],
    style="polar",
    showGrid=True,
    fillPlot=True,
    legendLocation=[-0.1, 0.2],
    figSize=(10, 10),
)


#### Q4: Show technical features of tracks in the first 5 positions and in the last 5 positions of the TOP 100 Italy.

In [None]:
# Q4: Show technical features of tracks in the first 5 positions and in the last 5 positions of the TOP 100 Italy.

resultDF = executeQuery("""
    MATCH (t:Track)-[r:isPositionedIn]->(ch:Chart)-[:isReferredTo]->(c2:Country { id:"IT" })
    WHERE r.position <= 5
    RETURN
        "Top 5" AS type,
        AVG(toInteger(t.explicit)) as explicit,
        AVG(t.danceability) as danceability,
        AVG(t.energy) as energy,
        AVG(t.mode) as mode,
        AVG(t.speechiness) as speechiness,
        AVG(t.acousticness) as acousticness,
        AVG(t.liveness) as liveness,
        AVG(t.valence) as valence

    UNION

    MATCH (t:Track)-[r:isPositionedIn]->(ch:Chart)-[:isReferredTo]->(c2:Country { id:"IT" })
    WHERE r.position >= 95
    RETURN
        "Last 5" AS type,
        AVG(toInteger(t.explicit)) as explicit,
        AVG(t.danceability) as danceability,
        AVG(t.energy) as energy,
        AVG(t.mode) as mode,
        AVG(t.speechiness) as speechiness,
        AVG(t.acousticness) as acousticness,
        AVG(t.liveness) as liveness,
        AVG(t.valence) as valence 
""")

In [None]:
resultDF

In [None]:
featuresToExtract = ["explicit", "danceability", "energy",
                     "mode", "speechiness", "acousticness", "liveness", "valence"]
                     
plotResults(
    title="Techincal features",
    yArr=resultDF[featuresToExtract].to_numpy().tolist(),
    xTicks=featuresToExtract,
    legend=list(resultDF["type"]),
    yLim=[0, 0.75],
    style="polar",
    showGrid=True,
    fillPlot=True,
    legendLocation=[-0.1, 0.2],
    figSize=(10, 10),
)

#### Q5: On average how many tracks from Italian artists are present in Top 100 Italy for each year? (Grafico a barre)

In [None]:
# Q5: On average how many tracks from italian artists are present in Top 100 Italy for each year?

avgItalianTracksDF = executeQuery("""
    MATCH (c1:Country { id:"IT" })<-[:hasNationality]-(p:Person)-[:isMemberOf]->(a:Artist)-[:partecipateIn]->(t:Track)-[:isPositionedIn]->(ch:Chart)-[:isReferredTo]->(c2:Country { id:"IT" })
    WITH ch, ch.date.year AS year, COUNT(DISTINCT t) as numTracks
    RETURN year, avg(numTracks) as avgNumTracks
    ORDER BY year
""")

In [None]:
avgItalianTracksDF


In [None]:
plotResults(
    x=np.arange(len(avgItalianTracksDF)),
    yArr=[list(avgItalianTracksDF["avgNumTracks"])],
    yLabel="Average number of tracks",
    xLabel="Year",
    xTicks=list(avgItalianTracksDF["year"]),
    showGrid=True,
    gridAxis="y",
    style="bar",
    saveTitle="query3"
)


As we can notice, in 2017 there were only 31 tracks on average in the TOP 100 Italy. We want to discovery if this is just a case of is there any reason behind this result.

So, in the next query we will check how many tracks were released by italian artist for each year.

#### Q6: How many tracks were released from Italian artist every year from 2017 to 2020 ?

In [None]:
# Q6: How many tracks were released from Italian artist every year from 2017 to 2020 ?

tracksReleasedDF = executeQuery("""
    MATCH (c1:Country{id:"IT"})<-[:hasNationality]-(p:Person)-[:isMemberOf]->(a:Artist)-[:partecipateIn]->(t:Track)-[:isPartOf]->(alb:Album)
    WHERE alb.releaseDate.year >= 2017
    WITH alb.releaseDate.year AS year, COUNT(DISTINCT t) as numTracks
    RETURN year, numTracks
    ORDER BY year
""")

In [None]:
tracksReleasedDF


In [None]:
plotResults(
    x=np.arange(len(tracksReleasedDF)),
    yArr=[list(tracksReleasedDF["numTracks"])],
    yLabel="Number of tracks",
    xLabel="Year",
    xTicks=list(tracksReleasedDF["year"]),
    showGrid=True,
    gridAxis="y",
    style="bar",
    saveTitle="query4"
)


width = 0.1
x = np.arange(len(tracksReleasedDF))
plt.bar(x, list(tracksReleasedDF["numTracks"]), color='Blue', width=width)
plt.twinx()
plt.bar(x+width*2,
        list(avgItalianTracksDF["avgNumTracks"]), color='Green', width=width)
#show plot
plt.show()


Since in 2017 Italian artists relased only 163 tracks (almost half of 2018,2019 and 2020), it is understandable why in 2017 there were only 31 tracks on average in the TOP 100 Italy.

#### Q7: How many different Italian artist entered at least once in Top 100 Italy for each Year ? (Grafico a barre)

In [None]:
# Q7: How many different Italian artist entered at least once in Top 100 Italy for each Year ?

resultDF = executeQuery("""
    MATCH (c1:Country { id:"IT" })<-[:hasNationality]-(p:Person)-[:isMemberOf]->(a:Artist)-[:partecipateIn]->(t:Track)-[:isPositionedIn]->(ch:Chart)-[:isReferredTo]->(c2:Country{ id:"IT" })
    RETURN ch.date.year AS year, COUNT(DISTINCT a) as numArtists
    ORDER BY year
""")

In [None]:
resultDF

In [None]:
plotResults(
    x=np.arange(len(resultDF)),
    yArr=[list(resultDF["numArtists"])],
    yLabel="Number of artists",
    xLabel="Year",
    xTicks=list(resultDF["year"]),
    showGrid=True,
    gridAxis="y",
    style="bar",
    saveTitle="query5"
)

#### Q8: On average how many tracks from Italian artists are present in Top 100 Italy through the different months of the year ? (Grafico a linea)

In [None]:
# Q8: On average how many tracks from Italian artists are present in Top 100 Italy through the different months of the year ? 

resultDF = executeQuery("""
    MATCH (c1:Country { id:"IT" })<-[:hasNationality]-(p:Person)-[:isMemberOf]->(a:Artist)-[:partecipateIn]->(t:Track)-[:isPositionedIn]->(ch:Chart)-[:isReferredTo]->(c2:Country { id:"IT" })
    WITH ch,ch.date.month AS month, COUNT(DISTINCT t) as numTracks
    RETURN month, avg(numTracks) as avgNumTracks
    ORDER BY month
""")

In [None]:
resultDF

In [None]:
plotResults(
    x=np.arange(len(resultDF)),
    yArr=[list(resultDF["avgNumTracks"])],
    yLabel="Average number of tracks",
    xLabel="Months",
    xTicks=list(resultDF["month"]),
    showGrid=True,
    gridAxis="y",
    style="line",
    fillPlot=True,
    saveTitle="query6"
)

#### Q9: How many albums from Italian artists are released through the different months of the year ? (Grafico a linea)

In [None]:
#Q9: How many albums from Italian artists are released through the different months of the year ? 

resultDF = executeQuery("""
    MATCH (c1:Country { id:"IT" })<-[:hasNationality]-(p:Person)-[:isMemberOf]->(a:Artist)-[:partecipateIn]->(alb:Album)
    WITH alb.releaseDate.month AS month, COUNT(DISTINCT alb) as numAlbums
    RETURN month, numAlbums
    ORDER BY month
""")

In [None]:
resultDF

In [None]:
plotResults(
    x=np.arange(len(resultDF)),
    yArr=[list(resultDF["numAlbums"])],
    yLabel="Number of albums",
    xLabel="Months",
    xTicks=list(resultDF["month"]),
    showGrid=True,
    gridAxis="y",
    style="line",
    fillPlot=True,
    saveTitle="query7"
)

#### Q9: Who is the Italian artist with the highest number of tracks present in Top 100 Italy for each year? (Nomi degli artisti)

In [None]:
# Q9: Who is the Italian artist with the highest number of tracks present in Top 100 Italy for each year?

topArtistsDF = executeQuery("""
    MATCH (c1:Country { id:"IT" })<-[:hasNationality]-(p:Person)-[:isMemberOf]->(a:Artist)-[:partecipateIn]->(t:Track)-[:isPositionedIn]->(ch:Chart)-[:isReferredTo]->(c2:Country { id:"IT" })
    WITH a, ch.date.year AS year, COUNT(DISTINCT t) as numTracks
    ORDER BY numTracks DESC
    WITH year, COLLECT(a) AS artists, COLLECT(numTracks) as orderedNumTracks
    RETURN DISTINCT year, artists[0].id as artist, artists[0].name as artistName , orderedNumTracks[0] as numTracks
    ORDER BY year
""")

In [None]:
topArtistsDF

#### Q10: Show the distribution of the positions in the TOP 100 Italy reached by the just retrieved artist

In [None]:
# Q10: Show the distribution of the positions in the TOP 100 Italy reached by the just retrieved artist

positionsDF = executeQuery("""
    UNWIND $datas as data
    MATCH (a:Artist { id: data.artist })-[:partecipateIn]->(t:Track)-[r:isPositionedIn]->(ch:Chart)-[:isReferredTo]->(c2:Country { id:"IT" })
    WHERE ch.date >= date({year: data.year, month: 1, day: 1}) AND ch.date <= date({year: data.year, month: 12, day: 31})
    RETURN a.name as artist, COLLECT(r.position) AS positions
""", datas=[{"artist": x["artist"], "year": x["year"]} for _, x in topArtistsDF.iterrows()])

In [None]:
positionsDF

In [None]:
plotResults(
    yArr=list(positionsDF["positions"]),
    style="boxplot",
    fillPlot=True,
    figSize=(15, 8),
    yLabel="Position",
    showGrid=True,
    xTicks=list(positionsDF["artist"])
)

####  Q11: Show the top 3 Italian artists with the highest number of tracks present in Top 100 Italy at the same time.

In [None]:
# Q11: Show the top 3 Italian artists with the highest number of tracks present in Top 100 Italy at the same time.

topArtitstDF = executeQuery("""
    MATCH (c1:Country{id:"IT"})<-[:hasNationality]-(p:Person)-[:isMemberOf]->(a:Artist)-[:partecipateIn]->(t:Track)-[:isPositionedIn]->(ch:Chart)-[:isReferredTo]->(c2:Country{id:"IT"})
    WITH a,ch, COUNT(DISTINCT t) as numTracks
    ORDER BY numTracks DESC
    WITH a,COLLECT(ch) AS charts, COLLECT(numTracks) as orderedNumTracks
    RETURN DISTINCT a.id as artist, a.name as artistName, charts[0].date as date, charts[0].name as chart, orderedNumTracks[0] as numTracks
    LIMIT 3
""")

topArtitstDF

The numbers of tracks present simultaneously in the TOP 100 Italy are quite high (24,19 and 18). 

We want to check if these artists released an album on the same month (or previous one) of the chart in which they have the highest number of tracks.

In [None]:
executeQuery("""
    UNWIND $artistsAndAlbums as data
    MATCH (a:Artist { id: data.artist })-[:partecipateIn]->(alb:Album { albumType:"album" })
    WHERE alb.releaseDate >= date({ year: data.date.year, month: data.date.month - 1, day: 1 })
        AND alb.releaseDate <= date({ year: data.date.year, month: data.date.month, day: 30 })
    RETURN a.name as artist, alb.name as albumName, alb.releaseDate as releaseDate
""", artistsAndAlbums=[{"artist": x["artist"], "date": x["date"]} for _, x in topArtitstDF.iterrows()])


So, these Italian artists managed to have a lot of tracks in the TOP 100 Italy at the same time, because they released albums whose tracks probably entered all in TOP 100 Italy.

#### Q12: Show the youngest artist who entered in the first 10 positions of Top 100 Italy for each year.

In [None]:
# Q12: Show the youngest artist who entered in the first 10 positions of Top 100 Italy for each year.

executeQuery("""
    MATCH (c1:Country { id:"IT" })<-[:hasNationality]-(p:Person)-[:isMemberOf]->(a:Artist)-[:partecipateIn]->(t:Track)-[r:isPositionedIn]->(ch:Chart)-[:isReferredTo]->(c2:Country { id:"IT" })
    WHERE r.position <=10 AND p.birthDate IS NOT NULL
    WITH ch,a,p
    ORDER BY p.birthDate DESC
    WITH ch.date.year AS year, COLLECT(a.name) AS artistsNames, COLLECT(p.birthDate) AS artistsBirthDates
    RETURN year, artistsNames[0] as artist, artistsBirthDates[0] as birthDate
""")

Of course in the case of ***Maneskin*** the birthDate is referred to the youngest member of the group.

### Italian tracks abroad

#### Q13: How many tracks from Italian artist are present in a Top 100 of a different country for each year (grafico barre)

In [None]:
# Q13: How many tracks from Italian artist are present in a Top 100 of a different country for each year

resultDF = executeQuery("""
    MATCH (c1:Country { id:"IT" })<-[:hasNationality]-(p:Person)-[:isMemberOf]->(a:Artist)-[:partecipateIn]->(t:Track)-[:isPositionedIn]->(ch:Chart)-[:isReferredTo]->(c2:Country)
    WHERE c2.id <> "IT"
    WITH ch.date.year AS year, COUNT(DISTINCT t) AS numItalianTracks
    RETURN year,numItalianTracks
    ORDER BY year
""")

In [None]:
resultDF

In [None]:
plotResults(
    x=np.arange(len(resultDF)),
    yArr=[list(resultDF["numItalianTracks"])],
    yLabel="Number of Tracks abroad",
    xLabel="Year",
    xTicks=list(resultDF["year"]),
    showGrid=True,
    gridAxis="y",
    style="bar",
    saveTitle="query11"
)

#### Q14: Show the countries that listen the most to Italian tracks. (show Names and numbers)

In [None]:
# Q14: Show the top 5 countries that listen the most to Italian tracks.

resultDF = executeQuery("""
    MATCH (c1:Country { id:"IT" })<-[:hasNationality]-(p:Person)-[:isMemberOf]->(a:Artist)-[:partecipateIn]->(t:Track)-[:isPositionedIn]->(ch:Chart)-[:isReferredTo]->(c2:Country)
    WHERE c2.id <> "IT"
    RETURN c2.id as countryCode, c2.name as countryName, COUNT(DISTINCT t) AS numItalianTracks
    ORDER BY numItalianTracks DESC
""")

resultDF

In [None]:
import plotly.express as px

fig = px.choropleth(resultDF,
                    locations="countryName",
                    locationmode='country names',
                    color="numItalianTracks",
                    hover_name="countryName",
                    color_continuous_scale='tealgrn')

fig.show()

#### Q15: Who is the artist present in the highest number of TOP 100 of different countries? Show also the countries in which he/she is present.

In [None]:
# Q13: Who is the artist present in the highest number of different countries? Show also the countries in which he/she is present.

executeQuery("""
    MATCH (c1:Country { id:"IT" })<-[:hasNationality]-(p:Person)-[:isMemberOf]->(a:Artist)-[:partecipateIn]->(t:Track)-[:isPositionedIn]->(ch:Chart)-[:isReferredTo]->(c2:Country)
    WHERE c2.id <> "IT"
    RETURN a.name as artist, COUNT(DISTINCT c2) AS numCountries, COLLECT(DISTINCT ch.name) AS charts
    ORDER BY numCountries DESC
    LIMIT 1
""")

#### Q15: Who is the artist with the highest number of tracks present oustide Italy ? Show also the names of the tracks. 

In [None]:
# Q15: Who is the artist with the highest number of tracks present oustide Italy ? Show also the names of the tracks. 

executeQuery("""
    MATCH (c1:Country { id:"IT" })<-[:hasNationality]-(p:Person)-[:isMemberOf]->(a:Artist)-[:partecipateIn]->(t:Track)-[:isPositionedIn]->(ch:Chart)-[:isReferredTo]->(c2:Country)
    WHERE c2.id <> "IT"
    RETURN a.name as artist, COUNT(DISTINCT t) AS numTracks, COLLECT(DISTINCT t.name) as trackNames
    ORDER BY numTracks DESC
    LIMIT 1
""")

We have shown that actually there are two different criteria to determine the most "widespread" Italian artist abroad:

**1)** Count how many different countries the artist has reached excluding Italy. In this case, the best is ***Gigi D'Agostino***


**2)** Count the total number of tracks present in countries outside Italy. In this case, the best is ***Sfera Ebbasta***