## Spotify Query

In this notebook we perform some query on our property graph about Spotify. In particular the queries will be divided into three parts:
1. ***Example queries***: we perform two example queries where we show how it is possibile to use the new added information such as *record label* and *instruments*.
1. ***Italian tracks and Italian artists from 2017 to 2020:*** we perfom some queries about italian tracks and artists present in the TOP 100 Italy.
1. ***Italian tracks abroad:*** we want to discover if italian tracks are listened also outside Italy.

In [None]:
# Import required libraries
import pandas as pd
import calendar
import numpy as np
import matplotlib.pyplot as plt
import os

### Connection to Neo4j

In [None]:
# Neo4J params class
class Neo4jParams:
  def __init__(self, user, psw,dbname,db_psw,uri):
    self.user = user
    self.psw = psw
    self.dbname = dbname
    self.dbpsw = dbpsw
    self.uri = uri

In [None]:
#DB parameters
user = "neo4j"
psw = "neo4j"
dbname = "SpotifyDB"
dbpsw = "SpotifyDB"
uri = "bolt://localhost:7687"

params = Neo4jParams(user,psw,dbname,dbpsw,uri)

In [None]:
from neo4j import GraphDatabase

# Test class
class Driver:

    def __init__(self, uri, user, password):
        self.driver = GraphDatabase.driver(uri, auth=(user, password))

    def close(self):
        self.driver.close()

    def print_greeting(self, message):
        with self.driver.session() as session:
            greeting = session.write_transaction(self._create_and_return_greeting, message)
            print(greeting)

    @staticmethod
    def _create_and_return_greeting(tx, message):
        result = tx.run("CREATE (a:Greeting) "
                        "SET a.message = $message "
                        "RETURN a.message + ', from node ' + id(a)", message=message)
        return result.single()[0]


if __name__ == "__main__":
    greeter = Driver("bolt://localhost:7687", "neo4j", "SpotifyDB")
    greeter.print_greeting("hello, world")
    greeter.close()

## Plot utilities

In [None]:
TITLE_PAD = 30
TITLE_DIM = 30
XY_LABEL_PAD = 23
XY_LABEL_DIM = 21
XY_TICKS_DIM = 18
GRID_ALPHA = 0.2
FILL_ALPHA = 0.05
BAR_WIDTH = 0.95
LEGEND_LOC = "upper left"
LEGEND_LABEL_DIM = 15
COLOR_SCHEME = ["#1DB954", "#22577A", "#A03C78", "#C67ACE"]

FIG_SIZE = (18, 8)
FIG_BG_COLOR = None # "w"
FIG_DPI = 500
FIG_BBOX = "tight"
FIG_PAD = .3
FIG_FORMATS = ["png", "pdf"]


In [None]:
def plotResults(title=None, x=[], yArr=[],
                xLabel=None, xTicks=None, xTicksPosition=None, xTicksRotation=0,
                yLim=None, yLabel=None, yTicks=None, yTicksPosition=None, yTicksRotation=0,
                legend=None, legendLocation=LEGEND_LOC,
                style="line", showGrid=False, gridAxis="both", fillPlot=False,
                figSize=FIG_SIZE, saveTitle=None):

    plt.figure(figsize=figSize, facecolor=FIG_BG_COLOR)

    maxY = None
    minY = None

    numPlots = len(yArr)
    for i, y in zip(range(numPlots), yArr):
        label = None
        try:
            label = legend[i]
        except:
            pass

        yNum = [float(yVal) for yVal in y]
        maxY = np.max(
            [np.max(yNum), maxY]
        ) if not maxY is None else np.max(yNum)
        minY = np.min(
            [np.min(yNum), minY]
        ) if not minY is None else np.min(yNum)

        if style == "line":
            plt.plot(x, yNum, label=label, color=COLOR_SCHEME[i])

            if fillPlot:
                plt.fill_between(x, yNum, alpha=FILL_ALPHA,
                                 facecolor=COLOR_SCHEME[i])

        elif style == "bar":
            barWidth = BAR_WIDTH / numPlots
            xOffset = -BAR_WIDTH / 2 + barWidth / 2 + barWidth * i
            plt.bar(x + xOffset, yNum, width=barWidth,
                    label=label, align="center", color=COLOR_SCHEME[i])

        elif style == "polar":
            x = np.linspace(0, 2 * np.pi, len(yNum) + 1, endpoint=True)
            yNum.append(yNum[0])
            plt.polar(x, yNum, label=label, color=COLOR_SCHEME[i])

            if fillPlot:
                plt.fill(x, yNum, alpha=FILL_ALPHA,
                         facecolor=COLOR_SCHEME[i])

        elif style == "boxplot":
            lineWidth = 1.5

            if fillPlot:
                plt.boxplot(yNum,
                            positions=[i],
                            notch=True,
                            widths=[1 / numPlots],
                            patch_artist=True,
                            showcaps=False,
                            showfliers=False,
                            showmeans=False,
                            boxprops=dict(
                                facecolor=COLOR_SCHEME[i],
                                alpha=FILL_ALPHA,
                            ),
                            )

            plt.boxplot(yNum,
                        positions=[i],
                        notch=True,
                        widths=[1 / numPlots],
                        boxprops=dict(
                            color=COLOR_SCHEME[i],
                            linewidth=lineWidth,
                        ),
                        capprops=dict(
                            color=COLOR_SCHEME[i],
                            linewidth=lineWidth,
                        ),
                        whiskerprops=dict(
                            color=COLOR_SCHEME[i],
                            linewidth=lineWidth,
                        ),
                        flierprops=dict(
                            color=COLOR_SCHEME[i],
                            markeredgecolor=COLOR_SCHEME[i],
                            markerfacecolor=COLOR_SCHEME[i],
                        ),
                        medianprops=dict(
                            color=COLOR_SCHEME[i],
                            linewidth=lineWidth,
                        ),
                        )

    if not xLabel is None:
        plt.xlabel(xLabel, labelpad=XY_LABEL_PAD, fontsize=XY_LABEL_DIM)

    if not yLabel is None:
        plt.ylabel(yLabel, labelpad=XY_LABEL_PAD, fontsize=XY_LABEL_DIM)

    if not xTicks is None:
        if not xTicksPosition:
            xTicksPosition = x if style != "boxplot" else np.arange(
                0, numPlots)

            if style == "polar":
                xTicks.append(xTicks[0])

        plt.xticks(xTicksPosition, xTicks, fontsize=XY_TICKS_DIM,
                   rotation=xTicksRotation)
    else:
        plt.xticks(fontsize=XY_TICKS_DIM, rotation=xTicksRotation)

    if not yTicks is None and not yTicksPosition is None:
        plt.yticks(yTicksPosition, yTicks, fontsize=XY_TICKS_DIM,
                   rotation=yTicksRotation)
    elif not yTicksPosition is None:
        plt.yticks(yTicksPosition, fontsize=XY_TICKS_DIM,
                   rotation=yTicksRotation)
    else:
        plt.yticks(fontsize=XY_TICKS_DIM, rotation=yTicksRotation)

    if yLim is None:
        yPad = (maxY - minY) * 0.1
        topLim = maxY + yPad * 2
        infLim = minY - yPad if style == "bar" else minY - yPad
        infLim = 0 if style == "bar" and minY >= 0 and minY <= yPad else infLim
        infLim = minY if style == "polar" else infLim
        plt.ylim(top=topLim, bottom=infLim)
    else:
        plt.ylim(top=yLim[1], bottom=yLim[0])

    if showGrid:
        plt.grid(alpha=GRID_ALPHA, axis=gridAxis)

    if not legend is None:
        plt.legend(loc=legendLocation, fontsize=LEGEND_LABEL_DIM)

    if not saveTitle is None:
        baseDir = "plots"
        if not os.path.exists(baseDir):
            os.mkdir("plots")

        for figFormat in FIG_FORMATS:
            saveDir = baseDir + "/" + figFormat
            if not os.path.exists(saveDir):
                os.mkdir(saveDir)

            plt.savefig(saveDir + "/" + saveTitle + "." + figFormat,
                        facecolor=FIG_BG_COLOR,
                        dpi=FIG_DPI,
                        bbox_inches=FIG_BBOX,
                        pad_inches=FIG_PAD)

    if not title is None:
        plt.title(title, fontsize=TITLE_DIM, pad=TITLE_PAD)

    plt.show()

### Example Queries

##### Q1: Show artists of the same discographic house 

##### Q2: Show the most common played instrument in rock groups

### Italian Tracks and Italian Artists

#### Q3: On average how many tracks from Italian artists are present in Top 100 Italy for each year? (Grafico a barre)

In [None]:
# Q3: On average how many tracks from italian artists are present in Top 100 Italy for each year?

# connect to the DB
driver = GraphDatabase.driver(params.uri, auth=(params.user, params.dbpsw))
# create a session
session = driver.session()

result = session.run("""
    MATCH (c1:Country{id:"IT"})<-[:hasNationality]-(p:Person)-[:isMemberOf]->(a:Artist)-[:partecipateIn]->(t:Track)-[:isPositionedIn]->(ch:Chart)-[:isReferredTo]->(c2:Country{id:"IT"})
    WITH ch,ch.date.year AS year, COUNT(DISTINCT t) as numTracks
    RETURN year,avg(numTracks)
    ORDER BY year
""")

# Define lists to contain the results
years = []
avgNumItalianTracks = []
for r in result:
    returnedData = r.values()
    years.append(returnedData[0])
    avgNumItalianTracks.append(int(returnedData[1]))

# Print the results
for i in range(0,len(years)):
    print("Year: {}".format(years[i]))
    print("avgNumItalianTracks: {:d}".format(avgNumItalianTracks[i]))
    print("")


plotResults(
    x=np.arange(len(years)),
    yArr=[avgNumItalianTracks],
    yLabel="Average number of Italian Tracks",
    xLabel="Year",
    xTicks=years,
    showGrid=True,
    gridAxis="y",
    style="bar",
    saveTitle="query3"
)

session.close()
driver.close()

As we can notice, in 2017 there were only 31 tracks on average in the TOP 100 Italy. We want to discovery if this is just a case of is there any reason behind this result.

So, in the next query we will check how many tracks were released by italian artist for each year.

#### Q4: How many tracks were released from Italian artist every year from 2017 to 2020 ?

In [None]:
# Q4: How many tracks were released from Italian artist every year from 2017 to 2020 ?

# connect to the DB
driver = GraphDatabase.driver(params.uri, auth=(params.user, params.dbpsw))
# create a session
session = driver.session()

result = session.run("""
    MATCH (c1:Country{id:"IT"})<-[:hasNationality]-(p:Person)-[:isMemberOf]->(a:Artist)-[:partecipateIn]->(t:Track)-[:isPartOf]->(alb:Album)
    WHERE alb.releaseDate.year >= 2017
    WITH alb.releaseDate.year AS year, COUNT(DISTINCT t) as numTracks
    RETURN year,numTracks
    ORDER BY year
""")

# Define lists to contain the results
years = []
numItalianTracksReleased = []
for r in result:
    returnedData = r.values()
    years.append(returnedData[0])
    numItalianTracksReleased.append(int(returnedData[1]))

# Print the results
for i in range(0,len(years)):
    print("Year: {}".format(years[i]))
    print("numItalianTracksReleased: {:d}".format(numItalianTracksReleased[i]))
    print("")


plotResults(
    x=np.arange(len(years)),
    yArr=[numItalianTracksReleased],
    yLabel="number of Italian tracks Released",
    xLabel="Year",
    xTicks=years,
    showGrid=True,
    gridAxis="y",
    style="bar",
    saveTitle="query4"
)

session.close()
driver.close()

Since in 2017 Italian artists relased only 163 tracks (almost half of 2018,2019 and 2020), it is understandable why in 2017 there were only 31 tracks on average in the TOP 100 Italy.

#### Q5: How many different Italian artist entered at least once in Top 100 Italy for each Year ? (Grafico a barre)

In [None]:
# Q5: How many different Italian artist entered at least once in Top 100 Italy for each Year ?

# connect to the DB
driver = GraphDatabase.driver(params.uri, auth=(params.user, params.dbpsw))
# create a session
session = driver.session()

result = session.run("""
    MATCH (c1:Country{id:"IT"})<-[:hasNationality]-(p:Person)-[:isMemberOf]->(a:Artist)-[:partecipateIn]->(t:Track)-[:isPositionedIn]->(ch:Chart)-[:isReferredTo]->(c2:Country{id:"IT"})
    WITH ch.date.year AS year, COUNT(DISTINCT a) as numArtists
    RETURN year,numArtists
    ORDER BY year
""")

# Define lists to contain the results
years = []
numItalianArtists = []
for r in result:
    returnedData = r.values()
    years.append(returnedData[0])
    numItalianArtists.append(int(returnedData[1]))

# Print the results
for i in range(0,len(years)):
    print("Year: {}".format(years[i]))
    print("numItalianArtists: {:d}".format(numItalianArtists[i]))
    print("")


plotResults(
    x=np.arange(len(years)),
    yArr=[numItalianArtists],
    yLabel="number of Italian artists",
    xLabel="Year",
    xTicks=years,
    showGrid=True,
    gridAxis="y",
    style="bar",
    saveTitle="query5"
)

session.close()
driver.close()


Now, since we have both the ***number of Italian Tracks*** and the ***number of Italian Artists*** for each year, we can compute a ratio: 
$$(numItalianTracks)/(numItalianArtists)$$
to see on average how many tracks there are for each artist present in the TOP 100 Italy

In [None]:
for i in range(0,len(years)):
    print("Year: {}".format(years[i]))
    print("Ratio: {:.2f}".format(numItalianTracksReleased[i]/numItalianArtists[i]))
    print("")

It is possible to notice that even if in 2017 there were less Italian tracks and artists, the ratio is not so different from 2020.

#### Q6: On average how many tracks from Italian artists are present in Top 100 Italy through the different months of the year ? (Grafico a linea)

In [None]:
# Q6: On average how many tracks from Italian artists are present in Top 100 Italy through the different months of the year ? 

# connect to the DB
driver = GraphDatabase.driver(params.uri, auth=(params.user, params.dbpsw))
# create a session
session = driver.session()

result = session.run("""
    MATCH (c1:Country{id:"IT"})<-[:hasNationality]-(p:Person)-[:isMemberOf]->(a:Artist)-[:partecipateIn]->(t:Track)-[:isPositionedIn]->(ch:Chart)-[:isReferredTo]->(c2:Country{id:"IT"})
    WITH ch,ch.date.month AS month, COUNT(DISTINCT t) as numTracks
    RETURN month,avg(numTracks)
    ORDER BY month
""")

# Define lists to contain the results
months = []
avgNumItalianTracksByMonth = []
for r in result:
    returnedData = r.values()
    months.append(calendar.month_abbr[returnedData[0]])
    avgNumItalianTracksByMonth.append(int(returnedData[1]))

# Print the results
for i in range(0,len(months)):
    print("Year: {}".format(months[i]))
    print("avgNumItalianTracksByMonth: {:d}".format(avgNumItalianTracksByMonth[i]))
    print("")


plotResults(
    x=np.arange(len(months)),
    yArr=[avgNumItalianTracksByMonth],
    yLabel="avgNumItalianTracksByMonth",
    xLabel="Months",
    xTicks=months,
    showGrid=True,
    gridAxis="y",
    style="line",
    fillPlot=True,
    saveTitle="query6"
)

session.close()
driver.close()


#### Q7: How many albums from Italian artists are released through the different months of the year ? (Grafico a linea)

In [None]:
#Q7: How many albums from Italian artists are released through the different months of the year ? 

# connect to the DB
driver = GraphDatabase.driver(params.uri, auth=(params.user, params.dbpsw))
# create a session
session = driver.session()

result = session.run("""
    ATCH (c1:Country{id:"IT"})<-[:hasNationality]-(p:Person)-[:isMemberOf]->(a:Artist)-[:partecipateIn]->(alb:Album)
    WITH alb.releaseDate.month AS month, COUNT(DISTINCT alb) as numAlbums
    RETURN month,numAlbums
    ORDER BY month
""")

# Define lists to contain the results
months = []
numItalianAlbumsReleasedByMonth = []
for r in result:
    returnedData = r.values()
    months.append(calendar.month_abbr[returnedData[0]])
    numItalianAlbumsReleasedByMonth.append(int(returnedData[1]))

# Print the results
for i in range(0,len(months)):
    print("Year: {}".format(months[i]))
    print("numItalianAlbumsReleasedByMonth: {:d}".format(numItalianAlbumsReleasedByMonth[i]))
    print("")


plotResults(
    x=np.arange(len(months)),
    yArr=[numItalianAlbumsReleasedByMonth],
    yLabel="numItalianAlbumsReleasedByMonth",
    xLabel="Months",
    xTicks=months,
    showGrid=True,
    gridAxis="y",
    style="line",
    fillPlot=True,
    saveTitle="query7"
)

session.close()
driver.close()

#### Q7: Who is the Italian artist with the highest number of tracks present in Top 100 Italy for each year? (Nomi degli artisti)

In [None]:
# Q7: Who is the Italian artist with the highest number of tracks present in Top 100 Italy for each year?

# connect to the DB
driver = GraphDatabase.driver(params.uri, auth=(params.user, params.dbpsw))
# create a session
session = driver.session()

result = session.run("""
    MATCH (c1:Country{id:"IT"})<-[:hasNationality]-(p:Person)-[:isMemberOf]->(a:Artist)-[:partecipateIn]->(t:Track)-[:isPositionedIn]->(ch:Chart)-[:isReferredTo]->(c2:Country{id:"IT"})
    WITH a,ch.date.year AS year, COUNT(DISTINCT t) as numTracks
    ORDER BY numTracks DESC
    WITH year,COLLECT(a) AS artists, COLLECT(numTracks) as orderedNumTracks
    RETURN DISTINCT year,artists[0],orderedNumTracks[0]
    ORDER BY year
""")

# Define lists to contain the results
years = []
topArtistsByYear = []
numTracks = []
for r in result:
    returnedData = r.values()
    years.append(returnedData[0])
    topArtistsByYear.append(returnedData[1]["name"])
    numTracks.append(returnedData[2])

# Print the results
for i in range(0,len(years)):
    print("Year: {}".format(years[i]))
    print("Artist: {}".format(topArtistsByYear[i]))
    print("numTracks: {}".format(numTracks[i]))
    print("")

session.close()
driver.close()


#### Q8: Show the most common position in the TOP 100 Italy obtained by the just retrieved artist.

In [None]:
# Q9: Show the top 3 Italian artists with the highest number of tracks present in Top 100 Italy at the same time.

# connect to the DB
driver = GraphDatabase.driver(params.uri, auth=(params.user, params.dbpsw))
# create a session
session = driver.session()

# Define lists to contain the results
positions = []

for i in range(0,len(topArtistsByYear)):
    result = session.run("""
        MATCH (a:Artist {name:$name})-[:partecipateIn]->(t:Track)-[r:isPositionedIn]->(ch:Chart)-[:isReferredTo]->(c2:Country{id:"IT"})
        WHERE ch.date >= date({year: $year, month: 1, day: 1}) AND ch.date <= date({year: $year, month: 12, day: 31})
        RETURN a,COLLECT(r.position) AS positions
    """,name=topArtistsByYear[i],year=years[i])

    for r in result:
        returnedData = r.values()
        positions.append(returnedData[1])

# Print the results
for i in range(0,len(topArtistsByYear)):
    print("Artist: {}".format(topArtistsByYear[i]))
    print("positions: {}".format(positions[i]))
    print("")

plt.boxplot(positions)

session.close()
driver.close()



####  Q9: Show the top 3 Italian artists with the highest number of tracks present in Top 100 Italy at the same time.

In [None]:
# Q9: Show the top 3 Italian artists with the highest number of tracks present in Top 100 Italy at the same time.

# connect to the DB
driver = GraphDatabase.driver(params.uri, auth=(params.user, params.dbpsw))
# create a session
session = driver.session()

result = session.run("""
    MATCH (c1:Country{id:"IT"})<-[:hasNationality]-(p:Person)-[:isMemberOf]->(a:Artist)-[:partecipateIn]->(t:Track)-[:isPositionedIn]->(ch:Chart)-[:isReferredTo]->(c2:Country{id:"IT"})
    WITH a,ch, COUNT(DISTINCT t) as numTracks
    ORDER BY numTracks DESC
    WITH a,COLLECT(ch) AS charts, COLLECT(numTracks) as orderedNumTracks
    RETURN DISTINCT a,charts[0],orderedNumTracks[0]
    LIMIT 3
""")

for r in result:
    returnedData = r.values()
    print("Artist: {}".format(returnedData[0]["name"]))
    print("Chart: {}".format(returnedData[1]["id"]))
    print("numTracks: {}".format(returnedData[2]))
    print("")
session.close()
driver.close()


The numbers of tracks present simultaneously in the TOP 100 Italy are quite high (24,19 and 18). 

We want to check if these three artists released an album on the same month of the chart or in the previous one.

In [None]:
topThreeArtists = ["tha Supreme","Marracash","Ultimo"]
years = [2019,2019,2019]
months = [11,11,4]

# connect to the DB
driver = GraphDatabase.driver(params.uri, auth=(params.user, params.dbpsw))
# create a session
session = driver.session()

for i in range(0,len(topThreeArtists)):

    print("SEARCHING FOR AN ALBUM PUBLISHED BY "+topThreeArtists[i])

    result = session.run("""
        MATCH (a:Artist {name:$artist})-[:partecipateIn]->(alb:Album {albumType:"album"})
        WHERE alb.releaseDate >= date({year: $year, month: $month-1, day: 1}) AND alb.releaseDate <= date({year: $year, month: $month, day: 30})
        RETURN a.name, alb.name, alb.releaseDate
        """,artist=topThreeArtists[i],year=years[i],month=months[i])

    for r in result:
        returnedData = r.values()
        print("Alb: {}".format(returnedData[0]))
        print("Album: {}".format(returnedData[1]))
        print("Album relaseDate: {}".format(str(returnedData[2])))
        print("")

session.close()
driver.close()

So, these Italian artists managed to have a lot of tracks in the TOP 100 Italy at the same time, because they released albums whose tracks probably entered all in TOP 100 Italy.

#### Q10: Show the youngest artist who entered in the first 10 positions of Top 100 Italy for each year.

In [None]:
# Q10: Show the youngest artist who entered in the first 10 positions of Top 100 Italy for each year.

# connect to the DB
driver = GraphDatabase.driver(params.uri, auth=(params.user, params.dbpsw))
# create a session
session = driver.session()

result = session.run("""
    MATCH (c1:Country{id:"IT"})<-[:hasNationality]-(p:Person)-[:isMemberOf]->(a:Artist)-[:partecipateIn]->(t:Track)-[r:isPositionedIn]->(ch:Chart)-[:isReferredTo]->(c2:Country{id:"IT"})
    WHERE r.position <=10 AND p.birthDate IS NOT NULL
    WITH ch,a,p
    ORDER BY p.birthDate DESC
    WITH ch.date.year AS year, COLLECT(a.name) AS artistsNames, COLLECT(p.birthDate) AS artistsBirthDates
    RETURN year,artistsNames[0],artistsBirthDates[0]
""")

for r in result:
    returnedData = r.values()
    print("Year: {}".format(returnedData[0]))
    print("Artist: {}".format(returnedData[1]))
    print("BirthDate: {}".format(str(returnedData[2])))
    print("")
session.close()
driver.close()

Of course in the case of ***Maneskin*** the birthDate is referred to the youngest member of the group.

### Italian tracks abroad

#### Q11: How many tracks from Italian artist are present in a Top 100 of a different country for each year (grafico barre)

In [None]:
# Q11: How many tracks from Italian artist are present in a Top 100 of a different country for each year

# connect to the DB
driver = GraphDatabase.driver(params.uri, auth=(params.user, params.dbpsw))
# create a session
session = driver.session()

result = session.run("""
    MATCH (c1:Country{id:"IT"})<-[:hasNationality]-(p:Person)-[:isMemberOf]->(a:Artist)-[:partecipateIn]->(t:Track)-[:isPositionedIn]->(ch:Chart)-[:isReferredTo]->(c2:Country)
    WHERE c2.id<>"IT"
    WITH ch.date.year AS year, COUNT(DISTINCT t) AS numItalianTracks
    RETURN year,numItalianTracks
    ORDER BY year
""")

# Define lists to contain the results
years = []
numItalianTracksAbroad = []
for r in result:
    returnedData = r.values()
    years.append(returnedData[0])
    numItalianTracksAbroad.append(int(returnedData[1]))

# Print the results
for i in range(0,len(years)):
    print("Year: {}".format(years[i]))
    print("numItalianTracksAbroad: {:d}".format(numItalianTracksAbroad[i]))
    print("")


plotResults(
    x=np.arange(len(years)),
    yArr=[numItalianTracksAbroad],
    yLabel="number of Italian Tracks Abroad",
    xLabel="Year",
    xTicks=years,
    showGrid=True,
    gridAxis="y",
    style="bar",
    saveTitle="query11"
)
session.close()
driver.close()

#### Q12: Show the countries that listen the most to Italian tracks. (show Names and numbers)

In [None]:
# Q12: Show the top 5 countries that listen the most to Italian tracks.

# connect to the DB
driver = GraphDatabase.driver(params.uri, auth=(params.user, params.dbpsw))
# create a session
session = driver.session()

result = session.run("""
    MATCH (c1:Country{id:"IT"})<-[:hasNationality]-(p:Person)-[:isMemberOf]->(a:Artist)-[:partecipateIn]->(t:Track)-[:isPositionedIn]->(ch:Chart)-[:isReferredTo]->(c2:Country)
    WHERE c2.id<>"IT"
    RETURN c2.name, COUNT(DISTINCT t) AS numItalianTracks
    ORDER BY numItalianTracks DESC
""")

# Define lists to contain the results
countries = []
numItalianTracksByCountry = []
for r in result:
    returnedData = r.values()
    countries.append(returnedData[0])
    numItalianTracksByCountry.append(returnedData[1])

# Print the results
for i in range(0,len(countries)):
    print("Country: {}".format(countries[i]))
    print("numItalianTracksByCountry: {:d}".format(numItalianTracksByCountry[i]))
    print("")

session.close()
driver.close()

In [None]:
import plotly.express as px

d = {'countries':countries,'numItalianTracksByCountry':numItalianTracksByCountry}
data = pd.DataFrame(d, columns=['countries','numItalianTracksByCountry'])

fig = px.choropleth(data, 
                    locations="countries",
                    locationmode='country names',
                    color="numItalianTracksByCountry", 
                    hover_name="countries",
                    color_continuous_scale='tealgrn')

fig.show()

#### Q13: Who is the artist present in the highest number of TOP 100 of different countries? Show also the countries in which he/she is present.

In [None]:
# Q13: Who is the artist present in the highest number of different countries? Show also the countries in which he/she is present.

# connect to the DB
driver = GraphDatabase.driver(params.uri, auth=(params.user, params.dbpsw))
# create a session
session = driver.session()

result = session.run("""
    MATCH (c1:Country{id:"IT"})<-[:hasNationality]-(p:Person)-[:isMemberOf]->(a:Artist)-[:partecipateIn]->(t:Track)-[:isPositionedIn]->(ch:Chart)-[:isReferredTo]->(c2:Country)
    WHERE c2.id<>"IT"
    RETURN a, COUNT(DISTINCT c2) AS numCountries, COLLECT(DISTINCT ch.name) AS charts
    ORDER BY numCountries DESC
    LIMIT 1
""")

for r in result:
    returnedData = r.values()
    print("Artist: {}".format(returnedData[0]["name"]))
    print("numCountries: {}".format(returnedData[1]))
    print("Countries: {}".format(returnedData[2]))
    print("")
session.close()
driver.close()

#### Q14: Who is the artist with the highest number of tracks present oustide Italy ? Show also the names of the tracks. 

In [None]:
# Q14: Who is the artist with the highest number of tracks present oustide Italy ? Show also the names of the tracks. 

# connect to the DB
driver = GraphDatabase.driver(params.uri, auth=(params.user, params.dbpsw))
# create a session
session = driver.session()

result = session.run("""
MATCH (c1:Country{id:"IT"})<-[:hasNationality]-(p:Person)-[:isMemberOf]->(a:Artist)-[:partecipateIn]->(t:Track)-[:isPositionedIn]->(ch:Chart)-[:isReferredTo]->(c2:Country)
WHERE c2.id<>"IT"
RETURN a, COUNT(DISTINCT t) AS numTracks, COLLECT(DISTINCT t.name) as trackNames
ORDER BY numTracks DESC
LIMIT 1
""")

for r in result:
    returnedData = r.values()
    print("Artist: {}".format(returnedData[0]["name"]))
    print("numItalianTracks: {}".format(returnedData[1]))
    print("Tracks: {}".format(returnedData[2]))
    print("")
session.close()
driver.close()

We have shown that actually there are two different criteria to determine the most "widespread" Italian artist abroad:

**1)** Count how many different countries the artist has reached excluding Italy. In this case, the best is ***Gigi D'Agostino***


**2)** Count the total number of tracks present in countries outside Italy. In this case, the best is ***Sfera Ebbasta***