## Analysis and Visualization notebook

This notebook aims to anaylse the data collected from the spotify api and worldweather api to see if climate has an impact on peoples music choice in Europe. 

In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import statsmodels.api as sm
import plotly.express as px

pd.set_option("display.max_columns", 50)

In [2]:
df = pd.read_pickle('final_spotify.pkl')


In [3]:
df.columns

Index(['region', 'date', 'month', 'spotify_id', 'artist', 'track_name',
       'position', 'streams', 'danceability', 'energy', 'instrumentalness',
       'key', 'liveness', 'loudness', 'speechiness', 'acousticness', 'tempo',
       'valence', 'explicit', 'temp', 'rain', 'snow', 'cloud', 'humidity',
       'const'],
      dtype='object')

In [4]:
df.head()

Unnamed: 0,region,date,month,spotify_id,artist,track_name,position,streams,danceability,energy,instrumentalness,key,liveness,loudness,speechiness,acousticness,tempo,valence,explicit,temp,rain,snow,cloud,humidity,const
0,AUT,2019-01-02,1,7KPGeiXWDsGEvFK62wzU8E,Capital Bra,Benzema,1,35174,0.78,0.69,0.0,10.0,0.12,-6.16,0.33,0.48,100.08,0.26,1.0,-5,1,1,48,69,1
1,AUT,2019-01-03,1,7KPGeiXWDsGEvFK62wzU8E,Capital Bra,Benzema,1,34237,0.78,0.69,0.0,10.0,0.12,-6.16,0.33,0.48,100.08,0.26,1.0,-3,1,1,91,70,1
2,AUT,2019-01-04,1,7KPGeiXWDsGEvFK62wzU8E,Capital Bra,Benzema,1,35199,0.78,0.69,0.0,10.0,0.12,-6.16,0.33,0.48,100.08,0.26,1.0,-2,0,0,14,75,1
3,AUT,2019-01-05,1,7KPGeiXWDsGEvFK62wzU8E,Capital Bra,Benzema,1,32559,0.78,0.69,0.0,10.0,0.12,-6.16,0.33,0.48,100.08,0.26,1.0,0,0,0,47,80,1
4,AUT,2019-01-06,1,7KPGeiXWDsGEvFK62wzU8E,Capital Bra,Benzema,1,26956,0.78,0.69,0.0,10.0,0.12,-6.16,0.33,0.48,100.08,0.26,1.0,0,1,0,80,74,1


## Valence 

Describes the musical positiveness of the track, a high valence is more positive whereas a low valence is more negative


In [5]:
val = pd.read_pickle("data/weighted_valence.pkl")

In [42]:
## overall average measure of valence 

fig = px.line(val, x="date", y="valence",title="Overall European Valence")
fig.show()

- Overall trend winter = lower valence , summer = higher valence
- Music industry supply/demand summer more positive / opposite for winter
- General seasonality could be linked to weahter or other factors ( music supply, charts etc ) 

In [7]:
v = pd.read_pickle("data/weighted_valence_month.pkl")

In [40]:
v.head()

Unnamed: 0,month,region,valence
2,1,AUT,0.481
3,1,BEL,0.467
4,1,CHE,0.466
5,1,DEU,0.49
6,1,DNK,0.491


In [44]:
## measure of positivity per country 


fig = px.choropleth(v, color="valence", locations="region", animation_frame="month",animation_group="region", color_continuous_scale=px.colors.sequential.Viridis, title="Hello")
fig.update_layout(margin={"r":0,"t":0,"l":0,"b":0})
fig.show()

## Danceability

In [9]:
d = pd.read_pickle("data/weighted_danceability.pkl")

In [10]:
## measure of danceability per country 


fig = px.choropleth(d, color="danceability", locations="region", animation_frame="month",animation_group="region", color_continuous_scale=px.colors.sequential.Viridis)
fig.update_layout(margin={"r":0,"t":0,"l":0,"b":0})
fig.show()

In [11]:
l = pd.read_pickle("data/weighted_loudness.pkl")

In [12]:
## measure of loudness per country 


fig = px.choropleth(l, color="loudness", locations="region", animation_frame="month",animation_group="region", color_continuous_scale=px.colors.sequential.Viridis)
fig.update_layout(margin={"r":0,"t":0,"l":0,"b":0})
fig.show()

## Explicit 

In [13]:
e=df.groupby(["date"]).agg({"explicit":"sum"}).reset_index().sort_values("date")


In [14]:
## overall average measure of valence 

fig = px.line(e, x="date", y="explicit")
fig.show()

In [15]:
mask = (e['date'] >= "2019-12-01") & (e['date'] <= "2019-12-31")

In [16]:
christmas = e.loc[mask]


In [17]:
## overall average measure of valence 

fig = px.line(christmas, x="date", y="explicit")
fig.show()

In [18]:
christ_exp =df.groupby(["date", "artist", "track_name"]).agg({"explicit":"sum"}).reset_index()


In [19]:
christ_date = (christ_exp['date'] >= "2019-12-24") & (christ_exp['date'] <= "2019-12-25") & (christ_exp["explicit"] != 0.0 )


In [20]:
christmas_explicit = christ_exp.loc[christ_date]

In [21]:
christmas_explicit

Unnamed: 0,date,artist,track_name,explicit
670936,2019-12-24,1.Cuz,FÖRSENT,1.0
670937,2019-12-24,1.Cuz,RÄKNA MINA DAGAR,1.0
670938,2019-12-24,24kGoldn,VALENTINO,1.0
670939,2019-12-24,3robi,Dinero,1.0
670940,2019-12-24,47ter,Côte Ouest,1.0
...,...,...,...,...
674449,2019-12-25,tha Supreme,oh 9od - feat. Nayt,1.0
674450,2019-12-25,tha Supreme,parano1a k1d - feat. Fabri Fibra,1.0
674451,2019-12-25,tha Supreme,"pers0na2 - feat. Gemitaiz, Madman",1.0
674452,2019-12-25,tha Supreme,scuol4,1.0


In [22]:
christmas_explicit["artist"].value_counts()

tha Supreme    36
Kontrafakt     26
Ian            21
Marracash      19
Rida Radar     19
               ..
Mula B          1
Sivas           1
Kabe            1
K27             1
Lion Music      1
Name: artist, Length: 294, dtype: int64