In [5]:
import numpy as np 
import pandas as pd
import plotly.express as px

# This Notebook is an EDA for the Reasearch Question: <br> 
## How does the overall happiness score and music listening frequency on Spotify relate to each other?

In [6]:
df = pd.read_csv('../data/raw_data/daily_chart_per_area/regional_24/all_regional_24.csv')
population_data = {
    'Australia': 27_100_000,
    'Spain': 47_890_000,
    'Brazil': 212_812_000,
    'Sweden': 10_500_000,
    'South Africa': 64_747_000,
    'Nigeria': 237_528_000,
    'Greece': 10_300_000,
    'India': 1_454_606_724,
    'Egypt': 105_914_499,
    'Finland': 5_638_675,
    'Germany': 83_555_478,
    'Japan': 123_440_000,
    'Ukraine': 32_962_000,
    'United Kingdom': 68_265_209,
    'United States': 340_110_988
}
# This data was manuelly added from https://en.wikipedia.org/wiki/List_of_countries_and_dependencies_by_population
df['population'] = df['country'].map(population_data)
df['streams_per_capita'] = df['streams'] / df['population']

happy = pd.read_excel('../data/raw_data/daily_chart_per_area/regional_24/hapiness.xlsx')
happy = happy[happy['Year'] == 2024]
happy.rename(columns={'Ladder score': 'happiness'}, inplace=True)


In [7]:
happy

Unnamed: 0,Year,Rank,Country name,happiness,upperwhisker,lowerwhisker,Explained by: Log GDP per capita,Explained by: Social support,Explained by: Healthy life expectancy,Explained by: Freedom to make life choices,Explained by: Generosity,Explained by: Perceptions of corruption,Dystopia + residual
0,2024,1,Finland,7.736,7.810,7.662,1.749,1.783,0.824,0.986,0.110,0.502,1.782
13,2024,2,Denmark,7.521,7.611,7.431,1.825,1.748,0.820,0.955,0.150,0.488,1.535
26,2024,3,Iceland,7.515,7.606,7.425,1.799,1.840,0.873,0.971,0.201,0.173,1.659
45,2024,4,Sweden,7.345,7.427,7.262,1.783,1.698,0.889,0.952,0.170,0.467,1.385
58,2024,5,Netherlands,7.306,7.372,7.240,1.822,1.667,0.844,0.860,0.186,0.344,1.583
...,...,...,...,...,...,...,...,...,...,...,...,...,...
1905,2024,143,Zimbabwe,3.396,3.509,3.283,0.827,0.961,0.178,0.598,0.065,0.131,0.636
1918,2024,144,Malawi,3.260,3.390,3.130,0.588,0.489,0.280,0.677,0.115,0.136,0.975
1931,2024,145,Lebanon,3.188,3.277,3.098,1.223,0.984,0.440,0.279,0.071,0.041,0.150
1943,2024,146,Sierra Leone,2.998,3.102,2.894,0.786,0.692,0.327,0.613,0.138,0.052,0.391


In [8]:
grouped_by_country = df.groupby('country', as_index=False).agg({
    'streams': 'sum',
    'population': 'first',  
})
grouped_by_country['streams_per_capita'] = grouped_by_country['streams'] / grouped_by_country['population']

grouped_by_country

Unnamed: 0,country,streams,population,streams_per_capita
0,Australia,6419845434,27100000,236.894665
1,Brazil,23678129768,212812000,111.263133
2,Egypt,1337138588,105914499,12.624698
3,Finland,1439767716,5638675,255.33795
4,Germany,10066126407,83555478,120.472369
5,Greece,1860985919,10300000,180.678245
6,India,18606949004,1454606724,12.791739
7,Japan,6786428466,123440000,54.977548
8,Nigeria,2554383731,237528000,10.754032
9,South Africa,1454035269,64747000,22.457184


In [9]:
df = df.groupby(
    [df['date'], 'country']
).agg({
    'streams_per_capita': 'sum'  
}).reset_index()

In [10]:


fig = px.choropleth(
    grouped_by_country,
    locations='country',
    locationmode='country names',
    color='streams_per_capita',
    color_continuous_scale='Viridis',
    title='Spotify Streams Per Capita by Country (2024)',
    template='plotly_dark'
)

fig.show()


In [11]:
fig = px.choropleth(
    happy,
    locations='Country name',
    locationmode='country names',
    color='happiness',
    title='World Happiness (Happiness Score) - 2024',
    color_continuous_scale='Viridis',
    template='plotly_dark' 
)
fig.update_layout(
    geo=dict(showframe=False, showcoastlines=True)
)
fig.show()


In [12]:
fig = px.choropleth(
    df,
    locations='country',
    locationmode='country names',
    color='streams_per_capita',
    animation_frame=df['date'].astype(str),
    color_continuous_scale='Viridis',
    title='Daily Spotify Streams Per Capita by Country (2024)',
    template='plotly_dark',
    range_color=(0, df['streams_per_capita'].max())
)

fig.update_layout(
    geo=dict(showframe=False, showcoastlines=True),
    margin=dict(l=0, r=0, t=50, b=0),
    updatemenus=[{
        'type': 'buttons',
        'showactive': False,
        'buttons': [{
            'label': 'Play',
            'method': 'animate',
            'args': [None, {
                'frame': {'duration': 50, 'redraw': True},   
                'transition': {'duration': 50},               
                'fromcurrent': True,
                'mode': 'immediate'
            }]
        }, {
            'label': 'Pause',
            'method': 'animate',
            'args': [[None], {'mode': 'immediate'}]
        }]
    }]
)

fig.show()


In [13]:
merged_df = grouped_by_country.merge(
    happy,
    left_on='country',
    right_on='Country name',
    how='inner'
)


In [14]:
merged_df.drop(columns=['Year', 'Rank', 'upperwhisker', 'lowerwhisker'], inplace=True, errors='ignore')
merged_df.rename(columns=lambda col: col.replace('Explained by: ', ''), inplace=True)
merged_df

Unnamed: 0,country,streams,population,streams_per_capita,Country name,happiness,Log GDP per capita,Social support,Healthy life expectancy,Freedom to make life choices,Generosity,Perceptions of corruption,Dystopia + residual
0,Australia,6419845434,27100000,236.894665,Australia,6.974,1.767,1.647,0.841,0.857,0.164,0.285,1.413
1,Brazil,23678129768,212812000,111.263133,Brazil,6.494,1.391,1.519,0.48,0.83,0.089,0.127,2.057
2,Egypt,1337138588,105914499,12.624698,Egypt,3.817,1.347,1.075,0.473,0.593,0.018,0.103,0.21
3,Finland,1439767716,5638675,255.33795,Finland,7.736,1.749,1.783,0.824,0.986,0.11,0.502,1.782
4,Germany,10066126407,83555478,120.472369,Germany,6.753,1.785,1.625,0.776,0.858,0.148,0.351,1.21
5,Greece,1860985919,10300000,180.678245,Greece,5.776,1.608,1.438,0.725,0.396,0.003,0.063,1.543
6,India,18606949004,1454606724,12.791739,India,4.389,1.149,0.86,0.316,0.914,0.141,0.12,0.889
7,Japan,6786428466,123440000,54.977548,Japan,6.147,1.678,1.55,0.921,0.746,0.014,0.2,1.038
8,Nigeria,2554383731,237528000,10.754032,Nigeria,4.885,0.989,1.245,0.38,0.639,0.17,0.028,1.433
9,South Africa,1454035269,64747000,22.457184,South Africa,5.213,1.283,1.465,0.142,0.676,0.064,0.043,1.54


In [15]:
numeric_df = merged_df.select_dtypes(include='number')
corr = numeric_df.corr()

fig = px.imshow(
    corr,
    text_auto=True,
    color_continuous_scale='Viridis',
    title='Heatmap Happiness & Charts',
    template='plotly_dark',
    width=1200,       
    height=1000      
)

fig.show()


In [16]:
import plotly.express as px

numeric_df = merged_df.select_dtypes(include='number')
corr = numeric_df.corr().round(2)  # round to 2 decimals

fig = px.imshow(
    corr,
    text_auto=True,  # will auto-show the 2-digit rounded values
    color_continuous_scale='Viridis',
    labels=dict(color='Correlation'),
    aspect='auto',
    width=1200,
    height=1000
)

fig.update_layout(
    template='plotly',
    font=dict(size=14, color='black'),
    
    xaxis=dict(
        showline=True,
        linecolor='black',
        gridcolor='black',
        zerolinecolor='black',
        title=dict(
            text="",
            font=dict(size=16, family="Arial", color="black", weight="bold")
        )
    ),
    
    yaxis=dict(
        showline=True,
        linecolor='black',
        gridcolor='black',
        zerolinecolor='black',
        title=dict(
            text="",
            font=dict(size=16, family="Arial", color="black", weight="bold")
        )
    ),
    
    coloraxis_colorbar=dict(
        title="Correlation",
        titlefont=dict(size=14, color='black', weight="bold"),
        tickfont=dict(color='black')
    ),

    margin=dict(l=0, r=0, t=50, b=0),
    plot_bgcolor='rgba(0, 0, 0, 0)',
    paper_bgcolor='rgba(0, 0, 0, 0)'
)
fig.update_xaxes(
    tickfont=dict(size=16, color='black', family='Arial Black'),
    tickangle=45
)

fig.update_yaxes(
    tickfont=dict(size=16, color='black', family='Arial Black')
)

fig.show()


In [17]:

fig = px.scatter(
    merged_df,
    x='happiness',
    y='streams_per_capita',
    hover_name='country',
    title='Streams per Capita vs. Ladder Score',
    template='plotly_dark',
    labels={
        'streams_per_capita': 'Streams per Capita',
        'Ladder score': 'Ladder Score'
    },
    #trendline='ols',
    color='country'
)
fig.update_traces(marker=dict(size=15))

fig.show()
