In [35]:
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
from collections import Counter
import datetime

import plotly.graph_objs as go
import plotly.offline as pyo

In [36]:
df = pd.read_json('my_spotify_data/endsong_0.json')
df1 = pd.read_json('my_spotify_data/endsong_1.json')
df2 = pd.read_json('my_spotify_data/endsong_2.json')
df3 = pd.read_json('my_spotify_data/endsong_3.json')

frames = [df, df1, df2, df3]

df = pd.concat(frames)

df.drop(['username','ip_addr_decrypted', 'platform', 'conn_country', 'user_agent_decrypted', 'spotify_track_uri', 'spotify_episode_uri', 'shuffle','skipped','offline_timestamp','offline','incognito_mode', 'reason_start'], axis=1, inplace=True)
df.columns = ['Date', 'Duration','Track', 'Artist','Album', 'Episode','Podcast', 'Reason to End']

df['Time'] = df['Date'].str.split('T', expand=True)[1].str[:-1]
df['Date'] = df['Date'].str.split('T', expand=True)[0]
df[['Hours','Minutes','Seconds']] = df['Time'].str.split(':',n=2, expand=True)
df['Hours'] = df['Hours'].astype(int)

df['Date'] = pd.to_datetime(df['Date'], format='%Y-%m-%d')

df['Duration'] = pd.to_numeric(df['Duration'])/(1*1000)

df.sort_values(by='Date',inplace=True)
df.reset_index(drop=True, inplace=True)

df_podcasts = df[df['Episode'].notna()].copy()
df_podcasts.drop(['Track','Artist','Album'], axis=1, inplace=True)
df_podcasts.reset_index(drop=True, inplace=True)

df_tracks = df[df['Track'].notna()].copy()
df_tracks.drop(['Episode','Podcast'], axis=1, inplace=True)
df_tracks.reset_index(drop=True, inplace=True)

In [37]:
artists_freq =  pd.DataFrame(Counter(df_tracks['Artist']).most_common(100))

data = [go.Bar(y=artists_freq[1],
               x=artists_freq[0],
               orientation='v')]

layout = go.Layout(dict(
    title='Top 100 - Artists',
    barmode='stack',
    font=dict(size=10)
))

fig = go.Figure(data=data, layout=layout)
pyo.plot(fig, filename='top100_artists.html')

'top100_artists.html'

In [38]:
tracks_freq =  pd.DataFrame(Counter(df_tracks['Track']).most_common(100))

data = [go.Bar(y=tracks_freq[1],
               x=tracks_freq[0],
               orientation='v')]

layout = go.Layout(dict(
    title='Top 100 - Tracks',
    barmode='stack',
    font=dict(size=10)
))

fig = go.Figure(data=data, layout=layout)
pyo.plot(fig, filename='top100_tracks.html')

'top100_tracks.html'

In [56]:
data = [go.Heatmap(x=df_tracks['Date'].dt.day_of_week,
                   y=df_tracks['Hours'],
                   z=df_tracks['Duration'],
                   colorscale='YlOrRd')]


layout = go.Layout(dict(
    title='Time heard during the days of a week',
    xaxis=dict(title='Day of the week',
               tickmode='array',
               tickvals=[0,1,2,3,4,5,6],
               ticktext=['Monday','Tuesday', 'Wednesday','Thursday','Friday','Saturday','Sunday']),
    yaxis=dict(title='Time of the day',
               autorange='reversed')
))

fig = go.Figure(data=data, layout=layout)
pyo.plot(fig, filename='heatmap_duration_week_time.html')

'heatmap_duration_week_time.html'

In [59]:
data = [go.Heatmap(x=df_tracks['Date'].dt.day_of_year,
                   y=df_tracks['Hours'],
                   z=df_tracks['Duration'],
                   colorscale='YlOrRd')]


layout = go.Layout(dict(
    title='Time heard during the days of a year',
    xaxis=dict(title='Day of the year',
               tickmode='array',
               tickvals=[31,59,90,120,151,181,212,243,273,304,334,365],
               ticktext=['Jan','Fev','Mar','Abr','Mai','Jun','Jul','Ago','Set','Out','Nov','Dez']),
    yaxis=dict(title='Time of the day',
               autorange='reversed')
))

fig = go.Figure(data=data, layout=layout)
pyo.plot(fig, filename='heatmap_duration_year_time.html')

'heatmap_duration_year_time.html'

In [40]:
# artists_freq =  np.asarray(Counter(df_tracks['Artist']).most_common(100))

# artists_freq = artists_freq[::-1]

# fig, ax = plt.subplots(figsize =(16, 20))
# for artist, freq in zip(artists_freq[:,0], artists_freq[:,1]):
#     ax.barh(artist,int(freq))
# plt.show()