In [27]:
import os
import pandas as pd

DATA_PATH = './data/'

# List of all relevant files
file_list = [DATA_PATH + filename for filename in os.listdir(DATA_PATH) if filename != '.gitkeep']

# List of Dataframes for each file
df_list = [pd.read_json(file) for file in file_list]

# Concatenating all df's together
df = pd.concat(df_list)
df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 174902 entries, 0 to 16487
Data columns (total 21 columns):
 #   Column                             Non-Null Count   Dtype  
---  ------                             --------------   -----  
 0   ts                                 174902 non-null  object 
 1   username                           174902 non-null  object 
 2   platform                           174902 non-null  object 
 3   ms_played                          174902 non-null  int64  
 4   conn_country                       174902 non-null  object 
 5   ip_addr_decrypted                  174902 non-null  object 
 6   user_agent_decrypted               172199 non-null  object 
 7   master_metadata_track_name         166423 non-null  object 
 8   master_metadata_album_artist_name  166423 non-null  object 
 9   master_metadata_album_album_name   166423 non-null  object 
 10  spotify_track_uri                  166423 non-null  object 
 11  episode_name                       7856 

In [31]:
df[['ts', 'ms_played', 'master_metadata_track_name']].head(20)

Unnamed: 0,ts,ms_played,master_metadata_track_name
0,2014-09-28T16:54:57Z,77475,Like I Love You - Video Edit
1,2014-08-19T13:26:58Z,1439,Rather Be (feat. Jess Glynne)
2,2020-03-17T08:16:55Z,4876,Feel Your Love Tonight - 2015 Remaster
3,2021-11-18T10:34:47Z,230922,CRUDELIA - I nervi
4,2022-07-01T05:24:40Z,23,Mantra
5,2022-01-07T14:55:11Z,129320,Cherry Stones
6,2016-04-08T13:28:31Z,1480,Restless Heart
7,2022-03-13T01:41:43Z,1710,"Mutter, der Mann mit dem Koks ist da - Mother'..."
8,2015-04-07T20:36:04Z,250066,Yeah!
9,2018-04-27T20:28:29Z,30680,Die letzten 30 Sekunden


In [26]:
print(sorted(df['reason_start'].unique().tolist()))
print(sorted(df['reason_end'].unique().tolist()))

['', 'appload', 'backbtn', 'clickrow', 'clickside', 'endplay', 'fwdbtn', 'playbtn', 'popup', 'remote', 'trackdone', 'trackerror', 'unknown', 'uriopen']
['', 'appload', 'backbtn', 'clickrow', 'clickside', 'endplay', 'fwdbtn', 'logout', 'popup', 'remote', 'trackdone', 'trackerror', 'unexpected-exit', 'unexpected-exit-while-paused', 'unknown', 'uriopen']


In [14]:
# Most playtime of all artists
sub_df = df[['master_metadata_album_artist_name', 'ms_played']]
sub_df = sub_df.rename(columns={'master_metadata_album_artist_name': 'artist'})
sub_df.groupby(['artist'])['ms_played'].sum().sort_values(ascending=False)

artist
Led Zeppelin          397958868
LGoony                390587297
Bilderbuch            360850531
Sabaton               275149926
Zugezogen Maskulin    270853651
                        ...    
Maximum Love                  0
Tokyo Blade                   0
Max Prosa                     0
Max Frost                     0
DUCKER SIMONIT                0
Name: ms_played, Length: 14288, dtype: int64

In [15]:
# Spotify API Integration
import os

import spotipy
from spotipy.oauth2 import SpotifyClientCredentials

CLIENT_ID = os.environ['SPOTIFY_CLIENT_ID']
CLIENT_SECRET = os.environ['SPOTIFY_CLIENT_SECRET']

sp = spotipy.Spotify(auth_manager=SpotifyClientCredentials(client_id=CLIENT_ID,
                                                           client_secret=CLIENT_SECRET))

results = sp.search(q='led zeppelin', limit=20)
for idx, track in enumerate(results['tracks']['items']):
    print(idx, track['name'])

0 Immigrant Song - Remaster
1 Kashmir - Remaster
2 Stairway to Heaven - Remaster
3 Whole Lotta Love - 1990 Remaster
4 Going to California - Remaster
5 Ramble On - 1990 Remaster
6 D'yer Mak'er - Remaster
7 Black Dog - Remaster
8 Over the Hills and Far Away - Remaster
9 Tangerine - Remaster
10 Good Times Bad Times - 1993 Remaster
11 Hey, Hey, What Can I Do - Remaster
12 When the Levee Breaks - Remaster
13 D'yer Mak'er - Remaster
14 Rock and Roll - Remaster
15 Dazed and Confused - 1990 Remaster
16 Heartbreaker - 1990 Remaster
17 Fool in the Rain - Remaster
18 Babe I'm Gonna Leave You - 1990 Remaster
19 All My Love - Remaster


In [11]:
sp.artist('spotify:artist:3oKRxpszQKUjjaHz388fVA')

{'external_urls': {'spotify': 'https://open.spotify.com/artist/3oKRxpszQKUjjaHz388fVA'},
 'followers': {'href': None, 'total': 646864},
 'genres': ['aussietronica', 'indie soul'],
 'href': 'https://api.spotify.com/v1/artists/3oKRxpszQKUjjaHz388fVA',
 'id': '3oKRxpszQKUjjaHz388fVA',
 'images': [{'height': 640,
   'url': 'https://i.scdn.co/image/ab6761610000e5ebb6edcc3e5c79c2bb67a17d00',
   'width': 640},
  {'height': 320,
   'url': 'https://i.scdn.co/image/ab67616100005174b6edcc3e5c79c2bb67a17d00',
   'width': 320},
  {'height': 160,
   'url': 'https://i.scdn.co/image/ab6761610000f178b6edcc3e5c79c2bb67a17d00',
   'width': 160}],
 'name': 'Parcels',
 'popularity': 66,
 'type': 'artist',
 'uri': 'spotify:artist:3oKRxpszQKUjjaHz388fVA'}

In [16]:
sp.audio_features('spotify:track:0ax2Np3bXCUXCcYmcX5x1x')

[{'danceability': 0.724,
  'energy': 0.83,
  'key': 8,
  'loudness': -8.531,
  'mode': 0,
  'speechiness': 0.0328,
  'acousticness': 0.0418,
  'instrumentalness': 0.0158,
  'liveness': 0.0853,
  'valence': 0.821,
  'tempo': 119.976,
  'type': 'audio_features',
  'id': '0ax2Np3bXCUXCcYmcX5x1x',
  'uri': 'spotify:track:0ax2Np3bXCUXCcYmcX5x1x',
  'track_href': 'https://api.spotify.com/v1/tracks/0ax2Np3bXCUXCcYmcX5x1x',
  'analysis_url': 'https://api.spotify.com/v1/audio-analysis/0ax2Np3bXCUXCcYmcX5x1x',
  'duration_ms': 237464,
  'time_signature': 4}]

In [17]:
sp.track('spotify:track:0ax2Np3bXCUXCcYmcX5x1x')

{'album': {'album_group': 'album',
  'album_type': 'album',
  'artists': [{'external_urls': {'spotify': 'https://open.spotify.com/artist/3oKRxpszQKUjjaHz388fVA'},
    'href': 'https://api.spotify.com/v1/artists/3oKRxpszQKUjjaHz388fVA',
    'id': '3oKRxpszQKUjjaHz388fVA',
    'name': 'Parcels',
    'type': 'artist',
    'uri': 'spotify:artist:3oKRxpszQKUjjaHz388fVA'}],
  'available_markets': ['AD',
   'AE',
   'AG',
   'AL',
   'AM',
   'AO',
   'AR',
   'AT',
   'AU',
   'AZ',
   'BA',
   'BB',
   'BD',
   'BE',
   'BF',
   'BG',
   'BH',
   'BI',
   'BJ',
   'BN',
   'BO',
   'BR',
   'BS',
   'BT',
   'BW',
   'BY',
   'BZ',
   'CA',
   'CD',
   'CG',
   'CH',
   'CI',
   'CL',
   'CM',
   'CO',
   'CR',
   'CV',
   'CW',
   'CY',
   'CZ',
   'DE',
   'DJ',
   'DK',
   'DM',
   'DO',
   'DZ',
   'EC',
   'EE',
   'EG',
   'ES',
   'ET',
   'FI',
   'FJ',
   'FM',
   'FR',
   'GA',
   'GB',
   'GD',
   'GE',
   'GH',
   'GM',
   'GN',
   'GQ',
   'GR',
   'GT',
   'GW',
   'GY',
   'H