## Convert JSON Objects into Data Frames

The data was collected by CURLing the requests in my own terminal using the [Spotify Web API](https://developer.spotify.com/documentation/web-api/reference/):

### Read JSON Files

In [27]:
import json

In [40]:
time_range = ['short_term', 'medium_term', 'long_term']

##### Tracks

In [42]:
track_data_json = {}
for term in time_range:
    with open(f"data/top_tracks_{term}.json") as json_file:
        data = json.load(json_file)
    key = f'{term}'
    track_data_json[key] = data

##### Artists

In [48]:
artists_data_json = {}
for term in time_range:
    with open(f"data/top_artists_{term}.json") as json_file:
        data = json.load(json_file)
    key = f'{term}'
    artists_data_json[key] = data

### Organize Artists and Tracks Data

Turn the dictionaries into csv and pandas dataframe

In [53]:
for term in time_range:
    data = artists_data_json[term]
    df = pd.DataFrame(data['items'])
    df.to_csv(f'data/top_artists_{term}.csv')

In [57]:
for term in time_range:
    data = track_data_json[term]
    df = pd.DataFrame(data['items'])
    df.to_csv(f'data/top_tracks_{term}.csv')

In [58]:
top_artists_short_term_df = pd.read_csv('data/top_artists_short_term.csv')
top_artists_medium_term_df = pd.read_csv('data/top_artists_medium_term.csv')
top_artists_long_term_df = pd.read_csv('data/top_artists_long_term.csv')

In [59]:
top_tracks_short_term_df = pd.read_csv('data/top_tracks_short_term.csv')
top_tracks_medium_term_df = pd.read_csv('data/top_tracks_medium_term.csv')
top_tracks_long_term_df = pd.read_csv('data/top_tracks_long_term.csv')

### Keep Popularity and Genres for Artists

In [134]:
artist_features = ['genres', 'name', 'popularity', 'id']

In [135]:
top_artists_short_term_reduced = top_artists_short_term_df[artist_features]
top_artists_medium_term_reduced = top_artists_medium_term_df[artist_features]
top_artists_long_term_reduced = top_artists_long_term_df[artist_features]

In [138]:
top_artists_short_term_reduced.to_csv('data/artists_short_term_reduced.csv')
top_artists_medium_term_reduced.to_csv('data/artists_medium_term_reduced.csv')
top_artists_long_term_reduced.to_csv('data/artists_long_term_reduced.csv')

In [215]:
top_artists_short_term_reduced

Unnamed: 0,genres,name,popularity,id
0,"['contemporary jazz', 'contemporary post-bop',...",Brad Mehldau,55,2vI9KFm0fwSfPrpEgOeIbq
1,"['contemporary jazz', 'jazz', 'jazz drums', 's...",Ari Hoenig,39,1P6Llrp12ldpVbyC8gCHBz
2,"['contemporary jazz', 'contemporary post-bop',...",Kurt Rosenwinkel,41,253GMpCNwx1TJtASNAeDoP
3,"['contemporary jazz', 'straight-ahead jazz']",Dayna Stephens,16,3Y8rffZJZVJgNWMM6ZVGin
4,"['austrian orchestra', 'classical', 'classical...",Wiener Philharmoniker,70,003f4bk13c6Q3gAUXv7dGJ
5,"['contemporary jazz', 'jazz saxophone']",Mark Turner,31,36kfddkWcVc6XrzNN9BsTP
6,"['modern folk rock', 'modern rock', 'pop rock'...",Mumford & Sons,75,3gd8FJtBJtkRxdfbTu19U2
7,"['neo mellow', 'pop rock', 'singer-songwriter']",John Mayer,82,0hEurMDQu99nJRq8pTxO14
8,"['hip hop', 'pittsburgh rap', 'rap']",Mac Miller,87,4LLpKhyESsyAXpc4laK94U
9,[],Noam Wiesenberg,3,0IV9EI5sd2rlMEoAvDg70M


### Get Musical Features from Track IDs

We'll be using spotipy's library to do this

In [64]:
import spotipy
from config import get_spotipy_client

In [75]:
sp = get_spotipy_client()

In [82]:
short_term_audio_features = sp.audio_features(tracks=list(top_tracks_short_term_df['id']))
medium_term_audio_features = sp.audio_features(tracks=list(top_tracks_medium_term_df['id']))
long_term_audio_features = sp.audio_features(tracks=list(top_tracks_long_term_df['id']))

In [84]:
short_term_audio_features_df = pd.DataFrame(short_term_audio_features)
medium_term_audio_features_df = pd.DataFrame(medium_term_audio_features)
long_term_audio_features_df = pd.DataFrame(long_term_audio_features)

#### Add name and release_date column for audio features

In [200]:
import ast

In [88]:
short_term_audio_features_df['name'] = top_tracks_short_term_df['name']
medium_term_audio_features_df['name'] = top_tracks_medium_term_df['name']
long_term_audio_features_df['name'] = top_tracks_long_term_df['name']

In [196]:
release_dates = []
for i in top_tracks_short_term_df['album']:
    date = ast.literal_eval(i)['release_date']
    release_dates.append(date) 
short_term_audio_features_df['release_date'] = release_dates

In [201]:
release_dates = []
for i in top_tracks_medium_term_df['album']:
    date = ast.literal_eval(i)['release_date']
    release_dates.append(date) 
medium_term_audio_features_df['release_date'] = release_dates

In [202]:
release_dates = []
for i in top_tracks_long_term_df['album']:
    date = ast.literal_eval(i)['release_date']
    release_dates.append(date) 
long_term_audio_features_df['release_date'] = release_dates

In [203]:
short_term_audio_features_df.to_csv('data/audio_features_short.csv')
medium_term_audio_features_df.to_csv('data/audio_features_medium.csv')
long_term_audio_features_df.to_csv('data/audio_features_long.csv')

#### All Musical Features
Remove duplicates

In [204]:
all_audio_features_df = pd.concat([short_term_audio_features_df, medium_term_audio_features_df, long_term_audio_features_df])

In [205]:
all_audio_features_df = all_audio_features_df.drop_duplicates(subset='name')

#### Get Averages of Features

Do this for each time ranges

In [206]:
audio_features = ['danceability', 'energy', 'loudness', 'speechiness', 'acousticness', 'instrumentalness', 'tempo', 'liveness', 'valence']

In [207]:
audio_features_averages_short = short_term_audio_features_df[audio_features].mean()
audio_features_averages_medium = medium_term_audio_features_df[audio_features].mean()
audio_features_averages_long = long_term_audio_features_df[audio_features].mean()
audio_features_averages_all = all_audio_features_df[audio_features].mean()

In [208]:
audio_features_averages_df = pd.DataFrame([dict(audio_features_averages_short), dict(audio_features_averages_medium), dict(audio_features_averages_long), dict(audio_features_averages_all)])
audio_features_averages_df['time_frame'] = ['short_term', 'medium_term', 'long_term', 'all']

In [209]:
audio_features_averages_df.to_csv('data/audio_features_averages.csv')

### Data So Far...

Now we have audio features for each track, the averages of those features over time and in total, track release dates, my top artists that I listen to, the artist's popularity index, and the genres of the artist's

In [214]:
short_term_audio_features_df

Unnamed: 0,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,type,id,uri,track_href,analysis_url,duration_ms,time_signature,name,release_date
0,0.767,0.787,3,-9.897,1,0.0331,0.0163,0.583,0.0513,0.964,126.879,audio_features,5QLHGv0DfpeXLNFo7SFEy1,spotify:track:5QLHGv0DfpeXLNFo7SFEy1,https://api.spotify.com/v1/tracks/5QLHGv0DfpeX...,https://api.spotify.com/v1/audio-analysis/5QLH...,266200,4,1979 - Remastered 2012,1995
1,0.794,0.32,1,-12.92,0,0.173,0.853,0.134,0.112,0.241,174.088,audio_features,1DWZUa5Mzf2BwzpHtgbHPY,spotify:track:1DWZUa5Mzf2BwzpHtgbHPY,https://api.spotify.com/v1/tracks/1DWZUa5Mzf2B...,https://api.spotify.com/v1/audio-analysis/1DWZ...,342040,4,Good News,2020-01-17
2,0.804,0.406,7,-11.154,0,0.0472,0.023,0.837,0.106,0.124,120.012,audio_features,2bB6iDVgm25WkYv0yBz1BD,spotify:track:2bB6iDVgm25WkYv0yBz1BD,https://api.spotify.com/v1/tracks/2bB6iDVgm25W...,https://api.spotify.com/v1/audio-analysis/2bB6...,360705,4,Looped,2014-10-27
3,0.835,0.626,1,-5.833,1,0.125,0.0589,6e-05,0.396,0.35,91.03,audio_features,2G7V7zsVDxg1yRsu7Ew9RJ,spotify:track:2G7V7zsVDxg1yRsu7Ew9RJ,https://api.spotify.com/v1/tracks/2G7V7zsVDxg1...,https://api.spotify.com/v1/audio-analysis/2G7V...,217925,4,In My Feelings,2018-06-29
4,0.61,0.389,9,-17.01,0,0.0543,0.849,0.901,0.692,0.621,90.141,audio_features,6d83pCGgsYX5e04z4Ej8VP,spotify:track:6d83pCGgsYX5e04z4Ej8VP,https://api.spotify.com/v1/tracks/6d83pCGgsYX5...,https://api.spotify.com/v1/audio-analysis/6d83...,524067,4,Wonderwall,2008-03-21
5,0.371,0.193,2,-13.38,1,0.0358,0.937,0.849,0.116,0.041,133.649,audio_features,0n6Uyd6PHbsy4d7nu6JZHD,spotify:track:0n6Uyd6PHbsy4d7nu6JZHD,https://api.spotify.com/v1/tracks/0n6Uyd6PHbsy...,https://api.spotify.com/v1/audio-analysis/0n6U...,344920,3,Yessss,2020-06-12
6,0.553,0.652,0,-6.685,1,0.206,0.0897,0.0,0.111,0.551,94.505,audio_features,6YbhspuOar1D9WSSnfe7ds,spotify:track:6YbhspuOar1D9WSSnfe7ds,https://api.spotify.com/v1/tracks/6YbhspuOar1D...,https://api.spotify.com/v1/audio-analysis/6Ybh...,207347,4,"Young, Wild & Free (feat. Bruno Mars)",2011-12-12
7,0.721,0.339,0,-11.195,1,0.0532,0.409,0.00153,0.0973,0.2,129.83,audio_features,7DfFc7a6Rwfi3YQMRbDMau,spotify:track:7DfFc7a6Rwfi3YQMRbDMau,https://api.spotify.com/v1/tracks/7DfFc7a6Rwfi...,https://api.spotify.com/v1/audio-analysis/7DfF...,200747,4,Thinkin Bout You,2012-07-10
8,0.544,0.203,1,-16.655,1,0.0511,0.538,0.208,0.683,0.374,122.882,audio_features,50cggf3pqYdA5rx92e5UDv,spotify:track:50cggf3pqYdA5rx92e5UDv,https://api.spotify.com/v1/tracks/50cggf3pqYdA...,https://api.spotify.com/v1/audio-analysis/50cg...,705947,4,'Teef,2019-06-21
9,0.762,0.701,8,-3.541,1,0.0286,0.235,0.000158,0.123,0.742,110.968,audio_features,748mdHapucXQri7IAO8yFK,spotify:track:748mdHapucXQri7IAO8yFK,https://api.spotify.com/v1/tracks/748mdHapucXQ...,https://api.spotify.com/v1/audio-analysis/748m...,208867,4,Kiss Me More (feat. SZA),2021-04-09
