In [341]:
import pandas as pd
import numpy as np
import json
from IPython.display import display

## MY SPOTIFY STREAMING HISTORY OVER THE PAST YEAR (01/2023 - 01/2024)

In [389]:
# combine streaming history in past year 01/31/2023 - 01/31/2024 
stream_history = pd.DataFrame() 
for file in ['StreamingHistory_music_0.json', 'StreamingHistory_music_1.json', 'StreamingHistory_music_2.json']:
    df = pd.read_json(file, orient='columns')
    stream_history = pd.concat([stream_history, df], ignore_index=True)

stream_history['minPlayed'] = stream_history['msPlayed']/(60 * 1000)
stream_history['hourPlayed'] = stream_history['msPlayed']/3600000

In [390]:
stream_history

Unnamed: 0,endTime,artistName,trackName,msPlayed,minPlayed,hourPlayed
0,2023-01-31 00:07,Joe Hisaishi,One Summer Day,189800,3.163333,0.052722
1,2023-01-31 00:11,Paintamelody,Above the Treetops (Lith Harbor),241893,4.031550,0.067193
2,2023-01-31 00:13,Paintamelody,Cygnus Garden,149472,2.491200,0.041520
3,2023-01-31 00:15,Nemu ネム,Nausicaä Requiem,125869,2.097817,0.034964
4,2023-01-31 00:18,Arcade Player,"Ellinia Tree Dungeon (From ""MapleStory"")",141409,2.356817,0.039280
...,...,...,...,...,...,...
29952,2024-01-31 23:41,BIGBANG,BAE BAE,169429,2.823817,0.047064
29953,2024-01-31 23:45,BIGBANG,Blue,233098,3.884967,0.064749
29954,2024-01-31 23:49,BIGBANG,Bad Boy,236817,3.946950,0.065782
29955,2024-01-31 23:52,G-DRAGON,WHO YOU?,201428,3.357133,0.055952


In [391]:
plays = pd.DataFrame(stream_history[['trackName', 'artistName']].value_counts()).reset_index()
plays

Unnamed: 0,trackName,artistName,count
0,We Might Even Be Falling In Love (Duet) - Spot...,Victoria Monét,225
1,H.S.K.T. (feat. Wonstein),LeeHi,201
2,Hush - Still Woozy Remix,The Marías,176
3,telepatía,Kali Uchis,171
4,Addiction,Doja Cat,167
...,...,...,...
3714,Let Me,ZAYN,1
3715,Let Me Be With You,ROUND TABLE featuring Nino,1
3716,Let Me Calm Down (feat. J. Cole),Nicki Minaj,1
3717,Let Me Explain,Bryson Tiller,1


In [392]:
# clean data 
stream_sum = stream_history.groupby(['artistName', 'trackName']).sum().reset_index()
stream_sum = stream_sum.drop(columns=['endTime','msPlayed'])
stream_sum = stream_sum.merge(plays, on = ['trackName', 'artistName'])
stream_sum = stream_sum[(stream_sum['minPlayed'] > 0) & (stream_sum['count'] > 4)]

In [393]:
# to be fixed: song data not available on Spotify 
ocean_sixteen_data = stream_sum[stream_sum['artistName'] == 'ocean sixteen']
lisandra_data = stream_sum[stream_sum['artistName'] == 'Lisandra']

ocean_sixteen_min_played = ocean_sixteen_data['minPlayed'].values[0]
lisandra_min_played = lisandra_data['minPlayed'].values[0]

ocean_sixteen_hour_played = ocean_sixteen_data['hourPlayed'].values[0]
lisandra_hour_played = lisandra_data['hourPlayed'].values[0]

ocean_sixteen_count = ocean_sixteen_data['count'].values[0]
lisandra_count = lisandra_data['count'].values[0]

min_sum = ocean_sixteen_min_played + lisandra_min_played
hour_sum = ocean_sixteen_hour_played + lisandra_hour_played
count_sum = ocean_sixteen_count + lisandra_count

combined = ['Frank Ocean', 'Songs for Women', min_sum, hour_sum, count_sum]
#stream_sum.loc[len(stream_sum)] = combined

In [394]:
stream_sum = stream_sum[~((stream_sum['artistName'] == 'ocean sixteen') | (stream_sum['artistName'] == 'Lisandra') | (stream_sum['artistName'] == 'extremely bad man'))]
stream_sum.minPlayed = stream_sum.minPlayed.round(2)
stream_sum.hourPlayed = stream_sum.hourPlayed.round(2)
stream_sum = stream_sum.sort_values(by = ['count', 'minPlayed'], ascending = False).reset_index(drop=True)

stream_sum

Unnamed: 0,artistName,trackName,minPlayed,hourPlayed,count
0,Victoria Monét,We Might Even Be Falling In Love (Duet) - Spot...,261.96,4.37,225
1,LeeHi,H.S.K.T. (feat. Wonstein),476.73,7.95,201
2,The Marías,Hush - Still Woozy Remix,359.78,6.00,176
3,Kali Uchis,telepatía,324.70,5.41,171
4,Doja Cat,Addiction,333.91,5.57,167
...,...,...,...,...,...
967,Syd,Fast Car,0.16,0.00,5
968,IU,Friday (feat.Jang Yi-jeong),0.14,0.00,5
969,Harry Styles,Daylight,0.13,0.00,5
970,NIKI,Every Summertime,0.10,0.00,5


In [395]:
track_and_artist = stream_sum.apply(lambda row: [row['trackName'], row['artistName']], axis=1)
track_and_artist

0      [We Might Even Be Falling In Love (Duet) - Spo...
1                     [H.S.K.T. (feat. Wonstein), LeeHi]
2                 [Hush - Still Woozy Remix, The Marías]
3                                [telepatía, Kali Uchis]
4                                  [Addiction, Doja Cat]
                             ...                        
967                                      [Fast Car, Syd]
968                    [Friday (feat.Jang Yi-jeong), IU]
969                             [Daylight, Harry Styles]
970                             [Every Summertime, NIKI]
971                                         [200%, AKMU]
Length: 972, dtype: object

## FIRST ATTEMPT: WEBSCRAPING METRICS ABOUT SONGS IN MY LIBRARY

In [377]:
# click button on homepage and choose "All of my songs in library" 
from selenium import webdriver
from selenium.webdriver.support.ui import Select
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from bs4 import BeautifulSoup

driver = webdriver.Chrome()  # Replace with the appropriate driver for your browser
driver.get("http://organizeyourmusic.playlistmachinery.com/index.html")

select_element = driver.find_element(By.ID, "collection-type")
select = Select(select_element)
select.select_by_value("all")

button = WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.ID, "go")))
button.click()
time.sleep(1)

# log into Spotify using username and password 
username_input = WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.ID, "login-username")))
password_input = WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.ID, "login-password")))

username_input.send_keys("##########")
password_input.send_keys("##########")

login_button = driver.find_element(By.ID, "login-button")
login_button.click()

wait = WebDriverWait(driver, 10)
content_element = wait.until(EC.visibility_of_element_located((By.ID, "main-area")))

In [415]:
soup = driver.page_source
soup = BeautifulSoup(soup, "html.parser")

In [416]:
table = soup.find_all('table')

In [417]:
headers = soup.find_all('th')

In [418]:
# get tables from different tabs 
category = soup.find('div', class_ = 'sidebar work')
category = category.find_all('li')
tabs = [element.contents[0].strip() for element in category]
tabs = ['Your Saved Tracks', 'C u girl', 'Pajeon', 'Boy', 'Girl','🍓']

In [419]:
headers = [title.text for title in headers]
headers

['\xa0',
 'sel',
 '',
 'title',
 'artist',
 'top genre',
 'year',
 'added',
 'bpm',
 'nrgy',
 'dnce',
 'dB',
 'live',
 'val',
 'dur',
 'acous',
 'spch',
 'pop']

In [421]:
rows = [] 
for tab in tabs:
    element = driver.find_element(By.XPATH, '//li[text()="' + tab + '"]')
    
    driver.execute_script("arguments[0].click();", element)

    page_source = driver.page_source

    soup = BeautifulSoup(page_source, 'html.parser')
   
    # extract the data from even rows
    even_rows = soup.find_all('tr', class_='google-visualization-table-tr-even')
    for row in even_rows:
        row_data = row.find_all('td')
        ind_row_data = [info.text.strip() for info in row_data]
        rows.append(ind_row_data)

    # extract the data from odd rows
    odd_rows = soup.find_all('tr', class_='google-visualization-table-tr-odd')
    for row in odd_rows:
        row_data = row.find_all('td')
        ind_row_data = [info.text.strip() for info in row_data]
        rows.append(ind_row_data)
tracks = pd.DataFrame(rows, columns=headers)
tracks

Unnamed: 0,Unnamed: 1,sel,Unnamed: 3,title,artist,top genre,year,added,bpm,nrgy,dnce,dB,live,val,dur,acous,spch,pop
0,1,,,Water,Tyla,,2023,2023‑10‑03,117,72,67,-3,14,52,200,9,8,95
1,3,,,Blinding Lights,The Weeknd,canadian contemporary r&b,2020,2020‑10‑29,171,73,51,-6,9,33,200,0,6,93
2,5,,,One Dance,Drake,canadian hip hop,2016,2020‑09‑09,104,63,79,-6,33,37,174,1,5,92
3,7,,,No Role Modelz,J. Cole,conscious hip hop,2014,2020‑09‑09,100,52,69,-8,5,49,293,32,34,91
4,9,,,Shut up My Moms Calling,Hotel Ugly,modern indie pop,2020,2021‑11‑12,138,41,49,-11,10,38,165,33,10,91
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
751,1,,,Truth or Dare,Tyla,,2023,2024‑02‑19,154,60,61,-6,9,81,190,18,34,81
752,3,,,Steeeam,Shelly,indie pop,2020,2024‑02‑20,120,76,82,-11,12,92,171,55,4,73
753,5,,,EASY,LE SSERAFIM,k-pop girl group,2024,2024‑02‑19,165,70,74,-5,11,60,165,19,5,71
754,2,,,Consideration,Rihanna,barbadian pop,2016,2024‑02‑15,145,58,88,-2,9,78,161,4,5,75


In [422]:
tracks = tracks.drop_duplicates().reset_index(drop=True)

In [423]:
tracks.head(10)

Unnamed: 0,Unnamed: 1,sel,Unnamed: 3,title,artist,top genre,year,added,bpm,nrgy,dnce,dB,live,val,dur,acous,spch,pop
0,1,,,Water,Tyla,,2023,2023‑10‑03,117,72,67,-3,14,52,200,9,8,95
1,3,,,Blinding Lights,The Weeknd,canadian contemporary r&b,2020,2020‑10‑29,171,73,51,-6,9,33,200,0,6,93
2,5,,,One Dance,Drake,canadian hip hop,2016,2020‑09‑09,104,63,79,-6,33,37,174,1,5,92
3,7,,,No Role Modelz,J. Cole,conscious hip hop,2014,2020‑09‑09,100,52,69,-8,5,49,293,32,34,91
4,9,,,Shut up My Moms Calling,Hotel Ugly,modern indie pop,2020,2021‑11‑12,138,41,49,-11,10,38,165,33,10,91
5,11,,,Flashing Lights,Kanye West,chicago rap,2007,2023‑10‑17,90,63,64,-8,39,43,238,4,4,91
6,13,,,Dark Red,Steve Lacy,afrofuturism,2017,2023‑04‑07,172,78,60,-4,12,77,173,45,6,90
7,15,,,The Less I Know The Better,Tame Impala,australian psych,2015,2020‑10‑29,117,74,64,-4,17,79,216,1,3,88
8,17,,,Break from Toronto,PARTYNEXTDOOR,r&b,2013,2023‑09‑25,117,68,60,-5,42,26,99,2,3,88
9,19,,,7 rings,Ariana Grande,pop,2019,2020‑09‑09,140,32,78,-11,9,33,179,59,33,87


In [425]:
tracks.merge(stream_sum, left_on=['title', 'artist'], right_on=['trackName', 'artistName']).sort_values(by=['count'], ascending = False)

Unnamed: 0,Unnamed: 1,sel,Unnamed: 3,title,artist,top genre,year,added,bpm,nrgy,...,val,dur,acous,spch,pop,artistName,trackName,minPlayed,hourPlayed,count
65,153,,,We Might Even Be Falling In Love (Duet) - Spot...,Victoria Monét,alternative r&b,2023,2023‑08‑15,77,42,...,78,90,47,8,75,Victoria Monét,We Might Even Be Falling In Love (Duet) - Spot...,261.96,4.37,225
330,344,,,H.S.K.T. (feat. Wonstein),LeeHi,k-pop,2021,2023‑08‑15,114,67,...,33,204,25,4,59,LeeHi,H.S.K.T. (feat. Wonstein),476.73,7.95,201
320,326,,,Hush - Still Woozy Remix,The Marías,bedroom pop,2021,2022‑07‑05,104,55,...,56,182,40,3,60,The Marías,Hush - Still Woozy Remix,359.78,6.00,176
202,22,,,telepatía,Kali Uchis,colombian pop,2020,2021‑03‑24,84,52,...,55,160,11,5,87,Kali Uchis,telepatía,324.70,5.41,171
109,277,,,Addiction,Doja Cat,dance pop,2019,2023‑09‑27,90,71,...,55,208,1,17,64,Doja Cat,Addiction,333.91,5.57,167
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
489,55,,,All Night,Girls' Generation,k-pop,2017,2023‑09‑08,130,81,...,73,223,3,5,50,Girls' Generation,All Night,4.03,0.07,5
426,69,,,Castaway,Crush,k-pop,2016,2023‑12‑29,100,53,...,73,195,29,20,32,Crush,Castaway,0.39,0.01,5
49,109,,,No Hay Ley Parte 2 (with Rauw Alejandro),Kali Uchis,colombian pop,2024,2024‑01‑11,120,66,...,19,188,3,6,79,Kali Uchis,No Hay Ley Parte 2 (with Rauw Alejandro),4.17,0.07,5
231,100,,,Every Summertime,NIKI,indonesian r&b,2021,2022‑02‑14,79,68,...,72,216,39,4,79,NIKI,Every Summertime,0.10,0.00,5


#### ISSUES: 
- Organize Your Music had only 584 out of 975 songs from my streaming history
- Updates my library based on my playlists, so if I removed a song I used to listen to from these playlists (from 01/2023 - 01/2024), they will not show up 

## SECOND ATTEMPT: SPOTIFY API CALLS

In [30]:
client_id = '2f6d1ff2547045228babc0a1530b615c'
client_secret = '743702a0239a4216b65652452b5d604f'
redirect_uri = 'localhost:8000/callback'
scope = 'user-library-read'

In [106]:
import requests
from urllib.parse import urlencode
from spotipy.oauth2 import SpotifyOAuth
import spotipy
import json
import time
import pandas as pd
from requests.adapters import HTTPAdapter
from requests.packages.urllib3.util.retry import Retry
import webbrowser

# api credentials 
client_id = client_id
client_secret = client_secret 
redirect_uri = 'http://127.0.0.1:5000/redirect'

# endpoint 
authorization_base_url = 'https://accounts.spotify.com/authorize'

scope = 'user-library-read'

oauth = SpotifyOAuth(
    client_id=client_id,
    client_secret=client_secret,
    redirect_uri=redirect_uri,
    scope=scope
)

# get the authorization URL and automatically open it in a browser 
authorization_url = oauth.get_authorize_url()
webbrowser.open(authorization_url)

# input authorization code
code = input('Enter the authorization code: ')
print('Logged in!')

# exchange authorization code for access token and refresh token
token_info = oauth.get_access_token(code)

Enter the authorization code:  AQCs_jTZWcWTMHOU1asjifuW6NPkTJlrhksb7Pr4P2EeMgSZfaoZdi-dH5sGBPHnSqAwY7_Vd9yx2o80WsYcJh6iclLsrhiy9o5zvI3PisIbBv_W_Jtdy1NlL5LyNhcplrzMS2K6Nji7TPQf2X-6l61mbonrh-csDkbHkDiy7zN6yj_Azxc3WqPfq66qM0farl2bxyM


Logged in!


  token_info = oauth.get_access_token(code)


In [275]:
# extract the access token and refresh token from the token_info
access_token = token_info['access_token']
refresh_token = token_info['refresh_token']

sp = spotipy.Spotify(auth=access_token)

# check if the access token has expired
if oauth.is_token_expired(token_info):
    print('Access token is expired.')

    new_token_info = oauth.refresh_access_token(refresh_token)
    new_access_token = new_token_info['access_token']

    sp = spotipy.Spotify(auth=new_access_token)

    # update the access token with the new access token
    access_token = new_access_token
    print('Access token is refreshed!')

Access token is expired.
Access token is refreshed!


In [249]:
# cannot find Songs for Women by Frank Ocean in API :( 
query = f"track:{'Songs for Women'} artist:{'Frank Ocean'}"
results = sp.search(q=query, type='track', limit=1)
results

{'tracks': {'href': 'https://api.spotify.com/v1/search?query=track%3ASongs+for+Women+artist%3AFrank+Ocean&type=track&offset=0&limit=1',
  'items': [],
  'limit': 1,
  'next': None,
  'offset': 0,
  'previous': None,
  'total': 0}}

In [312]:
# a nested list with each index being [id, track name, artist name]
all_track_ids = []

def get_track_ids(track_names, artist_names):
    track_ids = []
    for i in range(len(track_names)):
        orig_track_name = track_names[i]
        orig_artist_name = artist_names[i]
        # BUG: spotify search api does not recognize apostrophes - need to delete them before searching for track ids
        track_name = track_names[i].replace("'", "")
        artist_name = artist_names[i].replace("'", "")
        query = f"track:{track_name} artist:{artist_name}"
        results = sp.search(q=query, type='track', limit=1)
        track_id = results['tracks']['items'][0]['id']
        track_id = [track_id, orig_track_name, orig_artist_name]
        track_ids.append(track_id)
    return track_ids

# delay between each batch (in seconds)
batch_delay = 2

# how many requests are being sent in each batch 
batch_size = 30

# split track_and_artist list into batches
for i in range(0, len(track_and_artist), batch_size):
    batch = track_and_artist[i:i+batch_size]
    track_names = [pair[0] for pair in batch]
    artist_names = [pair[1] for pair in batch]
    
    batch_track_ids = get_track_ids(track_names, artist_names)
    all_track_ids.extend(batch_track_ids)

    # apply delay between batches
    time.sleep(batch_delay)  

In [315]:
all_track_ids[0:20]

[['0wOtc2nY3NOohp4xSwOyTN',
  'We Might Even Be Falling In Love (Duet) - Spotify Singles',
  'Victoria Monét'],
 ['39382sUtIOwIXftX0i76do', 'H.S.K.T. (feat. Wonstein)', 'LeeHi'],
 ['4dGuRldChjvboZktprNJFM', 'Hush - Still Woozy Remix', 'The Marías'],
 ['6tDDoYIxWvMLTdKpjFkc1B', 'telepatía', 'Kali Uchis'],
 ['2OAcH9SD8ehxuG0tWNe0cU', 'Addiction', 'Doja Cat'],
 ['0wzCQjc8JRa39ej1TFkAFt', 'Endlessly', 'Kali Uchis'],
 ['0O3TAouZE4vL9dM5SyxgvH', 'Fashion Killa', 'A$AP Rocky'],
 ['15EPc80XuFrb2LmOzGjuRg',
  'Crew (feat. Brent Faiyaz & Shy Glizzy)',
  'GoldLink'],
 ['41SwdQIX8Hy2u6fuEDgvWr', '10%', 'KAYTRANADA'],
 ['2p37Mfy2PWajgOS3i2aaep',
  'U Say (feat. Tyler, The Creator & Jay Prince)',
  'GoldLink'],
 ['1HA2V2EfAgJUBVOju4YfiB',
  'Just A Stranger (feat. Steve Lacy)',
  'Kali Uchis'],
 ['16gJmoOqRt2lKmlXtDpWgH', 'Cariño', 'The Marías'],
 ['23zOqixUia8E5BVMYHXL0c', 'Culture', 'KAYTRANADA'],
 ['1DunhgeZSEgWiIYbHqXl0c', 'Latch', 'Disclosure'],
 ['6luBKkFUt5wTwz7hpLhp12', 'drive ME crazy!', 'L

In [299]:
ids = [id[0] for id in all_track_ids]

In [303]:
# set batch size and delay
batch_size = 20
delay = 2

all_features = []
# get track features for a list of track IDs
def get_track_features(track_ids):
    
    for i in range(0, len(track_ids), batch_size):
        batch_track_ids = track_ids[i:i+batch_size]
        
        # track features for the current batch
        track_features = sp.audio_features(batch_track_ids)
        all_features.extend(track_features)

        # delay between batches
        if i + batch_size < len(track_ids):
            time.sleep(delay)

    return all_features

In [305]:
features = pd.DataFrame(get_track_features(ids)) 

In [427]:
features

Unnamed: 0,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,type,id,uri,track_href,analysis_url,duration_ms,time_signature
0,0.731,0.423,6,-10.147,1,0.0784,0.47300,0.000413,0.1290,0.780,76.964,audio_features,0wOtc2nY3NOohp4xSwOyTN,spotify:track:0wOtc2nY3NOohp4xSwOyTN,https://api.spotify.com/v1/tracks/0wOtc2nY3NOo...,https://api.spotify.com/v1/audio-analysis/0wOt...,90111,4
1,0.756,0.672,9,-5.711,0,0.0373,0.25200,0.000341,0.1170,0.327,113.975,audio_features,39382sUtIOwIXftX0i76do,spotify:track:39382sUtIOwIXftX0i76do,https://api.spotify.com/v1/tracks/39382sUtIOwI...,https://api.spotify.com/v1/audio-analysis/3938...,204453,4
2,0.708,0.554,10,-6.830,0,0.0311,0.40300,0.042800,0.1260,0.564,103.999,audio_features,4dGuRldChjvboZktprNJFM,spotify:track:4dGuRldChjvboZktprNJFM,https://api.spotify.com/v1/tracks/4dGuRldChjvb...,https://api.spotify.com/v1/audio-analysis/4dGu...,182347,4
3,0.653,0.524,11,-9.016,0,0.0502,0.11200,0.000000,0.2030,0.553,83.970,audio_features,6tDDoYIxWvMLTdKpjFkc1B,spotify:track:6tDDoYIxWvMLTdKpjFkc1B,https://api.spotify.com/v1/tracks/6tDDoYIxWvML...,https://api.spotify.com/v1/audio-analysis/6tDD...,160191,4
4,0.775,0.708,7,-6.073,1,0.1660,0.00993,0.000537,0.1280,0.548,90.005,audio_features,2OAcH9SD8ehxuG0tWNe0cU,spotify:track:2OAcH9SD8ehxuG0tWNe0cU,https://api.spotify.com/v1/tracks/2OAcH9SD8ehx...,https://api.spotify.com/v1/audio-analysis/2OAc...,208480,4
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1939,0.822,0.480,11,-7.175,0,0.0475,0.06900,0.004860,0.3220,0.577,111.959,audio_features,0NBTjVJKPWkfsJ7PkUU66K,spotify:track:0NBTjVJKPWkfsJ7PkUU66K,https://api.spotify.com/v1/tracks/0NBTjVJKPWkf...,https://api.spotify.com/v1/audio-analysis/0NBT...,210000,4
1940,0.681,0.525,9,-4.823,1,0.0512,0.61900,0.000000,0.1030,0.551,80.052,audio_features,0GsRx0gPft6RmijIwMsKmG,spotify:track:0GsRx0gPft6RmijIwMsKmG,https://api.spotify.com/v1/tracks/0GsRx0gPft6R...,https://api.spotify.com/v1/audio-analysis/0GsR...,217130,4
1941,0.686,0.445,0,-7.189,1,0.0398,0.48400,0.001440,0.1750,0.626,145.500,audio_features,51Zw1cKDgkad0CXv23HCMU,spotify:track:51Zw1cKDgkad0CXv23HCMU,https://api.spotify.com/v1/tracks/51Zw1cKDgkad...,https://api.spotify.com/v1/audio-analysis/51Zw...,164533,3
1942,0.628,0.676,6,-4.227,1,0.0424,0.39300,0.000069,0.0978,0.723,78.996,audio_features,68HocO7fx9z0MgDU0ZPHro,spotify:track:68HocO7fx9z0MgDU0ZPHro,https://api.spotify.com/v1/tracks/68HocO7fx9z0...,https://api.spotify.com/v1/audio-analysis/68Ho...,215687,4


In [313]:
id_df = pd.DataFrame(all_track_ids, columns=['id', 'trackName', 'artistName'])

In [314]:
id_df

Unnamed: 0,id,trackName,artistName
0,0wOtc2nY3NOohp4xSwOyTN,We Might Even Be Falling In Love (Duet) - Spot...,Victoria Monét
1,39382sUtIOwIXftX0i76do,H.S.K.T. (feat. Wonstein),LeeHi
2,4dGuRldChjvboZktprNJFM,Hush - Still Woozy Remix,The Marías
3,6tDDoYIxWvMLTdKpjFkc1B,telepatía,Kali Uchis
4,2OAcH9SD8ehxuG0tWNe0cU,Addiction,Doja Cat
...,...,...,...
967,0NBTjVJKPWkfsJ7PkUU66K,Fast Car,Syd
968,0GsRx0gPft6RmijIwMsKmG,Friday (feat.Jang Yi-jeong),IU
969,51Zw1cKDgkad0CXv23HCMU,Daylight,Harry Styles
970,68HocO7fx9z0MgDU0ZPHro,Every Summertime,NIKI


In [325]:
features_and_track = features.merge(id_df, on = ['id']).drop_duplicates()
features_and_track

Unnamed: 0,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,type,id,uri,track_href,analysis_url,duration_ms,time_signature,trackName,artistName
0,0.731,0.423,6,-10.147,1,0.0784,0.47300,0.000413,0.1290,0.780,76.964,audio_features,0wOtc2nY3NOohp4xSwOyTN,spotify:track:0wOtc2nY3NOohp4xSwOyTN,https://api.spotify.com/v1/tracks/0wOtc2nY3NOo...,https://api.spotify.com/v1/audio-analysis/0wOt...,90111,4,We Might Even Be Falling In Love (Duet) - Spot...,Victoria Monét
2,0.756,0.672,9,-5.711,0,0.0373,0.25200,0.000341,0.1170,0.327,113.975,audio_features,39382sUtIOwIXftX0i76do,spotify:track:39382sUtIOwIXftX0i76do,https://api.spotify.com/v1/tracks/39382sUtIOwI...,https://api.spotify.com/v1/audio-analysis/3938...,204453,4,H.S.K.T. (feat. Wonstein),LeeHi
4,0.708,0.554,10,-6.830,0,0.0311,0.40300,0.042800,0.1260,0.564,103.999,audio_features,4dGuRldChjvboZktprNJFM,spotify:track:4dGuRldChjvboZktprNJFM,https://api.spotify.com/v1/tracks/4dGuRldChjvb...,https://api.spotify.com/v1/audio-analysis/4dGu...,182347,4,Hush - Still Woozy Remix,The Marías
6,0.653,0.524,11,-9.016,0,0.0502,0.11200,0.000000,0.2030,0.553,83.970,audio_features,6tDDoYIxWvMLTdKpjFkc1B,spotify:track:6tDDoYIxWvMLTdKpjFkc1B,https://api.spotify.com/v1/tracks/6tDDoYIxWvML...,https://api.spotify.com/v1/audio-analysis/6tDD...,160191,4,telepatía,Kali Uchis
8,0.775,0.708,7,-6.073,1,0.1660,0.00993,0.000537,0.1280,0.548,90.005,audio_features,2OAcH9SD8ehxuG0tWNe0cU,spotify:track:2OAcH9SD8ehxuG0tWNe0cU,https://api.spotify.com/v1/tracks/2OAcH9SD8ehx...,https://api.spotify.com/v1/audio-analysis/2OAc...,208480,4,Addiction,Doja Cat
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1954,0.822,0.480,11,-7.175,0,0.0475,0.06900,0.004860,0.3220,0.577,111.959,audio_features,0NBTjVJKPWkfsJ7PkUU66K,spotify:track:0NBTjVJKPWkfsJ7PkUU66K,https://api.spotify.com/v1/tracks/0NBTjVJKPWkf...,https://api.spotify.com/v1/audio-analysis/0NBT...,210000,4,Fast Car,Syd
1956,0.681,0.525,9,-4.823,1,0.0512,0.61900,0.000000,0.1030,0.551,80.052,audio_features,0GsRx0gPft6RmijIwMsKmG,spotify:track:0GsRx0gPft6RmijIwMsKmG,https://api.spotify.com/v1/tracks/0GsRx0gPft6R...,https://api.spotify.com/v1/audio-analysis/0GsR...,217130,4,Friday (feat.Jang Yi-jeong),IU
1958,0.686,0.445,0,-7.189,1,0.0398,0.48400,0.001440,0.1750,0.626,145.500,audio_features,51Zw1cKDgkad0CXv23HCMU,spotify:track:51Zw1cKDgkad0CXv23HCMU,https://api.spotify.com/v1/tracks/51Zw1cKDgkad...,https://api.spotify.com/v1/audio-analysis/51Zw...,164533,3,Daylight,Harry Styles
1960,0.628,0.676,6,-4.227,1,0.0424,0.39300,0.000069,0.0978,0.723,78.996,audio_features,68HocO7fx9z0MgDU0ZPHro,spotify:track:68HocO7fx9z0MgDU0ZPHro,https://api.spotify.com/v1/tracks/68HocO7fx9z0...,https://api.spotify.com/v1/audio-analysis/68Ho...,215687,4,Every Summertime,NIKI


In [324]:
features_and_track.merge(stream_sum, on = ['artistName', 'trackName'])

Unnamed: 0,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,...,uri,track_href,analysis_url,duration_ms,time_signature,trackName,artistName,minPlayed,hourPlayed,count
0,0.731,0.423,6,-10.147,1,0.0784,0.47300,0.000413,0.1290,0.780,...,spotify:track:0wOtc2nY3NOohp4xSwOyTN,https://api.spotify.com/v1/tracks/0wOtc2nY3NOo...,https://api.spotify.com/v1/audio-analysis/0wOt...,90111,4,We Might Even Be Falling In Love (Duet) - Spot...,Victoria Monét,261.96,4.37,225
1,0.756,0.672,9,-5.711,0,0.0373,0.25200,0.000341,0.1170,0.327,...,spotify:track:39382sUtIOwIXftX0i76do,https://api.spotify.com/v1/tracks/39382sUtIOwI...,https://api.spotify.com/v1/audio-analysis/3938...,204453,4,H.S.K.T. (feat. Wonstein),LeeHi,476.73,7.95,201
2,0.708,0.554,10,-6.830,0,0.0311,0.40300,0.042800,0.1260,0.564,...,spotify:track:4dGuRldChjvboZktprNJFM,https://api.spotify.com/v1/tracks/4dGuRldChjvb...,https://api.spotify.com/v1/audio-analysis/4dGu...,182347,4,Hush - Still Woozy Remix,The Marías,359.78,6.00,176
3,0.653,0.524,11,-9.016,0,0.0502,0.11200,0.000000,0.2030,0.553,...,spotify:track:6tDDoYIxWvMLTdKpjFkc1B,https://api.spotify.com/v1/tracks/6tDDoYIxWvML...,https://api.spotify.com/v1/audio-analysis/6tDD...,160191,4,telepatía,Kali Uchis,324.70,5.41,171
4,0.775,0.708,7,-6.073,1,0.1660,0.00993,0.000537,0.1280,0.548,...,spotify:track:2OAcH9SD8ehxuG0tWNe0cU,https://api.spotify.com/v1/tracks/2OAcH9SD8ehx...,https://api.spotify.com/v1/audio-analysis/2OAc...,208480,4,Addiction,Doja Cat,333.91,5.57,167
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
911,0.822,0.480,11,-7.175,0,0.0475,0.06900,0.004860,0.3220,0.577,...,spotify:track:0NBTjVJKPWkfsJ7PkUU66K,https://api.spotify.com/v1/tracks/0NBTjVJKPWkf...,https://api.spotify.com/v1/audio-analysis/0NBT...,210000,4,Fast Car,Syd,0.16,0.00,5
912,0.681,0.525,9,-4.823,1,0.0512,0.61900,0.000000,0.1030,0.551,...,spotify:track:0GsRx0gPft6RmijIwMsKmG,https://api.spotify.com/v1/tracks/0GsRx0gPft6R...,https://api.spotify.com/v1/audio-analysis/0GsR...,217130,4,Friday (feat.Jang Yi-jeong),IU,0.14,0.00,5
913,0.686,0.445,0,-7.189,1,0.0398,0.48400,0.001440,0.1750,0.626,...,spotify:track:51Zw1cKDgkad0CXv23HCMU,https://api.spotify.com/v1/tracks/51Zw1cKDgkad...,https://api.spotify.com/v1/audio-analysis/51Zw...,164533,3,Daylight,Harry Styles,0.13,0.00,5
914,0.628,0.676,6,-4.227,1,0.0424,0.39300,0.000069,0.0978,0.723,...,spotify:track:68HocO7fx9z0MgDU0ZPHro,https://api.spotify.com/v1/tracks/68HocO7fx9z0...,https://api.spotify.com/v1/audio-analysis/68Ho...,215687,4,Every Summertime,NIKI,0.10,0.00,5


# NEXT STEPS 
- fix row inconsistencies between merges and on features dataframe
- get genres of each song
- get all end times for each song --> store in list --> add as column to features df --> explode 
- streaming analysis