In [6]:
import spotipy
import base64
import requests
from spotipy.oauth2 import SpotifyClientCredentials
from creds import CLIENT_ID, CLIENT_SECRET
import pandas as pd
import time

In [2]:
# token
auth_manager = SpotifyClientCredentials(CLIENT_ID, CLIENT_SECRET)
sp = spotipy.Spotify(auth_manager=auth_manager)


In [5]:
kaggle_df = pd.read_csv('dataset.csv')

kaggle_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 114000 entries, 0 to 113999
Data columns (total 21 columns):
 #   Column            Non-Null Count   Dtype  
---  ------            --------------   -----  
 0   Unnamed: 0        114000 non-null  int64  
 1   track_id          114000 non-null  object 
 2   artists           113999 non-null  object 
 3   album_name        113999 non-null  object 
 4   track_name        113999 non-null  object 
 5   popularity        114000 non-null  int64  
 6   duration_ms       114000 non-null  int64  
 7   explicit          114000 non-null  bool   
 8   danceability      114000 non-null  float64
 9   energy            114000 non-null  float64
 10  key               114000 non-null  int64  
 11  loudness          114000 non-null  float64
 12  mode              114000 non-null  int64  
 13  speechiness       114000 non-null  float64
 14  acousticness      114000 non-null  float64
 15  instrumentalness  114000 non-null  float64
 16  liveness          11

In [7]:
#Get spotify playlist ids and names
def get_all_playlists(user_id='spotify'): #api call
    playlist_ids = []
    playlist_names = []
    playlists = sp.user_playlists(user_id) #api call
    #my_playlists = sp.user_playlist('z8ctw1rqti6nguka70zk1ishf')

    while playlists: # example provided in spotipy documentation
        for i, playlist in enumerate(playlists['items']):
            print("%4d %s %s" % (i + 1 + playlists['offset'], playlist['uri'],  playlist['name']))
            playlist_ids.append(playlist['uri'][-22:])
            playlist_names.append(playlist['name'])
        if playlists['next']:
            playlists = sp.next(playlists)
            
        else:
            playlists = None
    return playlist_ids, playlist_names

todays_top_hits = '37i9dQZF1DXcBWIGoYBM5M'



In [10]:
# Get track info from playlists into a dataframe - separated out API calls

def get_playlist_tracks(playlist_id):#api call
    try: 
        tracks = sp.playlist_tracks(playlist_id, limit = 100, fields='items(track(id, name, artists, album(id, name)))')
        return tracks
    
    except Exception as e:
        print(f"get_playlist_tracks error fetching {e}")
        return []

def get_audio_features(track_id): #api call
    try:
        audio_stuffs = sp.audio_features(track_id)[0] if track_id else None 
        return audio_stuffs
    except Exception as e:
        print(f"get_audio_features error fetching {e}")
        return None
    
def get_tracks(playlist_id, playlist_name):
    music_data = []
    tracks = get_playlist_tracks(playlist_id)  

    for track_info in tracks['items']:
        track = track_info['track']

        if track:
            track_name = track['name']
            artists = ', '.join([artist['name'] for artist in track['artists']])
            album_name = track['album']['name']
            album_id = track['album']['id']
            track_id = track['id']
        
        audio_stuffs = get_audio_features(track_id) #api call
        #popularity = get_track_details(track_id) #api call


        track_data = {            
            'track_id': track,
            'track_name': track_name,
            'artists': artists,
            'album_name': album_name,
            'album_name': album_id,
            'track_id': track_id,
            'acousticness': audio_stuffs['acousticness'] if audio_stuffs else None,
            'sanceability': audio_stuffs['danceability'] if audio_stuffs else None,
            'energy': audio_stuffs['energy'] if audio_stuffs else None,
            'instrumentalness': audio_stuffs['instrumentalness'] if audio_stuffs else None,
            'loudness': audio_stuffs['loudness'] if audio_stuffs else None,
            'speechiness': audio_stuffs['speechiness'] if audio_stuffs else None,
            'tempo': audio_stuffs['tempo'] if audio_stuffs else None,
            'mode': audio_stuffs['mode'] if audio_stuffs else None,
            'valence': audio_stuffs['valence'] if audio_stuffs else None
        }
        
        music_data.append(track_data)
    return music_data

all_tracks = []  
# playlist_ids, playlist_names = get_all_playlists()

# for playlist_id, playlist_name in zip(playlist_ids[0], playlist_names[0]): #Test with first 10 IDs
#     print(f"Getting tracks from playlist ID: {id}")
tracks = get_tracks(todays_top_hits,"Today's Top Hits") #Two API calls per function call
all_tracks.extend(tracks)
time.sleep(5)
    # break # to keep the spotify api server from giving me a 429 error when they finally take me back

spotify_track_df = pd.DataFrame(all_tracks)

spotify_track_df.head()


Unnamed: 0,Playlist Name,Track Name,Artist,Album Name,Playlist ID,Album ID,Track ID,Acousticness,Danceability,Energy,Instrumentalness,Loudness,Speechiness,Tempo,Mode,Valence
0,Today's Top Hits,Please Please Please,Sabrina Carpenter,Please Please Please,37i9dQZF1DXcBWIGoYBM5M,5bBaoign62r1i7OV8w7mi9,5N3hjp1WNayUPZrA8kJmJP,0.274,0.669,0.586,0.0,-6.073,0.054,107.071,1,0.579
1,Today's Top Hits,Si Antes Te Hubiera Conocido,KAROL G,Si Antes Te Hubiera Conocido,37i9dQZF1DXcBWIGoYBM5M,5ylbxH7EqpsmHZCRuiYewS,6WatFBLVB0x077xWeoVc2k,0.446,0.924,0.668,0.000594,-6.795,0.0469,128.027,1,0.787
2,Today's Top Hits,BIRDS OF A FEATHER,Billie Eilish,HIT ME HARD AND SOFT,37i9dQZF1DXcBWIGoYBM5M,7aJuG4TFXa2hmE4z1yxc3n,6dOtVTDdiauQNBQEDOtlAB,0.2,0.747,0.507,0.0608,-10.171,0.0358,104.978,1,0.438
3,Today's Top Hits,"Good Luck, Babe!",Chappell Roan,"Good Luck, Babe!",37i9dQZF1DXcBWIGoYBM5M,1WAjjRMfZjEXtB0lQrAw6Q,0WbMK4wrZ1wFSty9F7FCgu,0.0502,0.7,0.582,0.0,-5.96,0.0356,116.712,0,0.785
4,Today's Top Hits,A Bar Song (Tipsy),Shaboozey,A Bar Song (Tipsy),37i9dQZF1DXcBWIGoYBM5M,6egBeCLeGITzGSo5VyRjwZ,2FQrifJ1N335Ljm3TjTVVf,0.0633,0.722,0.709,0.0,-4.95,0.0273,81.012,1,0.604


In [11]:
spotify_track_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 50 entries, 0 to 49
Data columns (total 16 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   Playlist Name     50 non-null     object 
 1   Track Name        50 non-null     object 
 2   Artist            50 non-null     object 
 3   Album Name        50 non-null     object 
 4   Playlist ID       50 non-null     object 
 5   Album ID          50 non-null     object 
 6   Track ID          50 non-null     object 
 7   Acousticness      50 non-null     float64
 8   Danceability      50 non-null     float64
 9   Energy            50 non-null     float64
 10  Instrumentalness  50 non-null     float64
 11  Loudness          50 non-null     float64
 12  Speechiness       50 non-null     float64
 13  Tempo             50 non-null     float64
 14  Mode              50 non-null     int64  
 15  Valence           50 non-null     float64
dtypes: float64(8), int64(1), object(7)
memory usag

In [None]:
# model 

