## Extract songs that Brittain loves from Spotify using the Spotipy API

In [None]:
# load api key and code (make sure not to make publically visible)

from dotenv import load_dotenv
import os

load_dotenv()

api_key = os.getenv("API_KEY_B")
api_secret = os.getenv("API_SECRET_B")


In [None]:
import spotipy
from spotipy.oauth2 import SpotifyClientCredentials
import pandas as pd
from time import time
from pandas import DataFrame

#replace with Spotify credentials before pushing to github
cid = api_key
secret = api_secret

client_credentials_manager = SpotifyClientCredentials(client_id=cid, client_secret=secret)

sp = spotipy.Spotify(client_credentials_manager = client_credentials_manager)

In [None]:
# Use spotipy to pull britt's loved songs
from spotipy.oauth2 import SpotifyOAuth

redirect_uri = 'https://example.com/callback'

FEATURE_KEYS = ['danceability', 'energy', 'key', 'loudness', 'mode', 'speechiness', 
                'acousticness', 'instrumentalness', 'liveness', 'valence', 'tempo']
OFFSET=0
SAVED_TRACKS_LIMIT=50
FEATURE_LIMIT = 100

sp = spotipy.Spotify(auth_manager=SpotifyOAuth(client_id=cid,
                                               client_secret=secret,
                                               redirect_uri=redirect_uri,
                                               scope="user-library-read"))

liked_tracks=list()
print('')

while(True):
    paged_tracks = sp.current_user_saved_tracks(offset=OFFSET, limit=SAVED_TRACKS_LIMIT)
    liked_tracks.extend([{'name':el['track']['name'], 
                          'id':el['track']['id'], 
                          'popularity': el['track']['popularity'],
                          'duration': el['track']['duration_ms']} for el in paged_tracks['items']])
    print(f'Fetched {len(liked_tracks)} tracks')
    OFFSET+=SAVED_TRACKS_LIMIT
    if paged_tracks['next'] is None:
        break

def get_windowed_track_ids(liked_tracks, limit):
    for i in range(0, len(liked_tracks), limit): 
        track_window = liked_tracks[i:i + limit]
        yield track_window, [t['id'] for t in track_window]

track_feature_list = list()
print('')

for track_window, track_window_ids in get_windowed_track_ids(liked_tracks, FEATURE_LIMIT):
    track_features = sp.audio_features(tracks=track_window_ids)
    for index, _track in enumerate(track_window):
        _track.update({k:v for k,v in track_features[index].items() if k in FEATURE_KEYS})
        track_feature_list.append(_track)
    print(f'Fetched features for {len(track_feature_list)} tracks')

songs_b_loves_df =DataFrame.from_dict(track_feature_list)
loved_songs = 'songs_b_loves.csv'
songs_b_loves_df.to_csv(loved_songs, index=False)
print('')
print(f'Saved features to {loved_songs}')

In [None]:
songs_b_loves_df.info()

In [None]:
# Pull track ids, which will be used to pull additional song attributes from spotipy
track_ids = songs_b_loves_df["id"]
print(len(track_ids))
track_ids = list(track_ids)

In [None]:
# Define a formula to pull additional track attributes — track name, album, artist, and release date
def getTrackFeatures(id):
  meta = sp.track(id)

  # meta
  name = meta['name']
  album = meta['album']['name']
  artist = meta['album']['artists'][0]['name']
  release_date = meta['album']['release_date']


  track = [name, album, artist, release_date]
  return track

In [None]:
# pull track info and save into a dataframe

tracks = []
for i in range(len(track_ids)):
  #time.sleep(.5)
  track = getTrackFeatures(track_ids[i])
  tracks.append(track)

# create dataset
loved_tracks_attributes = pd.DataFrame(tracks, columns = ['name', 'album', 'artist','release_date'])
loved_tracks_attributes.to_csv("loved_tracks_attributes_Aug2023_britt.csv", sep = ',')

In [None]:
loved_tracks_attributes['index_col'] = loved_tracks_attributes.index # add index column; will be used when merging track info with the attributes

In [None]:
loved_tracks_attributes.index_col

In [None]:
loved_tracks_attributes.info(5)

In [None]:
# add index to original loved tracks file for merging
songs_b_loves_df['index_col'] = songs_b_loves_df.index

In [None]:
# merge loved songs and attributes
loved_tracks_df = pd.merge(songs_b_loves_df, loved_tracks_attributes, on = ['index_col'])

In [None]:
#top_tracks_df = top_tracks_df.drop(columns=['name_y'])
loved_tracks_df.columns

In [None]:
loved_tracks_df = loved_tracks_df.drop(columns=['name_y'])
loved_tracks_df.columns

In [None]:
# Create a "release year" column based on release date
loved_tracks_df['release_year'] = loved_tracks_df['release_date'].str[0:4]
loved_tracks_df['release_year'] = pd.to_numeric(loved_tracks_df['release_year'])
loved_tracks_df['release_year'].describe
# print(saved_tracks_df)

In [None]:
loved_tracks_df.release_year.value_counts()

In [None]:
# Save as csv, will use this file for EDA and cluster analysis
loved_tracks_df.to_csv("songs_b_loves_final.csv", sep = ',', index= False)