In [1]:
import spotipy
from spotipy.oauth2 import SpotifyClientCredentials
import pandas as pd
import numpy as np
import os

In [3]:
# Get the current working directory
cwd = os.getcwd()
# create a relative path to the data file
data_file = os.path.join(cwd, 'client_cred', 'creds.txt')

In [5]:
# open the text file
with open(data_file, 'r') as f:
    cid = f.readline().strip()
    secret = f.readline().strip()

In [3]:
#Authentication - without user
client_credentials_manager = SpotifyClientCredentials(client_id=cid, client_secret=secret)
sp = spotipy.Spotify(client_credentials_manager = client_credentials_manager)

In [4]:
def get_playlist_tracks(playlist_id):
    '''
    Params:
        playlist_id: ID of the playlist
    
    Returns:
        track_ids: Id's of all the tracks in the playlist. Works for playlists with >100 tracks as well
        artist_names: Names of the first artist in the playlist
        track_names: Names of the track

    Example: Playlist Link is in the format "https://open.spotify.com/playlist/5S8SJdl1BDc0ugpkEvFsIL"
    Id of this playlist is '5S8SJdl1BDc0ugpkEvFsIL'
    '''
    tracks = []
    temp_track_results = sp.playlist_tracks(playlist_id, offset = 0, limit = 100)
    tracks.extend(temp_track_results['items'])

    while temp_track_results['next']:
        temp_track_results = sp.next(temp_track_results)
        tracks.extend(temp_track_results['items'])

    track_ids = [track['track']['id'] for track in tracks]
    artist_names = [track["track"]["artists"][0]['name'] for track in tracks]
    track_names = [track["track"]["name"] for track in tracks]

    return track_ids, artist_names, track_names

In [5]:
def get_audio_features(track_ids):
    '''
     Get the audio features from sp.audio_features
     Params:
         track_ids: List of all the track_ids
     Returns:
         track_features: Dataframe with all the track features
    '''
    audio_default = {"danceability": 0, "energy": 0, "key": 0, "loudness": 0, "mode": 0, "speechiness": 0,
              "acousticness": 0, "instrumentalness": 0, "liveness": 0, "valence": 0, "tempo": 0,
              "type": "audio_features", 'id' : '0', 'uri':'', 'track_href':'',
             "analysis_url":'', 'duration_ms':0, 'time_signature':0}
    # Loop through each track and get its audio features
    audio_features_list = []
    for i in range(0, len(track_ids), 100):
        # Get the next 100 track IDs from the list
        track_ids_subset = track_ids[i:i+100]

        # Get the audio features for the subset of track IDs
        audio_features_subset = sp.audio_features(track_ids_subset)

        # Loop through each audio feature and add the track ID
        for j in range(len(audio_features_subset)):
            audio_features = audio_features_subset[j]

            if audio_features is None:
                audio_features = {key: audio_default[key] for key in audio_default.keys()}
            audio_features['id'] = track_ids_subset[j]
            audio_features_list.append(audio_features)
    # track_ids_subset = tracks_temp_data['track_id'][9735:9745].tolist()
    # audio = []
    # # Get the audio features for the subset of track IDs
    # for j in range(len(track_ids_subset)):
    #     audio_features_subset = sp.audio_features(track_ids_subset[j])
    #     if audio_features_subset[0] is None:
    #         audio_features_subset = [{key: audio_default[key] for key in audio_default.keys()}]
    #     audio_features_subset[0]['id'] = track_ids_subset[j]
    #     audio.append(audio_features_subset)
    track_features = pd.DataFrame(audio_features_list)
    return track_features

def get_track_audio_features(track_id):
    return sp.audio_features(track_id)

In [14]:
data_file = os.path.join(cwd, 'flask/client_cred', 'playlist_ids.txt')
# open file in read mode
with open(data_file, 'r') as f:
    playlist_ids = [line.strip() for line in f.readlines()]

In [16]:
#import json
#with open('tracks_info', 'w') as fout:
#    json.dump(tracks_info, fout)

track_ids = []
artist_names = []
track_names = []
for playlist_id in playlist_ids:
    temp_track_ids = []
    temp_artist_names = []
    temp_track_names = []
    temp_track_ids, temp_artist_names, temp_track_names = get_playlist_tracks(playlist_id)
    track_ids.extend(temp_track_ids)
    artist_names.extend(temp_artist_names)
    track_names.extend(temp_track_names)

TypeError: 'NoneType' object is not subscriptable

In [25]:
tracks_temp_data = pd.DataFrame(list(zip(track_ids, artist_names, track_names)), 
                                columns = ['track_id', 'artist_name', 'track_title'])
tracks_temp_data = tracks_temp_data.dropna()
tracks_temp_data.reset_index(inplace = True, drop = True)
tracks_temp_data.drop_duplicates(inplace = True, ignore_index = True)
#tracks_data.to_csv("tracks_data.csv", index = False)

In [27]:
audio_features = get_audio_features(tracks_temp_data['track_id'].tolist())

In [41]:
tracks_data = pd.merge(tracks_temp_data, audio_features, left_on = 'track_id', right_on = 'id')
tracks_data = tracks_data[['track_id', 'artist_name', 'track_title', 'danceability', 'energy', 'key', 
                           'loudness', 'mode', 'speechiness', 'acousticness',
                           'instrumentalness', 'liveness', 'valence', 'tempo', 'duration_ms']]

tracks_data = tracks_data[tracks_data['duration_ms'] > 0]
tracks_data = tracks_data[tracks_data['duration_ms'] < 360000]

In [None]:
#tracks_data.to_csv("track_features.csv", index = False)