In [None]:
import pandas as pd
import spotipy
from spotipy.oauth2 import SpotifyOAuth

from dotenv import dotenv_values

In [None]:
config = dotenv_values(".env")

In [None]:
scope = "playlist-modify-public"

sp = spotipy.Spotify(
    auth_manager=SpotifyOAuth(
        client_id=config["CLIENT_ID"],
        client_secret=config["CLIENT_SECRET"],
        redirect_uri=config["REDIRECT_URL"],
        scope=scope
    )
)


In [None]:
playlist_track_features = ["id", "artists", "name", "album",
                   "external_urls", "popularity"]
required_album_features = ["genres", "name", "release_date"]

def extract_artist_names(artists_series) -> str:
    """"""
    return_value = []
    for artists_list in artists_series:
        artist_names_list = [artist["name"] for artist in artists_list]
        if len(artist_names_list) == 1:
            artist_names = artist_names_list[0]
        else:
            artist_names = ", ".join(artist_names_list[:-1]) + " & " + artist_names_list[-1]

        return_value.append(artist_names)
    return return_value

def get_all_tracks_in_playlist(playlist_id: str, limit: int=100) -> pd.DataFrame:
    """ Get some information on all tracks in a playlist and return as pd.DataFrame"""

    results = sp.playlist_tracks(playlist_id, limit=limit)
    tracks = [item["track"] for item in results["items"]]
    n_total = results["total"]
    print(f"Tracks Loaded (Out of Total): {len(tracks)} ({n_total})")

    while len(tracks) < results["total"]:
        results = sp.playlist_tracks(playlist_id, limit=limit, offset=len(tracks))
        tracks = tracks + [item["track"] for item in results["items"]]
        print(f"Tracks Loaded (Out of Total): {len(tracks)} ({n_total})")
    
    return (
        pd.DataFrame(tracks)[playlist_track_features]
        .assign(album_id=lambda track: track["album"].str.get("id"))
        .assign(artist_names=lambda track: extract_artist_names(track["artists"]))
        .dropna(subset=["id", "album_id"])
    )

def get_audio_features_df(sp, track_ids, query_limit: int=50) -> pd.DataFrame:
    """
    Get track audio features and return a pandas dataframe
    """

    n_total = len(track_ids)
    result_list = []
    i = 0
    while len(result_list) < n_total:
        _track_ids = track_ids[i: min(i + query_limit, n_total)]
        result_list += sp.audio_features(_track_ids)
        i = len(result_list)
        print(f"Audio Features Loaded for {i} ({n_total})")

    return pd.DataFrame(result_list)


def get_album_features_df(
    sp, track_df: pd.DataFrame, query_limit: int=20
) -> pd.DataFrame:
    """
    Get album features and return a pandas dataframe
    """
    n_total = len(track_df)
    result_list = []
    i = 0
    while len(result_list) < len(track_df):
        _album_ids = list(track_df["album_id"].values)[i:min(i + query_limit, n_total)]
        results = sp.albums(_album_ids)
        result_list += results["albums"]
        i = len(result_list)
        print(f"Album Features Loaded for {i} ({n_total})")

    return (
        pd.DataFrame(result_list)
        [required_album_features]
        .rename(columns={col_name: f"album_{col_name}" for col_name in required_album_features})
    )

In [None]:
def get_full_track_data_for_playlist(playlist_id: str) -> pd.DataFrame:
    """
    Combine outputs of
        - get_all_tracks_in_playlist,
        - get_audio_features_df and
        - get_album_features_df
    """
    df_playlist_tracks = get_all_tracks_in_playlist(playlist_id)
    df_track_audio_features = get_audio_features_df(sp, list(df_playlist_tracks["id"]))
    df_track_album_features = get_album_features_df(sp, df_playlist_tracks)

    return (
        df_playlist_tracks.set_index("id")
        .join(df_track_audio_features.set_index("id"))
        .join(df_track_album_features.set_index(df_playlist_tracks["id"]))
        .reset_index()
    )


In [None]:
df_apres_ski = get_full_track_data_for_playlist("6GxKiCYFF6QxUX6z2SoP2E")
df_apres_ski["album_img_url"] = df_apres_ski["album"].str.get("images").str[0].str.get("url")
df_apres_ski.to_pickle("data/df_apres_ski.pkl")

In [None]:
df_top_2000 = get_full_track_data_for_playlist("1DTzz7Nh2rJBnyFbjsH1Mh")
df_top_2000["album_img_url"] = df_top_2000["album"].str.get("images").str[0].str.get("url")
df_top_2000.to_pickle("data/df_top_2000.pkl")