In [None]:

import base64
import pandas as pd
import requests
import seaborn as sns
import spotipy

from dotenv import dotenv_values
from IPython.display import HTML, display
from matplotlib import pyplot
from time import sleep
from spotipy.oauth2 import SpotifyOAuth

In [None]:
config = dotenv_values(".env")

In [None]:
scope = "user-modify-playback-state app-remote-control streaming user-read-playback-state"

sp = spotipy.Spotify(
    auth_manager=SpotifyOAuth(
        client_id=config["SPOTIFY_CLIENT_ID"],
        client_secret=config["SPOTIFY_CLIENT_SECRET"],
        redirect_uri=config["SPOTIFY_REDIRECT_URL"],
        scope=scope
    )
)


In [None]:
devices_df = pd.DataFrame(sp.devices()["devices"])
device_id = devices_df[devices_df["name"] == "Tim’s MacBook Pro (2)"]["id"].values[0]

In [None]:
def display_two_images(img_url_1, img_url_2: str, text_1: str, text_2: str) -> None:
    """
    """
    html_template = """
        <div class="row">
            <div style="float:left;margin-right:30px;width:300px">{text_1}</div>
            <div style="float:left;margin-right:30px;width:300px">{text_2}</div>
        </div>
        <div style="clear:both"></div>
        <div class="row">
            <img style="float:left;margin-right:30px;" src="data:image/jpeg;base64,{b64_img_1}" width="300" height="300" />
            <img style="float:left;margin-right:30px;" src="data:image/jpeg;base64,{b64_img_2}" width="300" height="300" />
        </div>
    """
    img_bytes_1 = requests.get(img_url_1).content
    img_bytes_2 = requests.get(img_url_2).content
    b64_img_1 =  base64.b64encode(img_bytes_1).decode('ascii')
    b64_img_2 =  base64.b64encode(img_bytes_2).decode('ascii')
    txt = "sjfa;\nakd;f\nsdfa"
    display(
        HTML(
            html_template.format(
                text_1=text_1, text_2=text_2, b64_img_1=b64_img_1,b64_img_2=b64_img_2
            )
        )
    )


In [None]:
def play_part_of_song(device_id: str, uri: str, start_position_ms: int, n_seconds: int=10) -> None:
    """Start playing a song and pause after duration_seconds"""
    sp.start_playback(
        device_id=device_id,
        uris=[uri],
        position_ms=start_position_ms
    )
    sleep(n_seconds)
    sp.pause_playback(device_id=device_id)

def play_middle_of_track(device_id: str, track_series: pd.Series, n_seconds: int=9) -> None:
    """Play n_seconds on device starting from the middle of the track"""
    play_part_of_song(
        device_id=device_id,
        uri=track_series["uri"],
        start_position_ms=track_series["duration_ms"] / 2,
        n_seconds=n_seconds
    )

def play_audio_feature_extremes(tracks_df, audio_feature, pause_seconds: int=2) -> None:
    """"""
    min_track = tracks_df.sort_values(by=audio_feature, ascending=True).iloc[0]
    max_track = tracks_df.sort_values(by=audio_feature, ascending=False).iloc[0]
    
    display_two_images(
        min_track["album_img_url"],
        max_track["album_img_url"],
        f"Lowest {audio_feature}({min_track[audio_feature]}): <br />{min_track['artist_names']} - {min_track['name']}",
        f"Highest {audio_feature}({max_track[audio_feature]}): <br />{max_track['artist_names']} - {max_track['name']}",
    )

    play_middle_of_track(device_id, min_track)

    sleep(pause_seconds)
    
    play_middle_of_track(device_id, max_track)

In [None]:
df_apres_ski = pd.read_pickle("data/df_apres_ski.pkl")

In [None]:
df_all_songs = (
    pd.read_pickle("data/all_songs_with_lyrics_cleaned.pkl")
    .assign(duration_minutes=lambda track: track["duration_ms"] / 1000 / 60)
)

In [None]:
zero_to_one_scale = [
    'acousticness',
    'danceability',
    'energy',
    'instrumentalness',
    'liveness',
    'speechiness',
    'valence',
]
other_features = ['duration_minutes', 'key', 'loudness', 'popularity', 'tempo']

In [None]:
df_to_melt = df_all_songs[["id", "playlist"] + zero_to_one_scale + other_features]
df_to_melt.head()

In [None]:
df_to_melt.shape

In [None]:
melted_df = pd.melt(
    df_to_melt,
    id_vars=["id", "playlist"],
    value_vars=zero_to_one_scale + other_features,
    var_name="audio_feature",
    value_name="value",
)

In [None]:
melted_df.head()

In [None]:
melted_df.shape

In [None]:
a4_dims = (11.7, 8.27)
fig, ax = pyplot.subplots(figsize=a4_dims)
fig.suptitle('Zero-to-One Scale Audio Features')
sns.violinplot(ax=ax, data=melted_df[melted_df["audio_feature"].isin(zero_to_one_scale)], x="audio_feature", y="value", hue="playlist", split=True)
fig.savefig('images/zero_to_one_scale.png')

# What does this mean?!

In [None]:
play_audio_feature_extremes(df_apres_ski, "acousticness")


In [None]:
play_audio_feature_extremes(df_apres_ski, "danceability")

In [None]:
# play_audio_feature_extremes(df_apres_ski, "energy")

In [None]:
# play_audio_feature_extremes(df_apres_ski, "liveness")

In [None]:
play_audio_feature_extremes(df_apres_ski, "instrumentalness")
# here we see that the lyrics don't need to be complex
# and sometimes we don't even need words at all

In [None]:
# play_audio_feature_extremes(df_apres_ski, "speechiness")


In [None]:
play_audio_feature_extremes(df_apres_ski, "valence")

In [None]:
a4_dims = (11.7, 8.27)
fig, axs = pyplot.subplots(1, len(other_features), figsize=a4_dims)
fig.suptitle('Hetero-scale Audio Features')
for i, feature in enumerate(other_features):
    sns.violinplot(ax=axs[i], data=melted_df[melted_df["audio_feature"] == feature], x="audio_feature", y="value", hue="playlist", split=True)
fig.savefig('images/other_features.png')

# This kind of reflects what I was thinking when my NLP efforts didn't pay off

In [None]:

track_series = df_apres_ski[df_apres_ski["id"] == "3S3Zrdg6WDjXTE8BfYGvE9"].iloc[0]
play_part_of_song(
    device_id=device_id,
    uri=track_series["uri"],
    start_position_ms=56000,
    n_seconds=10
)

In [None]:
melted_df[melted_df["audio_feature"].isin(zero_to_one_scale)].groupby(["playlist", "audio_feature"]).mean(numeric_only=True)

In [None]:
# Niet-woorden: in Hurra die Gams en in Atemlos durch die Nach

In [None]:
(
    df_apres_ski.query("popularity >= 40")
    .sort_values("danceability", ascending=False)
)

In [None]:
(
    df_apres_ski.query("popularity >= 40")
    .sort_values("acousticness", ascending=False)
)

In [None]:

track_series = df_apres_ski[df_apres_ski["id"] == "2kjlOZ10YPK3deMN45l4bS"].iloc[0]
play_part_of_song(
    device_id=device_id,
    uri=track_series["uri"],
    start_position_ms=51000,
    n_seconds=10
)

## Based on this query I did a manual pre-selection to exclude songs you will need to learn outside of the office :-)

In [None]:
# (
#     df_all_songs
#     .query("playlist == 'Après Ski'")
#     .query("popularity >= 40")
#     .query("language == 'German'") # This also excludes songs for which we don't have lyrics
# )

In [None]:
# map song ids to starting point
pre_selection = {
    "5Qht2aUJcCjRuhrlHvvKt2": 84,
    "7KSOqRndT6D0d5Ok2yVrSh": 48,
    "1hDNg43vCjUHSJSzETUHdN": 50,
    "4HqlbB0BuahK05r6P2KcwP": 49,
    "1RcWWgnw5fVm3wcNq11zBu": 2,
    "6ksZJSo6h2OAlqQNQZEYIu": 51,
    "1PGSrlMKPm4FsgZvHtOY2Q": 27,
    "2A2PmKoiTzGbQRglDK0v2K": 45,
    "2kjlOZ10YPK3deMN45l4bS": 21,
    "2E68HPSmJxetiG8xjkNAJ3": 15,
    "7gJc9SPshSHnLSqz2d0ETV": 61,
    "65wKbNqjUTmdyk47Z31Czm": 111,
}


In [None]:
ids = pre_selection.keys()

In [None]:
df_selection = df_apres_ski[df_apres_ski["id"].isin(ids)]

In [None]:
scores = {
    "acousticness":     {"minimize": 4, "maximize": 4},
    "danceability":     {"minimize": 4, "maximize": 4},
    "speechiness":      {"minimize": 4, "maximize": 4},
    "instrumentalness": {"minimize": 4, "maximize": 4},

}

In [None]:
df_selection = df_selection[["id", *scores.keys()]]
selection_min = df_selection.min().to_dict()
selection_max = df_selection.max().to_dict()
selection_range = {feature: (selection_max[feature] - selection_min[feature]) for feature in scores.keys()}

In [None]:
positions = {
    feature: votes["maximize"] / (votes["minimize"] + votes["maximize"]) 
    for feature, votes in scores.items()
}
required_levels = {
    feature: (selection_min[feature] + positions[feature] * selection_range[feature])
    for feature in scores.keys()
}


In [None]:
required_levels

In [None]:
required_levels["id"] = "required_levels"
df_required_levels = pd.DataFrame([required_levels], columns=df_selection.columns)

In [None]:
selection_plus_required = pd.concat([
    df_selection,
    df_required_levels
    
], ignore_index=True)

In [None]:
from scipy.spatial.distance import pdist, squareform


In [None]:

distances = pdist(selection_plus_required[scores.keys()].values, metric='euclidean')
dist_matrix = squareform(distances)

In [None]:
selection_plus_required["distance"] = dist_matrix[-1]

In [None]:
winners = selection_plus_required.sort_values("distance")["id"].values[1:4]

In [None]:
for id in winners:
    track_series = df_apres_ski[df_apres_ski["id"] == id].iloc[0]
    play_part_of_song(
        device_id=device_id,
        uri=track_series["uri"],
        start_position_ms=pre_selection[id] * 1000,
        n_seconds=10
    )
    pre_selection
    

In [None]:
# TODO: create playlist
# TODO: make separate script for scoring
# TODO: make polls