## Imports

In [None]:
import panel as pn
import pandas as pd
from panel.widgets import Tabulator, TextInput, RangeSlider
from bokeh.models.widgets.tables import NumberFormatter
from sklearn.neighbors import NearestNeighbors
from sklearn.preprocessing import StandardScaler
import numpy as np
import json

pn.extension("tabulator")

## Constants

In [None]:
url = "https://raw.githubusercontent.com/music-recommender/music-recommender/main/data/song_info_complete_rows.csv"


recom_cols = ["Year", "Tempo", "Latitude", "Longitude", "Overlapping genres"]
k = 5

tab_kwargs = dict(
    hidden_columns=["index", "ID", "Latitude", "Longitude", "Distance"],
    layout="fit_columns",
    sizing_mode="stretch_width",
    disabled=True,
    formatters={
        "Tempo": NumberFormatter(format="0"),
        "Year": NumberFormatter(format="0"),
    },
)

Tabulator.theme = "midnight"

intro_text = "### Welcome to Music Recommender! This website can give you new and exciting songs to listen to based on your personal taste!"
songs_text = "### Please choose as many songs as you like from the table below and we will recommend new songs based on you selection.\n### Select a song by clicking on it. You can also use the filters to search for songs."
selected_text = "### Here you can see your selected songs. Remove a song by clicking on it."
recommend_text = "### These are your personal song recommendations. We used your selected songs to do our magic."

## Tabulators

In [None]:
songs_tab = Tabulator(
    pd.read_csv(url, converters={"Genres": lambda x: x.split(",")}),
    pagination="local",
    page_size=10,
    **tab_kwargs
)

selected_tab = Tabulator(songs_tab.selected_dataframe.copy(), **tab_kwargs)
recommend_tab = Tabulator(songs_tab.selected_dataframe.copy(), **tab_kwargs)


def filter(df, value, column):
    if not value:
        return df
    if column in ["Title", "Artist", "Location"]:
        return df[df[column].str.contains(value, case=False)]
    elif column in ["Tempo", "Year"]:
        return df[(df[column] >= value[0]) & (df[column] <= value[1])]
    elif column == "Genres":

        def inputs_in_list(l):
            for p in value.split(" "):
                if not p.lower() in str(l):
                    return False
            return True

        return df[df[column].apply(inputs_in_list)]


title_filter = TextInput(
    name="Title", placeholder='E.g. "Never Gonna Give You Up"'
)
artist_filter = TextInput(
    name="Artist", placeholder='E.g. "Rick Astley"'
)
location_filter = TextInput(
    name="Location", placeholder='E.g. "England"'
)
genres_filter = TextInput(
    name="Genres", placeholder='E.g. "Dance pop ballad"'
)
tempo_filter = RangeSlider(
    name="Tempo",
    step=1,
    start=songs_tab.value.Tempo.min(),
    end=songs_tab.value.Tempo.max(),
)
year_filter = RangeSlider(
    name="Year",
    step=1,
    start=songs_tab.value.Year.min(),
    end=songs_tab.value.Year.max(),
)

filters = {
    "Title": (title_filter, title_filter.param.value_input),
    "Artist": (artist_filter, artist_filter.param.value_input),
    "Genres": (genres_filter, genres_filter.param.value_input),
    "Location": (location_filter, location_filter.param.value_input),
    "Tempo": (tempo_filter, tempo_filter.param.value_throttled),
    "Year": (year_filter, year_filter.param.value_throttled),
}


for key, value in filters.items():
    value[0].align = "center"
    value[0].sizing_mode = "stretch_width"
    songs_tab.add_filter(pn.bind(filter, value=value[1], column=key))

## Helper functions

### Recommendation (MSD)

In [None]:
def count_overlapping_genres(song_ats, other_ats):
    c = 0
    for at in song_ats:
        if at in other_ats:
            c += 1
    return c


def recommendSongs(selection):
    cols = recom_cols.copy()
    songs = songs_tab.value.copy()
    # If overlapping genres are used, we need to remove the column temporarily because it is not built yet
    cols.remove("Overlapping genres")
    # ids = list(map(lambda i: songs.iloc[i]["ID"], selection)) # Maybe del
    genres = list(map(lambda song: songs.iloc[song]["Genres"], selection))
    # save song data
    selection_songs = songs[cols].iloc[selection]
    # Remove all songs from the artists in the selection
    songs = songs[~songs["Artist"].isin(list(songs.iloc[selection]["Artist"]))]
    # selection = list(map(lambda i: songs.index[songs["ID"] == test_id].tolist(), selection)) # Maybe del
    # We create a separate dataframe for each song (because the Overlapping genres are different for each song)
    songs_with_genres = list()
    for s in range(len(selection)):
        songs_with_genres.append(songs[cols].copy())
        songs_with_genres[s]["Overlapping genres"] = songs["Genres"].apply(
            lambda x: count_overlapping_genres(genres[s], x)
        )
    selection_songs["Overlapping genres"] = list(map(len, genres))
    # We normalize everything using the same scaler
    scaler = StandardScaler().fit(songs_with_genres[0])
    songs_data = list()
    for song_list in songs_with_genres:
        songs_data.append(scaler.transform(song_list))
    selection_songs_scaled = scaler.transform(selection_songs)
    # Create list of nearest neighbours for each song
    neighbours = list()
    for song, matrix in zip(selection_songs_scaled, songs_data):
        # We make k*selection_size suggestions here, just in case there are better candidates since more input songs mean broader search space
        neighbours += (
            NearestNeighbors(n_neighbors=k * len(selection))
            .fit(matrix)
            .kneighbors([song], return_distance=False)
            .tolist()[0]
        )
    # Delete duplicates
    neighbours = list(set(neighbours))
    results = songs.iloc[neighbours].copy().reset_index()
    results["Distance"] = [0] * len(results)
    results["Distance"] = results["Distance"].astype(np.float64)
    # Calculate squared distances for each result song for each input song
    for song, matrix in zip(selection_songs_scaled, songs_data):
        for i, result in enumerate(neighbours):
            results.loc[i, "Distance"] += np.square(
                np.linalg.norm(song - matrix[result])
            )
    # Sort by least distance and only return the first k elements
    return results.sort_values(by=["Distance"]).iloc[:k].reset_index(drop=True).set_index("ID")

### Comparison (Echo)

In [None]:
def readEchoUserData():
    with open("data/echo_user_data.json", "r") as f:
        echo_user_data = json.load(f)
    for user in echo_user_data.keys():
        echo_user_data[user] = set(echo_user_data[user])
    return echo_user_data


def echoComparison(user_song_id, recommended_songs_df):
    song_ids = np.array(
        [row["ID"] for row in recommended_songs_df.iloc]
    )
    echo_listens = readEchoUserData()
    users = list(echo_listens.keys())
    scores = np.zeros(len(song_ids))
    user_song_listeners = np.zeros(len(song_ids))

    for i in range(len(song_ids)):
        for user in users:
            if user_song_id in echo_listens[user]:
                if song_ids[i] in echo_listens[user]:
                    scores[i] += 1
                user_song_listeners[i] += 1

    for i in range(len(scores)):
        if user_song_listeners[i] > 0:
            scores[i] = scores[i] / user_song_listeners[i]
    return scores

## Interactive functions

In [None]:
def handle_drop(i):
    df = selected_tab.value.copy()
    selected_tab.value = df[df.ID != df.iloc[i].ID]


def handle_selection(i):
    if ~(selected_tab.value.index == i).any():
        selected_tab.value = pd.concat(
            [selected_tab.value, songs_tab.value[songs_tab.value.index == i]]
        )
        recommend_tab.value = recommendSongs(selected_tab.value.index.tolist())


songs_tab.on_click(lambda e: handle_selection(e.row))
selected_tab.on_click(lambda e: handle_drop(e.row))


@pn.depends(selected_df=selected_tab.param.value)
def outputs(selected_df):
    if selected_df.empty:
        return "## Please select something."

    return pn.Column(
        "# Selected",
        selected_text,
        selected_tab,
        "# Recommendations",
        recommend_text,
        recommend_tab
    )

## Template

In [None]:
template = pn.template.VanillaTemplate(
    title="Music Recommender",
    theme="dark",
    header_background="#2e2e2e"
)

css = ["""

nav, nav *, h1{
  font-size: 35px;
  color: #fff;
  text-align: center;
  animation: glow 1s ease-in-out infinite alternate;
  text-align: center;
}

@-webkit-keyframes glow {
  from {
    text-shadow: 0 0 10px #fff, 0 0 20px #fff, 0 0 30px #e60073, 0 0 40px #e60073, 0 0 50px #e60073, 0 0 60px #e60073, 0 0 70px #e60073;
  }
  
  to {
    text-shadow: 0 0 20px #fff, 0 0 30px #ff4da6, 0 0 40px #ff4da6, 0 0 50px #ff4da6, 0 0 60px #ff4da6, 0 0 70px #ff4da6, 0 0 80px #ff4da6;
  }
}
"""]

template.config.raw_css = css

template.main.append(
    pn.Column(
        intro_text,
        "# Songs",
        songs_text,
        pn.Row(
            title_filter,
            artist_filter,
            genres_filter,
            location_filter,
            tempo_filter,
            year_filter
        ),
        songs_tab,
        outputs,
        stylesheets = css
        # styles = {"text-shadow": "0 0 10px #fff, 0 0 20px #fff, 0 0 30px #e60073, 0 0 40px #e60073, 0 0 50px #e60073, 0 0 60px #e60073, 0 0 70px #e60073;"}
    ),
)

template.servable()