In [1]:
import pandas as pd
import numpy as np
import pickle
import json
import random
from sklearn.metrics.pairwise import cosine_similarity

In [2]:
# load the model from the file using pickle
with open("models/model_km.pkl", "rb") as f:
    kmeans_loaded = pickle.load(f)

with open("models/model_agg.pkl", "rb") as f:
    agg_loaded = pickle.load(f)

In [3]:
# Loading the list from the JSON file
with open("retained_features.json", "r") as f:
    retained_features = json.load(f)

print("retained_features:", retained_features)

retained_features: ['acousticness', 'danceability', 'energy', 'instrumentalness', 'liveness', 'loudness', 'speechiness']


## Build a recommender system based on the K Means clusters using euclidian distances and cosine similarity

In [4]:
def get_recommendations(artist_name, track_name, df, model, top=5):
    """
    Given an artist name, track name, dataframe and a model, this function returns
    a list of recommended tracks based on their Euclidean distance to the input track
    in the same cluster.
    """

    # Filter the dataframe by artist and track name
    track_df = df[(df["artist"] == artist_name) & (df["track_name"] == track_name)]
    if model == kmeans_loaded:
        cs = "km"
    elif model == agg_loaded:
        cs = "agg"

    # Get the cluster number for the given track and artist
    try:
        cluster_num = track_df[f"clusters_{cs}"].values[0]
    except IndexError:
        print("Track not found!")
        return None

    print("\n" * 2)
    print(f"cluster_num: {cluster_num}")
    print(model)

    # Filter the dataframe by the same cluster number
    cluster_df = df[df[f"clusters_{cs}"] == cluster_num]

    # Get the feature values of the input track
    x_input = track_df[retained_features].values

    # Get the feature values of all tracks in the cluster
    X_cluster = cluster_df[retained_features].values

    euclidean_distances, cosine_similarities = [], []
    indices_euclidean, indices_cosine = [], []

    # Calculate the Euclidean distances and cosine similarities between the input track and all tracks in the cluster
    for i, item in enumerate(X_cluster):
        # Calculate the Euclidean distance between the input track and the current item in the cluster
        euclidean_distance = np.linalg.norm(item - x_input)
        # Calculate the cosine similarity between the input track and the current item in the cluster
        cosine_similarity_ = cosine_similarity(
            item.reshape(1, -1), x_input.reshape(1, -1)
        )[0][0]
        # Append the index of the current item in the cluster to the indices lists
        indices_euclidean.append(cluster_df.iloc[i].name)
        indices_cosine.append(cluster_df.iloc[i].name)
        # Append the distance/similarity to the appropriate lists
        euclidean_distances.append(euclidean_distance)
        cosine_similarities.append(cosine_similarity_)

    indices_euclidean = np.array(indices_euclidean)
    indices_cosine = np.array(indices_cosine)
    euclidean_distances = np.array(euclidean_distances)
    cosine_similarities = np.array(cosine_similarities)

    # Sort the indices arrays based on the distances/similarities arrays and return the top 10 recommended tracks
    indices_euclidean = indices_euclidean[np.argsort(euclidean_distances)][1 : top + 1]
    indices_cosine = indices_cosine[np.argsort(cosine_similarities)[::-1]][1 : top + 1]

    # Concatenate the results horizontally and return the recommended tracks as a pandas DataFrame
    recommendations = pd.concat(
        [
            cluster_df.loc[indices_euclidean, ["artist", "track_name"]],
            cluster_df.loc[indices_cosine, ["artist", "track_name"]],
        ],
        axis=1,
        join="outer",
    )
    recommendations.fillna("-", inplace=True)
    recommendations.columns = [
        "Euclidean Distance_artist",
        "Euclidean Distance_track",
        "Cosine Similarity_artist",
        "Cosine Similarity_track",
    ]
    print("\n", f"Here are {top} similar tracks to: {artist_name} - {track_name}", "\n")
    return recommendations

In [5]:
def get_recommendations_loop(models, song_selection="select"):
    # Load the dataset
    df = pd.read_csv("csvs/df_scaled_pca_km_agg.csv", encoding="utf-8-sig")

    # Get input from the user
    artist_name = str(input("Enter artist name: "))

    if song_selection == "select":
        # Filter the dataset based on the selected artist
        artist_songs = df[df["artist"] == artist_name]

        # Get a list of 10 random song names for the selected artist
        try:
            song_names = list(
                np.unique(artist_songs["track_name"].sample(n=10, replace=True))
            )
        except ValueError as ve:
            print(ve, "Try another artist")
            return

        # Ask the user to select a song from the list
        selected_song = input(
            f"Select a song for {artist_name} from the following list: {song_names}, \n"
        )
    elif song_selection == "random":
        # Get a random song from the dataset for the selected artist
        artist_songs = df[df["artist"] == artist_name]
        selected_song = artist_songs["track_name"].sample(n=1).iloc[0]
        print(f"Selected song for {artist_name}: {selected_song}")
    else:
        print(
            f"Invalid song selection option: {song_selection}. Use 'select' or 'random'."
        )
        return

    for model in models:

        # Get recommendations based on the selected artist and song
        recommendations = get_recommendations(
            artist_name, selected_song, df, model, top=5
        )

        # Display the recommendations
        display(recommendations)

In [8]:
models = [kmeans_loaded, agg_loaded]
get_recommendations_loop(models, song_selection="random")

Enter artist name:  grandson


Selected song for grandson: Dirty



cluster_num: 2
KMeans(n_clusters=7, n_init=10, random_state=420)

 Here are 5 similar tracks to: grandson - Dirty 



Unnamed: 0,Euclidean Distance_artist,Euclidean Distance_track,Cosine Similarity_artist,Cosine Similarity_track
1393,Two Door Cinema Club,Do You Want It All?,Two Door Cinema Club,Do You Want It All?
17,Inhaler,It Won't Always Be Like This,Inhaler,It Won't Always Be Like This
219,Yeah Yeah Yeahs,Burning,Yeah Yeah Yeahs,Burning
1453,Woodkid,I Love You,Woodkid,I Love You
69,Florence + The Machine,Spectrum,-,-
911,-,-,grandson,Blood // Water - King Kavalier Remix





cluster_num: 5
AgglomerativeClustering(n_clusters=7)

 Here are 5 similar tracks to: grandson - Dirty 



Unnamed: 0,Euclidean Distance_artist,Euclidean Distance_track,Cosine Similarity_artist,Cosine Similarity_track
1393,Two Door Cinema Club,Do You Want It All?,Two Door Cinema Club,Do You Want It All?
1453,Woodkid,I Love You,Woodkid,I Love You
911,grandson,Blood // Water - King Kavalier Remix,grandson,Blood // Water - King Kavalier Remix
548,Electric Callboy,We Got the Moves,Electric Callboy,We Got the Moves
1548,ZHU,Guilty Love,ZHU,Guilty Love


In [10]:
get_recommendations_loop(models, song_selection="random")

Enter artist name:  Noah Slee


Selected song for Noah Slee: Lips



cluster_num: 3
KMeans(n_clusters=7, n_init=10, random_state=420)

 Here are 5 similar tracks to: Noah Slee - Lips 



Unnamed: 0,Euclidean Distance_artist,Euclidean Distance_track,Cosine Similarity_artist,Cosine Similarity_track
222,Arctic Monkeys,There’d Better Be A Mirrorball,Arctic Monkeys,There’d Better Be A Mirrorball
1461,Jadu Heart,U Never Call Me,Jadu Heart,U Never Call Me
1162,Rickyxsan,LOVED FOREVER,Rickyxsan,LOVED FOREVER
287,Noah Slee,RISE,Noah Slee,RISE
707,Masego,Flight 99,-,-
1569,-,-,Kodaline,Shed a Tear





cluster_num: 1
AgglomerativeClustering(n_clusters=7)

 Here are 5 similar tracks to: Noah Slee - Lips 



Unnamed: 0,Euclidean Distance_artist,Euclidean Distance_track,Cosine Similarity_artist,Cosine Similarity_track
222,Arctic Monkeys,There’d Better Be A Mirrorball,Arctic Monkeys,There’d Better Be A Mirrorball
1461,Jadu Heart,U Never Call Me,Jadu Heart,U Never Call Me
1162,Rickyxsan,LOVED FOREVER,Rickyxsan,LOVED FOREVER
287,Noah Slee,RISE,Noah Slee,RISE
707,Masego,Flight 99,-,-
1569,-,-,Kodaline,Shed a Tear
