In [2]:
import pandas as pd
import random
import authorization
import numpy as np
from numpy.linalg import norm

In [3]:
df = pd.read_csv("valence_arousal_dataset.csv")
print(df.shape)
df.head()

(4720, 6)


Unnamed: 0,id,genre,track_name,artist_name,valence,energy
0,4VJgrWjrkodaGiq3xKz62z,acoustic,Sometimes (I Wish),City and Colour,0.135,0.467
1,6wXy0MosS6KGV8yowVradf,acoustic,Luka,Suzanne Vega,0.722,0.229
2,1NdgDGYG4J827IvfeDgF4o,acoustic,Dear Mr. President,Various Artists,0.297,0.299
3,1DWbTRHEL2x9sZmdagh2Su,acoustic,Hometown Glory (Live at Hotel Cafe),Adele,0.181,0.353
4,5ECRlF2VFIdZYoY783Wxgp,acoustic,Sweet Dreams (Are Made of This) - Acoustic,ortoPilot,0.415,0.331


In [4]:
df["mood_vec"] = df[["valence", "energy"]].values.tolist()
df["mood_vec"].head()

0    [0.135, 0.467]
1    [0.722, 0.229]
2    [0.297, 0.299]
3    [0.181, 0.353]
4    [0.415, 0.331]
Name: mood_vec, dtype: object

In [5]:
sp = authorization.authorize()

In [6]:
def recommend(track_id, ref_df, sp, n_recs = 5):
    
    # Crawl valence and arousal of given track from spotify api
    track_features = sp.track_audio_features(track_id)
    track_moodvec = np.array([track_features.valence, track_features.energy])
    print(f"mood_vec for {track_id}: {track_moodvec}")
    
    # Compute distances to all reference tracks
    ref_df["distances"] = ref_df["mood_vec"].apply(lambda x: norm(track_moodvec-np.array(x)))
    # Sort distances from lowest to highest
    ref_df_sorted = ref_df.sort_values(by = "distances", ascending = False)
    # If the input track is in the reference set, it will have a distance of 0, but should not be recommendet
    ref_df_sorted = ref_df_sorted[ref_df_sorted["id"] != track_id]
    
    # Return n recommendations
    return ref_df_sorted.iloc[:n_recs]

In [7]:
track1 = "37LBvhw1k73Xt27jEIL8kI"
recommend(track_id = track1, ref_df = df, sp = sp, n_recs = 5)

mood_vec for 37LBvhw1k73Xt27jEIL8kI: [0.924 0.143]


Unnamed: 0,id,genre,track_name,artist_name,valence,energy,mood_vec,distances
2002,0Kk2cqtBrf1IkjxWUqFcep,heavy-metal,Transilvanian Hunger,Darkthrone,0.0217,1.0,"[0.0217, 1.0]",1.244425
268,5S22lhwri5iTC3CWtouqdA,black-metal,Trolldom,Taake,0.0295,0.998,"[0.0295, 0.998]",1.237399
2919,0egwM3fS9lfjM9Uexzfh3f,metalcore,Revive,The Devil Wears Prada,0.0321,0.996,"[0.0321, 0.996]",1.234137
1703,3a7fL4OU8p4d0pqNRd3Lnq,grindcore,Simple Math,Brutal Truth,0.031,0.994,"[0.031, 0.994]",1.233552
1985,2Uxh6Vjw0GJgmnV2GW9N5y,heavy-metal,In Hell Is Where She Waits for Me,The Black Dahlia Murder,0.0364,0.999,"[0.0364, 0.999]",1.233114


In [8]:
def recommend_mean(track_ids, ref_df, sp, n_recs = 5):
    
    # Initialize a list to store moodvecs of all tracks
    moodvecs = []
    
    # Crawl valence and arousal of given tracks from spotify api
    for track_id in track_ids:
        track_features = sp.track_audio_features(track_id)
        track_moodvec = np.array([track_features.valence, track_features.energy])
        print(f"mood_vec for {track_id}: {track_moodvec}")
        
        # Append the moodvec to the list
        moodvecs.append(track_moodvec)
        
    # Compute the average moodvec
    avg_moodvec = np.mean(moodvecs, axis=0)
    
    # Compute distances to all reference tracks
    ref_df["distances"] = ref_df["mood_vec"].apply(lambda x: norm(avg_moodvec-np.array(x)))
    
    # Sort distances from lowest to highest
    ref_df_sorted = ref_df.sort_values(by = "distances", ascending = False)
    
    # Exclude the input tracks from the recommendations
    ref_df_sorted = ref_df_sorted[~ref_df_sorted["id"].isin(track_ids)]
    
    # Return n recommendations
    return ref_df_sorted.iloc[:n_recs]

In [9]:
track_ids1 = ("0Kk2cqtBrf1IkjxWUqFcep", "37LBvhw1k73Xt27jEIL8kI")
recommend_mean(track_ids = track_ids1, ref_df = df, sp = sp, n_recs = 5)

mood_vec for 0Kk2cqtBrf1IkjxWUqFcep: [0.0217 1.    ]
mood_vec for 37LBvhw1k73Xt27jEIL8kI: [0.924 0.143]


Unnamed: 0,id,genre,track_name,artist_name,valence,energy,mood_vec,distances
1012,0NMaOSNx7NFF21KUCSmbpE,detroit-techno,Passage (Out),Plastikman,0.0307,0.000853,"[0.0307, 0.000853]",0.721897
178,0LfRVw2aFyi3wqYcRjFZpx,ambient,"Broken Harbors, Pt. 2",Stars Of The Lid,0.0257,0.00522,"[0.0257, 0.00522]",0.721537
2972,0dlAN15Cr7eIOqhqtFjfHV,movies,The Passing of Théoden,Various Artists,0.0321,0.0102,"[0.0321, 0.0102]",0.713665
3302,72avKXjk0UTESGbVsxmjwG,piano,"Consolations, S172/R12: No. 2. Un poco piu mosso",Franz Liszt,0.0395,0.0058,"[0.0395, 0.0058]",0.712607
4223,7ocJPvoS4xnFcaFc4B3AJj,sleep,A Lovely Place to Be,Patrick O'Hearn,0.0342,0.012,"[0.0342, 0.012]",0.710953


In [24]:
def recommend_multi(track_ids, ref_df, sp, n_recs=5):
    # Initialize an empty array to store the total differences for each reference track
    total_diffs = np.zeros(len(ref_df))
    for track_id in track_ids:
        # Crawl valence and arousal of given track from Spotify API
        track_features = sp.track_audio_features(track_id)
        track_moodvec = np.array([track_features.valence, track_features.energy])
        print(f"mood_vec for {track_id}: {track_moodvec}")
#
        for index, row in ref_df.iterrows():
            diff = np.abs(row['mood_vec'] - track_moodvec)
            total_diffs[index] += sum(diff)

    # Add the total differences as a new column in the reference DataFrame
    ref_df["total_diffs"] = total_diffs
    
    # Sort reference tracks by the total differences in ascending order
    ref_df_sorted = ref_df.sort_values(by="total_diffs", ascending=True)

    filter_by_genre = input("Хотите отфильтровать рекомендации по жанру? (y/n)").lower()

    if filter_by_genre == "y":
        genres = input("Введите жанр(ы), разделяя пробелом: ").split()
        filtered_df = ref_df_sorted[ref_df_sorted['genre'].isin(genres)]
    else:
        filtered_df = ref_df_sorted.copy()
    # Return the top n recommendations
    return filtered_df.iloc[:n_recs]


In [23]:
recommend_multi(track_ids = track_ids1, ref_df = df, sp = sp, n_recs = 5)

mood_vec for 7InnZ5mRKo2oiipcGGChjE: [0.585 0.771]
mood_vec for 3ZT9yfaYbpPbFFfYUJjVI8: [0.862 0.901]
mood_vec for 2HTzzn4hP4PHlX2nLotWF4: [0.811 0.858]


Unnamed: 0,id,genre,track_name,artist_name,valence,energy,mood_vec,distances,total_diffs
4531,1WLVCxHYQrYyedcCAYPINq,techno,Waves,DJ Satomi,0.738,0.913,"[0.738, 0.913]",0.43235,0.559
4546,7Kq2SpCEOOY1kc1pbASqZF,techno,The Whistle Song - Solar Patrol Mix,Various Artists,0.723,0.966,"[0.723, 0.966]",0.467124,0.733
4540,7kEzaW7IHn2Ur65lzS2GtY,techno,Escape Me,Tiësto,0.517,0.835,"[0.517, 0.835]",0.267173,0.86
4549,619bJQ9uDi8dnXzLebFI7y,techno,When Love Takes Over (feat. Kelly Rowland),David Guetta,0.496,0.863,"[0.496, 0.863]",0.292418,0.905
4551,5CMjjywI0eZMixPeqNd75R,techno,Lose Yourself to Dance (feat. Pharrell Williams),Daft Punk,0.674,0.659,"[0.674, 0.659]",0.219357,0.967
