In [None]:
import pandas as pd
import os
import ast

In [None]:
from extract_youtube_song_feature import get_prob_using_youtube
from reclassify_recommendation_data import reclassify_and_get_recommendation_data
from sklearn.metrics.pairwise import cosine_similarity

In [None]:
youtube_url = "https://www.youtube.com/watch?v=sV2t3tW_JTQ"

get_prob_using_youtube performs following tasks -
* Download youtube song features using youtube url using pytubefix.
* Extract features for our model using librosa.
* Get genre probabilities using our optimized model.



In [None]:
youtube_song_prob = get_prob_using_youtube(youtube_url)

✅ Model loaded successfully
21 Savage - Bank Account (Official Audio) has been successfully downloaded.


  x, sr = librosa.load("song.wav", sr=None, mono=True)
  features[name, 'mean'] = np.mean(values, axis=1)


In [None]:
youtube_song_prob

Unnamed: 0,Rock,Electronic,Pop,Hip-Hop,Folk
0,0.01954,0.069662,0.025742,0.877906,0.00715


reclassify_and_get_recommendation_data performs following tasks -
* Reclassify tracks with genres other than Hip-Hop, Rock, Pop, Electronic and Folk, as well as tracks with missing genre values.

* Merge reclassification data with popularity metadata - listens, interest, favorites.

In [None]:
recommendation_data = reclassify_and_get_recommendation_data()

In [None]:
recommendation_data

Unnamed: 0_level_0,Rock,Electronic,Pop,Hip-Hop,Folk,listens,favorites,interest
track_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
20,0.257002,0.125194,0.163086,0.037696,0.417022,361,0,978
26,0.118131,0.064750,0.321094,0.045405,0.450619,193,0,1060
30,0.142537,0.122378,0.454565,0.035769,0.244751,612,0,718
46,0.034783,0.038007,0.361828,0.002768,0.562613,171,0,252
48,0.212671,0.083119,0.235224,0.016218,0.452768,173,0,247
...,...,...,...,...,...,...,...,...
155309,0.399393,0.099351,0.307394,0.061082,0.132779,79,0,90
155310,0.450586,0.036552,0.297130,0.080286,0.135446,84,0,94
155311,0.624200,0.025582,0.142246,0.028306,0.179666,171,0,187
155312,0.253352,0.308128,0.226864,0.148033,0.063623,219,0,230


In [None]:
youtube_song_prob

Unnamed: 0,Rock,Electronic,Pop,Hip-Hop,Folk
0,0.01954,0.069662,0.025742,0.877906,0.00715


Main Recommendation Approach


*   Cosine similarity to find songs in our database which have similar genre probabilities of new song.


In [None]:
new_song_similarities = cosine_similarity(youtube_song_prob,recommendation_data[recommendation_data.columns[:5]])
recommendation_data["cosine_similarity"] = new_song_similarities[0]



*  A low average cosine similarity across different genres indicates that our model is effectively distinguishing genres and is recommending songs with truly similar properties within the correct genre clusters.


In [None]:
recommendation_data["cosine_similarity"].mean()

0.17460339

* Normalizing popularity metadata to bring them to the same scale, so that no single feature dominates the others.



In [None]:
normalized_features = pd.DataFrame(columns=["normalized_listens","normalized_favorites","normalized_interest"],index=recommendation_data.index)

In [None]:
from sklearn.preprocessing import MinMaxScaler

scaler = MinMaxScaler()
normalized_features['normalized_listens'] = scaler.fit_transform(recommendation_data[['listens']])[:, 0]
normalized_features['normalized_favorites'] = scaler.fit_transform(recommendation_data[['favorites']])[:, 0]
normalized_features['normalized_interest'] = scaler.fit_transform(recommendation_data[['interest']])[:, 0]


In [None]:
recommendation_data["popularity_score"] = (0.5 * normalized_features["normalized_listens"]
               + 0.3 * normalized_features["normalized_favorites"]
               + 0.2 * normalized_features["normalized_interest"])

* Getting top 10 recommendations



In [None]:
top_recommendations = recommendation_data.nlargest(10, "cosine_similarity").copy()

* Sorting top 10 recommendation based on popularity.



In [None]:
final_recommendations = top_recommendations.sort_values("popularity_score", ascending=False)

In [None]:
track_title_artist = pd.read_csv("../Data/track_title_artist_metadata.csv",index_col="track_id")

**Recommendations**

In [None]:
track_title_artist.loc[final_recommendations.index]

Unnamed: 0_level_0,title,artist,genre_top,listens,favorites,interest
track_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
140907,Worries,OpVious,,12678,4,13977
92992,Analytix,Tab & Anitek,,1831,5,2888
123630,The Life and Death of The Party (Edit),Pimpstrut Local #13,Soul-RnB,1997,1,2373
116209,Darkside Imperials,Toussaint Morrison,,1414,1,1840
34661,Blender Tzivoni,51%,,280,1,664
8175,polybag MFR,James Amoeba,,116,1,225
15922,We're moving,Kukkiva Kunnas,,287,0,760
146190,Revolutionary Letters Part 1,Symbol Of Subversion,,233,0,283
71528,Laxatif Rudimentaire,Doctor Bux,Experimental,69,0,257
154096,To Be,Brian Routh,,11,0,13
