In [167]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns


%matplotlib inline

<p>Creating dataframes from following csv files</p>

In [168]:
songs_playlist_df = pd.read_csv('/Users/nitanshjain/Documents/Data_Science_Learning/Spotify_Recommender_System/csv/songs_playlist_df.csv')
songs_50_df = pd.read_csv('/Users/nitanshjain/Documents/Data_Science_Learning/Spotify_Recommender_System/csv/songs_50_df.csv')

In [169]:
# dropping columns
songs_playlist_df.drop(columns=["album", "artist_genres","time_signature","artist_name"], axis=1, inplace=True) 
songs_50_df.drop(columns=["artist_genres","time_signature","artist_name"], axis=1, inplace=True)

In [170]:
songs_playlist_df.drop_duplicates(subset='track_name', inplace=True) # removing duplicate songs based on track name
songs_50_df.drop_duplicates(subset='track_name', inplace=True)

In [171]:
songs_playlist_df.head()

Unnamed: 0,track_name,artist_pop,danceability,energy,loudness,speechiness,acousticness,instrumentalness,liveness,valence,tempo,duration_ms,genre_score,track_pop
0,You Right,88,0.828,0.621,-6.414,0.0565,0.0164,0.00233,0.0845,0.436,128.986,186173,860.0,85
1,Wild Side (feat. Cardi B),68,0.74,0.576,-6.744,0.146,0.0249,0.0,0.104,0.315,110.095,209476,1227.0,7
2,Peaches (feat. Daniel Caesar & Giveon),90,0.677,0.696,-6.181,0.119,0.321,0.0,0.42,0.464,90.03,198082,597.0,87
3,Chicken Lemon Rice,42,0.516,0.895,-5.77,0.0919,0.000376,0.54,0.492,0.694,199.923,153019,3.0,38
4,LALA (Unlocked) (feat. Swae Lee),76,0.615,0.56,-4.938,0.184,0.0889,0.000114,0.314,0.33,139.441,266937,669.0,48


In [172]:
songs_50_df.head()

Unnamed: 0,track_name,artist_pop,danceability,energy,loudness,speechiness,acousticness,instrumentalness,liveness,valence,tempo,duration_ms,genre_score,track_pop
0,Cheerleader - Felix Jaehn Remix Radio Edit,64,0.782,0.685,-6.237,0.0309,0.166,1.2e-05,0.16,0.603,118.016,180566,13.0,79
1,Here Comes The Sun - Remastered 2009,82,0.557,0.54,-10.484,0.0347,0.0339,0.00248,0.179,0.394,129.171,185733,15.0,82
2,Shallow,83,0.572,0.385,-6.362,0.0308,0.371,0.0,0.231,0.323,95.799,215733,34.0,83
3,Sweet but Psycho,80,0.72,0.706,-4.719,0.0473,0.0684,0.0,0.166,0.62,133.002,187436,32.0,81
4,Tacones Rojos,80,0.748,0.856,-3.517,0.0348,0.0824,0.0,0.142,0.927,123.031,189320,14.0,84


<p>Scaling the data using Standard Scaler</p>

In [173]:
from sklearn import preprocessing
scaler = preprocessing.StandardScaler()
songs_playlist_df.loc[:,"artist_pop":"track_pop"] = scaler.fit_transform(songs_playlist_df.loc[:,"artist_pop":"track_pop"])
songs_50_df.loc[:,"artist_pop":"track_pop"] = scaler.fit_transform(songs_50_df.loc[:,"artist_pop":"track_pop"])

In [174]:
songs_playlist_np = songs_playlist_df.copy()
songs_playlist_np.drop("track_name", axis=1, inplace=True)
songs_playlist_np = songs_playlist_np.to_numpy()

songs_50_np = songs_50_df.copy()
songs_50_np.drop("track_name", axis=1, inplace=True)
songs_50_np = songs_50_np.to_numpy()


In [175]:
from sklearn.metrics.pairwise import linear_kernel
cosine_sim_playlist = linear_kernel(songs_playlist_np, songs_playlist_np)
cosine_sim_50 = linear_kernel(songs_50_np, songs_50_np)

In [176]:
print(cosine_sim_playlist.shape)
print(cosine_sim_50.shape)

(2165, 2165)
(44, 44)


In [177]:
songs_playlist_indices = pd.Series(songs_playlist_df.index, index=songs_playlist_df["track_name"]).drop_duplicates()
songs_50_indices = pd.Series(songs_50_df.index, index=songs_50_df["track_name"]).drop_duplicates()


In [178]:
def get_recommendations(title):
    
    index = songs_playlist_indices[title]
    # print(index)
    # print(cosine_sim_playlist[index])
    sim_scores = list(enumerate(cosine_sim_playlist[index]))
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
    
    sim_scores = sim_scores[1:11]
    # print(sim_scores)
    
    recommended_songs_indices = [i[0] for i in sim_scores]
    return songs_playlist_df["track_name"].iloc[recommended_songs_indices]

In [179]:
for song in songs_50_df.track_name:
    print("The following songs were recommended for \n{}".format(song))
    try:
        print(get_recommendations(song))
    except:
        print("\n")

The following songs were recommended for 
Cheerleader - Felix Jaehn Remix Radio Edit
1157                                      Me Porto Bonito
2024                                        Tacones Rojos
1167                              Cold Heart - PNAU Remix
187                                                 Woman
1298                      There's Nothing Holdin' Me Back
136                                       Clap Your Hands
267     Bhool Bhulaiyaa 2 Title Track (From "Bhool Bhu...
205                                                  Numb
1323                                               Sucker
2083                                            Moonlight
Name: track_name, dtype: object
The following songs were recommended for 
Here Comes The Sun - Remastered 2009


The following songs were recommended for 
Shallow
1418       Lonely (with benny blanco)
1866    Best Part of Me (feat. YEBBA)
1446                          Falling
1897           exile (feat. Bon Iver)
2025                

```https://towardsdatascience.com/a-one-stop-shop-for-principal-component-analysis-5582fb7e0a9c```
```https://www.datacamp.com/tutorial/recommender-systems-python```