In [242]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns 
import os
import scipy
import warnings 
warnings.filterwarnings("ignore")
from sklearn.neighbors import NearestNeighbors
from sklearn.cluster import KMeans
from tqdm import tqdm

In [243]:
data = pd.read_csv("Dataset/preprocessed_data.csv")

In [244]:
data.columns

Index(['artist', 'track_name', 'acousticness', 'danceability', 'energy', 'key',
       'loudness', 'mode', 'speechiness', 'instrumentalness', 'liveness',
       'valence', 'tempo', 'duration_ms', 'popularity'],
      dtype='object')

In [245]:
data.drop(columns='artist',inplace=True)

In [246]:
numerical = data.select_dtypes(exclude='object')

### Nearest Neighbors algorithm 
- using cosine similarity

In [310]:
df = pd.pivot_table(data, index='track_name')

In [311]:
model_knn = NearestNeighbors(metric = 'cosine', algorithm = 'brute', n_neighbors= 10)

In [312]:
model_knn.fit(df)

In [334]:
def song_recommender(data, col, song, n = 5):
    query_index = data[data[col] == song].index[0]
    #print(query_index)
    distances, indices = model_knn.kneighbors(df.iloc[query_index,:].values.reshape(1, -1), n_neighbors = n)
    for i in range(0, len(distances.flatten())):
        if i == 0:
            print('Recommendations for : {0}\n'.format(df.index[query_index]))
        else:
            print('{0}:{1}'.format(i, df.index[indices.flatten()[i]], distances.flatten()[i]))

In [335]:
song_recommender(data = data, col = 'track_name', song = 'Unholy', n =10)

Recommendations for : Like A Rolling Stone (Mono)

1:The Owl
2:Around Ya - Flamingo Star Hippie Remix
3:We Can't Go on Living Like This - 2008 Version
4:Pavanai Pattu
5:Queen Jane Approximately - Take 5, Alternate Take
6:Dandy In The Underworld
7:Loving The Alien - Single Version; 2002 Remaster
8:Everything Is Good About You
9:All Your Goodies Are Gone


In [337]:
song_recommender(data = data, col = 'track_name', song = 'All On Me', n=10)

Recommendations for : HEARTEATER

1:Smell These Roses
2:Smoke A Little Smoke
3:Marcus Garvey
4:Let The Drummer Kick
5:Lose Yourself to Dance (feat. Pharrell Williams)
6:My Neck, My Back (Lick It)
7:still feel.
8:Tip Toe (feat. French Montana)
9:Tuesday (feat. Danelle Sandoval)


#### Recommendation using sigmoid kernel

In [269]:
data_matrix = data.pivot_table(index='track_name').sample(20000)


In [270]:
from sklearn.metrics.pairwise  import sigmoid_kernel

sig = sigmoid_kernel(data_matrix,data_matrix)

In [273]:
sig

array([[0.87641004, 0.86308575, 0.86192981, ..., 0.84820549, 0.86780396,
        0.81690112],
       [0.86308575, 0.860207  , 0.85908052, ..., 0.84288319, 0.85779627,
        0.81819704],
       [0.86192981, 0.85908052, 0.85939811, ..., 0.84043379, 0.85677693,
        0.81795032],
       ...,
       [0.84820549, 0.84288319, 0.84043379, ..., 0.84950012, 0.84370406,
        0.82062338],
       [0.86780396, 0.85779627, 0.85677693, ..., 0.84370406, 0.87789566,
        0.81607364],
       [0.81690112, 0.81819704, 0.81795032, ..., 0.82062338, 0.81607364,
        0.80928759]])

In [291]:
indices = pd.Series(range(20000), index = data_matrix.index).drop_duplicates()

In [292]:
indices

track_name
Yo Quisiera Que Tú                               0
Everyday Is A Winding Road                       1
All On Me                                        2
Waves: Calm Down                                 3
Unbelievable                                     4
                                             ...  
Smokey Joe's La La                           19995
For Aisha (Featured in "The Sky Is Pink")    19996
Lights - Single Version                      19997
Flip Flop and Bop                            19998
Stopover Bombay                              19999
Length: 20000, dtype: int64

In [306]:
def give_rec(title, sig=sig):
    # Get the index corresponding to original_title
    idx = indices[title]

    # Get the pairwsie similarity scores 
    sig_scores = list(enumerate(sig[idx]))

    # Sort the movies 
    sig_scores = sorted(sig_scores, key=lambda x: x[1], reverse=True)

    # Scores of the 10 most similar movies
    sig_scores = sig_scores[1:11]

    # Song indices
    song_indices = [i[0] for i in sig_scores]

    # Top 10 most similar movies
    return data['track_name'].iloc[song_indices]

In [307]:
give_rec('Flip Flop and Bop')

1692                                           Dhimu Dhimu
13662                One of Us Must Know (Sooner or Later)
18904                                                Shoop
18817                           When I Look Into Your Eyes
9401     I Speak To The Stars (with Leith Stevens & His...
14373                                          Samba Pa Ti
15160                            Don't Ask Me No Questions
13173                               All Summer Long (Mono)
12752                                    Lovers Who Wander
5002                                              Ma Gosse
Name: track_name, dtype: object

In [308]:
give_rec('All On Me')

12019                           Wonderful, Wonderful
16766         The Girl Is Mine (with Paul McCartney)
8795     Consolation No. 3 in D-Flat Major, S. 172/3
4263                                Po Indru Neeyaga
15883         Runnin' with the Devil - 2015 Remaster
6140                  Ce N'est Que Votre Main Madame
3241                                          prozac
5611                                       Fireworks
7021                     A Sailboat In the Moonlight
17127                                I Would Die 4 U
Name: track_name, dtype: object