# Load Packages

In [1]:
import random
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.neighbors import NearestNeighbors
from sklearn.model_selection import train_test_split

# Load Data

In [2]:
spotify = pd.read_csv("song_data_cleaned.csv")
spotify.set_index("songid", inplace=True)
spotify.drop_duplicates(inplace=True)

Create a data frame which keeps artist name and title of song, since these variables will not be included in the algorithm

In [3]:
meta_data = spotify[["song_title", "artist_name"]]

Clean data frame

In [4]:
del spotify["Unnamed: 0"]
del spotify["song_title"]
del spotify["artist_name"]

In [5]:
spotify.head()

Unnamed: 0_level_0,popularity,danceability,energy,key,loudness,speechiness,acousticness,instrumentalness,liveness,valence,tempo,time_signature,category
songid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
4cqVjzC0AxeDmdeJbV3lHH,47,0.515,0.335,7,-12.794,0.0292,0.624,0.00559,0.122,0.0625,95.032,4,Pop
4WtOFEQOJ3TOPL9iRKWJ95,45,0.843,0.431,1,-9.486,0.0692,0.36,3.8e-05,0.297,0.396,97.993,4,Pop
6ht5cDaJsrvWXCOd0vkIsX,45,0.591,0.797,7,-3.544,0.109,0.0994,0.0,0.0797,0.785,89.968,4,Pop
2Ok9PIQhVkKAaPTMbM4EvS,62,0.333,0.349,3,-8.062,0.0398,0.874,0.0,0.144,0.418,208.787,4,Pop
6hGysJfPg591z5fqvTYL8O,55,0.592,0.857,6,-6.817,0.0315,0.0512,3e-06,0.334,0.505,107.092,4,Pop


Convert genre to dummy representation so our model will accept it!

In [6]:
spotify_new = pd.get_dummies(spotify, columns=["category"])

Create training and test sets

In [7]:
training_set, test_set = train_test_split(spotify_new, 
                 test_size=0.1, 
                random_state=42)

Fit the model 

In [8]:
fit_nn = NearestNeighbors(metric="cosine", 
                          algorithm="auto")

In [9]:
fit_nn.fit(training_set)

NearestNeighbors(algorithm='auto', leaf_size=30, metric='cosine',
         metric_params=None, n_jobs=1, n_neighbors=5, p=2, radius=1.0)

Create helper functions to show song and artist info and output top 9 recommendations

In [10]:
my_cols = ["danceability", "energy", "key", 
           "loudness", "speechiness", "acousticness", 
           "instrumentalness", "liveness", "valence", "tempo"]
def return_recommendation(n_entry):
    try: 
        my_entry = test_set.iloc[n_entry, :].values.reshape(1, -1)
        recommnd_array = fit_nn.kneighbors(my_entry, n_neighbors=9, return_distance=False)
        recommnd_array_new = recommnd_array.flatten()
        new_recommend_df = training_set.iloc[recommnd_array_new, :]
        last_df = new_recommend_df.join(meta_data, how="inner")
        artist_name = meta_data[meta_data.index == test_set.index[n_entry]]
        print("Recommendations for {0} by {1}".format(artist_name.values[0][0], 
                                                 artist_name.values[0][1]))
        for i in range(0, len(recommnd_array_new)):
            print("{0}. Recommended Song: {1} by {2}".format(i,
                                                      last_df["song_title"][i],
                                                     last_df["artist_name"][i]))
    except IndexError:
        print("The number you entered is not valid for the current test set! \nChoose a number between 0-407")

In [11]:
def song_lookup(n_entry, print_artist=True):    
    test_set_info = test_set.join(meta_data, how="inner")
    test_set_info = test_set_info.assign(new_int= [i for i in range(0, test_set_info.shape[0])])
    song_info = test_set_info[["artist_name", "song_title"]].iloc[n_entry, :]
    if print_artist:
        print("Song: '{0}' by '{1}'".format(song_info["song_title"], song_info["artist_name"]))
    return n_entry

Demonstrate example using *Ooh La La* by *HI-LO* (never heard of them).

In [12]:
_ = song_lookup(2, print_artist=True)

Song: 'Ooh La La' by 'HI-LO'


In [13]:
hi_lo = song_lookup(2, print_artist=False)

In [14]:
return_recommendation(hi_lo)

Recommendations for Ooh La La by HI-LO
0. Recommended Song: More Mess - Hugel Remix by Kungs
1. Recommended Song: Animals - Victor Niglio & Martin Garrix Festival Trap Mix by Martin Garrix
2. Recommended Song: Brolab by Tiësto
3. Recommended Song: Real Love - Radio Mix by Antonio Giacca
4. Recommended Song: Revolt by Tiësto
5. Recommended Song: Boombox by Dirtyphonics
6. Recommended Song: Make You Hustle by Croatia Squad
7. Recommended Song: Imjussayin by Convex
8. Recommended Song: Get Down by Hardwell


In [15]:
random.seed(420)
# Empty list where we'll add pseudo-random numbers 
# within range of our index to plot them. 
rand_list = []
for x in range(8):
  rand_list.append(random.randint(1,407))

In [16]:
rand_list

[14, 345, 401, 187, 140, 207, 49, 392]

In [17]:
for i in rand_list:
    return_recommendation(i)
    print("\n")

Recommendations for All We Got (feat. Kanye West & Chicago Childrens Choir) by Chance The Rapper
0. Recommended Song: Perplexing Pegasus by Rae Sremmurd
1. Recommended Song: Runaway Train by Soul Asylum
2. Recommended Song: Too Hotty by Various Artists
3. Recommended Song: Back (feat. Lil Yachty) by Lil Pump
4. Recommended Song: Summertime Sadness by Lana Del Rey
5. Recommended Song: New Freezer (feat. Kendrick Lamar) by Rich The Kid
6. Recommended Song: Dig Down by Muse
7. Recommended Song: Dont Wanna Know - Acoustic Version by The Mayries
8. Recommended Song: Follow You - Tep No Edit by Jamie Brown


Recommendations for World Of Magic by Firelite
0. Recommended Song: Purpose Of Life by RVAGE
1. Recommended Song: Wild Out by Platinum Doug
2. Recommended Song: Bad Bae by ChildsPlay, Onderkoffer
3. Recommended Song: Akira by Ekali
4. Recommended Song: I Was Just a Kid by Nothing But Thieves
5. Recommended Song: Tequila Nites by Michael Woods
6. Recommended Song: I Doubt It (feat. Blackb