This notebook contains the recommender system. 

In [59]:
#Imports
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from sklearn.preprocessing import StandardScaler

from scipy import sparse
from sklearn.metrics.pairwise import pairwise_distances, cosine_distances, cosine_similarity

In [60]:
#import data
songs = pd.read_csv('/Users/maiaclemons/Capstone/Data/cleandf.csv')
songs.head()

Unnamed: 0,artist,album,track_name,track_id,danceability,energy,key,loudness,mode,speechiness,...,tempo,duration_ms,time_signature,popularity,release_date,artist_id,genres,release_year,region,era
0,Mobb Deep,The Infamous,Survival of the Fittest,7N1Vjtzr1lmmCW9iasQ8YO,0.813,0.703,11,-5.077,0,0.268,...,94.828,224533,4,75,1995-04-25,6O2zJ0tId7g07yzHtX0yap,"['east coast hip hop', 'hardcore hip hop', 'hi...",1995,East,90s
1,Nas,Illmatic,Represent,6DPrhGVJ1WTZvM9fKptnGe,0.708,0.832,1,-4.964,1,0.318,...,92.428,252600,4,76,1994-04-19,20qISvAhX20dpIbOOzGK3q,"['conscious hip hop', 'east coast hip hop', 'g...",1994,East,90s
2,Gang Starr,Hard To Earn,Mass Appeal,3lGBvPUgO7MJltUnBlOpe9,0.808,0.726,10,-8.435,0,0.252,...,96.12,221027,4,61,1994-03-08,5cMgGlA1xGyeAB2ctYlRdZ,"['alternative hip hop', 'conscious hip hop', '...",1994,East,90s
3,Big L,Lifestylez Ov Da Poor & Dangerous,Put It On,6JI5wNWYdBw68GZjOMmgK5,0.711,0.62,11,-10.174,0,0.212,...,179.817,217627,4,64,1995-03-28,3CygdxquGHurS7f9LjNLkv,"['contemporary country', 'country', 'country d...",1995,Other,90s
4,Sheek Louch,Silverback Gorilla,Lottery (Skit),6VjF67azNjUtShTy0Eaz12,0.393,0.641,6,-17.791,1,0.453,...,118.15,31347,4,37,2008-03-18,14ejEUaU0wE2iAenbbv9b8,['hardcore hip hop'],2008,Other,00s


In [61]:
#encoding region and era with get dummies

eras = pd.get_dummies(songs['era'], prefix='era')
region = pd.get_dummies(songs['region'], prefix='region')

# adding encoded columns to songs
encoded = pd.concat([songs, eras, region], axis=1)

In [62]:
#dropping columns not needed for recommender
encoded.drop(columns=['era','region','artist','track_id','release_date','album','genres','artist_id'], inplace=True)

In [63]:
#make track name the index 
encoded.set_index('track_name', inplace=True)

In [64]:
encoded.head()

Unnamed: 0_level_0,danceability,energy,key,loudness,mode,speechiness,instrumentalness,liveness,valence,tempo,...,release_year,era_00s,era_70s-80s,era_90s,era_Current,region_East,region_Midwest,region_Other,region_South,region_West
track_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Survival of the Fittest,0.813,0.703,11,-5.077,0,0.268,1e-06,0.237,0.241,94.828,...,1995,0,0,1,0,1,0,0,0,0
Represent,0.708,0.832,1,-4.964,1,0.318,3e-06,0.189,0.618,92.428,...,1994,0,0,1,0,1,0,0,0,0
Mass Appeal,0.808,0.726,10,-8.435,0,0.252,0.276,0.129,0.616,96.12,...,1994,0,0,1,0,1,0,0,0,0
Put It On,0.711,0.62,11,-10.174,0,0.212,0.0,0.134,0.805,179.817,...,1995,0,0,1,0,0,0,1,0,0
Lottery (Skit),0.393,0.641,6,-17.791,1,0.453,0.0,0.508,0.87,118.15,...,2008,1,0,0,0,0,0,1,0,0


In [65]:
#checking data types to make sure they're all numerical
encoded.dtypes

danceability        float64
energy              float64
key                   int64
loudness            float64
mode                  int64
speechiness         float64
instrumentalness    float64
liveness            float64
valence             float64
tempo               float64
duration_ms           int64
time_signature        int64
popularity            int64
release_year          int64
era_00s               uint8
era_70s-80s           uint8
era_90s               uint8
era_Current           uint8
region_East           uint8
region_Midwest        uint8
region_Other          uint8
region_South          uint8
region_West           uint8
dtype: object

In [66]:
#convert df to sparse matrix
sparse_matrix = sparse.csr_matrix(encoded.values)

In [67]:
#calculate cosine similarity matrix
similarity_matrix = cosine_similarity(sparse_matrix)

In [68]:
similarity_matrix

array([[1.        , 0.99999951, 0.99999999, ..., 0.99999923, 0.99999991,
        0.99999983],
       [0.99999951, 1.        , 0.99999936, ..., 0.99999993, 0.99999967,
        0.99999984],
       [0.99999999, 0.99999936, 1.        , ..., 0.99999907, 0.99999989,
        0.99999977],
       ...,
       [0.99999923, 0.99999993, 0.99999907, ..., 1.        , 0.99999953,
        0.99999975],
       [0.99999991, 0.99999967, 0.99999989, ..., 0.99999953, 1.        ,
        0.99999996],
       [0.99999983, 0.99999984, 0.99999977, ..., 0.99999975, 0.99999996,
        1.        ]])

In [77]:
def get_track_name_from_index(index):
    return songs[songs.index == index]["track_name"].values[0]

target_index = 765 

# Get the indices of the top similar songs
similar_songs_indices = np.argsort(similarity_matrix[target_index])[::-1][:15]

# Print the recommended songs
for song_index in similar_songs_indices:
    song_title = get_track_name_from_index(song_index)
    print(song_title)


Feeling Myself
Life Is Good (feat. Drake)
F*ck Up Some Commas
Stay Fly
I Mean It (feat. Remo)
Amazing
Fashion Killa
Portland
The Show Goes On
Gossip Folks (feat. Ludacris)
EVERY CHANCE I GET (feat. Lil Baby & Lil Durk)
LsD
Headlines
Rich Flex
I'm N Luv (Wit a Stripper) (feat. Mike Jones)
