# Step 4: Building Model

In [246]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import os
import seaborn as sns
from sklearn.preprocessing import MultiLabelBinarizer
from sklearn.preprocessing import StandardScaler

pd.set_option('display.max_columns', None)


#ignore warning messages 
import warnings
warnings.filterwarnings('ignore')

In [274]:
from sklearn.model_selection import train_test_split
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.feature_extraction.text import CountVectorizer
from scipy import sparse

In [248]:
spotify_model = pd.read_csv('scaled_spotify_data_model.csv')
spotify_model_ohe = pd.read_csv('ohe_scaled_spotify_data_model.csv')
spotify_original = pd.read_csv("dataset.csv")

In [249]:
# scaler was turning some values into NAN because didnt reset index
spotify_original = spotify_original.dropna()
spotify_original = spotify_original.drop(['Unnamed: 0','key','duration_ms', 'explicit', 'mode', 'time_signature'], axis=1)
spotify_original = spotify_original.reset_index()
spotify = spotify_original.copy()
spotify['Genre_Combined'] = spotify.groupby(['track_id'])['track_genre'].transform(lambda x: ','.join(x))
spotify['Genre_Combined'] = spotify['Genre_Combined'].str.split(',')
spotify = spotify.drop_duplicates(subset=['track_id'])
spotify = spotify.drop_duplicates(subset=['track_name'])
spotify = spotify.reset_index()

In [250]:
spotify_original.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 113999 entries, 0 to 113998
Data columns (total 16 columns):
 #   Column            Non-Null Count   Dtype  
---  ------            --------------   -----  
 0   index             113999 non-null  int64  
 1   track_id          113999 non-null  object 
 2   artists           113999 non-null  object 
 3   album_name        113999 non-null  object 
 4   track_name        113999 non-null  object 
 5   popularity        113999 non-null  int64  
 6   danceability      113999 non-null  float64
 7   energy            113999 non-null  float64
 8   loudness          113999 non-null  float64
 9   speechiness       113999 non-null  float64
 10  acousticness      113999 non-null  float64
 11  instrumentalness  113999 non-null  float64
 12  liveness          113999 non-null  float64
 13  valence           113999 non-null  float64
 14  tempo             113999 non-null  float64
 15  track_genre       113999 non-null  object 
dtypes: float64(9), int64

In [251]:
spotify_dummies = pd.get_dummies(spotify_original['track_genre'])
dummies_cols = list(spotify_dummies.columns)

In [252]:
spotify_dummies = spotify_original.join(spotify_dummies)

In [253]:
mlb = MultiLabelBinarizer(sparse_output=True)

genre_dummies = pd.DataFrame.sparse.from_spmatrix(mlb.fit_transform(spotify.pop('Genre_Combined')),index=spotify.index,columns=mlb.classes_)

In [254]:
genre_dummies_norm = genre_combined_dummies.div(genre_combined_dummies.sum(axis=1), axis=0)

In [255]:
# genre dummies one hot encodes the list of genres
# genre dummies norm normalizes the dummy rows by dividing the row by the sum of the row

spotify_ohe = spotify.join(genre_dummies)
spotify_ohe_norm = spotify.join(genre_dummies_norm)

In [256]:
spotify_ohe.head(5)

Unnamed: 0,level_0,index,track_id,artists,album_name,track_name,popularity,danceability,energy,loudness,speechiness,acousticness,instrumentalness,liveness,valence,tempo,track_genre,acoustic,afrobeat,alt-rock,alternative,ambient,anime,black-metal,bluegrass,blues,brazil,breakbeat,british,cantopop,chicago-house,children,chill,classical,club,comedy,country,dance,dancehall,death-metal,deep-house,detroit-techno,disco,disney,drum-and-bass,dub,dubstep,edm,electro,electronic,emo,folk,forro,french,funk,garage,german,gospel,goth,grindcore,groove,grunge,guitar,happy,hard-rock,hardcore,hardstyle,heavy-metal,hip-hop,honky-tonk,house,idm,indian,indie,indie-pop,industrial,iranian,j-dance,j-idol,j-pop,j-rock,jazz,k-pop,kids,latin,latino,malay,mandopop,metal,metalcore,minimal-techno,mpb,new-age,opera,pagode,party,piano,pop,pop-film,power-pop,progressive-house,psych-rock,punk,punk-rock,r-n-b,reggae,reggaeton,rock,rock-n-roll,rockabilly,romance,sad,salsa,samba,sertanejo,show-tunes,singer-songwriter,ska,sleep,songwriter,soul,spanish,study,swedish,synth-pop,tango,techno,trance,trip-hop,turkish,world-music
0,0,0,5SuOikwiRyPMVoIQDJUgSV,Gen Hoshino,Comedy,Comedy,73,0.676,0.461,-6.746,0.143,0.0322,1e-06,0.358,0.715,87.917,acoustic,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0
1,1,1,4qPNDBW1i3p13qLCt0Ki3A,Ben Woodward,Ghost (Acoustic),Ghost - Acoustic,55,0.42,0.166,-17.235,0.0763,0.924,6e-06,0.101,0.267,77.489,acoustic,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,2,2,1iJBSr7s7jYXzM8EGcbK5b,Ingrid Michaelson;ZAYN,To Begin Again,To Begin Again,57,0.438,0.359,-9.734,0.0557,0.21,0.0,0.117,0.12,76.332,acoustic,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,3,3,6lfxq3CG4xtTiEg7opyCyx,Kina Grannis,Crazy Rich Asians (Original Motion Picture Sou...,Can't Help Falling In Love,71,0.266,0.0596,-18.515,0.0363,0.905,7.1e-05,0.132,0.143,181.74,acoustic,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,4,4,5vjLSffimiIP26QG5WcN2K,Chord Overstreet,Hold On,Hold On,82,0.618,0.443,-9.681,0.0526,0.469,0.0,0.0829,0.167,119.949,acoustic,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [257]:
num_features = ['popularity', 'danceability', 'energy', 'loudness', 'speechiness', 'acousticness', 'instrumentalness', 'liveness', 'valence', 'tempo']

scaler = StandardScaler()
scaled = scaler.fit_transform(spotify[num_features])
spotify[num_features] = pd.DataFrame(scaled,columns=num_features)

scaled_dummies = scaler.fit_transform(spotify_original[num_features])
spotify_dummies[num_features] = pd.DataFrame(scaled_dummies,columns=num_features)

In [263]:
spotify.to_csv("/Users/scottsmacbook/DataMining/FinalProject/Spotify/spotify_scaled.csv",mode="w+")

In [264]:
spotify_dummies.to_csv("/Users/scottsmacbook/DataMining/FinalProject/Spotify/spotify_scaled_w_dups_OH.csv",mode='w+')

# 1. Content-based recommender system.

In [185]:
train=spotify.sample(frac=0.7,random_state=200)
test=spotify.drop(train.index)

In [186]:
train_dummies=spotify_dummies.sample(frac=0.7,random_state=200)
test_dummies=spotify_dummies.drop(train_dummies.index)

In [187]:
train_numerical = train[num_features]
train_labels = train.drop(columns=num_features)

test_numerical = test[num_features]
test_labels = test.drop(columns=num_features)

In [188]:
num_dummy_features = num_features+dummies_cols

train_numerical_dummies = train_dummies[num_dummy_features]
train_labels_dummies = train_dummies.drop(columns=num_dummy_features)

test_numerical_dummies = test_dummies[num_dummy_features]
test_labels_dummies = test_dummies.drop(columns=num_dummy_features)

In [189]:
np.array(train_numerical_dummies.iloc[0])

array([-0.68320652, -0.35035104, -0.04525629,  0.25708165, -0.30882059,
       -0.94608436,  0.08705761, -0.48090857, -1.4740649 ,  1.73247151,
        0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
        0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
        0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
        0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
        0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
        0.        ,  0.        ,  1.        ,  0.        ,  0.        ,
        0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
        0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
        0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
        0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
        0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
        0.        ,  0.        ,  0.        ,  0.        ,  0.  

In [190]:
train_numerical

Unnamed: 0,popularity,danceability,energy,loudness,speechiness,acousticness,instrumentalness,liveness,valence,tempo
70361,-0.957962,0.573166,-1.994505,-1.850426,-0.319708,1.247297,-0.556349,-0.534996,0.180937,-0.642354
31712,-1.010063,-1.174624,1.233386,0.065838,0.332424,-0.973476,0.677796,0.543053,-1.626552,-0.644509
18045,0.917688,-1.510737,1.360955,0.864376,0.164781,-0.974488,-0.016222,-0.500220,-1.019511,0.473150
27486,-0.072238,-0.132672,0.645794,0.032942,0.256984,0.935362,-0.556349,2.321585,0.461168,-0.857898
29200,-0.541151,0.802844,-0.819321,0.048168,-0.416103,-0.368292,-0.523458,-0.534996,0.222593,0.263009
...,...,...,...,...,...,...,...,...,...,...
38804,-1.062164,0.724417,-1.689112,-1.372957,-0.360780,1.320867,-0.556349,-0.445573,1.127663,0.491049
1568,-0.801657,1.077337,0.893201,0.723955,-0.489027,1.108986,1.214905,-0.530028,1.960782,0.925020
57322,0.605080,0.545157,-0.278117,-0.487010,-0.486513,0.440974,2.313263,0.448661,-0.974068,0.026186
70676,-0.436948,1.032521,0.556882,0.283707,-0.444602,-0.538973,-0.551551,-0.296534,1.555583,0.356678


In [191]:
train_numerical.columns

Index(['popularity', 'danceability', 'energy', 'loudness', 'speechiness',
       'acousticness', 'instrumentalness', 'liveness', 'valence', 'tempo'],
      dtype='object')

In [275]:
from spotify_utils import get_playlist_df
from spotify_utils import get_track_features
import spotipy
from spotipy.oauth2 import SpotifyClientCredentials
from spotipy.oauth2 import SpotifyOAuth
from etl_utils import get_unique_map_from_df_col, clean_date
import json

file = open('credentials.json')
credentials = json.load(file)
file.close()

auth_manager = SpotifyClientCredentials(client_id = credentials["client_id"], 
                                        client_secret = credentials["client_secret"])

sp = spotipy.Spotify(auth_manager=auth_manager)

In [193]:
# https://medium.com/analytics-vidhya/build-your-own-playlist-generator-with-spotifys-api-in-python-ceb883938ce4
# import requests

# user_id ='spotify:user:scottmatsubara'

# endpoint_url = f"https://api.spotify.com/v1/users/{user_id}/playlists"
# request_body = json.dumps({
#           "name": "Test Python",
#           "description": "My first programmatic playlist, yooo!",
#           "public": False # let's keep it between us - for now
#         })
# response = requests.post(url = endpoint_url, data = request_body, headers={"Content-Type":"application/json", 
#                         "Authorization":"Bearer "+ token})

In [316]:
def get_distance_recs_song(features,data,data_labels):
    # Get the index of the track with the given ID
    data_new = data.copy()
    features = np.array(features)
    
    distances = []
    for j in range(len(data_new)):
        dist = np.linalg.norm(features - np.array(data_new.iloc[j]))
        if dist==0:
            distances.append(1000000)
        else:
            distances.append(dist)
    
    data_new['Distance'] = distances
    data_new = data_new.join(data_labels)
    data_new = data_new.sort_values(by=['Distance'])
    return data_new

In [209]:
recs = get_distance_recs_song(test_numerical.iloc[4],train_numerical,train_labels)

In [207]:
display_labels = ['Distance','artists','album_name','track_name','track_genre']
recs[display_labels].head(10)

Unnamed: 0,Distance,artists,album_name,track_name,track_genre
45643,0.647351,Sammy Davis Jr.,christmas but not christmas songs,Something's Gotta Give - Single Version,jazz
51081,0.675401,Milton Nascimento,Chill in Brazil,Rosa Maria,mpb
60786,0.765143,"Группа ""Загадка""",Золотая коллекция эстрады. Сиреневый туман,Люблю (Вдыхая розы аромат),romance
64195,0.815612,Krystina Alabado;Shereen Pimentel,Teen Time Travelers: The French Correction,Turbulent Flow,show-tunes
61190,0.824052,"Группа ""Загадка""","20 лучших ресторанных песен. Ой, цветет калина","А за окном то дождь, то снег",romance
68839,0.842305,Junie,LUGNA HITS,Salt I Mina Sår,swedish
45524,0.896937,Dean Martin,All I Want For Christmas Is You,I've Got My Love To Keep Me Warm - Remastered ...,jazz
61096,0.956325,"Группа ""Загадка""",Золотая коллекция эстрады. Сиреневый туман,Сиреневый туман,romance
16034,0.957696,Maddie & Tae,Country Christmas Greatest Hits,Holly Jolly Christmas,country
39030,0.977398,Hank Williams,Low Down Blues,Honky Tonk Blues - Non-Session Demo,honky-tonk


In [212]:
recs_genre = get_distance_recs_song(test_numerical_dummies.iloc[4],train_numerical_dummies,train_labels_dummies)

In [214]:
# recommendations are better if genre is not included.
recs_genre[display_labels].head(10)

Unnamed: 0,Distance,artists,album_name,track_name,track_genre
18,0.269,Jason Mraz;Colbie Caillat,We Sing. We Dance. We Steal Things.,Lucky,acoustic
75,1.084376,Jason Mraz,Love Is a Four Letter Word,93 Million Miles,acoustic
903,1.117485,Zack Tabudlo,Yakap,Yakap,acoustic
20,1.209542,Jason Mraz,We Sing. We Dance. We Steal Things.,I'm Yours,acoustic
583,1.310534,Joshua Hyslop,Ash & Stone,Gentle Heart,acoustic
590,1.318176,The Weepies;Deb Talan;Steve Tannen,Hideaway,Can't Go Back Now,acoustic
162,1.370006,Jason Mraz,I'm Yours,I'm Yours,acoustic
338,1.459133,Joshua Hyslop,Echos,Stand Your Ground,acoustic
5,1.522267,Tyrone Wells,Days I Will Remember,Days I Will Remember,acoustic
19302,1.539808,Shania Twain,Come On Over,You're Still The One,country


In [17]:
test.iloc[4]

index                                   26
track_id            5IfCZDRXZrqZSm8AwE44PG
artists                         Jason Mraz
album_name           Holly Jolly Christmas
track_name               Winter Wonderland
popularity                       -1.791532
danceability                      0.343495
energy                           -1.267752
loudness                         -0.113681
speechiness                      -0.342338
acousticness                      1.344394
instrumentalness                 -0.556344
liveness                          -0.38098
valence                           0.745172
tempo                             0.769598
track_genre                       acoustic
Name: 23, dtype: object

In [240]:
# find distance from all songs in the playlist to all songs in corpus. Then get the average of all these distances to 
# find the songs with the closest average distance to all songs in the playlist

def get_distance_recs_playlist(uri, data, data_labels, OH = False):
    # Get the index of the track with the given ID
    
    df_original = get_playlist_df('spotify', uri, sp, song_limit=10)
    genre = df_original['track_genre']
    df = df_original[num_features]
    scaler = StandardScaler()
    df = pd.DataFrame(scaler.fit_transform(df),columns=num_features)
    if(OH):
        for col in dummies_cols:
            df[col] = 0
        for i in range(len_df):
            df.loc[i,lower(genre)] = 1
        
    
    data_new = data.copy()
    all_distances = []
    
    for i in range(len(df)):
        features = np.array(df.iloc[i])
        distances = []
        for j in range(len(data_new)):
            dist = np.linalg.norm(features - np.array(data_new.iloc[j]))
            if dist==0:
                distances.append(1000000)
            else:
                distances.append(dist)
        all_distances.append(distances)

    all_distances = np.array(all_distances)
    avg_dist = np.sum(all_distances, 0) / len(all_distances)
    
    data_new['Distance'] = avg_dist.tolist()

    data_new = data_new.join(data_labels)
    return data_new.sort_values(by=['Distance']),df_original

In [241]:
test_small = test_numerical.head(10)
playlist_recs,playlist = get_distance_recs_playlist('spotify:playlist:6mQ256r9AwXBIdfd36Gbob',train_numerical,train_labels)

Got track 1 out of 203
Got track 2 out of 203
Got track 3 out of 203
Got track 4 out of 203
Got track 5 out of 203
Got track 6 out of 203
Got track 7 out of 203
Got track 8 out of 203
Got track 9 out of 203
Got track 10 out of 203
Got track 11 out of 203
Got track 12 out of 203


In [224]:
playlist_recs.head(10)

Unnamed: 0,popularity,danceability,energy,loudness,speechiness,acousticness,instrumentalness,liveness,valence,tempo,Distance,level_0,index,track_id,artists,album_name,track_name,track_genre
62015,-0.072238,0.545157,-0.274252,0.159264,-0.488189,0.158467,-0.55633,0.180391,-0.099294,-0.262344,3.095061,95313,95314,0JWnqFLENqVGs4yNss52gy,Willie Colón,Fantasmas,Celo,salsa
27606,-0.020137,0.281868,-0.285849,-0.337378,-0.11686,-0.056357,-0.556349,-0.564804,-0.26213,-0.210107,3.101856,36643,36643,0SwrpK6ZkaoGvBrdbrklFh,Lynda Lemay,Lynda Lemay,Les Souliers verts,french
13536,0.188269,0.097006,0.131651,0.171671,-0.235048,0.60577,-0.556349,-0.321374,0.025674,-0.1996,3.104133,16307,16307,4piMfKVoPcY06x4hTLf9Pl,Rahul Deshpande,Katyar Kaljat Ghusli (Original Motion Picture ...,Dil Ki Tapish,classical
71172,-0.228542,0.343489,-0.200803,0.257766,-0.495733,-0.256466,-0.556349,-0.236918,-0.288639,0.06606,3.109214,110798,110799,3xWwecwIKqwgbMamxQ3vtG,Deny,Invencible,Un Año Más,trance
17165,-0.228542,0.326683,-0.146682,0.448941,-0.167153,-0.32415,-0.556349,-0.505188,-0.046277,-0.073349,3.110256,21531,21531,4MP0DR7DuBXd0w7QV99V92,Malie Donn;Moonskull;Harlem Richard$,We Rising,We Rising,dancehall
64825,0.084066,0.001774,0.135517,0.061139,-0.40772,-0.474232,-0.555631,-0.162399,0.264249,-0.231485,3.116248,99613,99614,1ODitWwCi2P2D489FX5CmY,Jasleen Royal,Jahaan Tu Chala - Midnight Mix (Gully Boy),Jahaan Tu Chala - Midnight Mix (Gully Boy),singer-songwriter
5968,-0.436948,0.040987,-0.200803,0.112645,-0.495733,0.158467,-0.556239,-0.505188,0.199872,0.023402,3.118438,7326,7326,3spGkhIhKBYRZcC3crNiZz,Trampled by Turtles,Sigourney Fever,Pocahontas,bluegrass
12317,-0.280644,-0.054245,-0.459807,0.054748,-0.38425,0.134924,-0.556349,-0.187238,0.453594,-0.112792,3.121258,14789,14789,1ODuiU1H47mNBcxvfQkmUz,Rolf Zuckowski,Rolfs fröhlicher Familientag,Der Morgenmuffel,children
30936,-0.645353,0.253859,0.004082,-0.149022,-0.478969,0.376233,-0.555552,-0.077943,0.124134,-0.4756,3.125234,41592,41592,5UYQSCJ9VDdC9P0akTVaED,Arvid Nero,Mother Earth,Bored With Clay,goth
36172,-0.332745,0.393906,-0.138951,0.195544,-0.443764,-0.041643,-0.331246,-0.660686,-0.515853,-0.071659,3.125306,48971,48971,5X65C51a9CkEjc3Hvamf33,Gloria,Acima do Céu,Voltar pra Casa,hardcore


In [350]:
# get recommendations by taking the average of all features and treating it as one song, takes much less time 

def get_distance_recs_playlist_2(uri, data, data_labels):
    # Get the index of the track with the given ID
    df = get_playlist_df('spotify', uri, sp, song_limit=10)
    df = df[num_features]
    scaler = StandardScaler()
    df = pd.DataFrame(scaler.fit_transform(df),columns=num_features)
    
    return(get_distance_recs_song(df.mean(axis=0),data,data_labels))

In [22]:
playlist_recs_2 = get_distance_recs_playlist_2('spotify:playlist:6mQ256r9AwXBIdfd36Gbob',train_numerical,train_labels)

Got track 1 out of 203
Got track 2 out of 203
Got track 3 out of 203
Got track 4 out of 203
Got track 5 out of 203
Got track 6 out of 203
Got track 7 out of 203
Got track 8 out of 203
Got track 9 out of 203
Got track 10 out of 203
Got track 11 out of 203
Got track 12 out of 203


In [233]:
playlist_recs

Unnamed: 0,popularity,danceability,energy,loudness,speechiness,acousticness,instrumentalness,liveness,valence,tempo,Distance,level_0,index,track_id,artists,album_name,track_name,track_genre
62015,-0.072238,0.545157,-0.274252,0.159264,-0.488189,0.158467,-0.556330,0.180391,-0.099294,-0.262344,3.095061,95313,95314,0JWnqFLENqVGs4yNss52gy,Willie Colón,Fantasmas,Celo,salsa
27606,-0.020137,0.281868,-0.285849,-0.337378,-0.116860,-0.056357,-0.556349,-0.564804,-0.262130,-0.210107,3.101856,36643,36643,0SwrpK6ZkaoGvBrdbrklFh,Lynda Lemay,Lynda Lemay,Les Souliers verts,french
13536,0.188269,0.097006,0.131651,0.171671,-0.235048,0.605770,-0.556349,-0.321374,0.025674,-0.199600,3.104133,16307,16307,4piMfKVoPcY06x4hTLf9Pl,Rahul Deshpande,Katyar Kaljat Ghusli (Original Motion Picture ...,Dil Ki Tapish,classical
71172,-0.228542,0.343489,-0.200803,0.257766,-0.495733,-0.256466,-0.556349,-0.236918,-0.288639,0.066060,3.109214,110798,110799,3xWwecwIKqwgbMamxQ3vtG,Deny,Invencible,Un Año Más,trance
17165,-0.228542,0.326683,-0.146682,0.448941,-0.167153,-0.324150,-0.556349,-0.505188,-0.046277,-0.073349,3.110256,21531,21531,4MP0DR7DuBXd0w7QV99V92,Malie Donn;Moonskull;Harlem Richard$,We Rising,We Rising,dancehall
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
66093,0.188269,-3.129684,-2.462181,-6.474348,-0.757256,1.891767,-0.553181,-0.604548,-1.769318,-4.048497,9.532550,101442,101443,2QNQCdk29G9JCPTB0uOvsg,White Noise Babies,Loopable White Noise For Baby Sleep,Clean White Noise - Loopable Without Fade,sleep
66097,0.188269,-3.129684,-2.462181,-6.474348,-0.757256,1.891767,-0.553181,-0.604548,-1.769318,-4.048497,9.532550,101446,101447,3FCjw9jRcPqw8A9k2ZkJ5p,White Noise Sleep Sounds;Sleep Sound Library,Loopable White Noise Sleep Sounds,Clean White Noise - Loopable With No Fade,sleep
66534,-0.124340,-3.129684,-2.459793,-6.440136,-0.757256,1.909424,1.957202,-0.617961,-1.769318,-4.048497,9.703986,101938,101939,1gMuHUxFRYRDAmdfG3mmNM,White Noise Babies,Loopable White Noise For Baby Sleep,Air Conditioner - Loopable Without Fade,sleep
66182,0.084066,-3.129684,-2.461266,-6.464761,-0.757256,1.900595,2.307228,-0.612000,-1.769318,-4.048497,9.796908,101539,101540,4YukH0BeXhJmcCmz2BBRke,Baby Sleep White Noise,Relaxing Loopable White Noise,Box Fan - Loopable With No Fade,sleep


In [339]:
# Gets n number of recs from each song then finds recs based on all songs. 


def get_distance_recs_playlist_3(uri, data, data_labels):
    # Get the index of the track with the given ID
    
    df_original = get_playlist_df('spotify', uri, sp, song_limit=10)
    genre = df_original['track_genre']
    df = df_original[num_features]
    
    scaler = StandardScaler()
    df = pd.DataFrame(scaler.fit_transform(df),columns=num_features)
    
    recs = []
    
    for i in range(len(df)):
        song_recs = get_distance_recs_song(df.iloc[i],data,data_labels)
        recs.extend([list(song_recs.iloc[i,:]) for i in range(2)])

    song_recs = get_distance_recs_song(df.mean(axis=0),data,data_labels)
    recs.extend([list(song_recs.iloc[i,:]) for i in range(5)])
    
    data_cols = list(data.columns)
    data_cols.append('Distance') 
    cols = data_cols + list(data_labels.columns)
    
    return pd.DataFrame(recs,columns=cols)

In [340]:
playlist_recs_3 = get_distance_recs_playlist_3('spotify:playlist:6mQ256r9AwXBIdfd36Gbob',train_numerical,train_labels)

Got track 1 out of 203
Got track 2 out of 203
Got track 3 out of 203
Got track 4 out of 203
Got track 5 out of 203
Got track 6 out of 203
Got track 7 out of 203
Got track 8 out of 203
Got track 9 out of 203
Got track 10 out of 203
Got track 11 out of 203
Got track 12 out of 203


In [341]:
playlist_recs_4

Unnamed: 0,popularity,danceability,energy,loudness,speechiness,acousticness,instrumentalness,liveness,valence,tempo,Distance,level_0,index,track_id,artists,album_name,track_name,track_genre
0,2.272324,0.903678,-0.026844,0.574323,-0.411073,0.835307,-0.556349,-0.569772,0.169576,-1.36343,0.632082,81037,81038,2rOnSn2piaqLAlYjtfUBlY,Tanishk Bagchi;Jubin Nautiyal;Asees Kaur,"Raataan Lambiyan (From ""Shershaah"")","Raataan Lambiyan (From ""Shershaah"")",pop
1,2.324425,0.589972,-0.247191,0.391983,-0.52507,0.829422,-0.555254,-0.680558,0.374069,-1.528692,0.650559,34021,34021,5NORWMFC27ywGSZxi8uquP,The Lumineers,Cleopatra,Ophelia,folk
2,-0.280644,1.038123,0.788826,0.848022,-0.040582,0.78528,-0.556349,0.562925,-0.099294,0.193669,1.046485,78578,78579,37jT5FWjizTz0yzow9mj1G,Michelle,Ich würd' es wieder tun (Deluxe),Vergiss mich nicht,party
3,0.344573,0.741223,0.870007,0.911183,-0.420294,0.317377,-0.556349,1.064689,-0.424967,-0.238181,1.072444,74559,74560,7cXrg0qp2OfIuEYfLi0P5S,Nossa Toca;Giba Moojen;Fabinho K;Allan Furtado...,Não Temos Tempo,Não Temos Tempo,mpb
4,-0.593252,-1.57796,1.364821,0.805726,2.637515,-0.972166,0.907123,0.294654,-0.512066,1.664624,1.257989,58745,58745,4T073IowTzdJRPBaAtHK0Q,Murderdolls,Beyond The Valley Of The Murderdolls,Dawn of the Dead,industrial
5,0.188269,-0.099061,1.391881,1.520989,3.00633,-0.970209,1.779172,0.42879,-0.746854,1.753321,1.663583,27661,27661,6oj95LjRQSKa5oY6aexgpa,Teddy Killerz,INEEDU / SOYUZ,INEEDU,drum-and-bass
6,-0.645353,0.595574,1.086488,0.77471,0.097723,-0.969738,-0.085624,-0.669131,-1.231578,0.193968,0.692784,110495,110496,1fTS5Hq6xICq4VSG0AQsAg,Above & Beyond;Richard Bedford;Maor Levi,10 Years of Group Therapy (Part 2),With Your Hope - Maor Levi Extended Mix,trance
7,-0.384846,0.937289,0.657391,0.592933,-0.00873,-0.477175,-0.556349,-0.385957,-1.314889,0.424858,0.784912,104854,104855,4FTsRd2lE1iQhTg5ylZeg2,ZPU;Mäbu,Quiebro,No Te Sueltes,spanish
8,-0.228542,-2.177362,-1.534482,-1.279532,-0.477292,0.287949,-0.333358,-0.157431,-1.524684,1.532175,1.610257,75194,75195,2GkWQgqVsgEuKbni70d0WW,Jon & Vangelis,Private Collection (Remastered 2016),Deborah - Remastered,new-age
9,-0.384846,-1.230643,-0.819321,-1.041925,-0.485674,-0.963323,-0.477291,-0.800286,-1.034659,1.517591,1.710154,107573,107574,2r69YILaGLD7FW7swtDR20,Étienne Daho,Ouverture,Ouverture - Single Edit,synth-pop


-------------

# Generate Actual Playist

In [354]:
# https://community.spotify.com/t5/Spotify-for-Developers/Redirect-URI-needed/td-p/5067419
def create_playlist(recs,playlist_name,Public=False):
    uris = list(recs.track_id)
    uris = ["spotify:track:" + x for x in uris]
    
    auth_manager = SpotifyOAuth(client_id=credentials["client_id"], client_secret=credentials["client_secret"], redirect_uri = 'http://localhost/',scope='playlist-modify-private playlist-modify-public')
    sp = spotipy.Spotify(auth_manager=auth_manager)

    # Create a new playlist
    playlist_name = playlist_name + "Playlist Recs"
    playlist_description = "Playlist Consisting of Songs Recommended by Data Mining Team 4's Program"
    user_id = sp.me()["id"]
    new_playlist = sp.user_playlist_create(user=user_id, name=playlist_name, public=Public, description=playlist_description)

    sp.playlist_add_items(playlist_id=new_playlist["id"], items=uris)

SyntaxError: invalid syntax (2803674454.py, line 2)

In [355]:
results = sp.user_playlist(user=None, playlist_id='spotify:playlist:6mQ256r9AwXBIdfd36Gbob', fields="name")
playlist_name = results['name']
create_playlist(playlist_recs_4,playlist_name)

NameError: name 'create_playlist' is not defined

## Program in Action

In [24]:
while True:
    try: 
        print("This Program will Generate a Playlist of Reccomendations Based on a Playlist that you Enter. Please make sure the playlist is public before inputting the URI.")
        input_uri = input("Please input a Spotify Playlist URI: ")
        recs = get_distance_recs_playlist(input_uri)
    except: 
        print("URI is not valid")
    else:
        break
    
results = sp.user_playlist(user=None, playlist_id=inpur_uri, fields="name")
playlist_name = results['name']

while True:
    try: 
        num_songs = int(input("How many songs would you like in this playist (max 50): ")
    except: 
        print("Please Input a Valid Number")
    else:
        if(num_songs<=50):
            break
        print("Please input a number less than 50")
            
print("Once You Input Your User information, the screen will show an Error. This is supposed to happen, simply copy the url and paste it back here. ")
create_playlist(recs,num_songs,playlist_name,True)


#print(recs[['artists','album_name','track_name','track_genre']].head(10))