# Model v3


---
**Note**

We could construct a model which determines if a user will like a certain playlist based on comparing their playlists and liked playlists to the audio features of our playlist. Train model on users playlists and likes, classify playlist as 0 or 1 if the user will like the playlist.

I believe our final model will look something like this: https://developer.spotify.com/console/get-recommendations/

In [92]:
import numpy as np
import pandas as pd
from scipy.stats import ttest_ind
pd.set_option('display.max_columns', 25)

from sklearn.linear_model import LinearRegression
from sklearn.linear_model import LogisticRegression
from sklearn.linear_model import LogisticRegressionCV
from sklearn.linear_model import LassoCV

from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import cross_val_score
from sklearn.metrics import accuracy_score
from sklearn.model_selection import KFold
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.decomposition import PCA
from sklearn import metrics
from sklearn.metrics import confusion_matrix

import matplotlib
import matplotlib.pyplot as plt
%matplotlib inline

import statsmodels.api as sm
from statsmodels.api import OLS

import seaborn as sns
sns.set()

import spotipy
from spotipy.oauth2 import SpotifyClientCredentials
import time
import requests

## Initialize API with keys

In [93]:
# https://stackoverflow.com/questions/30557409/spotify-api-post-call-response-415
def initiate_api():    
    client_id = "9cd3dd2ea2cf492ca28ab0247a79d781"
    client_secret = "11c972ad002843e9be5ecc31f022dd6e"
    grant_type = 'client_credentials'
    body_params = {'grant_type' : grant_type}
    url = 'https://accounts.spotify.com/api/token'
    response = requests.post(url, data=body_params, auth = (client_id, client_secret)) 
    client_credentials_manager = SpotifyClientCredentials(client_id, client_secret)
    return spotipy.Spotify(client_credentials_manager=client_credentials_manager)
sp = initiate_api()

### Read in pkl file of songs collected from first 30K playlists in dataset

In [94]:
songs = pd.read_pickle("pickles/songs_30k_dropped.pkl")
songs.head()

Unnamed: 0,index,acousticness,analysis_url,danceability,duration_ms,energy,id,instrumentalness,key,liveness,loudness,mode,speechiness,tempo,time_signature,track_href,type,uri,valence
0,0,0.0311,https://api.spotify.com/v1/audio-analysis/0UaM...,0.904,226864,0.813,0UaMYEvWZi0ZqiDOoHU3YI,0.00697,4,0.0471,-7.105,0,0.121,125.461,4,https://api.spotify.com/v1/tracks/0UaMYEvWZi0Z...,audio_features,spotify:track:0UaMYEvWZi0ZqiDOoHU3YI,0.81
1,1,0.0249,https://api.spotify.com/v1/audio-analysis/6I9V...,0.774,198800,0.838,6I9VzXrHxO9rA9A5euc8Ak,0.025,5,0.242,-3.914,0,0.114,143.04,4,https://api.spotify.com/v1/tracks/6I9VzXrHxO9r...,audio_features,spotify:track:6I9VzXrHxO9rA9A5euc8Ak,0.924
2,2,0.00238,https://api.spotify.com/v1/audio-analysis/0WqI...,0.664,235933,0.758,0WqIKmW4BTrj3eJFmnCKMv,0.0,2,0.0598,-6.583,0,0.21,99.259,4,https://api.spotify.com/v1/tracks/0WqIKmW4BTrj...,audio_features,spotify:track:0WqIKmW4BTrj3eJFmnCKMv,0.701
3,3,0.202,https://api.spotify.com/v1/audio-analysis/1AWQ...,0.891,267267,0.714,1AWQoqb9bSvzTjaLralEkT,0.000234,4,0.0521,-6.055,0,0.14,100.972,4,https://api.spotify.com/v1/tracks/1AWQoqb9bSvz...,audio_features,spotify:track:1AWQoqb9bSvzTjaLralEkT,0.818
4,4,0.0561,https://api.spotify.com/v1/audio-analysis/1lzr...,0.853,227600,0.606,1lzr43nnXAijIGYnCT8M8H,0.0,0,0.313,-4.596,1,0.0713,94.759,4,https://api.spotify.com/v1/tracks/1lzr43nnXAij...,audio_features,spotify:track:1lzr43nnXAijIGYnCT8M8H,0.654


### This function calculates the distance in our KNN Model

In [99]:
def distance(songs, seed_song):
    '''
    inputs: 
        idkey : song ID of seed song
        songs : Dataframe of songs to choose from
    outputs:
        dist : pandas Series containing calculated 'distance' from songs in the input dataframe to the seed song
    '''
    dist = np.sqrt((np.subtract(songs.acousticness, seed_song.acousticness))**2
                 + (np.subtract(songs.danceability, seed_song.danceability))**2
                 + (np.subtract(songs.energy, seed_song.energy))**2
                 + (np.subtract(songs.instrumentalness, seed_song.instrumentalness))**2
                 + (np.subtract(songs.liveness, seed_song.liveness))**2
                 + (np.subtract(songs.speechiness, seed_song.speechiness))**2
                 + (np.subtract(songs.tempo, seed_song.tempo))**2)
        
    return dist

### Enter song ID to seed the playlist from

In [143]:
# spotify:track:1ghlpxVfPbFH2jenrv9vVw
# spotify:track:0AJUX8BRUehB6RHPZUOoYS
# spotify:track:5IVuqXILoxVWvWEPm82Jxr
# spotify:track:5IVuqXILoxVWvWEPm82Jxr
# spotify:track:550rQQCGkrTzvp4SfpOPzx
# spotify:track:2c37Gkpu75l3kvh1FUZrHV
spotify:track:7AJIHT8hK423KPQZtvwEkM
spotify:track:7N3PAbqfTjSEU1edb2tY8j

In [159]:
song_id = '5cYA45RVGI6F4f06gtWjsd'
# song_id = '1ghlpxVfPbFH2jenrv9vVw'
# song_id = '5IVuqXILoxVWvWEPm82Jxr' # from list of songs

In [160]:
seed_song = pd.DataFrame(sp.audio_features(song_id))
seed_song

Unnamed: 0,acousticness,analysis_url,danceability,duration_ms,energy,id,instrumentalness,key,liveness,loudness,mode,speechiness,tempo,time_signature,track_href,type,uri,valence
0,0.903,https://api.spotify.com/v1/audio-analysis/5cYA...,0.349,206960,0.166,5cYA45RVGI6F4f06gtWjsd,0.783,4,0.11,-14.957,1,0.0395,97.972,4,https://api.spotify.com/v1/tracks/5cYA45RVGI6F...,audio_features,spotify:track:5cYA45RVGI6F4f06gtWjsd,0.0895


### Get playlist from seed song

In [161]:
print("Seed Song: " + sp.track(song_id)['name'])

Seed Song: Concerto For Violin And Strings In E, Op.8, No.1, R.269 "La Primavera": 1. Allegro


### Final Baseline Playlist Generator

In [162]:
def playlist_generator(songs, seed_song, n):
    topn_dist = distance(songs, seed_song).sort_values()[0:n]
    topn = songs.iloc[topn_dist.index]
    return topn

def playlist_printer(playlist): 
    for ref in playlist['id']:
        track = sp.track(ref)
        print(track['name'])
        print('by')
        for artist in track['artists']:
            print(artist['name'])
        print('-----------------------------------')

In [163]:
playlist = playlist_generator(songs, seed_song, 25)
playlist_printer(playlist)

Processional - Bridal Chorus (Here Comes The Bride) - Wagner
by
The O'Neill Brothers
-----------------------------------
Ramblin' Rose
by
Acker Bilk
-----------------------------------
Varðeldur
by
Sigur Rós
-----------------------------------
When You Wish Upon A Star
by
Royal Philharmonic Orchestra
-----------------------------------
Cello Suite No. 1 in G Major, BWV 1007: I. Prélude
by
Johann Sebastian Bach
Anner Bylsma
-----------------------------------
Doubts 2
by
Ibrahim Maalouf
-----------------------------------
What A Friend We Have In Jesus
by
Jack Jezzro
-----------------------------------
Aria
by
Jan Antonín Losy
Kurt Schneeweiss
-----------------------------------
Liebesleid (Love's Sorrow)
by
Fritz Kreisler
Sergei Rachmaninoff
RueiBin Chen
-----------------------------------
Now Thank We All Our God
by
The O'Neill Brothers
-----------------------------------
Scent of Night
by
Myuu
-----------------------------------
Where Do You Start
by
Brad Mehldau Trio
---------------