# Model v3


---
**Note**

We could construct a model which determines if a user will like a certain playlist based on comparing their playlists and liked playlists to the audio features of our playlist. Train model on users playlists and likes, classify playlist as 0 or 1 if the user will like the playlist.

I believe our final model will look something like this: https://developer.spotify.com/console/get-recommendations/

In [92]:
import numpy as np
import pandas as pd
from scipy.stats import ttest_ind
pd.set_option('display.max_columns', 25)

from sklearn.linear_model import LinearRegression
from sklearn.linear_model import LogisticRegression
from sklearn.linear_model import LogisticRegressionCV
from sklearn.linear_model import LassoCV

from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import cross_val_score
from sklearn.metrics import accuracy_score
from sklearn.model_selection import KFold
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.decomposition import PCA
from sklearn import metrics
from sklearn.metrics import confusion_matrix

import matplotlib
import matplotlib.pyplot as plt
%matplotlib inline

import statsmodels.api as sm
from statsmodels.api import OLS

import seaborn as sns
sns.set()

import spotipy
from spotipy.oauth2 import SpotifyClientCredentials
import time
import requests

## Initialize API with keys

In [93]:
# https://stackoverflow.com/questions/30557409/spotify-api-post-call-response-415
def initiate_api():    
    client_id = "9cd3dd2ea2cf492ca28ab0247a79d781"
    client_secret = "11c972ad002843e9be5ecc31f022dd6e"
    grant_type = 'client_credentials'
    body_params = {'grant_type' : grant_type}
    url = 'https://accounts.spotify.com/api/token'
    response = requests.post(url, data=body_params, auth = (client_id, client_secret)) 
    client_credentials_manager = SpotifyClientCredentials(client_id, client_secret)
    return spotipy.Spotify(client_credentials_manager=client_credentials_manager)
sp = initiate_api()

### Read in pkl file of songs collected from first 30K playlists in dataset

In [94]:
songs = pd.read_pickle("pickles/songs_30k_dropped.pkl")
songs.head()

Unnamed: 0,index,acousticness,analysis_url,danceability,duration_ms,energy,id,instrumentalness,key,liveness,loudness,mode,speechiness,tempo,time_signature,track_href,type,uri,valence
0,0,0.0311,https://api.spotify.com/v1/audio-analysis/0UaM...,0.904,226864,0.813,0UaMYEvWZi0ZqiDOoHU3YI,0.00697,4,0.0471,-7.105,0,0.121,125.461,4,https://api.spotify.com/v1/tracks/0UaMYEvWZi0Z...,audio_features,spotify:track:0UaMYEvWZi0ZqiDOoHU3YI,0.81
1,1,0.0249,https://api.spotify.com/v1/audio-analysis/6I9V...,0.774,198800,0.838,6I9VzXrHxO9rA9A5euc8Ak,0.025,5,0.242,-3.914,0,0.114,143.04,4,https://api.spotify.com/v1/tracks/6I9VzXrHxO9r...,audio_features,spotify:track:6I9VzXrHxO9rA9A5euc8Ak,0.924
2,2,0.00238,https://api.spotify.com/v1/audio-analysis/0WqI...,0.664,235933,0.758,0WqIKmW4BTrj3eJFmnCKMv,0.0,2,0.0598,-6.583,0,0.21,99.259,4,https://api.spotify.com/v1/tracks/0WqIKmW4BTrj...,audio_features,spotify:track:0WqIKmW4BTrj3eJFmnCKMv,0.701
3,3,0.202,https://api.spotify.com/v1/audio-analysis/1AWQ...,0.891,267267,0.714,1AWQoqb9bSvzTjaLralEkT,0.000234,4,0.0521,-6.055,0,0.14,100.972,4,https://api.spotify.com/v1/tracks/1AWQoqb9bSvz...,audio_features,spotify:track:1AWQoqb9bSvzTjaLralEkT,0.818
4,4,0.0561,https://api.spotify.com/v1/audio-analysis/1lzr...,0.853,227600,0.606,1lzr43nnXAijIGYnCT8M8H,0.0,0,0.313,-4.596,1,0.0713,94.759,4,https://api.spotify.com/v1/tracks/1lzr43nnXAij...,audio_features,spotify:track:1lzr43nnXAijIGYnCT8M8H,0.654


### This function calculates the distance in our KNN Model

In [99]:
def distance(songs, seed_song):
    '''
    inputs: 
        idkey : song ID of seed song
        songs : Dataframe of songs to choose from
    outputs:
        dist : pandas Series containing calculated 'distance' from songs in the input dataframe to the seed song
    '''
    dist = np.sqrt((np.subtract(songs.acousticness, seed_song.acousticness))**2
                 + (np.subtract(songs.danceability, seed_song.danceability))**2
                 + (np.subtract(songs.energy, seed_song.energy))**2
                 + (np.subtract(songs.instrumentalness, seed_song.instrumentalness))**2
                 + (np.subtract(songs.liveness, seed_song.liveness))**2
                 + (np.subtract(songs.speechiness, seed_song.speechiness))**2
                 + (np.subtract(songs.tempo, seed_song.tempo))**2)
        
    return dist

### Enter song ID to seed the playlist from

In [100]:
# spotify:track:1ghlpxVfPbFH2jenrv9vVw
# spotify:track:0AJUX8BRUehB6RHPZUOoYS
# spotify:track:5IVuqXILoxVWvWEPm82Jxr
# spotify:track:5IVuqXILoxVWvWEPm82Jxr
# spotify:track:550rQQCGkrTzvp4SfpOPzx

In [120]:
song_id = '550rQQCGkrTzvp4SfpOPzx'
# song_id = '1ghlpxVfPbFH2jenrv9vVw'
# song_id = '5IVuqXILoxVWvWEPm82Jxr' # from list of songs

In [121]:
song = pd.DataFrame(sp.audio_features(song_id))
song

Unnamed: 0,acousticness,analysis_url,danceability,duration_ms,energy,id,instrumentalness,key,liveness,loudness,mode,speechiness,tempo,time_signature,track_href,type,uri,valence
0,0.454,https://api.spotify.com/v1/audio-analysis/550r...,0.321,268960,0.377,550rQQCGkrTzvp4SfpOPzx,0,10,0.338,-7.385,1,0.0361,119.483,4,https://api.spotify.com/v1/tracks/550rQQCGkrTz...,audio_features,spotify:track:550rQQCGkrTzvp4SfpOPzx,0.367


In [122]:
top10_dist = distance(songs, song).sort_values()[0:10]
top10 = songs.iloc[top10_dist.index]
hrefs = top10['id']

In [123]:
hrefs

6938      550rQQCGkrTzvp4SfpOPzx
216136    5UJomXuoDQoeBwQ67RVvSH
48478     4HmyfiSCAaZzNWKhUkLM3V
164187    5XBzK0sl4y38zHLojqSIMM
61636     0NZx7rRyIOArlTbRbM3efn
233146    44lGysG4BW1P7ixarRhYRd
300885    4vDJqsXuvoVSbHwfjQVrFp
246800    3ik79qXV15D3ae33uveasw
161775    6VM7mzesPE5YMKqKE0ZquC
174343    3bPSPTXcc8gB7NMOa0LDAo
Name: id, dtype: object

In [124]:
top10_dist

6938      0.000000
216136    0.237121
48478     0.247892
164187    0.254656
61636     0.255003
233146    0.278955
300885    0.279199
246800    0.284026
161775    0.285628
174343    0.286494
dtype: float64

### Get playlist from seed song

In [125]:
print(sp.track(song_id)['name'])

Hallelujah


In [126]:
for ref in top10['id']:
    track = sp.track(ref)
    print(track['name'])
    print('by')
    for artist in track['artists']:
        print(artist['name'])
    print('-----------------------------------')

Hallelujah
by
Pentatonix
-----------------------------------
Dancing
by
Elisa
-----------------------------------
Make It Rain (As Heard In Sons of Anarchy)
by
Sofia Karlberg
-----------------------------------
Heroe
by
Il Divo
-----------------------------------
I Go To Sleep
by
Sia
-----------------------------------
Older and Taller
by
Regina Spektor
-----------------------------------
Carrie-Anne
by
The Hollies
-----------------------------------
Love Song for a Vampire
by
Annie Lennox
-----------------------------------
When You're Evil
by
Aurelio Voltaire
-----------------------------------
Jackie's Strength
by
Tori Amos
-----------------------------------


### Final Baseline Playlist Generator

In [38]:
def distance(idkey, songs):
    '''
    inputs: 
        idkey: Input Song Id 
        songs: songs dataframe

    returns: list of distances from songs in dataframe to input song features
    '''
    song = pd.DataFrame(sp.audio_features(idkey))
    song = scaler(cols, songs, song)
#         song = songs.iloc[np.where(songs.id == idkey)]
    dist = np.sqrt((np.subtract(songs.acousticness, song.acousticness))**2
                 + (np.subtract(songs.danceability, song.danceability))**2
                 + (np.subtract(songs.energy, song.energy))**2
                 + (np.subtract(songs.instrumentalness, song.instrumentalness))**2
                 + (np.subtract(songs.liveness, song.liveness))**2
                 + (np.subtract(songs.speechiness, song.speechiness))**2
                 + (np.subtract(songs.tempo, song.tempo))**2)

    return dist

def playlist_generator(input_song_uri, n):
    topn_dist = distance(input_song_uri, songs2).sort_values()[0:n]
    topn = songs2.iloc[topn_dist.index]
    print(input_song_uri)
    return topn

def playlist_printer(playlist): 
    for ref in playlist['id']:
        track = sp.track(ref)
        print(track['name'])
        print('by')
        for artist in track['artists']:
            print(artist['name'])
        print('-----------------------------------')

In [39]:
playlist = playlist_generator('1ghlpxVfPbFH2jenrv9vVw', 25)
playlist_printer(playlist)

1ghlpxVfPbFH2jenrv9vVw
Vios
by
Roy Rosenfeld
Matt Lange
Daniel Jang
-----------------------------------
Farewell Hands - Original Mix
by
Heartik
-----------------------------------
People
by
James Murphy
-----------------------------------
Rock Me
by
Muddy Waters
-----------------------------------
Am I That Easy To Forget
by
George Jones
-----------------------------------
Candyman - 2013 Remaster
by
Grateful Dead
-----------------------------------
Struggle Phone Call
by
Upchurch
-----------------------------------
Nospheratu (Echospace Reduction)
by
Pulshar
Echospace
-----------------------------------
O Christmas Tree
by
Tony Bennett
-----------------------------------
The Clearing Fills
by
Tortoise
-----------------------------------
The Beginning
by
Frigorex
-----------------------------------
Love Slave
by
Max Richter
-----------------------------------
Where They at Doe
by
Ray Dey
-----------------------------------
Prelude Suite No.1
by
Johann Sebastian Bach
Classical Study Mu

In [52]:
sp.track('0WqIKmW4BTrj3eJFmnCKMv')['']

{'album': {'album_type': 'album',
  'artists': [{'external_urls': {'spotify': 'https://open.spotify.com/artist/6vWDO969PvNqNYHIOW5v0m'},
    'href': 'https://api.spotify.com/v1/artists/6vWDO969PvNqNYHIOW5v0m',
    'id': '6vWDO969PvNqNYHIOW5v0m',
    'name': 'Beyoncé',
    'type': 'artist',
    'uri': 'spotify:artist:6vWDO969PvNqNYHIOW5v0m'}],
  'available_markets': ['CH', 'LI'],
  'external_urls': {'spotify': 'https://open.spotify.com/album/25hVFAxTlDvXbx2X2QkUkE'},
  'href': 'https://api.spotify.com/v1/albums/25hVFAxTlDvXbx2X2QkUkE',
  'id': '25hVFAxTlDvXbx2X2QkUkE',
  'images': [{'height': 640,
    'url': 'https://i.scdn.co/image/ab67616d0000b27390592f54226a3eb8a99feea6',
    'width': 640},
   {'height': 300,
    'url': 'https://i.scdn.co/image/ab67616d00001e0290592f54226a3eb8a99feea6',
    'width': 300},
   {'height': 64,
    'url': 'https://i.scdn.co/image/ab67616d0000485190592f54226a3eb8a99feea6',
    'width': 64}],
  'name': 'Dangerously In Love (Alben für die Ewigkeit)',
  'rele