### Create Playlist with Type 1 Error Songs  
This notebook creates a playlist with all songs from the random sample from kaggle that are incorrectly labeled as songs from my personal listening history.  If the songs are from the random sample but the model thinks that the song characteristics are in line with my personal listening data then I might like the song!  And in fact this playlist has helped me discover many new artists that I had never heard of and several songs that I love but haven't heard or thought about in years.  

In [97]:
import pandas as pd
from matplotlib import pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn import metrics
import numpy as np
import xgboost
from sklearn.metrics import recall_score
from sklearn.preprocessing import StandardScaler
from datetime import datetime
from datetime import date
from sklearn.preprocessing import PolynomialFeatures
import requests
import json

In [2]:
df_master = pd.read_csv('data/spotify_personal_kaggle.csv', sep='\t')

In [3]:
# train the model with the hyperparameters that performed best in the model_building notebook.  
# Returns an array y_pred with predictions that can be used to find all type 1 errors. 
y = df_master['y']

X = df_master
rs = 22
X_train_, X_other, y_train, y_other = train_test_split(X, y, test_size=0.4, random_state=rs, shuffle = True, stratify= y)
X_val_, X_test_, y_val, y_test = train_test_split(X_other, y_other, test_size=0.5, random_state=rs, shuffle = True, stratify= y_other)
X_train = X_train_.drop(['artistName', 'trackName', 'y'], axis=1)

X_val = X_val_.drop(['artistName', 'trackName', 'y'], axis=1)
X_test = X_test_.drop(['artistName', 'trackName', 'y'], axis=1)


scaler = StandardScaler()
X_train = pd.DataFrame(scaler.fit_transform(X_train))
X_train.columns = X_train.columns

X_val = pd.DataFrame(scaler.transform(X_val))
X_val.columns = X_train.columns

X_test = pd.DataFrame(scaler.transform(X_test))
X_test.columns = X_train.columns

cl = xgboost.XGBClassifier(n_estimators = 32, max_depth = 10, reg_alpha=10, reg_lambda=0, use_label_encoder=False,random_state=rs)
cl.fit(X_train, y_train, eval_metric='logloss')
y_pred = cl.predict(X_test)

In [83]:
# create dictionary with all type 1 errors - songs classified as being from my history but actually from random sample
type_1 = [i for i in range(len(y_test)) if y_test.iloc[i] == 0 and y_pred[i] == 1]
d = {X_test_['trackName'].iloc[i]: X_test_['artistName'].iloc[i] for i in type_1}
d["Im with You"] = d["I'm with You"] 
del d["I'm with You"]
d["Youll Be In My Heart - Instrumental"] = d["You'll Be In My Heart - Instrumental"]
del d["You'll Be In My Heart - Instrumental"]
d["Dont Cry (Original)"] = "Guns N Roses"
del d["Don't Cry (Original)"]
d["Wholl Stop The Rain"] = d["Who'll Stop The Rain"]
del d["Who'll Stop The Rain"]

In [155]:
# initialize class with spotify_token, spotify_id and a dictionary of all songs.  The populate_playlist
# method will create and populate the playlist.  

class type_1_playlist: #partially adapted from https://github.com/TheComeUpCode/SpotifyGeneratePlaylist/blob/master/create_playlist.py
    """this class creates a playlist of all type 1 errors using spotify api"""
    def __init__(self, spotify_token, spotify_user_id, song_dict):
        self.spotify_token = spotify_token
        self.spotify_user_id = spotify_user_id
        self.song_dict = song_dict
        
    def _get_spotify_uri(self, song_name, artist): 
        """returns the uri of particular song"""
        query = "https://api.spotify.com/v1/search?query=track%3A{}+artist%3A{}&type=track&offset=0&limit=20".format(
            song_name,
            artist
        )
        response = requests.get(
            query,
            headers={
                "Content-Type": "application/json",
                "Authorization": "Bearer {}".format(self.spotify_token)
            }
        )
        response_json = response.json()
        songs = response_json["tracks"]["items"]
        uri = songs[0]["uri"]

        return uri
    
    def _create_uri_list(self):
        """creates a list of all uri for song list"""
        uri_list = []
        for key in self.song_dict:
            try:
                uri_list.append(self._get_spotify_uri(key, self.song_dict[key]))
            except:
                print(key, self.song_dict[key])
        return uri_list
    
    def create_playlist(self):
        """Create A New Playlist"""
        request_body = json.dumps({
            "name": "Type 1 Error",
            "description": "All Misclassified Songs from Kaggle",
            "public": True
        })

        query = "https://api.spotify.com/v1/users/{}/playlists".format(
            self.spotify_user_id)
        response = requests.post(
            query,
            data=request_body,
            headers={
                "Content-Type": "application/json",
                "Authorization": "Bearer {}".format(self.spotify_token)
            }
        )
        response_json = response.json()

        # playlist id
        return response_json["id"]
    
    def populate_playlist(self):
        """Add all type 1 error songs to playlist"""
 
        uris = self._create_uri_list()
        
        playlist_id = self.create_playlist()
        
        temp_list = uris[0:100]
        request_data = json.dumps(temp_list)

        query = "https://api.spotify.com/v1/playlists/{}/tracks".format(
            playlist_id)

        response = requests.post(
            query,
            data=request_data,
            headers={
                "Content-Type": "application/json",
                "Authorization": "Bearer {}".format(self.spotify_token)
            }
        )

        # check for valid response status
        if response.status_code != 200:
            print(response.status_code)
            

        response_json = response.json()
        return response_json

In [145]:
spotify_token = # add spotify token which can be found on spotify for developers
spotify_id = # add spotify id

In [156]:
pl = type_1_playlist(spotify_token, spotify_id, d)
pl.populate_playlist()

201


{'snapshot_id': 'Myw5OTAxMTczOWJiODRjNzA5YTM4YmVlMTJmNGY0ODY4NGYzMTcyNjNl'}