# TEST MODEL ON UNKNOWN DATA

### Import libraries and spotipy authentication

In [19]:
import numpy as np
import pandas as pd

In [20]:
import spotipy
import spotipy.util as util

In [21]:
from sklearn.externals import joblib
from sklearn.preprocessing import MinMaxScaler

In [22]:
import warnings
warnings.filterwarnings('ignore')

In [23]:
# Authentication details
my_username = "oso41"
chosen_scope = "user-library-read playlist-modify-private"
my_client_id = ""
my_client_secret = ""

In [24]:
# Token creation
token = util.prompt_for_user_token(username=my_username,
                                   scope=chosen_scope,
                                   client_id=my_client_id,
                                   client_secret=my_client_secret,
                                   redirect_uri="http://127.0.0.1/")

In [25]:
# Spotipy object
sp = spotipy.Spotify(auth=token)

### Load previous classification model

In [26]:
# Load previously trained classification model
filename = 'tree_model.sav'
dtc = joblib.load(filename)

### Functions to retrieve track features from a playlist

In [28]:
def get_tracks(playlist_id, playlist_length):
    '''Gets the main information for all tracks in a playlist
    
    Input:
    --------------
    playlist_id (str): playlist id retrieved manually from spotify
    playlist_length (int): number of tracks in the playlist
    
    Returns:
    --------------
    df (df): dataframe with tracks information (artist_name, track_name, track_id and popularity)
    '''
        
    artist_name = []
    track_name = []
    track_id = []
    popularity = []
    
    for i in range(0,playlist_length,100): # can only retrieve 100 tracks per request
        results = sp.user_playlist_tracks(my_username,playlist_id=playlist_id,limit=100, offset=i)
        for t in results['items']:
            artist_name.append(t['track']['artists'][0]['name'])
            track_name.append(t['track']['name'])
            track_id.append(t['track']['id'])
            popularity.append(t['track']['popularity'])
    
    df = pd.DataFrame([artist_name,track_name,track_id,popularity]).transpose()
    df.columns = ['artist_name','track_name','track_id','popularity']
    
    return df

In [29]:
def clean_tracks_df(df):
    '''Cleans dataframe
    
    Input:
    --------------
    df (df): dataframe with tracks information
    
    Returns:
    --------------
    df (df): cleaned dataframe
    '''
    
    # Some songs appear twice (this is not avoided with unique uri), so they are deleted making sure 
    # that the combination of 'artist_name' and 'track_name' just appears once
    df = df.drop_duplicates(subset=['artist_name','track_name'])
    df = df.dropna()

    return df

In [30]:
def get_features(df):
    '''Gets the features for all tracks in a dataframe
    
    Input:
    --------------
    df (df): dataframe with tracks information
    
    Returns:
    --------------
    df (df): dataframe with features for each track
    '''
    
    # Create empty list to store songs features
    rows = []

    # Retrieve audio features for every track
    for i in range(0,len(df['track_id']),50):
        feature_results = sp.audio_features(df['track_id'][i:i+50])
        for features in feature_results:
            rows.append(features)
    
    df = pd.DataFrame.from_dict(rows,orient='columns')
    
    return df  

In [31]:
def clean_features_df(df):
    '''Cleans dataframe containig tracks features
    
    Input:
    --------------
    df (df): dataframe with tracks features
    
    Returns:
    --------------
    df (df): cleaned dataframe
    '''
    
    # Drop columns containing useless information
    df = df.drop(['analysis_url','track_href','type','uri'],axis=1)
    # Change 'id' name to 'track_id' in order to match with previous dataframe (prepare for merging)
    df = df.rename(columns={'id':'track_id'})

    return df

In [32]:
def merge(df1,df2):
    '''Merges two dataframes with tracks information and tracks features
    
    Input:
    --------------
    df1 (df): dataframe with tracks information
    df2 (df): dataframe with features for each track
    
    Returns:
    --------------
    df (df): dataframe with tracks information and features for each track
    '''
    
    df = pd.merge(df1,df2,on='track_id',how='inner')
    
    return df

In [33]:
def scaling(df):
    '''Returns a dataframe with scaled features.
       
    Input:
    --------------
    df (df): dataframe without all features scaled
    
    Returns:
    --------------
    df (df): dataframe with all features scaled
    '''
        
    df['popularity'] = df['popularity'] / 100
    
    return df

In [34]:
def pipeline(playlist_id, playlist_length):
    '''Applies the functions get_tracks, clean_tracks, get_features, clean_features, merge and scaling to a playlist
    
    Input:
    --------------
    playlist_id (str): playlist id retrieved manually from spotify
    playlist_length (int): number of tracks in the playlist
    
    Returns:
    --------------
    df (df): dataframe with tracks information and features for each track
    '''
    
    df_tracks = get_tracks(playlist_id,playlist_length)
    df_tracks = clean_tracks_df(df_tracks)
    df_features = get_features(df_tracks)
    df_features = clean_features_df(df_features)
    df = merge(df_tracks,df_features)
    df = scaling(df)
    
    return df

### Functions to predict and create new playlists

In [35]:
def predict(model, df):
    '''Predicts the likeableness for songs contained in a dataframe
    
    Input:
    --------------
    model (sklearn object): Previously trained classification model to be used for predictions 
    df (df): Dataframe with tracks information and features for each track
    
    Returns:
    --------------
    df (df): dataframe with added column for predictions
    '''
    
    X = df[['popularity', 'acousticness', 'danceability', 'energy',
       'instrumentalness', 'liveness', 'mode', 'speechiness', 'valence']]
    
    df['like'] = model.predict(X)
    
    return df

In [36]:
def create_playlists(df,my_username):
    '''Creates two playlists in Spotify for liked and disliked songs
    
    Input:
    --------------
    df (df): Dataframe with tracks information, features and predictions
    my_username (str): The user name for which the playlist will be created
    '''
    
    # Create playlist
    good_pl = sp.user_playlist_create(user=my_username,name='Spotipie',public=False)
    # Create list of track uris
    good_tracks = [f'spotify:track:{track_id}' for track_id in df[df['like']==1]['track_id']]
    # Add tracks to playlist
    for i in range(0,len(good_tracks),100):
        sp.user_playlist_add_tracks(my_username,good_pl['uri'].split(':')[-1],good_tracks[i:i+100])
    
    # Same but for 'bad' tracks
    bad_pl = sp.user_playlist_create(user=my_username,name='Spotimal',public=False)
    bad_tracks = [f'spotify:track:{track_id}' for track_id in df[df['like']==0]['track_id']]    
    for i in range(0,len(bad_tracks),100):
        sp.user_playlist_add_tracks(my_username,bad_pl['uri'].split(':')[-1],bad_tracks[i:i+100])

### Predictions

In [39]:
# Get dataframe for a specific playlist ready for predictions

unk_df = pipeline('3exlzRegV9HmAaWSqlUD8I',50)

# Predict

unk_df = predict(dtc, unk_df)

# Create playlists from predictions

create_playlists(unk_df, my_username)