In [113]:
import numpy as np
import pandas as pd
import pickle
from sklearn.feature_selection import VarianceThreshold
from sklearn.feature_extraction.text import TfidfVectorizer
from xgboost import XGBClassifier
from sklearn.pipeline import Pipeline

pd.set_option('display.max_columns', 500)
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

In [87]:
s3_location = 's3://djk-ml-sagemaker/music_lyrics/'

In [88]:
xgb_model = pickle.load(open('xgb_model.sav', 'rb'))



In [89]:
eligible_songs = pd.read_csv('eligible_song_pool.csv')

In [90]:
eligible_songs.head()

Unnamed: 0,lyrics,song_title,artist_name,liked,cleaned_lyrics
0,\n\n[Verse 1]\nI can't remember\nThe words wer...,If You Want It,Jay Som,,remember word form mouth have find bring joy p...
1,\n\n[Verse 1]\nI'm not that kind of fool\nWho ...,Superbike,Jay Som,,kind fool need read room somebody tell fall li...
2,\n\n[Verse 1]\nPoint me to my chair\nMake me s...,Peace Out,Jay Som,,point chair sing awful song bear go hard hard ...
3,\n\n[Verse 1]\nUsed to be the one to cry\nAnd ...,Devotion,Jay Som,,cry feel emotion need path find strange devoti...
4,\n\n[Verse 1]\nI'm sinking in my bed\nWe’re le...,Nighttime Drive,Jay Som,,sink bed be leave town tomorrow memory feel nu...


In [91]:
predictions = xgb_model.predict_proba(eligible_songs['cleaned_lyrics'])

In [111]:
preds = xgb_model.predict(eligible_songs['cleaned_lyrics'])

In [114]:
predictions[:5]
preds[:5]

array([[0.51532006, 0.4846799 ],
       [0.4579932 , 0.5420068 ],
       [0.61323786, 0.38676214],
       [0.45571375, 0.54428625],
       [0.5001302 , 0.49986985]], dtype=float32)

array([0., 1., 0., 1., 0.])

In [94]:
eligible_songs['recommendation'] = [probas[1] for probas in predictions]

In [95]:
eligible_songs.head()

Unnamed: 0,lyrics,song_title,artist_name,liked,cleaned_lyrics,recommendation
0,\n\n[Verse 1]\nI can't remember\nThe words wer...,If You Want It,Jay Som,,remember word form mouth have find bring joy p...,0.48468
1,\n\n[Verse 1]\nI'm not that kind of fool\nWho ...,Superbike,Jay Som,,kind fool need read room somebody tell fall li...,0.542007
2,\n\n[Verse 1]\nPoint me to my chair\nMake me s...,Peace Out,Jay Som,,point chair sing awful song bear go hard hard ...,0.386762
3,\n\n[Verse 1]\nUsed to be the one to cry\nAnd ...,Devotion,Jay Som,,cry feel emotion need path find strange devoti...,0.544286
4,\n\n[Verse 1]\nI'm sinking in my bed\nWe’re le...,Nighttime Drive,Jay Som,,sink bed be leave town tomorrow memory feel nu...,0.49987


In [96]:
all_music = pd.read_csv(f'{s3_location}all_music.csv')

In [97]:
all_music.head()

Unnamed: 0,Name,Artist,Composer,Album,Grouping,Work,Movement Number,Movement Count,Movement Name,Genre,Size,Time,Disc Number,Disc Count,Track Number,Track Count,Year,Date Modified,Date Added,Bit Rate,Sample Rate,Volume Adjustment,Kind,Equalizer,Comments,Plays,Last Played,Skips,Last Skipped,My Rating,Location
0,Bunny,Tourist,William Phillips,Bunny - Single,,,,,,Electronica,7738301,214.0,1.0,1.0,1.0,1.0,2019.0,"9/5/19, 5:49 PM","9/5/19, 5:49 PM",256,44100.0,,Apple Music AAC audio file,,,,,,,,
1,1990,PINES,Adam Dormand & James Kenneally,1990 - Single,,,,,,Electronic,6603206,183.0,1.0,1.0,1.0,1.0,2019.0,"9/4/19, 11:11 AM","9/4/19, 11:11 AM",256,44100.0,,Apple Music AAC audio file,,,3.0,"9/4/19, 7:34 PM",,,,
2,Scarlett Groove (feat. Saint Saviour),Maribou State,Maribou State & Saint Saviour,Scarlett Groove - EP,,,,,,Electronic,9867297,282.0,1.0,1.0,1.0,4.0,2012.0,"8/27/19, 10:39 AM","8/27/19, 10:39 AM",256,44100.0,,Apple Music AAC audio file,,,4.0,"9/3/19, 8:07 PM",,,,
3,If You Want It,Jay Som,,Anak Ko,,,,,,,7745797,193.0,,,1.0,,2019.0,"8/24/19, 3:39 AM","8/24/19, 3:28 AM",320,44100.0,,MPEG audio file,,plixid.com,3.0,"9/3/19, 8:11 PM",,,,The Library/Music/Music/Jay Som/Anak Ko/01 If ...
4,Superbike,Jay Som,,Anak Ko,,,,,,,9366424,233.0,,,2.0,,2019.0,"8/24/19, 3:39 AM","8/24/19, 3:28 AM",320,44100.0,,MPEG audio file,,plixid.com,3.0,"9/3/19, 8:14 PM",,,,The Library/Music/Music/Jay Som/Anak Ko/02 Sup...


In [98]:
playlist_df = pd.merge(all_music, eligible_songs, how = 'inner', left_on = ['Artist', 'Name'], right_on = ['artist_name', 'song_title'])

In [99]:
playlist_df.shape

(40997, 37)

In [100]:
playlist_df = playlist_df[playlist_df['recommendation'].notnull()]

In [101]:
playlist_df.head()

Unnamed: 0,Name,Artist,Composer,Album,Grouping,Work,Movement Number,Movement Count,Movement Name,Genre,Size,Time,Disc Number,Disc Count,Track Number,Track Count,Year,Date Modified,Date Added,Bit Rate,Sample Rate,Volume Adjustment,Kind,Equalizer,Comments,Plays,Last Played,Skips,Last Skipped,My Rating,Location,lyrics,song_title,artist_name,liked,cleaned_lyrics,recommendation
0,If You Want It,Jay Som,,Anak Ko,,,,,,,7745797,193.0,,,1.0,,2019.0,"8/24/19, 3:39 AM","8/24/19, 3:28 AM",320,44100.0,,MPEG audio file,,plixid.com,3.0,"9/3/19, 8:11 PM",,,,The Library/Music/Music/Jay Som/Anak Ko/01 If ...,\n\n[Verse 1]\nI can't remember\nThe words wer...,If You Want It,Jay Som,,remember word form mouth have find bring joy p...,0.48468
1,Superbike,Jay Som,,Anak Ko,,,,,,,9366424,233.0,,,2.0,,2019.0,"8/24/19, 3:39 AM","8/24/19, 3:28 AM",320,44100.0,,MPEG audio file,,plixid.com,3.0,"9/3/19, 8:14 PM",,,,The Library/Music/Music/Jay Som/Anak Ko/02 Sup...,\n\n[Verse 1]\nI'm not that kind of fool\nWho ...,Superbike,Jay Som,,kind fool need read room somebody tell fall li...,0.542007
2,Peace Out,Jay Som,,Anak Ko,,,,,,,10301608,257.0,,,3.0,,2019.0,"8/24/19, 3:39 AM","8/24/19, 3:28 AM",320,44100.0,,MPEG audio file,,plixid.com,3.0,"9/3/19, 8:19 PM",,,,The Library/Music/Music/Jay Som/Anak Ko/03 Pea...,\n\n[Verse 1]\nPoint me to my chair\nMake me s...,Peace Out,Jay Som,,point chair sing awful song bear go hard hard ...,0.386762
3,Devotion,Jay Som,,Anak Ko,,,,,,,8511696,212.0,,,4.0,,2019.0,"8/24/19, 3:39 AM","8/24/19, 3:28 AM",320,44100.0,,MPEG audio file,,plixid.com,3.0,"9/3/19, 8:22 PM",,,,The Library/Music/Music/Jay Som/Anak Ko/04 Dev...,\n\n[Verse 1]\nUsed to be the one to cry\nAnd ...,Devotion,Jay Som,,cry feel emotion need path find strange devoti...,0.544286
4,Nighttime Drive,Jay Som,,Anak Ko,,,,,,,7779236,193.0,,,5.0,,2019.0,"8/24/19, 3:39 AM","8/24/19, 3:28 AM",320,44100.0,,MPEG audio file,,plixid.com,4.0,"9/3/19, 8:25 PM",,,,The Library/Music/Music/Jay Som/Anak Ko/05 Nig...,\n\n[Verse 1]\nI'm sinking in my bed\nWe’re le...,Nighttime Drive,Jay Som,,sink bed be leave town tomorrow memory feel nu...,0.49987


In [102]:
playlist_df = playlist_df.drop(columns = ['lyrics', 'song_title', 'artist_name', 'liked', 'cleaned_lyrics'])

In [103]:
playlist_df['name_lower'] = [s.lower() for s in playlist_df['Name']]
playlist_df['Artist_lower'] = [s.lower() for s in playlist_df['Artist']]

In [104]:
playlist_df = playlist_df.drop_duplicates(subset = ['name_lower', 'Artist_lower'])

In [105]:
playlist_df.recommendation.value_counts(bins = 20).sort_index(ascending = False)

(0.67, 0.69]        12
(0.649, 0.67]       67
(0.628, 0.649]     108
(0.608, 0.628]     240
(0.587, 0.608]     716
(0.567, 0.587]    3867
(0.546, 0.567]    2736
(0.526, 0.546]    3546
(0.505, 0.526]    4394
(0.484, 0.505]    4983
(0.464, 0.484]    4847
(0.443, 0.464]    4105
(0.423, 0.443]    2851
(0.402, 0.423]    1932
(0.381, 0.402]    1106
(0.361, 0.381]     640
(0.34, 0.361]      320
(0.32, 0.34]       158
(0.299, 0.32]       58
(0.277, 0.299]      20
Name: recommendation, dtype: int64

In [106]:
playlist_df = playlist_df.drop(columns = ['name_lower', 'Artist_lower'])

In [107]:
export_df = playlist_df.sort_values(by = 'recommendation', ascending = False).iloc[:100]

In [108]:
export_df.columns

Index(['Name', 'Artist', 'Composer', 'Album', 'Grouping', 'Work',
       'Movement Number', 'Movement Count', 'Movement Name', 'Genre', 'Size',
       'Time', 'Disc Number', 'Disc Count', 'Track Number', 'Track Count',
       'Year', 'Date Modified', 'Date Added', 'Bit Rate', 'Sample Rate',
       'Volume Adjustment', 'Kind', 'Equalizer', 'Comments', 'Plays',
       'Last Played', 'Skips', 'Last Skipped', 'My Rating', 'Location',
       'recommendation'],
      dtype='object')

In [110]:
export_df[['Name', 'Artist', 'recommendation']].head(20)

Unnamed: 0,Name,Artist,recommendation
13489,Is It Worth It,Chase & Status,0.690204
13003,I Don't Know,Plastikman,0.687797
33841,What Do You Wanna Be,Dispatch,0.681355
22307,Through the Walls,RJD2,0.677297
23760,16 Years,Phantogram,0.677272
14614,Cape Cod Kwassa Kwassa,Vampire Weekend,0.67684
31125,Roseablility,Idlewild,0.676242
12756,Floorplan,Tegan And Sara,0.675556
10461,Always (Route 94 Remix),MK,0.674566
24998,World's End Rhapsody,Nujabes,0.671324


In [119]:
apple_music_playlist = export_df[['Name', 'Artist', 'Composer', 'Album', 'Grouping', 'Work',
       'Movement Number', 'Movement Count', 'Movement Name', 'Genre', 'Size',
       'Time', 'Disc Number', 'Disc Count', 'Track Number', 'Track Count',
       'Year', 'Date Modified', 'Date Added', 'Bit Rate', 'Sample Rate',
       'Volume Adjustment', 'Kind', 'Equalizer', 'Comments', 'Plays',
       'Last Played', 'Skips', 'Last Skipped', 'My Rating', 'Location']].copy()

In [120]:
with open('recommedations_v0_3','w') as write_tsv:
    write_tsv.write(apple_music_playlist.to_csv(sep='\t', index=False))

26575