In [1]:
# import requests
from __future__ import print_function    # (at top of module)
import time
import sys
import os
import pandas as pd
import numpy as np
import pickle

PROJ_ROOT = os.path.join(os.getcwd(), os.pardir)

%load_ext watermark
%watermark -a "Thomas Turner" -d -t -v -p numpy,pandas -g

Thomas Turner 07/31/2016 11:03:05 

CPython 3.5.2
IPython 5.0.0

numpy 1.11.1
pandas 0.18.1
Git hash: 23ee99f8aa682e9b4da872442de9df32b7a22b7a


In [2]:
# load environment variables from .env file using dotenv.
from dotenv import load_dotenv
dotenv_path = os.path.join(PROJ_ROOT, '.env')
load_dotenv(dotenv_path)

# Load the "autoreload" extension
%load_ext autoreload

# always reload modules marked with "%aimport"
%autoreload 1

# add the 'src' directory as one where we can import modules
src_dir = os.path.join(PROJ_ROOT, 'src')
sys.path.append(src_dir)

# import my methods from the source code
%aimport data.spotipy_functions
from data.spotipy_functions import *

In [155]:
# run this to generate a fresh 100 track recomendations from a seed
init_seed = ['5Tfgr1akxH3ioDz1ugiVLW','7fwSIxrSHymHbUtItEmMJv','3wZ5PrSdb3QNgBVYb5udiQ']
trackset = get_new_recs_and_feats(init_seed,100)
trackset = band_BPMs(trackset,80,170)

# run this to SAVE trackset to file
# trackset.to_pickle(os.path.join(PROJ_ROOT,'data','interim','recommends_data_proto1.pkl'))

# run this to LOAD trackset from file
# trackset = pd.read_pickle(os.path.join(PROJ_ROOT,'data','interim','recommends_data_proto1.pkl'))


In [82]:
training_set = get_tracks_details(init_seed).merge(get_features_for_tracks(init_seed), on='id')
training_set['status'] = np.ones(len(init_seed))
training_set['weight'] = np.ones(len(init_seed))
training_set['P_accept'] = np.zeros(len(init_seed))

In [83]:
gui_cols = ['artist_name','track_name','tempo','uri','status','P_accept']
training_set[gui_cols]

Unnamed: 0,artist_name,track_name,tempo,uri,status,P_accept
0,Daphni,Ye Ye,129.018,spotify:track:5Tfgr1akxH3ioDz1ugiVLW,1.0,0.0
1,Frits Wentink,"Rising Sun, Falling Coconut",124.128,spotify:track:7fwSIxrSHymHbUtItEmMJv,1.0,0.0
2,Bodhi,Brawd,124.003,spotify:track:3wZ5PrSdb3QNgBVYb5udiQ,1.0,0.0


In [134]:
# ONLY RUN THIS TO ACCEPT/REJECT A TRACK

id_to_process = 84
trackset.loc[id_to_process,'status'] = 1
training_set.loc[len(training_set)+1] = trackset.loc[id_to_process]
trackset = trackset.drop([id_to_process])


In [135]:
training_set[gui_cols]

Unnamed: 0,artist_name,track_name,tempo,uri,status,P_accept
0,Daphni,Ye Ye,129.018,spotify:track:5Tfgr1akxH3ioDz1ugiVLW,1.0,0.0
1,Frits Wentink,"Rising Sun, Falling Coconut",124.128,spotify:track:7fwSIxrSHymHbUtItEmMJv,1.0,0.0
2,Bodhi,Brawd,124.003,spotify:track:3wZ5PrSdb3QNgBVYb5udiQ,1.0,0.0
4,Brandt Brauer Frick,Bop,119.996,spotify:track:7IIStj2J5RwY15wellEs9O,-1.0,1.0
5,Maya Jane Coles,Protect Them - Bonus Track,122.003,spotify:track:1O6KUE9O09eczRQvYzhl4O,-1.0,1.0
6,Isolée,Paloma Triste,111.909,spotify:track:4atM2g2NaTWrn0cswBhSyj,-1.0,1.0
7,Maya Jane Coles,Blame,110.027,spotify:track:2EG4aHhnfbK7T0YtiIQ2Zb,-1.0,1.0
8,Motor City Drum Ensemble,Feel the Love,115.006,spotify:track:5VM11vVe6Ib8YIYZg7NKuM,1.0,1.0
9,Fort Romeau,Say Something,125.04,spotify:track:1QtOnUVFzMD7g4E9tjofTw,-1.0,1.0
10,Jimmy Edgar,Hrt Real Good,108.053,spotify:track:0VDErq8M39gjVh013GEItl,-1.0,1.0


In [156]:
# np.set_printoptions(precision=4)
# np.set_printoptions(suppress=True)

useful_features = ['acousticness','danceability','instrumentalness','energy','speechiness','tempo','valence']
X = training_set[useful_features]
Y = training_set.status
w = training_set.weight

In [157]:
from sklearn.naive_bayes import GaussianNB
clf = GaussianNB()
clf.fit(X, Y, sample_weight=w)

GaussianNB()

In [158]:
pd.options.display.float_format = '{:,.3f}'.format
predicts = pd.DataFrame(clf.predict_proba(trackset[useful_features]))
predicts.columns = ['P_reject','P_accept']
trackset.P_accept = predicts['P_accept']
trackset = trackset.sort_values(by=['P_accept'], ascending=False)
trackset[gui_cols]

Unnamed: 0,artist_name,track_name,tempo,uri,status,P_accept
31,Fort Romeau,IKB,134.994,spotify:track:2tVs6akVfFJFUm0MEFvCVH,0.000,1.000
18,Fort Romeau,Nights Bridge,125.998,spotify:track:3IfHaF1kje7F1ANEXDQdqw,0.000,1.000
13,Scuba,Minerals,136.992,spotify:track:7sj1qZp33UVyc8fVROa5ns,0.000,0.986
28,Brandt Brauer Frick,Caffeine,111.985,spotify:track:6ms26kXKrgrUoFp5Mpcw1h,0.000,0.985
76,Daniel Avery,Free Floating,121.969,spotify:track:0FbA1IhV7pzUPFiexkARyd,0.000,0.981
92,Fort Romeau,Insides,116.968,spotify:track:34KD9rEJujgvmNLQblQAjw,0.000,0.975
55,Maya Jane Coles,What They Say,125.012,spotify:track:7HlvFIJDmcQRRn45aRd46t,0.000,0.973
27,Daniel Avery,Flashlights - Original Mix,115.022,spotify:track:77mLxqm1lFnBWWfoybyHj6,0.000,0.973
70,Brandt Brauer Frick,Iron Man,124.000,spotify:track:7v7PGRp84YqsAaxx4DGelQ,0.000,0.973
69,Fort Romeau,I Knew,121.754,spotify:track:0JWipkRSb9TTyH8IFjGtsL,0.000,0.973


In [148]:
import seaborn as sb
#sb.pairplot(trackset[useful_features + ['P_accept']].dropna())
sb.pairplot(training_set[useful_features + ['status']].dropna(), hue='status')
sb.plt.show()