## Create Model

In [2]:
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from sklearn.neighbors import NearestNeighbors


# create df
df = pd.read_csv('SpotifyAudioFeaturesApril2019.csv')

# Separate data into features and targets
target = df.columns[:3] # target is the first 3 columns, ie. artist, song_id, and song name
features = df.columns[3:]

X = df[features]
y = df[target]

# remove potentially unecessary columns
drop_cols = ['duration_ms', 'key', 'mode', 'time_signature', 'popularity','tempo'] 
X = X.drop(columns=drop_cols)

# scaling 
scaler = MinMaxScaler()
scaler.fit_transform(X)

# model 
model = NearestNeighbors(n_neighbors=10, algorithm='kd_tree')
model.fit(X)

NearestNeighbors(algorithm='kd_tree', leaf_size=30, metric='minkowski',
                 metric_params=None, n_jobs=None, n_neighbors=10, p=2,
                 radius=1.0)

## Test Flask in Production

In [185]:
import requests
import json

In [6]:
import json
data = """
{"artist_name":"YG","track_id":"2RM4jf1Xa9zPgMGRDiht8O",
"track_name":"Big Bank feat. 2 Chainz, Big Sean,Nicki Minaj",
"acousticness":0.00582,"danceability":0.743,"duration_ms":238373,
"energy":0.339,"instrumentalness":0.0,"key":1,"liveness":0.0812,
"loudness":-7.678,"mode":1,"speechiness":0.409,"tempo":203.927,
"time_signature":4,"valence":0.118,"popularity":15}
"""
labels = ['artist_name', 'track_id', 'track_name', 'duration_ms', 'key', 'mode', 'tempo', 'time_signature', 'popularity']      
predictor = pd.Series(json.loads(data)).drop(labels=labels)

In [7]:
predictor

acousticness        0.00582
danceability          0.743
energy                0.339
instrumentalness          0
liveness             0.0812
loudness             -7.678
speechiness           0.409
valence               0.118
dtype: object

In [8]:
recommendations = model.kneighbors([predictor])[1][0]

In [9]:
str(y.iloc[recommendations][:2])

'       artist_name                track_id  \\\n0               YG  2RM4jf1Xa9zPgMGRDiht8O   \n123910          YG  0ZNrc4kNeQYD9koZ3KvCsy   \n\n                                              track_name  \n0         Big Bank feat. 2 Chainz, Big Sean, Nicki Minaj  \n123910  BIG BANK (feat. 2 Chainz, Big Sean, Nicki Minaj)  '

In [183]:
data = """
{"artist_name":"YG","track_id":"2RM4jf1Xa9zPgMGRDiht8O",
"track_name":"Big Bank feat. 2 Chainz, Big Sean,Nicki Minaj",
"acousticness":0.00582,"danceability":0.743,"duration_ms":238373,
"energy":0.339,"instrumentalness":0.0,"key":1,"liveness":0.0812,
"loudness":-7.678,"mode":1,"speechiness":0.409,"tempo":203.927,
"time_signature":4,"valence":0.118,"popularity":15}
"""
labels = ['artist_name', 'track_id', 'track_name', 'duration_ms', 'key', 'mode', 'tempo', 'time_signature', 'popularity']      
predictor = pd.Series(json.loads(data)).drop(labels=labs)
recommendations = model.kneighbors([predictor])[1][0]
y.iloc[recommendations]

[acousticness        0.00582
 danceability          0.743
 energy                0.339
 instrumentalness          0
 liveness             0.0812
 loudness             -7.678
 speechiness           0.409
 valence               0.118
 dtype: object]

In [10]:
# local url
url = 'http://127.0.0.1:5000'

# test data
data = """
{"artist_name":"YG","track_id":"2RM4jf1Xa9zPgMGRDiht8O",
"track_name":"Big Bank feat. 2 Chainz, Big Sean,Nicki Minaj",
"acousticness":0.00582,"danceability":0.743,"duration_ms":238373,
"energy":0.339,"instrumentalness":0.0,"key":1,"liveness":0.0812,
"loudness":-7.678,"mode":1,"speechiness":0.409,"tempo":203.927,
"time_signature":4,"valence":0.118,"popularity":15}
"""

data = json.dumps(data)
data

'"\\n{\\"artist_name\\":\\"YG\\",\\"track_id\\":\\"2RM4jf1Xa9zPgMGRDiht8O\\",\\n\\"track_name\\":\\"Big Bank feat. 2 Chainz, Big Sean,Nicki Minaj\\",\\n\\"acousticness\\":0.00582,\\"danceability\\":0.743,\\"duration_ms\\":238373,\\n\\"energy\\":0.339,\\"instrumentalness\\":0.0,\\"key\\":1,\\"liveness\\":0.0812,\\n\\"loudness\\":-7.678,\\"mode\\":1,\\"speechiness\\":0.409,\\"tempo\\":203.927,\\n\\"time_signature\\":4,\\"valence\\":0.118,\\"popularity\\":15}\\n"'

In [12]:
import requests 
r_survey = requests.post(url, data)
print(r_survey)

<Response [200]>


In [13]:
send_request = requests.post(url, data)
print(send_request)

<Response [200]>


In [20]:
print(send_request.json())

{'results': {'results': '       artist_name                track_id                                        track_name\n0               YG  2RM4jf1Xa9zPgMGRDiht8O    Big Bank feat. 2 Chainz, Big Sean, Nicki Minaj\n123910          YG  0ZNrc4kNeQYD9koZ3KvCsy  BIG BANK (feat. 2 Chainz, Big Sean, Nicki Minaj)'}}


## Test App in Heroku

In [16]:
# heroku url
heroku_url = 'https://spotify-flask-model.herokuapp.com/' # change to your app name

# test data
data = """
{"artist_name":"YG","track_id":"2RM4jf1Xa9zPgMGRDiht8O",
"track_name":"Big Bank feat. 2 Chainz, Big Sean,Nicki Minaj",
"acousticness":0.00582,"danceability":0.743,"duration_ms":238373,
"energy":0.339,"instrumentalness":0.0,"key":1,"liveness":0.0812,
"loudness":-7.678,"mode":1,"speechiness":0.409,"tempo":203.927,
"time_signature":4,"valence":0.118,"popularity":15}
"""

data = json.dumps(data)
data

'"\\n{\\"artist_name\\":\\"YG\\",\\"track_id\\":\\"2RM4jf1Xa9zPgMGRDiht8O\\",\\n\\"track_name\\":\\"Big Bank feat. 2 Chainz, Big Sean,Nicki Minaj\\",\\n\\"acousticness\\":0.00582,\\"danceability\\":0.743,\\"duration_ms\\":238373,\\n\\"energy\\":0.339,\\"instrumentalness\\":0.0,\\"key\\":1,\\"liveness\\":0.0812,\\n\\"loudness\\":-7.678,\\"mode\\":1,\\"speechiness\\":0.409,\\"tempo\\":203.927,\\n\\"time_signature\\":4,\\"valence\\":0.118,\\"popularity\\":15}\\n"'

In [17]:
# check response code
r_survey = requests.post(heroku_url, data)
print(r_survey)

<Response [200]>


In [19]:
# get prediction
print(send_request.json())

{'results': {'results': '       artist_name                track_id                                        track_name\n0               YG  2RM4jf1Xa9zPgMGRDiht8O    Big Bank feat. 2 Chainz, Big Sean, Nicki Minaj\n123910          YG  0ZNrc4kNeQYD9koZ3KvCsy  BIG BANK (feat. 2 Chainz, Big Sean, Nicki Minaj)'}}
