In [9]:
import pandas as pd
import sagemaker
from sagemaker.amazon.knn import KNN

role = sagemaker.get_execution_role()

In [10]:
BUCKET = 'ioccino-data'
FILENAME = 'mturk-coffee-flavors-2019-06-16_22:01:18.780587.csv'

In [11]:
df = pd.read_csv('s3://' + BUCKET + '/train_data/' + FILENAME)
del df['Unnamed: 0']

In [12]:
df.head(3)

Unnamed: 0,Answer.age,Gender,Mood,Temperature,Target
0,0.067406,0.0,0.0,0.028396,1
1,0.035767,0.0,0.0,0.028396,1
2,0.023386,0.0,0.064897,0.01893,2


In [13]:
df_target = df['Target'].copy()
del df['Target']

In [14]:
output_path = 's3://ioccino-train/output/output'
#s3_train_data = 's3://{}/train/{}'.format(BUCKET, 'knn')

In [15]:
# set up the training job
knn_estimator = KNN(role=role,
                    k=49,
                    sample_size=10000,
                    predictor_type='classifier',
                    index_type='faiss.Flat',
                    index_metric='INNER_PRODUCT',
                    train_instance_count=1,
                    train_instance_type='ml.m5.2xlarge',
                    output_path=output_path)

In [16]:
knn_estimator.fit([
    knn_estimator.record_set(df.values, df_target.values, channel='train')
])

2019-06-16 22:38:45 Starting - Starting the training job...
2019-06-16 22:38:47 Starting - Launching requested ML instances......
2019-06-16 22:39:53 Starting - Preparing the instances for training...
2019-06-16 22:40:39 Downloading - Downloading input data...
2019-06-16 22:41:04 Training - Downloading the training image.
[31mDocker entrypoint called with argument(s): train[0m
[31m[06/16/2019 22:41:20 INFO 139887857596224] Reading default configuration from /opt/amazon/lib/python2.7/site-packages/algorithm/resources/default-conf.json: {u'index_metric': u'L2', u'_tuning_objective_metric': u'', u'_num_gpus': u'auto', u'_log_level': u'info', u'faiss_index_ivf_nlists': u'auto', u'epochs': u'1', u'index_type': u'faiss.Flat', u'_faiss_index_nprobe': u'5', u'_kvstore': u'dist_async', u'_num_kv_servers': u'1', u'mini_batch_size': u'5000'}[0m
[31m[06/16/2019 22:41:20 INFO 139887857596224] Reading provided configuration from /opt/ml/input/config/hyperparameters.json: {u'index_metric': u'INN

In [17]:
# set up an endpoint
knn_predictor = knn_estimator.deploy(initial_instance_count=1, instance_type='ml.t2.medium')

---------------------------------------------------------------------------------------------------------------------------!

In [51]:
# add serializers and deserializers as needed for the application
from sagemaker.predictor import csv_serializer, json_deserializer

knn_predictor.content_type = 'text/csv'
knn_predictor.serializer = csv_serializer
knn_predictor.deserializer = json_deserializer
# use the verbose API to get the actual neighbors
knn_predictor.accept = 'application/json; verbose=true'

In [78]:
result = knn_predictor.predict([2, 1, 0, 1])

In [79]:
response = []
for r in result['predictions']:
    response.append(r['predicted_label'])

In [80]:
response

[1.0]