In [1]:
import boto3
import re
from sagemaker import get_execution_role
from io import StringIO

role = get_execution_role()

In [2]:
import torch
import pandas as pd
import numpy as np
import io
import os
import sys
import time
import json
import sagemaker
from sagemaker.pytorch import PyTorch

# Check dataset.

In [12]:
model_dir = './source_dir'
dataset_dir = './dataset'

In [13]:
# Prepare dataset
reviews = pd.read_csv(os.path.join(dataset_dir, '10000_review.csv'))
sentences = pd.read_csv(os.path.join(dataset_dir, '10000_sentence.csv'))
embeddings = np.load(os.path.join(dataset_dir, '10000_embedding.npy'))

In [16]:
reviews.head(1)

Unnamed: 0,review_id,product_id,product_title,star_rating,review_headline,review_body
0,RDIJS7QYB6XNR,B00EDBY7X8,Monopoly Junior Board Game,5.0,Five Stars,Excellent!!!


In [17]:
sentences.head(1)

Unnamed: 0,review_id,sentence
0,RDIJS7QYB6XNR,excellent!!


# Training

In [4]:
# Upload dataset.
sess = sagemaker.Session()
s3_train_data = sess.upload_data(path=dataset_dir, key_prefix='data')
print(f"Training data is uploaded to {s3_train_data}")

data_channels = {'train': s3_train_data}

Training data is uploaded to s3://sagemaker-us-west-2-010942746803/data


In [5]:
# Create estimator.
estimator = PyTorch(
    entry_point='entry_point.py',
    source_dir='source_dir',
    dependencies=['search'],
    role=role,
    framework_version='1.3.1',
    train_instance_count=1,
    train_instance_type='ml.m4.xlarge')

In [6]:
# Train.
estimator.fit(data_channels)

2020-01-24 15:41:37 Starting - Starting the training job...
2020-01-24 15:41:39 Starting - Launching requested ML instances......
2020-01-24 15:42:40 Starting - Preparing the instances for training......
2020-01-24 15:43:47 Downloading - Downloading input data...
2020-01-24 15:44:11 Training - Downloading the training image..[34mbash: cannot set terminal process group (-1): Inappropriate ioctl for device[0m
[34mbash: no job control in this shell[0m
[34m2020-01-24 15:44:49,416 sagemaker-containers INFO     Imported framework sagemaker_pytorch_container.training[0m
[34m2020-01-24 15:44:49,420 sagemaker-containers INFO     No GPUs detected (normal if no gpus installed)[0m
[34m2020-01-24 15:44:49,435 sagemaker_pytorch_container.training INFO     Block until all host DNS lookups succeed.[0m
[34m2020-01-24 15:44:49,436 sagemaker_pytorch_container.training INFO     Invoking user training script.[0m

2020-01-24 15:44:48 Training - Training image download completed. Training in prog

# Inference

In [7]:
# Deploy the trained model.
predictor = estimator.deploy(
    initial_instance_count=1,
    instance_type='ml.m4.xlarge')

-------------------!

In [8]:
from sagemaker.predictor import json_serializer, json_deserializer

predictor.content_type = 'application/json'
predictor.serializer = json_serializer
predictor.deserializer = json_deserializer

In [9]:
request = {
    'query': 'it still looks brand new too'
}
response = predictor.predict(request)

In [10]:
for value in response.values():
    print('-' * 10)
    for k, v in value.items():
        print(f'- {k}')
        print(f'{v}')

----------
- review_id
R1V5I8W64XQA8R
- product_id
B00388C3C4
- product_title
Fisher-Price Laugh & Learn Learning Kitchen Activity Center
- star_rating
5.0
- review_headline
One of our most beloved toys, even 1.5 yrs later
- review_body
My son received this as a gift when he was 9 months old. He played with it daily (sometimes 30 min at a time) until he was almost 2, when he graduated to a big kitchen. This was put away for a few months, but recently came out again now that his little sister is 6 months old. It's one of the few things that they can really play with together, one on each side.  My son (now 2.5) loves playing with it with his sis (and she of course loves it too). We have a lot of toys, but this one stands apart as versatile, fun, and extremely long lived! It still looks brand new too.
- product_search_score
0.9975305795669556
----------
- review_id
R2UIBI7R96HXAP
- product_id
B0007IG43W
- product_title
Monsties Thly HP
- star_rating
5.0
- review_headline
For those saying

In [11]:
predictor.delete_endpoint()