# Modeling - Sagemaker
This notebook is the Iteration 5 from model 1 set up for the purpose of running on AWS Sagemaker.  
It takes the pickles of extracted features, dictionary of descriptions, and train, test, val lists from Model 1.  
*To-do: For the full deployment, feature extraction portion should migrate into the AWS.*


In [144]:
%reload_ext autoreload
%autoreload 2

In [None]:
import os
import sagemaker
import tensorflow as tf
from sagemaker.tensorflow import TensorFlow
import pickle
import numpy as np
from s3fs.core import S3FileSystem

import time

from SCRIPT.sequence_generator import *
from SCRIPT.evaluation_tools import *

## Setting up

In [6]:
sess = sagemaker.Session()
role = sagemaker.get_execution_role()
region = sess.boto_session.region_name

In [52]:
bucket = 'sagemaker-descriptor'
pkl_dir = f's3://{bucket}/PKL'

## Loading files into sagemaker
This step is not necessary for Tensorflow. Its only purpose is to get other parameters from our data for review.

In [76]:
def load_files(fname):
    s3_file = S3FileSystem()
    return pickle.load(s3_file.open('{}/PKL/{}'.format(bucket, fname)))

In [84]:
#os.mkdir('data')

In [86]:
descriptions = load_files('full_descriptions.pkl')
features = load_files('full_features.pkl')
train_list_full = load_files('train_list_full.pkl')
val_list_full = load_files('val_list_full.pkl')
test_list = load_files('test_list.pkl')
test_list_art = load_files('test_list_art.pkl')

In [134]:
processor = sequence_generator(descriptions, features)

train_X1, train_X2, train_Y = processor.train_generator(train_list_full)
val_X1, val_X2, val_Y = processor.validation_generator(val_list_full)

# get params
tokenizer = processor.get_tokenizer()
max_length = processor.get_max_length()
num_vocab = processor.get_num_vocab()

Gracefully stopping... (press Ctrl+C again to force)


## Local Mode
Run it on the local mode to check the performance first. 

In [123]:
batch_size = 32
epochs = 10

In [124]:
tf_estimator = TensorFlow(entry_point = 'SCRIPT/nlg_model_tf_1.py', 
                         role = role, 
                         train_instance_count = 1, 
                         train_instance_type = 'local', 
                         py_version = 'py37', 
                         framework_version = '2.2',
                         script_mode = True, 
                         hyperparameters = {'epochs': epochs, 
                                           'batch_size': batch_size}
                         )

In [None]:
train_channel = sagemaker.session.s3_input(pkl_dir)

In [None]:
tf_estimator.fit(train_channel)

## GPU Mode

In [None]:
tf_estimator = TensorFlow(entry_point = 'SCRIPT/nlg_model_tf_1.py', 
                         role = role, 
                         train_instance_count = 1, 
                         train_instance_type = 'ml.p2.xlarge',
                         py_version = 'py37', 
                         framework_version = '1.15.2',
                         script_mode = True, 
                         hyperparameters = {'epochs': epochs, 
                                           'batch-size': batch_size}
                         )

In [None]:
tf_estimator.fit(train_channel)

## Deploy

In [None]:
tf_endpoint = 'nlg-model-tf-'+time.strftime('%Y-%m-%d-%H-%M-%S', time.gmtime())

# GPU
tf_predictor = tf_estimator.deploy(initial_instance_count = 1, 
                                  instance_type = 'ml.p2.xlarge', 
                                  endpoint_name = tf_endpoint)

## Delete End-point

In [None]:
sess.delete_endpoint(endpoint_name = tf_endpoint)