In [25]:
import sagemaker
from sagemaker import get_execution_role
import json
import boto3

sess = sagemaker.Session()
role = get_execution_role()

bucket = "mastering-ml-aws"

prefix = "chapter2/blazingtext"


In [16]:
SRC_PATH = '/home/ec2-user/SageMaker/mastering-ml-on-aws/chapter2/'

with open(SRC_PATH + 'dem.txt', 'r') as file:
    dem_text = ["__label__0 " + line.strip('\n') for line in file]

with open(SRC_PATH + 'gop.txt', 'r') as file:
    gop_text = ["__label__1 " + line.strip('\n') for line in file]
    
corpus = dem_text + gop_text
    
from sklearn.model_selection import train_test_split
corpus_train, corpus_test = train_test_split(corpus, test_size=0.25, random_state=42)    



    

In [20]:
corpus_train_txt = "\n".join(corpus_train)
corpus_test_txt = "\n".join(corpus_test)
with open('tweets.train', 'w') as file:
    file.write(corpus_train_txt)    
with open('tweets.test', 'w') as file:
    file.write(corpus_test_txt)    


In [26]:

train_path = prefix + '/train'
validation_path = prefix + '/validation'

sess.upload_data(path='tweets.train', bucket=bucket, key_prefix=train_path)
sess.upload_data(path='tweets.test', bucket=bucket, key_prefix=validation_path)

s3_train_data = 's3://{}/{}'.format(bucket, train_path)
s3_validation_data = 's3://{}/{}'.format(bucket, validation_path)

In [29]:
container = sagemaker.amazon.amazon_estimator.get_image_uri('us-east-1', "blazingtext", "latest")

s3_output_location = 's3://{}/{}/output'.format(bucket, prefix)


In [33]:
bt_model = sagemaker.estimator.Estimator(container,
                                         role, 
                                         train_instance_count=1, 
                                         train_instance_type='ml.c4.4xlarge',
                                         train_volume_size = 30,
                                         train_max_run = 360000,
                                         input_mode= 'File',
                                         output_path=s3_output_location,
                                         sagemaker_session=sess)

In [50]:
bt_model.set_hyperparameters(mode="supervised",
                            epochs=3,
                            min_count=2,
                            learning_rate=0.05,
                            vector_dim=10,
                            early_stopping=True,
                            patience=4,
                            min_epochs=5,
                            word_ngrams=2)

train_data = sagemaker.session.s3_input(s3_train_data, distribution='FullyReplicated', 
                        content_type='text/plain', s3_data_type='S3Prefix')
validation_data = sagemaker.session.s3_input(s3_validation_data, distribution='FullyReplicated', 
                             content_type='text/plain', s3_data_type='S3Prefix')
data_channels = {'train': train_data, 'validation': validation_data}

In [51]:
bt_model.fit(inputs=data_channels, logs=True)


INFO:sagemaker:Creating training-job with name: blazingtext-2018-12-20-23-04-29-407


2018-12-20 23:04:29 Starting - Starting the training job...
2018-12-20 23:04:34 Starting - Launching requested ML instances.........
2018-12-20 23:06:22 Starting - Preparing the instances for training......
2018-12-20 23:07:29 Downloading - Downloading input data..
[31mArguments: train[0m
[31m[12/20/2018 23:07:41 INFO 139960165136192] nvidia-smi took: 0.0251619815826 secs to identify 0 gpus[0m
[31m[12/20/2018 23:07:41 INFO 139960165136192] Running single machine CPU BlazingText training using supervised mode.[0m
[31m[12/20/2018 23:07:41 INFO 139960165136192] Processing /opt/ml/input/data/train/tweets.train . File size: 0 MB[0m
[31m[12/20/2018 23:07:41 INFO 139960165136192] Processing /opt/ml/input/data/validation/tweets.test . File size: 0 MB[0m
[31mRead 0M words[0m
[31mNumber of words:  727[0m
[31mLoading validation data from /opt/ml/input/data/validation/tweets.test[0m
[31mLoaded validation data.[0m
[31m##### Alpha: -0.0014  Progress: 102.89%  Million Words/sec: 0.

In [35]:
bt_model

<sagemaker.estimator.Estimator at 0x7f2cf9a19470>

In [52]:
transformer = bt_model.transformer(instance_count=1, instance_type='ml.m4.xlarge')

INFO:sagemaker:Creating model with name: blazingtext-2018-12-20-23-04-29-407


In [53]:
transformer.transform('s3://mastering-ml-aws/chapter2/blazingtext/train/').wait()

INFO:sagemaker:Creating transform job with name: blazingtext-2018-12-20-23-10-36-250


In [66]:
bt_model.deploy(initial_instance_count = 1,instance_type = 'ml.m4.large')


INFO:sagemaker:Creating model with name: blazingtext-2018-12-20-23-33-57-687
INFO:sagemaker:Creating endpoint with name blazingtext-2018-12-20-23-04-29-407


-----------------------------------------------------------------------------------------------------------------------------------------*

ValueError: Error hosting endpoint blazingtext-2018-12-20-23-04-29-407: Failed Reason:  The primary container for production variant AllTraffic did not pass the ping health check. Please check CloudWatch logs for this endpoint.

In [None]:
!aws s3 ls --recursive s3://mastering-ml-aws/chapter2/blazingtext
    
    


In [62]:
predictor = sagemaker.RealTimePredictor(endpoint=bt_model.endpoint, 
                                        sagemaker_session=sess,
                                        serializer=json.dumps,
                                        deserializer=sagemaker.predictor.json_deserializer)

corpus_test_no_labels = [x[11:] for x in corpus_test]

payload = {"instances" : corpus_test_no_labels}

response = predictor.predict(json.dumps(payload))

predictions = json.loads(response)
print(json.dumps(predictions, indent=2))

AttributeError: 'Estimator' object has no attribute 'endpoint'

In [61]:
bt_model

<sagemaker.estimator.Estimator at 0x7f2cf9a19470>

In [65]:
sess.delete_endpoint('blazingtext-2018-12-20-23-04-29-407')

INFO:sagemaker:Deleting endpoint with name: blazingtext-2018-12-20-23-04-29-407
