# Training and Deploying a BlazingText model

In [2]:
import sagemaker
from sagemaker import get_execution_role
import json
import boto3
session = sagemaker.Session()
role = get_execution_role()
region_name = boto3.Session().region_name

In [3]:
%store -r s3_bucket
%store -r prefix

In [4]:
s3_train_data = 's3://{}/{}/input/{}'.format(
    s3_bucket, 
    prefix, 
    "synthetic.train.txt"
)
s3_validation_data = 's3://{}/{}/input/{}'.format(
    s3_bucket, 
    prefix, 
    "synthetic.validation.txt"
)
s3_output_location = 's3://{}/{}/output'.format(
    s3_bucket, 
    prefix
)

In [5]:
from sagemaker.image_uris import retrieve 

container = retrieve(
    "blazingtext", 
    region_name, 
    "1"
)

In [6]:
estimator = sagemaker.estimator.Estimator(
    container,
    role, 
    instance_count=1, 
    instance_type='ml.c4.xlarge',
    input_mode= 'File',
    output_path=s3_output_location,
    sagemaker_session=session
)

In [7]:
estimator.set_hyperparameters(
    mode="supervised", 
    min_count=2
)

In [8]:
from sagemaker.inputs import TrainingInput

train_data = TrainingInput(
    s3_train_data, 
    distribution='FullyReplicated',   
    content_type='text/plain', 
    s3_data_type='S3Prefix'
)

validation_data = TrainingInput(
    s3_validation_data, 
    distribution='FullyReplicated', 
    content_type='text/plain', 
    s3_data_type='S3Prefix'
)

In [9]:
data_channels = {
    'train': train_data, 
    'validation': validation_data
}

In [10]:
%%time
estimator.fit(
    inputs=data_channels, 
    logs=True
)

2022-03-25 00:58:40 Starting - Starting the training job...ProfilerReport-1648169920: InProgress
...
2022-03-25 00:59:27 Starting - Preparing the instances for training......
2022-03-25 01:00:27 Downloading - Downloading input data...
2022-03-25 01:01:08 Training - Training image download completed. Training in progress..[34mArguments: train[0m
[34m[03/25/2022 01:01:11 INFO 140120604190528] nvidia-smi took: 0.025195837020874023 secs to identify 0 gpus[0m
[34m[03/25/2022 01:01:11 INFO 140120604190528] Running single machine CPU BlazingText training using supervised mode.[0m
[34mNumber of CPU sockets found in instance is  1[0m
[34m[03/25/2022 01:01:11 INFO 140120604190528] Processing /opt/ml/input/data/train/synthetic.train.txt . File size: 0.14569377899169922 MB[0m
[34m[03/25/2022 01:01:11 INFO 140120604190528] Processing /opt/ml/input/data/validation/synthetic.validation.txt . File size: 0.0477447509765625 MB[0m
[34mRead 0M words[0m
[34mNumber of words:  69[0m
[34m####

In [11]:
endpoint = estimator.deploy(
    initial_instance_count = 1, 
    instance_type = 'ml.r5.large'
)

------!

In [12]:
sentences = [
    "that is bad", 
    "the apple tastes good", 
    "i would recommend it to my friends"
]

payload = {"instances" : sentences}

In [13]:
from sagemaker.serializers import JSONSerializer

endpoint.serializer = JSONSerializer()
response = endpoint.predict(payload)
predictions = json.loads(response)

print(json.dumps(predictions, indent=2))

[
  {
    "label": [
      "__label__negative"
    ],
    "prob": [
      0.8499948382377625
    ]
  },
  {
    "label": [
      "__label__positive"
    ],
    "prob": [
      0.8464159369468689
    ]
  },
  {
    "label": [
      "__label__positive"
    ],
    "prob": [
      0.6532424092292786
    ]
  }
]


In [14]:
tn = estimator.latest_training_job.name
training_job_name = tn
%store training_job_name

Stored 'training_job_name' (str)


In [15]:
endpoint.delete_endpoint()