In [12]:
import sagemaker
import boto3
from sagemaker.estimator import Estimator
from sagemaker import image_uris
import json
session = sagemaker.Session()

role = sagemaker.get_execution_role()

In [10]:
session.boto_region_name

'ap-southeast-1'

In [7]:
role

'arn:aws:iam::244995231168:role/sagemaker_role'

In [11]:
container = image_uris.retrieve('blazingtext', session.boto_region_name, version='latest')

Defaulting to the only supported framework/algorithm version: 1. Ignoring framework/algorithm version: latest.


In [26]:
blazingText = Estimator(
    image_uri = container,
    role=role,
    instance_count = 1,
    instance_type = 'ml.m5.large',
    output_path = "s3://ud-sg-essentials/l2e1/"
)

In [27]:
train_location = "s3://ud-sg-essentials/l2e1/hello_blaze_train"
val_location = "s3://ud-sg-essentials/l2e1/hello_blaze_validation"

In [28]:
s3_input_train = sagemaker.inputs.TrainingInput(s3_data = train_location, content_type="fastFile")
s3_input_val = sagemaker.inputs.TrainingInput(s3_data = val_location, content_type="fastFile")

In [29]:
blazingText.set_hyperparameters(mode="skipgram",
    epochs=5,
    min_count=5,
    sampling_threshold=0.0001,
    learning_rate=0.05,
    window_size=5,
    early_stopping=True,
    vector_dim=100,
    negative_samples=5,
    batch_size=11,  #  = (2*window_size + 1) (Preferred. Used only if mode is batch_skipgram)
    evaluation=True,  # Perform similarity evaluation on WS-353 dataset at the end of training
    subwords=False,)

In [30]:
blazingText.fit({"train": s3_input_train, "val": s3_input_val}, logs=True)

INFO:sagemaker:Creating training-job with name: blazingtext-2024-03-15-09-24-27-678


2024-03-15 09:24:27 Starting - Starting the training job...
2024-03-15 09:24:41 Starting - Preparing the instances for training......
2024-03-15 09:25:35 Downloading - Downloading input data...
2024-03-15 09:26:25 Downloading - Downloading the training image...
2024-03-15 09:26:40 Training - Training image download completed. Training in progress....[34mArguments: train[0m
  self.stdout = io.open(c2pread, 'rb', bufsize)[0m
[34m[03/15/2024 09:26:44 INFO 139791572899648] nvidia-smi took: 0.025183677673339844 secs to identify 0 gpus[0m
[34m[03/15/2024 09:26:44 INFO 139791572899648] Running single machine CPU BlazingText training using skipgram mode.[0m
[34mNumber of CPU sockets found in instance is  1[0m
[34m[03/15/2024 09:26:44 INFO 139791572899648] Processing /opt/ml/input/data/train/hello_blaze_train . File size: 40.886911392211914 MB[0m
[34mRead 8M words[0m
[34mNumber of words:  33240[0m
[34m##### Alpha: 0.0489  Progress: 2.14%  Million Words/sec: 0.39 #####[0m
[34m#

In [3]:
!pip install -U sagemaker

Collecting sagemaker
  Downloading sagemaker-2.212.0-py3-none-any.whl.metadata (14 kB)
Downloading sagemaker-2.212.0-py3-none-any.whl (1.4 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.4/1.4 MB[0m [31m17.0 MB/s[0m eta [36m0:00:00[0m:00:01[0m
[?25hInstalling collected packages: sagemaker
  Attempting uninstall: sagemaker
    Found existing installation: sagemaker 2.210.0
    Uninstalling sagemaker-2.210.0:
      Successfully uninstalled sagemaker-2.210.0
Successfully installed sagemaker-2.212.0


In [6]:
words = ["awesome", "blazing"]

payload = {"instances": words}

AttributeError: module 'sagemaker' has no attribute 'invoke_endpoint'

In [9]:
#instantiate a Predictor
predictor = sagemaker.predictor.Predictor(
    "blazing-text-console",
    sagemaker_session=sagemaker.Session(),
)

#prepare one image for prediction
predictor.serializer = sagemaker.base_serializers.JSONSerializer("application/json")

#use the predictor to make a prediction
inference = predictor.predict(payload)

In [10]:
inference

b'[{"vector": [-0.10311742126941681, 0.09651123732328415, -0.6314752697944641, -0.2233518660068512, 0.1533883661031723, -0.21545320749282837, 0.02982252649962902, 0.08699091523885727, 0.18451273441314697, 0.10264405608177185, -0.2975276708602905, -0.10732537508010864, -0.37441951036453247, 0.38601168990135193, 0.19506926834583282, 0.2224438637495041, 0.2920026183128357, -0.0716804713010788, 0.5326611995697021, -0.3413856029510498, 0.9123349189758301, 0.021400179713964462, 0.16851000487804413, -0.636429488658905, -0.28933459520339966, -0.04792143777012825, 0.17606140673160553, -0.15283504128456116, 0.341959685087204, 0.045655082911252975, -0.23628978431224823, 0.08396004140377045, -0.38644617795944214, -0.38151219487190247, -0.19461144506931305, 0.5576109290122986, 0.4155610501766205, 0.0008213674300350249, -0.23428258299827576, -0.10016001015901566, -0.1456056833267212, 0.1363823562860489, 0.26975947618484497, 0.2711084187030792, 0.5214126706123352, -0.20061133801937103, 0.266617417335

In [13]:
json.loads(inference)

[{'vector': [-0.10311742126941681,
   0.09651123732328415,
   -0.6314752697944641,
   -0.2233518660068512,
   0.1533883661031723,
   -0.21545320749282837,
   0.02982252649962902,
   0.08699091523885727,
   0.18451273441314697,
   0.10264405608177185,
   -0.2975276708602905,
   -0.10732537508010864,
   -0.37441951036453247,
   0.38601168990135193,
   0.19506926834583282,
   0.2224438637495041,
   0.2920026183128357,
   -0.0716804713010788,
   0.5326611995697021,
   -0.3413856029510498,
   0.9123349189758301,
   0.021400179713964462,
   0.16851000487804413,
   -0.636429488658905,
   -0.28933459520339966,
   -0.04792143777012825,
   0.17606140673160553,
   -0.15283504128456116,
   0.341959685087204,
   0.045655082911252975,
   -0.23628978431224823,
   0.08396004140377045,
   -0.38644617795944214,
   -0.38151219487190247,
   -0.19461144506931305,
   0.5576109290122986,
   0.4155610501766205,
   0.0008213674300350249,
   -0.23428258299827576,
   -0.10016001015901566,
   -0.1456056833267212,