In [206]:
import boto3
import sagemaker


proj_bucket = "nikhil-spam-ham"

container = sagemaker.image_uris.retrieve('blazingtext', boto3.Session().region_name)

output_path='s3://{}/{}/output'.format(proj_bucket, "training_output")
estimator = sagemaker.estimator.Estimator(container,
                                    sagemaker.get_execution_role(), 
                                    instance_count=1, 
                                    instance_type='ml.m4.xlarge',
                                    output_path=output_path,
                                    sagemaker_session=sagemaker.Session(),
                                    hyperparameters={
                                        "mode": "supervised",
                                        "epochs": 40,
                                        "min_count": 2,
                                        "learning_rate": 0.05,
                                        "vector_dim": 10,
                                        "early_stopping": True,
                                        "patience": 4,
                                        "min_epochs": 20,
                                        "word_ngrams": 2,
                                    })

train_path = f"s3://{proj_bucket}/train"
validation_path = f"s3://{proj_bucket}/validation"

s3_input_train = sagemaker.inputs.TrainingInput(s3_data=train_path.format(proj_bucket, "train.csv"), content_type='csv')
s3_input_validation = sagemaker.inputs.TrainingInput(s3_data=validation_path.format(proj_bucket, "validation.csv"), content_type='csv')

model = estimator.fit({'train': s3_input_train, 'validation': s3_input_validation})

2022-10-26 23:26:48 Starting - Starting the training job...ProfilerReport-1666826808: InProgress
...
2022-10-26 23:27:32 Starting - Preparing the instances for training............
2022-10-26 23:29:32 Downloading - Downloading input data...
2022-10-26 23:30:13 Training - Downloading the training image..[34mArguments: train[0m
[34m[10/26/2022 23:30:28 INFO 140130880444224] nvidia-smi took: 0.07566452026367188 secs to identify 0 gpus[0m
[34m[10/26/2022 23:30:28 INFO 140130880444224] Running single machine CPU BlazingText training using supervised mode.[0m
[34mNumber of CPU sockets found in instance is  1[0m
[34m[10/26/2022 23:30:28 INFO 140130880444224] 109 files found in train channel. Using /opt/ml/input/data/train/train.csv for training...[0m
[34m[10/26/2022 23:30:28 INFO 140130880444224] Processing /opt/ml/input/data/train/train.csv . File size: 25.89203453063965 MB[0m
[34m[10/26/2022 23:30:28 INFO 140130880444224] Processing /opt/ml/input/data/validation/validation.csv 

In [207]:
predictor = estimator.deploy(initial_instance_count=1,instance_type='ml.m4.xlarge')

--------!

In [209]:
print(f"Predictor name: {predictor.endpoint_name}")

Predictor name: blazingtext-2022-10-26-23-32-03-964
