# MLOps - NLP Lab with Amazon SageMaker

**Step 3B** - *Predict in batch using SageMaker Batch Transform*
## Initialization
---

In [None]:
import os
import pandas as pd
import sagemaker
from sagemaker import get_execution_role
from sagemaker.utils import name_from_base
from sagemaker.pytorch import PyTorchModel

role = sagemaker.get_execution_role()

### Extraction locations

In [None]:
model_artifact_fname = '../2_train_model/model_artifact_location.txt'
if os.path.exists(model_artifact_fname):
    with open(model_artifact_fname, 'r') as f:
        model_artefact = f.readline()

    print(model_artefact)
    
else:
    print(f'Model artifact location file not found ({model_artifact_fname}): check that the previous notebook was fully executed.')
    
input_location_fname = '../1_prepare_data/processing_input_location.txt'
if os.path.exists(input_location_fname):
    with open(input_location_fname, 'r') as f:
        processing_input = f.readline()

    print(f'Processing input location | {processing_input}')
    
else:
    print(f'Processing input location file not found ({input_location_fname}): check that the previous notebook was fully executed.')

## Create SageMaker model
---

In [None]:
model = PyTorchModel(model_data=model_artefact,
                   name=name_from_base('bert-model'),
                   role=role, 
                   entry_point='predict_batch.py',
                   source_dir='source_dir',
                   framework_version='1.5.0')

## Launch batch predictions
---

Configure the batch transform job: don't forget to change the following paths in the cell below before execution:
* **`<YOUR-TEST-DATA-S3-PATH>`**: location of the processing job output results
* **`<YOUR-OUTPUT-RESULTS-S3-PATH>`**: S3 location where you want your predictions to be emitted

In [None]:
test_data = 's3://<YOUR-TEST-DATA-S3-PATH>/test_batch_transform.csv'
s3_output = 's3://<YOUR-OUTPUT-RESULTS-S3-PATH>'

transformer = model.transformer(
    instance_count=1, 
    instance_type='ml.m5.xlarge',
    strategy='SingleRecord',
    assemble_with='Line',
    accept = 'text/csv',
    max_concurrent_transforms=50,
    output_path=s3_output
)

Request the batch predictions and wait for the process to finish:

In [None]:
transformer.transform(
    test_data,
    content_type='text/csv',
    split_type='Line',
    join_source='Input'
)

In [None]:
transformer.wait()