# Deployment Models on Amazon Sagemaker

In [None]:
# download necessary libraries
!pip install sagemaker --upgrade

In [2]:
# first getting the information regarding sagemaker session
import sagemaker
import boto3

iam_client = sagemaker.client("iam")
role = iam_client.get_role("")["Role"]["arn"]

sess = sagemaker.Session()

sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /root/.config/sagemaker/config.yaml


## There are Two options while deploying model
  * model is trained on sagemaker
  * model is present in HF hub

### When model is trained in sagemaker

   * The model can be first trained and then deployed
   * any model which is present in other lcation with AWS

Deploy after training

In [None]:


# -------------DEPLOY AFTER TRAINING ----------------
# lets first train the model and then directly deploy


from sagemaker.huggingface import HuggingFace

# for training in sagemaker
# follow the training large language models in sagemaker python notebook or repository
# here we will just write basic training code

############# pseudo code start ############

# ------- create a huggingFaceEstimator -------------
huggingfaceEstimator = HuggingFace()

#------- Start training using the fit method ------------
huggingfaceEstimator.fit()

############ pseudo code end ############

# now we will use the estimator to deploy the model that we trained
predictor_pipeline = huggingfaceEstimator.deploy(initial_instance_count = 1,
                            instance_type = "chose your desired required instance")

# now we can just sent the input directory to the predictor_pipeline as its
# ********** Inference Toolkit builds on top of the pipeline feature from 🤗 Transformers *******

input_data = {
    "inputs" : "sentence"
}

predictor_pipeline.predict(input_data)

In [None]:
#--------- DELETING THE ENDPOINT------------
predictor_pipeline.delete_endpoint()


Deploy trained model data present in s3

In [None]:
# if you've already trained the model and saved it somewhere
# then we can give the lcoation address/path of model data and tokenizer data

# to deploy from model data we need to create HuggingFaceModel class
from sagemaker.huggingface.model import HuggingfaceModel
huggingfacemodel = HuggingFaceModel(
    model_data = "",                  # path where you model is stored
    role = role,
    tranformers_version = "",
    pytorch_version = "",
    py_version = ""
)

# now you can deploy this model class
predictor_pipeline = huggingfacemodel.deploy(
                                  initial_instance_count = 1,
                                  instance_type = "your aws instance where you want to deploy"
)

data = {
    "input" : "sentence"
}

predictor_pipeline.predict(data)

In [None]:
# you can delete the endpoint afterwards
predictor_pipeline.delete_endpoint()

### deploying model from the HF hub

In [None]:
# inorder to deploy from the huggingfacehug
# we need to have the model_id from the hub

# and we also need task for which we will be creating the pipeline
# we set these variable as environment variables
from sagemaker.huggingface.model import HuggingFaceModel

hub = {
    "HF_MODEL_ID" : ""
    "HF_TASK" : ""
}


# create a huggingfacemodel class

huggingfacemodel = HuggingFaceModel(
    env = hub,
    role = role,
    transformers_version = "",
    pytorch_version = "",
    py_version = ""
)

# deploy it as endpoint
prdictor_pipeline = huggingfacemodel.deploy(
    initial_instance_count = 1,
    instance_type = ''
)

data  = {
    "input" : "",
    "context" : ""
}

predictor_pipeline.predict(data)


# dleete the endpoint later when not required
predictor_pipeline.delete_endpoint()


### Run batch transform with 🤗 Transformers and SageMaker

In [None]:
# If you trained a model using the Hugging Face Estimator,
# call the transformer() method to create a transform job for a model based on the training job

# first call the transformer() function on the huggingfacemodel class object

batch_job = huggingfaceestimator.transformer(
    instance_count = "",
    instance_type = "",
    strategy = "SingleRecord" # process single record at a time
)

batch_job.transform(
    data = "s3 location where data is stored in json format",
    content_type = "Application/json",
    split_type = "Line"
)


# the iput is in this format
{"inputs":"this movie is terrible"}
{"inputs":"this movie is amazing"}
{"inputs":"SageMaker is pretty cool"}
{"inputs":"SageMaker is pretty cool"}
{"inputs":"this movie is terrible"}
{"inputs":"this movie is amazing"}

