# Package Huggingface Setfit Model for SageMaker MME with DJL
Example 4: Train, evaluate, and package a setfit sentence classification model for deployment on a SageMaker Multi-Model Endpoint with DJL 

* This is was tested with an AWS SageMaker conda_pytorch_p310 kernal
* This will run best on an instance with a GPU such as an ml.g4dn.xlarge

Read in essential static variables used across notebooks from the store. These values are set in notebook 00

In [None]:
%store -r

In [None]:
%pip install setfit

In [None]:
from setfit import SetFitModel, Trainer, TrainingArguments, sample_dataset
from datasets import load_dataset
import os

## Step 1: Define and train the example model

Leverage an open source sentiment analysis dataset for the example training data

In [None]:
# Preparing the dataset
dataset = load_dataset("SetFit/sst2")
train_dataset = sample_dataset(dataset["train"], label_column="label", num_samples=8)
test_dataset = dataset["test"]

## Create the model

Here we're utilizing the BAAI/bge-small-en-v1.5 model as the base model, and setting up the labelled output

In [None]:
# Initializing a new SetFit model
model = SetFitModel.from_pretrained("BAAI/bge-small-en-v1.5", labels=["negative", "positive"])

Configure the trainer. 

These training parameters are enough to demonstrate training, however, not enough for 90%+ accuracy. The intention here being to show the process.

In [None]:
# Preparing the training arguments

args = TrainingArguments(
    batch_size=32,
    num_epochs=10,
)

In [None]:
# Preparing the trainer
trainer = Trainer(
    model=model,
    args=args,
    train_dataset=train_dataset,
)

## Train the model

In [None]:
%%time
# This trains in less than a minute on a ml.g4dn.xlarge (single GPU) instance,
# it will take significantly longer on a CPU based instance and may fail

trainer.train()

### Evaluate the model

In [None]:
# Evaluating
metrics = trainer.evaluate(test_dataset)
print(metrics)
# => {'accuracy': 0.8511806699615596}

## Step 2: Export the trained model

### Make sure we're starting from a known place in the filesystem

In [None]:
%cd ~/SageMaker

### Export the model

In [None]:
model_id = "setfit-bge-small-v1.5-sst2-8-shot"
s3_model_prefix = "djl-mme-sklearn-examples"
setfit_model_reference_name = "setfit-classifier.tar.gz"

In [None]:
# if setfit-classifier directory does not exist then create it

target_dir = "setfit-classifier"
target_path = f"./models/{target_dir}"

if not os.path.exists(target_path):
    os.makedirs(target_path)

In [None]:
%cd $target_path

In [None]:
# Saving the trained model
model.save_pretrained(model_id)

### Test the exported model
Instantiate a new instance of the model from the saved file and test it with subset of test data

In [None]:
# Loading a trained model
model = SetFitModel.from_pretrained(model_id) # Load from a local directory

In [None]:
# Performing inference
preds = model.predict([
    "It's a charming and often affecting journey.",
    "It's slow -- very, very slow.",
    "A sometimes tedious film.",
])
print(preds)
# => ["positive", "negative", "negative"]

## Step 3: Deep Learning for Java (DJL) artifact creation

We now have our model artifact, but we need the following for our DJL Serving Engine

model.py: Inference script with custom model loading + pre/processing code

requirements.txt: Additional dependencies, in this case we need to install sklearn and numpy

serving.properties: Environment variables for DJL Serving, can adjust number of workers here

In [None]:
%%writefile model.py
#!/usr/bin/env python
#
#
# Licensed under the Apache License, Version 2.0 (the "License"). You may not use this file
# except in compliance with the License. A copy of the License is located at
#
# http://aws.amazon.com/apache2.0/
#
# or in the "LICENSE.txt" file accompanying this file. This file is distributed on an "AS IS"
# BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, express or implied. See the License for
# the specific language governing permissions and limitations under the License.

import logging
import numpy as np
import time
import os
from setfit import SetFitModel
from djl_python import Input
from djl_python import Output


class SetFitClassifier(object):
    def __init__(self):
        self.initialized = False

    def initialize(self, properties: dict):
        """
        Initialize model.
        """
        print("SetFitClassifier: initialize: " + str(os.listdir()))
        model_id = "setfit-bge-small-v1.5-sst2-8-shot"
        if os.path.exists(model_id):
            self.model = SetFitModel.from_pretrained(model_id)
            print("SetFitClassifier: model loaded during initialization")
        else:
            raise ValueError(f"Failed to find {model_id} directory for SetFit Model Loading")
        self.initialized = True

    def inference(self, inputs):
        """
        Custom service entry point function.

        :param inputs: the Input object holds a list of numpy array
        :return: the Output object to be send back
        """

        # example input: ["It's a charming and often affecting journey.", "It's slow -- very, very slow."]
        
        try:
            data = inputs.get_as_json()

            res = self.model.predict(data)
            outputs = Output()
            outputs.add_as_json(res)
            
        except Exception as e:
            # error handling
            logging.exception(f"SetFitClassifier: inference failed: {str(e)}")
            outputs = Output().error(str(e))

        return outputs


_service = SetFitClassifier()


def handle(inputs: Input):
    """
    Default handler function
    """
    if not _service.initialized:
        # stateful model
        _service.initialize(inputs.get_properties())
    
    if inputs.is_empty():
        return None

    return _service.inference(inputs)

In [None]:
%%writefile requirements.txt
setfit

In [None]:
%%writefile serving.properties
engine=Python
# idle time in seconds before the worker thread is scaled down, the default is 
max_idle_time=600

### Tarball Creation

In [None]:
# Build tar file with model data + inference code, replace this cell with your model.joblib
import subprocess

bashCommand = f"tar -cvpzf model.tar.gz {model_id} requirements.txt model.py serving.properties"
process = subprocess.Popen(bashCommand.split(), stdout=subprocess.PIPE)
output, error = process.communicate()

Take a quick look at what's in the tarball

In [None]:
!tar -tvf model.tar.gz

### Upload the tarball to target location on Amazon S3

In [None]:
from sagemaker import session as sagemaker_session
from boto3 import client as boto3_client

sess = sagemaker_session.Session()  # sagemaker session for interacting with different AWS APIs
bucket = sess.default_bucket()  # bucket to house artifacts

s3_client = boto3_client("s3")

In [None]:
# upload the tarball to Amazon S3 where is will be used to back model requests
with open("model.tar.gz", "rb") as f:
    s3_client.upload_fileobj(f,
                             bucket,
                             f"{s3_model_prefix}/{setfit_model_reference_name}")

In [None]:
mme_artifacts = "s3://{}/{}/".format(bucket, s3_model_prefix)

In [None]:
# Verify that the tar ball is saved to the target location
!aws s3 ls {mme_artifacts}

## Training and packaging complete