In [2]:
import os
import pandas as pd
from sagemaker.huggingface.model import HuggingFaceModel
from sagemaker.s3 import s3_path_join
from sagemaker.serializers import JSONSerializer
from sagemaker.deserializers import JSONDeserializer

import boto3
import sagemaker
import threading
import time
import json
import matplotlib.pyplot as plt

pd.set_option('display.max_columns', None)
pd.set_option('display.max_colwidth', None)
pd.set_option('display.float_format','{:.3f}'.format)



# Hub Model configuration. https://huggingface.co/models
hub = {
    'HF_MODEL_ID':'unitary/unbiased-toxic-roberta',
    'HF_TASK':'text-classification'
}

#### IF LOCAL
# ROLE = os.getenv('SAGEMAKER_ROLE')
# sagemaker_session_bucket = 'sagemaker-godeltech'
# sess = sagemaker.Session(default_bucket=sagemaker_session_bucket)


#### IF SAGEMAKER STUDIO
ROLE = sagemaker.get_execution_role()
sess = sagemaker.Session()


print(f"sagemaker bucket: {sess.default_bucket()}")
print(f"sagemaker session region: {sess.boto_region_name}")

sagemaker bucket: sagemaker-eu-west-1-595779774521
sagemaker session region: eu-west-1


In [3]:
repository="unitary/unbiased-toxic-roberta"
model_id = "unbiased-toxic-roberta"

SageMaker doesn't support git lfs by default. You need to install it ....
```
sudo curl -s https://packagecloud.io/install/repositories/github/git-lfs/script.rpm.sh | sudo bash
yum install git-lfs -y
git lfs install
```
...and download model (it is almost 1 Gb).

```
git clone https://huggingface.co/unitary/unbiased-toxic-roberta
```

Then copy the `inderence.py` to folder with model and pack into the ***tar.gz*** archive (SageMaker prefers tar.gz 😀)

In [50]:
%%bash


cd $model_id
mkdir code
cp -r ../godel_demo/aux/inference.py ./code/inference.py
tar zcvf model.tar.gz *

README.md
code/
code/.ipynb_checkpoints/
code/.ipynb_checkpoints/inference-checkpoint.py
code/inference.py
config.json
flax_model.msgpack
merges.txt
pytorch_model.bin
special_tokens_map.json
tokenizer_config.json
vocab.json


In [8]:
!pwd

/root/sagemaker_exp/godel_demo


In [4]:
# cust_model_url = sess.upload_data(
#     path=f"{model_id}/model.tar.gz",
#     key_prefix="custom_model",
# )
# print('Uploaded!')
cust_model_url = f's3://{sess.default_bucket()}/custom_model/model.tar.gz'

In [5]:
%%time

# create Hugging Face Model Class
huggingface_model = HuggingFaceModel(
   model_data=cust_model_url,    # path to your model and script
   role=ROLE,                    # iam role with permissions to create an Endpoint
   transformers_version="4.12",  # transformers version used
   pytorch_version="1.9",        # pytorch version used
   py_version='py38',            # python version used
)

# deploy the endpoint endpoint
predictor = huggingface_model.deploy(
    initial_instance_count=1,
    instance_type="ml.m5.large",
    endpoint_name=f"toxicbert-custom-endpoint",
    serializer=JSONSerializer(),
    deserializer=JSONDeserializer()
    )

-----!CPU times: user 361 ms, sys: 33.1 ms, total: 394 ms
Wall time: 2min 32s


<p><span style="background-color: #cc99ff;"><strong>Let's try our model on kayne west scandal tweet</strong></span> <a href="https://pitchfork.com/news/kanye-west-locked-out-of-twitter-following-anti-semitic-tweet/">Link</a></p>

<div>
<img src="https://images.ctfassets.net/tyagww523muq/hpJbwB0JgrBUNP7noWfuR/8630a435de4aa0f9a1478200366edd7a/Untitled_design__21_.png?w=1045&h=588&q=50&fit=fill&f=faces" width="650"/>
</div>

In [6]:
kayne_west_tweet = """I’m a bit sleepy tonight but when I wake up I’m going death con 3 On JEWISH PEOPLE.
The funny thing is I actually can’t be Anti Semitic because black people are actually Jew 
also You guys have toyed with me and tried to black ball anyone whoever opposes your agenda."""

In [7]:
test = predictor.predict({'inputs' : [kayne_west_tweet]})
test

['[{"toxicity": "0.587", "severe_toxicity": "0.001", "obscene": "0.008", "identity_attack": "0.521", "insult": "0.086", "threat": "0.014", "sexual_explicit": "0.0", "male": "0.007", "female": "0.001", "homosexual_gay_or_lesbian": "0.001", "christian": "0.003", "jewish": "0.998", "muslim": "0.003", "black": "0.972", "white": "0.006", "psychiatric_or_mental_illness": "0.001"}]',
 'application/json']

In [8]:
runtime= boto3.client('runtime.sagemaker')
response = runtime.invoke_endpoint(EndpointName = 'toxicbert-custom-endpoint',
                                       ContentType = 'application/json',
                                       Body = json.dumps({'inputs' : kayne_west_tweet})
                                  )

In [9]:
json.loads(response['Body'].read().decode('utf-8'))

['[{"toxicity": "0.587", "severe_toxicity": "0.001", "obscene": "0.008", "identity_attack": "0.521", "insult": "0.086", "threat": "0.014", "sexual_explicit": "0.0", "male": "0.007", "female": "0.001", "homosexual_gay_or_lesbian": "0.001", "christian": "0.003", "jewish": "0.998", "muslim": "0.003", "black": "0.972", "white": "0.006", "psychiatric_or_mental_illness": "0.001"}]',
 'application/json']

In [10]:
test_inputs = [kayne_west_tweet,
    "I like you. I love you", "There is nothing toxic in this comment",
    "last time I saw Joshua he looked like a dickhead",
    "I think modern politics are hypocritics",
    "Somebody tell this noob to plant the bomb on the B, not the A.",
    "I thing all Kryptonians are gangsters and going to rob me",
    "lalallalalalalalala this is a non-sense text", "98766654321", "      ",
    "We're watching you. Scum. Stop right there, criminal bastard!", 
    "I used to be an adventurer like you, until I took an arrow to the knee.",
   ]

In [11]:
input_text = pd.DataFrame(test_inputs, columns=['input_text'])    

In [12]:
outputs = json.loads(predictor.predict({'inputs' : test_inputs})[0])

In [13]:
merged = pd.concat([input_text, pd.json_normalize(outputs)], axis=1)
merged

Unnamed: 0,input_text,toxicity,severe_toxicity,obscene,identity_attack,insult,threat,sexual_explicit,male,female,homosexual_gay_or_lesbian,christian,jewish,muslim,black,white,psychiatric_or_mental_illness
0,I’m a bit sleepy tonight but when I wake up I’m going death con 3 On JEWISH PEOPLE.\nThe funny thing is I actually can’t be Anti Semitic because black people are actually Jew \nalso You guys have toyed with me and tried to black ball anyone whoever opposes your agenda.,0.587,0.001,0.008,0.521,0.086,0.014,0.0,0.007,0.001,0.001,0.003,0.998,0.003,0.972,0.006,0.001
1,I like you. I love you,0.001,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,There is nothing toxic in this comment,0.001,0.0,0.0,0.0,0.001,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,last time I saw Joshua he looked like a dickhead,0.99,0.0,0.014,0.001,0.986,0.001,0.002,0.005,0.001,0.0,0.001,0.0,0.0,0.0,0.001,0.003
4,I think modern politics are hypocritics,0.983,0.0,0.0,0.002,0.971,0.0,0.0,0.002,0.001,0.0,0.001,0.0,0.0,0.0,0.0,0.001
5,"Somebody tell this noob to plant the bomb on the B, not the A.",0.248,0.0,0.003,0.001,0.089,0.024,0.001,0.001,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.001
6,I thing all Kryptonians are gangsters and going to rob me,0.657,0.0,0.0,0.016,0.423,0.01,0.0,0.001,0.0,0.0,0.0,0.0,0.001,0.0,0.0,0.001
7,lalallalalalalalala this is a non-sense text,0.001,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
8,98766654321,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9,,0.001,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [14]:
predictor.delete_model()
predictor.delete_endpoint()