In [193]:
import os
import pandas as pd
from sagemaker.huggingface.model import HuggingFaceModel
from sagemaker.s3 import s3_path_join
from sagemaker.serializers import JSONSerializer
from sagemaker.deserializers import JSONDeserializer


import sagemaker
import threading
import time
import json
import matplotlib.pyplot as plt

pd.set_option('display.max_columns', None)
pd.set_option('display.max_colwidth', None)
pd.set_option('display.float_format','{:.3f}'.format)




# Hub Model configuration. https://huggingface.co/models
hub = {
    'HF_MODEL_ID':'unitary/unbiased-toxic-roberta',
    'HF_TASK':'text-classification'
}

#### IF LOCAL
# ROLE = os.getenv('SAGEMAKER_ROLE')
# sagemaker_session_bucket = 'sagemaker-godeltech'
# sess = sagemaker.Session(default_bucket=sagemaker_session_bucket)


#### IF SAGEMAKER STUDIO
ROLE = sagemaker.get_execution_role()
sess = sagemaker.Session()


print(f"sagemaker bucket: {sess.default_bucket()}")
print(f"sagemaker session region: {sess.boto_region_name}")

sagemaker bucket: sagemaker-eu-west-1-595779774521
sagemaker session region: eu-west-1


In [136]:
repository="unitary/unbiased-toxic-roberta"
model_id = "unbiased-toxic-roberta"

In [6]:
%cd ../tmp
!git clone https://huggingface.co/$repository

/home/vlad/sagemaker_exp/tmp
fatal: destination path 'unbiased-toxic-roberta' already exists and is not an empty directory.


In [10]:
%cd $model_id
%cp -r ../godel_demo/aux/inference.py ./code/inference.py
!tar zcvf model.tar.gz *

code/
code/inference.py
config.json
flax_model.msgpack
merges.txt
pytorch_model.bin
README.md
special_tokens_map.json
tokenizer_config.json
vocab.json


In [137]:
# cust_model_url = sess.upload_data(
#     path="./model.tar.gz",
#     key_prefix="transformers/cust_model",
# )
# cust_model_url = 's3://sagemaker-godeltech/transformers/cust_model/model.tar.gz'
cust_model_url = f's3://{sess.default_bucket()}/custom_model/model.tar.gz'


In [138]:
%%time

# create Hugging Face Model Class
huggingface_model = HuggingFaceModel(
   model_data=cust_model_url,    # path to your model and script
   role=ROLE,                    # iam role with permissions to create an Endpoint
   transformers_version="4.12",  # transformers version used
   pytorch_version="1.9",        # pytorch version used
   py_version='py38',            # python version used
)

# deploy the endpoint endpoint
predictor = huggingface_model.deploy(
    initial_instance_count=1,
    instance_type="ml.m5.xlarge",
    endpoint_name=f"toxicbert-custom-endpoint"
    )

----!CPU times: user 464 ms, sys: 29.1 ms, total: 493 ms
Wall time: 2min 2s


In [194]:
kayne_west_tweet = """I’m a bit sleepy tonight but when I wake up I’m going death con 3 On JEWISH PEOPLE.
The funny thing is I actually can’t be Anti Semitic because black people are actually Jew 
also You guys have toyed with me and tried to black ball anyone whoever opposes your agenda."""

test = predictor.predict({
	'inputs': kayne_west_tweet
})
test

{'toxicity': '0.5870796',
 'severe_toxicity': '0.0011920218',
 'obscene': '0.007903993',
 'identity_attack': '0.5209994',
 'insult': '0.0861265',
 'threat': '0.013513568',
 'sexual_explicit': '0.00039305032',
 'male': '0.0069997045',
 'female': '0.00088978146',
 'homosexual_gay_or_lesbian': '0.001269252',
 'christian': '0.002529395',
 'jewish': '0.99786276',
 'muslim': '0.0030005148',
 'black': '0.9719561',
 'white': '0.00563967',
 'psychiatric_or_mental_illness': '0.0013548189'}

In [199]:
test_inputs = [kayne_west_tweet,
    "I like you. I love you", "Go fuck yourself, motherfucker",
    "last time I saw Joshua he looked like a dickhead",
    "I thing modern politics are hypocritics",
    "Somebody tell this noob to plant the bomb on the B, not the A.",
    "I thing all Kryptonians are gangsters and going to rob me",
    "lalallalalalalalala", "98766654321", "      ", "!!!!!$^%#$%",
    "We're watching you. Scum.", "Stop right there, criminal scum!", 
    "I used to be an adventurer like you, until I took an arrow to the knee.",
    """When life gives you lemons, don't make lemonade. Make life take the lemons back! Get mad!
    I don't want your damn lemons! What am I supposed to do with these?! Demand to see life's manager! 
    Make life rue the day it thought it could give Cave Johnson lemons! Do you know who I am?! 
    I'm the man who's gonna burn your house down! With the lemons! 
    I'm gonna get my engineers to invent a combustible lemon that burns your house down!""" 

]

In [200]:
input_text = pd.DataFrame(test_inputs,columns=['input_text'])    

In [204]:
outputs = []
for i in range(len(test_inputs)):
    output = predictor.predict({'inputs': test_inputs[i]})
    outputs.append({k: float(v) for k, v in output.items()})

In [209]:
merged = pd.concat([input_text, pd.json_normalize(outputs)], axis=1)
merged

Unnamed: 0,input_text,toxicity,severe_toxicity,obscene,identity_attack,insult,threat,sexual_explicit,male,female,homosexual_gay_or_lesbian,christian,jewish,muslim,black,white,psychiatric_or_mental_illness
0,I’m a bit sleepy tonight but when I wake up I’m going death con 3 On JEWISH PEOPLE.\nThe funny thing is I actually can’t be Anti Semitic because black people are actually Jew \nalso You guys have toyed with me and tried to black ball anyone whoever opposes your agenda.,0.587,0.001,0.008,0.521,0.086,0.014,0.0,0.007,0.001,0.001,0.003,0.998,0.003,0.972,0.006,0.001
1,I like you. I love you,0.001,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,"Go fuck yourself, motherfucker",0.998,0.007,0.975,0.033,0.992,0.033,0.166,0.061,0.008,0.016,0.009,0.01,0.002,0.007,0.004,0.015
3,last time I saw Joshua he looked like a dickhead,0.99,0.0,0.014,0.001,0.986,0.001,0.002,0.005,0.001,0.0,0.001,0.0,0.0,0.0,0.001,0.003
4,I thing modern politics are hypocritics,0.983,0.0,0.0,0.002,0.973,0.0,0.0,0.002,0.001,0.0,0.001,0.0,0.0,0.0,0.0,0.001
5,"Somebody tell this noob to plant the bomb on the B, not the A.",0.248,0.0,0.003,0.001,0.089,0.024,0.001,0.001,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.001
6,I thing all Kryptonians are gangsters and going to rob me,0.657,0.0,0.0,0.016,0.423,0.01,0.0,0.001,0.0,0.0,0.0,0.0,0.001,0.0,0.0,0.001
7,lalallalalalalalala,0.005,0.0,0.0,0.0,0.001,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.001
8,98766654321,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9,,0.001,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [134]:
predictor.delete_model()
predictor.delete_endpoint()