<a href="https://colab.research.google.com/github/matthewchung74/inference_nbs/blob/dev/huggingface/imdb_classification_inference.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
from pathlib import Path

def create_requirements_file():
    if not Path("requirements.txt").exists():
        requirements = ["transformers"]
        with open('requirements.txt', 'w') as filehandle:
            for listitem in requirements:
                filehandle.write('%s\n' % listitem)
    
create_requirements_file()

In [None]:
!pip install -r requirements.txt



In [None]:
!wget https://ml-inference.s3-us-west-2.amazonaws.com/hugging_face_imdb_training.zip

--2021-04-14 17:13:58--  https://ml-inference.s3-us-west-2.amazonaws.com/hugging_face_imdb_training.zip
Resolving ml-inference.s3-us-west-2.amazonaws.com (ml-inference.s3-us-west-2.amazonaws.com)... 52.218.237.105
Connecting to ml-inference.s3-us-west-2.amazonaws.com (ml-inference.s3-us-west-2.amazonaws.com)|52.218.237.105|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 246898598 (235M) [application/zip]
Saving to: ‘hugging_face_imdb_training.zip.1’


2021-04-14 17:14:05 (32.7 MB/s) - ‘hugging_face_imdb_training.zip.1’ saved [246898598/246898598]



In [None]:
!unzip hugging_face_imdb_training.zip

Archive:  hugging_face_imdb_training.zip
   creating: model/
  inflating: model/config.json       
   creating: model/tokenizer/
  inflating: model/tokenizer/vocab.txt  
  inflating: model/pytorch_model.bin  


In [None]:
from transformers import DistilBertForSequenceClassification, DistilBertTokenizer
model_path = Path("./model")
model_new = DistilBertForSequenceClassification.from_pretrained(model_path)
tokenizer_new = DistilBertTokenizer.from_pretrained(model_path/"tokenizer")

In [None]:
!pip install -q git+https://github.com/matthewchung74/inference_params.git

  Building wheel for inference-params (setup.py) ... [?25l[?25hdone


In [None]:
import torch.nn.functional as F
import torch
from inference_params.inference_params import inference_test, FieldType, inference_predict

class_names =["negative", "positive"]

# text input will be the label for the app input
input = {"text input": FieldType.Text}
# result will be the label for the app output
output = {"result": FieldType.Text}

@inference_predict(input=input, output=output)
def predict(params):
    text = params['text input']
    inputs = tokenizer_new(text, return_tensors="pt")
    labels = torch.tensor([1]).unsqueeze(0)  # Batch size 1
    outputs = model_new(**inputs, labels=labels)
    logits = outputs.logits
    probabilities = F.softmax(logits, dim=1)
    confidence, predicted_class = torch.max(probabilities, dim=1)
    return {"result": f"{class_names[predicted_class]}, confidence: {confidence.item()}"}

In [None]:
inference_test(predict_func=predict, params={'text input': 'loved that movie'})

wrote results to result.json duration: 0.038301 seconds


In [None]:
predict({'text input':'this movie is awesome'}) 

({'result': 'positive, confidence: 0.9962551593780518'}, 0.040082454681396484)

In [None]:
predict({'text input':'this movie is very bad'})

({'result': 'negative, confidence: 0.999083399772644'}, 0.0460205078125)