<a href="https://colab.research.google.com/github/matthewchung74/inference_nbs/blob/main/huggingface/imdb_classification_inference.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# main_branch
from pathlib import Path

def create_requirements_file():
    if not Path("requirements.txt").exists():
        requirements = ["transformers"]
        with open('requirements.txt', 'w') as filehandle:
            for listitem in requirements:
                filehandle.write('%s\n' % listitem)
    
create_requirements_file()

In [2]:
!pip install -r requirements.txt



In [3]:
!wget https://ml-inference.s3-us-west-2.amazonaws.com/hugging_face_imdb_training.zip

--2021-04-27 00:18:28--  https://ml-inference.s3-us-west-2.amazonaws.com/hugging_face_imdb_training.zip
Resolving ml-inference.s3-us-west-2.amazonaws.com (ml-inference.s3-us-west-2.amazonaws.com)... 52.218.177.49
Connecting to ml-inference.s3-us-west-2.amazonaws.com (ml-inference.s3-us-west-2.amazonaws.com)|52.218.177.49|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 246898598 (235M) [application/zip]
Saving to: ‘hugging_face_imdb_training.zip.3’


2021-04-27 00:18:34 (43.1 MB/s) - ‘hugging_face_imdb_training.zip.3’ saved [246898598/246898598]



In [4]:
!unzip -n hugging_face_imdb_training.zip

Archive:  hugging_face_imdb_training.zip


In [5]:
from transformers import DistilBertForSequenceClassification, DistilBertTokenizer
model_path = Path("./model")
model_new = DistilBertForSequenceClassification.from_pretrained(model_path)
tokenizer_new = DistilBertTokenizer.from_pretrained(model_path/"tokenizer")

In [6]:
!pip install -q git+https://github.com/matthewchung74/inference_params.git

  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
    Preparing wheel metadata ... [?25l[?25hdone
  Building wheel for inference-params (PEP 517) ... [?25l[?25hdone


In [7]:
import torch.nn.functional as F
import torch
from inference_params.inference_params import inference_test, FieldType, inference_predict

class_names =["negative", "positive"]

# text input will be the label for the app input
input = {"text input": FieldType.Text}
# result will be the label for the app output
output = {"result": FieldType.Text}

@inference_predict(input=input, output=output)
def predict(params):
    text = params['text input']
    inputs = tokenizer_new(text, return_tensors="pt")
    labels = torch.tensor([1]).unsqueeze(0)  # Batch size 1
    outputs = model_new(**inputs, labels=labels)
    logits = outputs.logits
    probabilities = F.softmax(logits, dim=1)
    confidence, predicted_class = torch.max(probabilities, dim=1)
    return {"result": f"{class_names[predicted_class]}, confidence: {confidence.item()}"}

In [8]:
from inference_params.inference_params import in_colab

if in_colab():
    inference_test(predict_func=predict, params={'text input': 'loved that movie'})

Wrote results to result.json duration: 0.052086 seconds
Please take a look and verify the results


In [9]:
if in_colab():
    predict({'text input':'this movie is awesome'}) 

In [10]:
if in_colab():
    predict({'text input':'this movie is very bad'})