# Imports

In [1]:
from transformers import AlbertConfig, AlbertModel, AlbertTokenizer
from transformers.onnx import FeaturesManager
from transformers import AdamW, get_linear_schedule_with_warmup
from azureml.core.model import Model, InferenceConfig
from azureml.core.webservice import AciWebservice, Webservice
import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
import pandas as pd
import random
import os
import logging
import json
import onnxruntime
from azureml.core import Workspace, Environment, conda_dependencies

  from .autonotebook import tqdm as notebook_tqdm


# Load secrets

In [2]:
with open('secrets.json', 'r') as f:
    secrets = json.load(f)

In [3]:
from azureml.core import Workspace, Environment, conda_dependencies
ws = Workspace(subscription_id=secrets['subscription_id'],
               resource_group=secrets['resource_group'],
               workspace_name=secrets['workspace_name'])

Performing interactive authentication. Please follow the instructions on the terminal.


The default web browser has been opened at https://login.microsoftonline.com/organizations/oauth2/v2.0/authorize. Please continue the login in the web browser. If no web browser is available or if the web browser fails to open, use device code flow with `az login --use-device-code`.


Interactive authentication successfully completed.


# PyTorch Settings and random seeds

In [4]:
# Set the seed value all over the place to make this reproducible.
seed_val = 42

random.seed(seed_val)
np.random.seed(seed_val)
torch.manual_seed(seed_val)
torch.cuda.manual_seed_all(seed_val)

# If there's a GPU available...
if torch.cuda.is_available():
    # Tell PyTorch to use the GPU.
    device = torch.device("cuda")

    print('There are %d GPU(s) available.' % torch.cuda.device_count())

    print('We will use the GPU:', torch.cuda.get_device_name(0))

# If not...
else:
    print('No GPU available, using the CPU instead.')
    device = torch.device("cpu")

model_path = "albert.onnx"

No GPU available, using the CPU instead.


# Create Albert Model and export ONNX model

In [5]:
albert = AlbertModel.from_pretrained('albert-base-v2')
tokenizer = AlbertTokenizer.from_pretrained('albert-base-v2')
albert.eval()

# create example inputs
sentence = "Classic action movie with Tom Cruise!"
tokens = tokenizer.tokenize(sentence)
token_tensor = torch.tensor(tokenizer.encode(tokens)).unsqueeze(0)
segments_tensor = torch.tensor([1] * token_tensor.shape[1]).unsqueeze(0)

# variables
model_path = "albert.onnx"
symbolic_names = {0: 'batch_size', 1: 'max_seq_len'}

# set config
model_kind, model_onnx_config = FeaturesManager.check_supported_model_or_raise(albert)
onnx_config = model_onnx_config(albert.config)

# create input
inputs = {
    'token_tensor': token_tensor,
    'segments_tensor': segments_tensor, 
}

torch.onnx.export(albert,                                         # model being run
                  (inputs['token_tensor'], 
                  inputs['segments_tensor']),                    # model input (or a tuple for multiple inputs)
                  model_path,                                    # where to save the model (can be a file or file-like object)
                  opset_version=11,                              # the ONNX version to export the model to
                  do_constant_folding=True,                      # whether to execute constant folding for optimization
                  input_names=['input_ids',
                               'attention_mask'],                   # the model's input names
                  output_names=['last_hidden_state', 'pooler_output'],   # the model's output names
                  dynamic_axes={'input_ids': {0: 'batch_size', 1: 'sequence'}, 
                  'attention_mask': {0: 'batch_size', 1: 'sequence'}}, 
                )

Some weights of the model checkpoint at albert-base-v2 were not used when initializing AlbertModel: ['predictions.dense.bias', 'predictions.LayerNorm.bias', 'predictions.decoder.weight', 'predictions.LayerNorm.weight', 'predictions.decoder.bias', 'predictions.bias', 'predictions.dense.weight']
- This IS expected if you are initializing AlbertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing AlbertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


# Score.py File

In [6]:
%%writefile score.py
from transformers import AlbertConfig, AlbertModel, AlbertTokenizer
from transformers.onnx import FeaturesManager
from transformers import AdamW, get_linear_schedule_with_warmup
import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
import os
import logging
import json
import onnxruntime

def init():
    global tokenizer, albert, session
    # load ALBERT model
    albert = AlbertModel.from_pretrained('albert-base-v2', output_hidden_states=True).to(device)
    tokenizer = AlbertTokenizer.from_pretrained('albert-base-v2')
    albert.eval()

    # create onnx runtime and load onnx model
    # session = onnxruntime.InferenceSession(model_path, providers=["CPUExecutionProvider"])

def preprocess(text:str):
    '''
    This function preprocesses text data from the speech2text (https://azure.microsoft.com/en-us/products/cognitive-services/speech-to-text/#features) and uses the tokenizer stored in the global variable.

    Params
    ------
        text:str - Sentencte from speech2text

    Returns
    -------
        token_tensor:torch.Tensor - Tensor with ids
        segments_tensor:torch.Tensor - Tensor with segments
    '''
    tokens = tokenizer.tokenize(text)
    token_tensor = torch.tensor(tokenizer.encode(tokens)).unsqueeze(0)
    segments_tensor = torch.tensor([1] * token_tensor.shape[1]).unsqueeze(0)
    return token_tensor, segments_tensor

def run(input:str) -> torch.Tensor:
    '''
    Transforms tokens_tensor and segments_tensor into Embedding with the Albert Model.

    Params
    ------
        token_tensor:torch.Tensor - Tensor with ids
        segments_tensor:torch.Tensor - Tensor with segments

    Returns
    -------
        embedding_vector:torch.Tensor - Vector with Embeddings from Albert
    '''
    # read and log input
    logging.info("Request received")
    input = json.loads(input)
    logging.info(input)

    # preprocess
    logging.info("Preprocessing ...")
    token_tensor, segments_tensor = preprocess(text=input['text'])

    # process
    logging.info("Processing ...")
    albert.eval()
    with torch.no_grad():
        output = albert(token_tensor, segments_tensor)
    hidden_states = output[2][1:]
    embedding = torch.stack(hidden_states, dim=0).mean(dim=0).mean(dim=1)
    logging.info("Processed:\n", embedding)

    # create json
    logging.info("Creating json ...")
    


    return embedding

Overwriting score.py


# Create environment file

In [5]:
# read requirements
with open('requirements.txt', 'r') as f:
    reqs = f.readlines()

# create environment
myenv = Environment(name="venv")
conda_dep = conda_dependencies.CondaDependencies()

for req in reqs:
    conda_dep.add_pip_package(req)

myenv.python.conda_dependencies = conda_dep

In [6]:
from azureml.core import Environment
env = Environment.from_pip_requirements('albert-env', 'requirements.txt')
env.save_to_directory("albert-env", overwrite=True)

# Register Environment to ML Workspace

In [9]:
env.register(ws)

{
    "assetId": "azureml://locations/switzerlandnorth/workspaces/d0fc41ec-9cbe-4bde-bd01-d7b30df6209b/environments/albert-env/versions/5",
    "databricks": {
        "eggLibraries": [],
        "jarLibraries": [],
        "mavenLibraries": [],
        "pypiLibraries": [],
        "rcranLibraries": []
    },
    "docker": {
        "arguments": [],
        "baseDockerfile": null,
        "baseImage": "mcr.microsoft.com/azureml/openmpi4.1.0-ubuntu20.04:20221101.v1",
        "baseImageRegistry": {
            "address": null,
            "password": null,
            "registryIdentity": null,
            "username": null
        },
        "buildContext": null,
        "enabled": false,
        "platform": {
            "architecture": "amd64",
            "os": "Linux"
        },
        "sharedVolumes": true,
        "shmSize": null
    },
    "environmentVariables": {
        "EXAMPLE_ENV_VAR": "EXAMPLE_VALUE"
    },
    "inferencingStackVersion": null,
    "name": "albert-env",
    