In [1]:
import json

# import azure ml libraries
from azure.ai.ml import MLClient
from azure.ai.ml.entities import (
    ManagedOnlineEndpoint,
    ManagedOnlineDeployment,
    Model,
    Environment,
    CodeConfiguration,
)
from azure.identity import DefaultAzureCredential

# open secrets
with open('secrets.json', 'r') as f:
    secrets = json.load(f)

# set variables
subscription_id = secrets['subscription_id']
resource_group = secrets['resource_group']
workspace = secrets['workspace_name']

# get a handle to the workspace
ml_client = MLClient(
    DefaultAzureCredential(), subscription_id, resource_group, workspace
)
print(f"Connected to {workspace}!")

Connected to fhnw-cic-rsfm!


# Create and store albert model

In [2]:
# imports for albert model
import torch
from transformers import AlbertConfig, AlbertModel, AlbertTokenizer

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
model = AlbertModel.from_pretrained('albert-base-v2', torchscript=True)
tokenizer = AlbertTokenizer.from_pretrained('albert-base-v2')
model.eval()

# create example inputs
sentence = "Classic action movie with Tom Cruise!"
tokens = tokenizer.tokenize(sentence)
token_tensor = torch.tensor(tokenizer.encode(tokens)).unsqueeze(0)
segments_tensor = torch.tensor([1] * token_tensor.shape[1]).unsqueeze(0)

Some weights of the model checkpoint at albert-base-v2 were not used when initializing AlbertModel: ['predictions.LayerNorm.bias', 'predictions.dense.weight', 'predictions.dense.bias', 'predictions.LayerNorm.weight', 'predictions.decoder.bias', 'predictions.bias', 'predictions.decoder.weight']
- This IS expected if you are initializing AlbertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing AlbertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


In [4]:
traced_model = torch.jit.trace(model, (token_tensor, segments_tensor))
torch.jit.save(traced_model, "traced-albert.pt")

# Create score.py File

In [10]:
%%writefile score.py
from transformers import AlbertConfig, AlbertModel, AlbertTokenizer
from transformers.onnx import FeaturesManager
from transformers import AdamW, get_linear_schedule_with_warmup
import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
import os
import logging
import json
import onnxruntime

def init():
    global tokenizer, albert, session
    # load ALBERT model
    albert = AlbertModel.from_pretrained('albert-base-v2', output_hidden_states=True).to(device)
    tokenizer = AlbertTokenizer.from_pretrained('albert-base-v2')
    albert.eval()

    # create onnx runtime and load onnx model
    # session = onnxruntime.InferenceSession(model_path, providers=["CPUExecutionProvider"])

def preprocess(text:str):
    '''
    This function preprocesses text data from the speech2text (https://azure.microsoft.com/en-us/products/cognitive-services/speech-to-text/#features) and uses the tokenizer stored in the global variable.

    Params
    ------
        text:str - Sentencte from speech2text

    Returns
    -------
        token_tensor:torch.Tensor - Tensor with ids
        segments_tensor:torch.Tensor - Tensor with segments
    '''
    tokens = tokenizer.tokenize(text)
    token_tensor = torch.tensor(tokenizer.encode(tokens)).unsqueeze(0)
    segments_tensor = torch.tensor([1] * token_tensor.shape[1]).unsqueeze(0)
    return token_tensor, segments_tensor

def run(input:str) -> torch.Tensor:
    '''
    Transforms tokens_tensor and segments_tensor into Embedding with the Albert Model.

    Params
    ------
        token_tensor:torch.Tensor - Tensor with ids
        segments_tensor:torch.Tensor - Tensor with segments

    Returns
    -------
        embedding_vector:torch.Tensor - Vector with Embeddings from Albert
    '''
    # read and log input
    logging.info("Request received")
    input = json.loads(input)
    logging.info(input)

    # preprocess
    logging.info("Preprocessing ...")
    token_tensor, segments_tensor = preprocess(text=input['text'])

    # process
    logging.info("Processing ...")
    albert.eval()
    with torch.no_grad():
        output = albert(token_tensor, segments_tensor)
    hidden_states = output[2][1:]
    embedding = torch.stack(hidden_states, dim=0).mean(dim=0).mean(dim=1)
    logging.info("Processed:\n", embedding)

    # create json
    logging.info("Creating json ...")
    


    return [1,2,3]

Overwriting score.py


# Create local endpoint for testing

In [11]:
# Creating a local endpoint
import datetime

local_endpoint_name = "local-" + datetime.datetime.now().strftime("%m%d%H%M%f") + "%s"

# create an online endpoint
endpoint = ManagedOnlineEndpoint(
  name=local_endpoint_name, description="this is a sample local endpoint"
)

# Load model and define Environment

In [12]:
model = Model(path="traced-albert.pt")
env = Environment(
    conda_file="albert-env/conda_dependencies.yml",
    image="mcr.microsoft.com/azureml/openmpi4.1.0-ubuntu20.04:latest",
)

blue_deployment = ManagedOnlineDeployment(
    name="blue",
    endpoint_name=local_endpoint_name,
    model=model,
    environment=env,
    code_configuration=CodeConfiguration(
        scoring_script="score.py"
    ),
    instance_type="Standard_DS2_v2",
    instance_count=1,
)

In [13]:
ml_client.online_endpoints.begin_create_or_update(endpoint, local=True)

ml_client.online_deployments.begin_create_or_update(
    deployment=blue_deployment, local=True, vscode_debug=False
)

Creating local endpoint (local-12270956488903%s) .Done (0m 5s)
Creating local deployment (local-12270956488903%s / blue) .Done (0m 5s)


RequiredLocalArtifactsNotFoundError: ("Local endpoints only support local artifacts. '%s' did not contain required local artifact '%s' of type '%s'.", 'Local deployment (local-12270956488903%s / blue)', 'code_configuration.code', "<class 'str'>")