# Create and test Albert Model, Environment and Endpoint

## Create Albert Model

In [52]:
# imports for albert model
import torch
from transformers import AlbertConfig, AlbertModel, AlbertTokenizer

In [53]:
model = AlbertModel.from_pretrained('albert-base-v2', torchscript=True)
tokenizer = AlbertTokenizer.from_pretrained('albert-base-v2')
model.eval()

# create example inputs
sentence = "Classic action movie with Tom Cruise!"
tokens = tokenizer.tokenize(sentence)
token_tensor = torch.tensor(tokenizer.encode(tokens)).unsqueeze(0)
segments_tensor = torch.tensor([1] * token_tensor.shape[1]).unsqueeze(0)

Some weights of the model checkpoint at albert-base-v2 were not used when initializing AlbertModel: ['predictions.LayerNorm.bias', 'predictions.bias', 'predictions.dense.weight', 'predictions.decoder.weight', 'predictions.dense.bias', 'predictions.LayerNorm.weight', 'predictions.decoder.bias']
- This IS expected if you are initializing AlbertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing AlbertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


In [54]:
traced_model = torch.jit.trace(model, (token_tensor, segments_tensor))
torch.jit.save(traced_model, "traced-albert.pt")

## Create Azure ML Model

In [58]:
from azure.ai.ml.entities import Model
from azure.ai.ml.constants import AssetTypes
from azure.identity import DefaultAzureCredential
import json
from azure.ai.ml import MLClient

In [59]:
with open("../secrets.json", "r") as f:
    secrets = json.load(f)

# enter details of your AzureML workspace
subscription_id = secrets['subscription_id']
resource_group = secrets['resource_group']
workspace = secrets['workspace_name']

# get a handle to the workspace
ml_client = MLClient(
    DefaultAzureCredential(), subscription_id, resource_group, workspace
)

In [60]:
file_model = Model(
    path="traced-albert.pt",
    type=AssetTypes.CUSTOM_MODEL,
    name="traced-albert",
    description="Model created from local file.",
)
ml_client.models.create_or_update(file_model)

DefaultAzureCredential failed to retrieve a token from the included credentials.
Attempted credentials:
	EnvironmentCredential: EnvironmentCredential authentication unavailable. Environment variables are not fully configured.
Visit https://aka.ms/azsdk/python/identity/environmentcredential/troubleshoot to troubleshoot.this issue.
	ManagedIdentityCredential: ManagedIdentityCredential authentication unavailable, no response from the IMDS endpoint.
	SharedTokenCacheCredential: Azure Active Directory error '(invalid_grant) AADSTS70043: The refresh token has expired or is invalid due to sign-in frequency checks by conditional access. The token was issued on 2022-12-26T10:19:15.8483413Z and the maximum allowed lifetime for this request is 36000.
Trace ID: ab4c1d28-f09f-4db5-93f9-939119b11d00
Correlation ID: c5de35f1-b301-4083-8d76-f232f179bdd8
Timestamp: 2022-12-27 10:30:29Z'
Content: {"error":"invalid_grant","error_description":"AADSTS70043: The refresh token has expired or is invalid due t

ClientAuthenticationError: DefaultAzureCredential failed to retrieve a token from the included credentials.
Attempted credentials:
	EnvironmentCredential: EnvironmentCredential authentication unavailable. Environment variables are not fully configured.
Visit https://aka.ms/azsdk/python/identity/environmentcredential/troubleshoot to troubleshoot.this issue.
	ManagedIdentityCredential: ManagedIdentityCredential authentication unavailable, no response from the IMDS endpoint.
	SharedTokenCacheCredential: Azure Active Directory error '(invalid_grant) AADSTS70043: The refresh token has expired or is invalid due to sign-in frequency checks by conditional access. The token was issued on 2022-12-26T10:19:15.8483413Z and the maximum allowed lifetime for this request is 36000.
Trace ID: ab4c1d28-f09f-4db5-93f9-939119b11d00
Correlation ID: c5de35f1-b301-4083-8d76-f232f179bdd8
Timestamp: 2022-12-27 10:30:29Z'
Content: {"error":"invalid_grant","error_description":"AADSTS70043: The refresh token has expired or is invalid due to sign-in frequency checks by conditional access. The token was issued on 2022-12-26T10:19:15.8483413Z and the maximum allowed lifetime for this request is 36000.\r\nTrace ID: ab4c1d28-f09f-4db5-93f9-939119b11d00\r\nCorrelation ID: c5de35f1-b301-4083-8d76-f232f179bdd8\r\nTimestamp: 2022-12-27 10:30:29Z","error_codes":[70043],"timestamp":"2022-12-27 10:30:29Z","trace_id":"ab4c1d28-f09f-4db5-93f9-939119b11d00","correlation_id":"c5de35f1-b301-4083-8d76-f232f179bdd8","suberror":"token_expired"}
To mitigate this issue, please refer to the troubleshooting guidelines here at https://aka.ms/azsdk/python/identity/defaultazurecredential/troubleshoot.

## Create scoring file

In [8]:
%%writefile score.py
from transformers import AlbertConfig, AlbertModel, AlbertTokenizer
from transformers.onnx import FeaturesManager
from transformers import AdamW, get_linear_schedule_with_warmup
import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
import os
import logging
import json
import onnxruntime

def init():
    global tokenizer, albert, session
    # load ALBERT model
    albert = AlbertModel.from_pretrained('albert-base-v2', output_hidden_states=True).to(device)
    tokenizer = AlbertTokenizer.from_pretrained('albert-base-v2')
    albert.eval()

    # create onnx runtime and load onnx model
    # session = onnxruntime.InferenceSession(model_path, providers=["CPUExecutionProvider"])

def preprocess(text:str):
    '''
    This function preprocesses text data from the speech2text (https://azure.microsoft.com/en-us/products/cognitive-services/speech-to-text/#features) and uses the tokenizer stored in the global variable.

    Params
    ------
        text:str - Sentencte from speech2text

    Returns
    -------
        token_tensor:torch.Tensor - Tensor with ids
        segments_tensor:torch.Tensor - Tensor with segments
    '''
    tokens = tokenizer.tokenize(text)
    token_tensor = torch.tensor(tokenizer.encode(tokens)).unsqueeze(0)
    segments_tensor = torch.tensor([1] * token_tensor.shape[1]).unsqueeze(0)
    return token_tensor, segments_tensor

def run(input:str) -> torch.Tensor:
    '''
    Transforms tokens_tensor and segments_tensor into Embedding with the Albert Model.

    Params
    ------
        token_tensor:torch.Tensor - Tensor with ids
        segments_tensor:torch.Tensor - Tensor with segments

    Returns
    -------
        embedding_vector:torch.Tensor - Vector with Embeddings from Albert
    '''
    # read and log input
    logging.info("Request received")
    input = json.loads(input)
    logging.info(input)

    # preprocess
    logging.info("Preprocessing ...")
    token_tensor, segments_tensor = preprocess(text=input['text'])

    # process
    logging.info("Processing ...")
    albert.eval()
    with torch.no_grad():
        output = albert(token_tensor, segments_tensor)
    hidden_states = output[2][1:]
    embedding = torch.stack(hidden_states, dim=0).mean(dim=0).mean(dim=1)
    logging.info("Processed:\n", embedding)

    # create json
    logging.info("Creating json ...")
    


    return [1,2,3]

Overwriting score.py


## Connect to Azure Machine Learning workspace

In [2]:
from azure.ai.ml.entities import (
    ManagedOnlineEndpoint,
    ManagedOnlineDeployment,
    Model,
    Environment,
    CodeConfiguration,
)

## Create local endpoint

In [4]:
# Creating a local endpoint
import datetime

local_endpoint_name = "local-" + datetime.datetime.now().strftime("%m%d%H%M%f")

# create an online endpoint
endpoint = ManagedOnlineEndpoint(
  name=local_endpoint_name, description="this is a sample local endpoint"
)

In [44]:
env = Environment(
    conda_file="environment.yml",
    image="mcr.microsoft.com/azureml/openmpi4.1.0-ubuntu20.04:latest",
)

blue_deployment = ManagedOnlineDeployment(
    name="blue",
    endpoint_name=local_endpoint_name,
    model=model,
    environment=env,
    code_configuration=CodeConfiguration(scoring_script="score.py", ),
    instance_type="Standard_DS2_v2",
    instance_count=1,
)

In [45]:
ml_client.online_endpoints.begin_create_or_update(endpoint, local=True)

Updating local endpoint (local-12271054622660) .Done (0m 5s)


ManagedOnlineEndpoint({'public_network_access': None, 'provisioning_state': None, 'scoring_uri': None, 'openapi_uri': None, 'name': 'local-12271054622660', 'description': 'this is a sample local endpoint', 'tags': {}, 'properties': {}, 'id': None, 'Resource__source_path': None, 'base_path': PosixPath('/Users/ronnyschneeberger/.azureml/inferencing/local-12271054622660'), 'creation_context': None, 'serialize': <msrest.serialization.Serializer object at 0x7fa0c496ab90>, 'auth_mode': 'key', 'location': None, 'identity': None, 'traffic': {}, 'mirror_traffic': {}, 'kind': None})

In [46]:
ml_client.online_deployments.begin_create_or_update(
    deployment=blue_deployment, local=True
)

Exception: ['Non-string passed to RegistryStr for model', 'Non-string passed to ArmStr for model', "Value <bound method Module.type of AlbertModel(\n  (embeddings): AlbertEmbeddings(\n    (word_embeddings): Embedding(30000, 128, padding_idx=0)\n    (position_embeddings): Embedding(512, 128)\n    (token_type_embeddings): Embedding(2, 128)\n    (LayerNorm): LayerNorm((128,), eps=1e-12, elementwise_affine=True)\n    (dropout): Dropout(p=0, inplace=False)\n  )\n  (encoder): AlbertTransformer(\n    (embedding_hidden_mapping_in): Linear(in_features=128, out_features=768, bias=True)\n    (albert_layer_groups): ModuleList(\n      (0): AlbertLayerGroup(\n        (albert_layers): ModuleList(\n          (0): AlbertLayer(\n            (full_layer_layer_norm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n            (attention): AlbertAttention(\n              (query): Linear(in_features=768, out_features=768, bias=True)\n              (key): Linear(in_features=768, out_features=768, bias=True)\n              (value): Linear(in_features=768, out_features=768, bias=True)\n              (attention_dropout): Dropout(p=0, inplace=False)\n              (output_dropout): Dropout(p=0, inplace=False)\n              (dense): Linear(in_features=768, out_features=768, bias=True)\n              (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n            )\n            (ffn): Linear(in_features=768, out_features=3072, bias=True)\n            (ffn_output): Linear(in_features=3072, out_features=768, bias=True)\n            (activation): NewGELUActivation()\n            (dropout): Dropout(p=0, inplace=False)\n          )\n        )\n      )\n    )\n  )\n  (pooler): Linear(in_features=768, out_features=768, bias=True)\n  (pooler_activation): Tanh()\n)> passed is not in set ['custom_model', 'mlflow_model', 'triton_model']"]