### Install azureml library

In [None]:
! pip install azureml-core

### Create workspace from Azure Machine Learning workspace

In [None]:
from azureml.core import Workspace

source_dir = "./source_dir/"

ws = Workspace.from_config(path = source_dir + "config.json")
print(ws)

### Re-training LSTM model from Kaggle with same seed

In [21]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import random
import os
import io
import json

import tensorflow as tf

from sklearn.model_selection import train_test_split

In [22]:
tf.random.set_seed(3)
np.random.seed(3)
random.seed(3)
os.environ['PYTHONHASHSEED'] = '3'

In [23]:
train_eval_data = pd.read_csv(source_dir + "data/train.csv")
pred_data = pd.read_csv(source_dir + "data/test.csv")

In [24]:
from sklearn.model_selection import train_test_split
train_data, eval_data = train_test_split(train_eval_data, test_size = 0.2, random_state = 42)

In [25]:
train_text = train_data.loc[:,'excerpt'].values
pred_text = pred_data.loc[:,'excerpt'].values
eval_text = eval_data.loc[:,'excerpt'].values

train_targets = train_data.loc[:,'target'].values
eval_targets = eval_data.loc[:,'target'].values

In [26]:
from tensorflow.python.keras.preprocessing.text import Tokenizer, text_to_word_sequence
from tensorflow.python.keras.preprocessing.sequence import pad_sequences

tokenizer = Tokenizer()

total_text = np.concatenate((train_text, eval_text, pred_text))
All_text = np.array([text_to_word_sequence(s) for s in total_text], dtype = "object")

tokenizer.fit_on_texts(All_text)

max_length = max([len(s) for s in All_text])

vocab_size = len(tokenizer.word_index) + 1

train_text_tokens = tokenizer.texts_to_sequences(train_text)
eval_text_tokens = tokenizer.texts_to_sequences(eval_text)
pred_text_tokens = tokenizer.texts_to_sequences(pred_text)

pad_train_tokens = pad_sequences(train_text_tokens, maxlen = max_length, padding = "pre")
pad_eval_tokens = pad_sequences(eval_text_tokens, maxlen = max_length, padding = "pre")
pad_pred_tokens = pad_sequences(pred_text_tokens, maxlen = max_length, padding = "pre")

In [27]:
from keras.models import Sequential
from keras.layers import LSTM, Dense, Bidirectional
from keras.layers.embeddings import Embedding
from keras.initializers import GlorotNormal
from keras.losses import MeanSquaredError

model = Sequential()
EMBEDDING_SIZE = 90

model.add(Embedding(vocab_size, EMBEDDING_SIZE, input_length = max_length, mask_zero = True,
                    embeddings_initializer = GlorotNormal()))
model.add(Bidirectional(LSTM(units = 128, dropout = 0.8)))
model.add(Dense(activation="linear", units = 1))

model.compile(loss = MeanSquaredError(), optimizer = tf.keras.optimizers.Adam(
              learning_rate=2e-4), metrics = ['mse'])

In [None]:
model.fit(pad_train_tokens, train_targets, batch_size = 100, validation_data = (pad_eval_tokens, eval_targets), epochs = 10)

### Save pre-trained model

In [None]:
model.save(source_dir + 'models/Commonlit-Bi-LSTM')



INFO:tensorflow:Assets written to: ./outputs/model2/assets


INFO:tensorflow:Assets written to: ./outputs/model2/assets


### Save fitted Tokenizer for this model as tokenizer.json

In [29]:
tokenizer_json = tokenizer.to_json()
with io.open(source_dir + 'models/Commonlit-Bi-LSTM/assets/tokenizer.json', 'w', encoding='utf-8') as f:
    f.write(json.dumps(tokenizer_json, ensure_ascii=False))

### Save max_length attribute which determines the necessary padding

In [None]:
with open(source_dir + 'models/Commonlit-Bi-LSTM/assets/max_length.txt', 'w') as text:
    text.write(str(max_length))

### Test loading of pre-trained model

In [41]:
model_load = tf.keras.models.load_model(source_dir + 'models/Commonlit-Bi-LSTM')
model_load.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding (Embedding)        (None, 215, 90)           2581650   
_________________________________________________________________
bidirectional (Bidirectional (None, 256)               224256    
_________________________________________________________________
dense (Dense)                (None, 1)                 257       
Total params: 2,806,163
Trainable params: 2,806,163
Non-trainable params: 0
_________________________________________________________________


### Make predictions and store in np.ndarray

In [42]:
test_preds = model_load.predict(x = pad_pred_tokens)
test_preds

array([[-1.2834516 ],
       [-0.22421896],
       [-0.595436  ],
       [-2.2895405 ],
       [-1.713674  ],
       [-0.2228559 ],
       [-0.02093622]], dtype=float32)

### Convert test_preds to list of labels of type 'float'

In [44]:
test_preds_list = test_preds.flatten().tolist()
test_preds_list

[-1.2834515571594238,
 -0.2242189645767212,
 -0.5954359769821167,
 -2.2895405292510986,
 -1.7136739492416382,
 -0.22285589575767517,
 -0.02093621902167797]

### Register saved model on Azure

In [None]:
from azureml.core.model import Model

# Register model
model = Model.register(workspace = ws, 
                       model_name = "Commonlit-Bi-LSTM",
                       model_path = source_dir + "models/Commonlit-Bi-LSTM",
                       model_framework = "TensorFlow")

Registering model Commonlit-BiLSTM-Seed3


### Create environment with necessary dependencies

In [None]:
from azureml.core import Environment
from azureml.core.conda_dependencies import CondaDependencies
from azureml.core.model import InferenceConfig

env = Environment(name = "Commonlit-Bi-LSTM")
conda_dep = CondaDependencies()
conda_dep.add_conda_package("numpy")
conda_dep.add_pip_package("keras")
conda_dep.add_pip_package("pandas")

conda_dep.add_pip_package('tensorflow==2.6.0')
conda_dep.add_pip_package('sklearn')

conda_dep.add_pip_package("azureml-defaults")
conda_dep.add_pip_package("azureml")
conda_dep.add_pip_package("azureml-contrib-functions")

env.python.conda_dependencies = conda_dep

### Create inference configuration using scoring function

In [None]:
inference_config = InferenceConfig(
    environment = env,
    source_directory = "./source_dir",
    entry_script = "./Bi-LSTM_score_TRAIN.py",
)

### Create Azure Container Instance and deploy model to container

In [None]:
from azureml.core.webservice import AciWebservice
aci_config = AciWebservice.deploy_configuration(cpu_cores = 2, memory_gb = 2)
service = Model.deploy(
    ws,
    "commonlit-bi-lstm",
    [model],
    inference_config,
    aci_config,
    overwrite = True,
)
service.wait_for_deployment(show_output = True)

Tips: You can try get_logs(): https://aka.ms/debugimage#dockerlog or local deployment: https://aka.ms/debugimage#debug-locally to debug if deployment takes longer than 10 minutes.
Running
2021-08-27 02:00:02+00:00 Creating Container Registry if not exists.
2021-08-27 02:00:02+00:00 Registering the environment.
2021-08-27 02:00:05+00:00 Use the existing image.
2021-08-27 02:00:05+00:00 Generating deployment configuration.
2021-08-27 02:00:06+00:00 Submitting deployment to compute..
2021-08-27 02:00:10+00:00 Checking the status of deployment commonlit-bilstm-seed3..
2021-08-27 02:04:53+00:00 Checking the status of inference endpoint commonlit-bilstm-seed3.
Succeeded
ACI service creation operation finished, operation "Succeeded"


### Endpoint Consumption - Test 1

In [None]:
import requests
import json

uri = service.scoring_uri
requests.get("") # Enter API here
headers = {"Content-Type": "application/json"}
data = {
    'data': ['Cell division is the process by which a parent cell divides into two or more daughter cells. Cell division usually occurs as part of a larger cell cycle.\n In eukaryotes, there are two distinct types of cell division: a vegetative division, whereby each daughter cell is genetically identical to the parent cell (mitosis), and a reproductive cell division, whereby the number of chromosomes in the daughter cells is reduced by half, to produce haploid gametes (meiosis). \nMeiosis results in four haploid daughter cells by undergoing one round of DNA replication followed by two divisions: homologous chromosomes are separated in the first division, and sister chromatids are separated in the second division.\nBoth of these cell division cycles are used in sexually reproducing organisms at some point in their life cycle, and both are believed to be present in the last eukaryotic common ancestor. Prokaryotes also undergo a vegetative cell division known as binary fission, where their genetic material is segregated equally into two daughter cells. All cell divisions, regardless of organism, are preceded by a single round of DNA replication.']
}
data = json.dumps(data)
response = requests.post(uri, data = data, headers = headers)
print(response.json())

[-2.2895405292510986]


### Endpoint Consumption - Test 2

In [None]:
import urllib.request
import json
import os
import ssl

def allowSelfSignedHttps(allowed):
    # bypass the server certificate verification on client side
    if allowed and not os.environ.get('PYTHONHTTPSVERIFY', '') and getattr(ssl, '_create_unverified_context', None):
        ssl._create_default_https_context = ssl._create_unverified_context

allowSelfSignedHttps(True) # this line is needed if you use self-signed certificate in your scoring service.

# Request data goes here
data = {
    'data': ['Dotty continued to go to Mrs. Gray\'s every night with the milk. Sometimes Katie went with her, and then they always paused a while under the acorn-tree and played "King and Queen." Dotty said she wished they could ever remember to bring their nipperkins, for in that case the milk would taste a great deal more like nectar. The "nipperkins" were a pair of handled cups which the children supposed to be silver, and which they always used at table.\nDotty knew she was doing wrong every time she played "King and Queen." She knew the milk was not hers, but Mrs. Gray\'s; still she said to herself, "Ruthie needn\'t give so much measure, all pressed down and run over. If Queenie and I should drink a great deal more, there would always be a quart left. Yes, I know there would."\nMrs. Gray never said anything about the milk; she merely poured it out in a pan, and gave back the pail to Dotty, asking her at the same time as many questions as the child would stay to hear.',
             'Cell division is the process by which a parent cell divides into two or more daughter cells. Cell division usually occurs as part of a larger cell cycle.\n In eukaryotes, there are two distinct types of cell division: a vegetative division, whereby each daughter cell is genetically identical to the parent cell (mitosis), and a reproductive cell division, whereby the number of chromosomes in the daughter cells is reduced by half, to produce haploid gametes (meiosis). \nMeiosis results in four haploid daughter cells by undergoing one round of DNA replication followed by two divisions: homologous chromosomes are separated in the first division, and sister chromatids are separated in the second division.\nBoth of these cell division cycles are used in sexually reproducing organisms at some point in their life cycle, and both are believed to be present in the last eukaryotic common ancestor. Prokaryotes also undergo a vegetative cell division known as binary fission, where their genetic material is segregated equally into two daughter cells. All cell divisions, regardless of organism, are preceded by a single round of DNA replication.'],
}

body = str.encode(json.dumps(data))

url = ''
api_key = '' # Replace this with the API key for the web service
headers = {'Content-Type':'application/json', 'Authorization':('Bearer '+ api_key)}

req = urllib.request.Request(url, body, headers)

try:
    response = urllib.request.urlopen(req)

    result = response.read()
    print(result)
except urllib.error.HTTPError as error:
    print("The request failed with status code: " + str(error.code))

    # Print the headers - they include the requert ID and the timestamp, which are useful for debugging the failure
    print(error.info())
    print(json.loads(error.read().decode("utf8", 'ignore')))


b'[-0.22421890497207642, -2.2895405292510986]'
