In [1]:
%autosave 300
%autoreload 2
%reload_ext autoreload
%config Completer.use_jedi = False

Autosaving every 300 seconds


In [2]:
import os

os.chdir(
    r"/home/azureuser/cloudfiles/code/Users/soutrik.chowdhury/abi_genai_bert_classifier"
)

Steps:
* donwload the model from azure blob
* download the tokenizer from hugging face
* pass the incoming data through loader
* up the model based on requirements
* pass the data through inferencing pipeline

In [3]:
from src.settings import (
    DataSettings,
    env_settings,
    ModelSettings,
    TokenizerSettings,
    AzureblobSettings,
    LoggerSettings,
)
from src.pretrained_model import tokenizer, pretrained_model
from src.dataloader import create_data_loader
from src.model import BertSentimentClassifier, BertSentimentClassifierAdvanced
from src.utils.azure_connector import AzureBlobConnection
from src.utils.logger import setup_logging
from src.utils.model_helpers import get_device
import os, glob
import torch
import numpy as np




In [4]:
az_connection = AzureBlobConnection(
    storage_account=env_settings.STORAGE_ACCOUNT,
    client_id=env_settings.CLIENT_ID,
    tenant_id=env_settings.TENANT_ID,
    client_secret=env_settings.SECRET_ID,
)

In [5]:
az_connection.azblob_download(
    container_name=env_settings.CONTAINER_NAME,
    root_path=os.getcwd(),
    local_output_path=AzureblobSettings().input_path,
    blob_path=AzureblobSettings().blob_path,
    file_names=[]
)

In [6]:
def saved_model_path(model_path=AzureblobSettings().input_path):
    file_paths = glob.glob(os.path.join(model_path, "*pt"))
    model_path_dict = {}

    for path in file_paths:
        if "advanced" in path:
            model_path_dict["advanced"] = path
        elif "base" in path:
            model_path_dict["base"] = path

    return model_path_dict

In [46]:
class ModelInference:
    def __init__(self, tokenizer, model_type, pretrained_model, max_len, prob_thresh):
        self.device = get_device()
        self.tokenizer = tokenizer
        self.max_len = max_len
        self.prob_thresh = prob_thresh
        model_path_dict = saved_model_path()

        # model declaration and loading with pretrained weights
        if model_type == "base":
            self.bert_classifier = BertSentimentClassifier(
                bert=pretrained_model,
                n_classes=ModelSettings().num_classes,
                dropout=ModelSettings().drop_out,
            )

            self.bert_classifier.load_state_dict(
                torch.load(f=model_path_dict["base"], map_location=self.device)
            )
            self.bert_classifier.to(self.device)

        elif model_type == "advanced":
            self.bert_classifier = BertSentimentClassifierAdvanced(
                bert=pretrained_model,
                n_classes=ModelSettings().num_classes,
                dropout=ModelSettings().drop_out,
            )
            self.bert_classifier.load_state_dict(
                torch.load(f=model_path_dict["advanced"], map_location=self.device)
            )
            self.bert_classifier.to(self.device)

    def _get_predictions(self, data_loader, model):
        """Returns only the predicted labels for the given data loader"""
        review_texts = []
        predictions = []
        prediction_probs = []

        model.eval()
        with torch.no_grad():
            for d in data_loader:
                texts = d["review_text"]
                input_ids = d["input_ids"].to(self.device)
                attention_mask = d["attention_mask"].to(self.device)

                outputs = model(
                    input_ids=input_ids, attention_mask=attention_mask
                ).flatten()

                probs = torch.sigmoid(outputs)

                preds = torch.where(
                    probs > self.prob_thresh,
                    torch.tensor(1.0).to(self.device),
                    torch.tensor(0.0).to(self.device),
                )

                review_texts.extend(texts)
                predictions.extend(preds)
                prediction_probs.extend(probs)

        predictions = torch.stack(predictions).cpu().numpy()
        prediction_probs = np.round(torch.stack(prediction_probs).cpu().numpy(), 3)

        return review_texts, predictions, prediction_probs

    def predict(self, user_query):
        """Returns the predicted labels for the given data loader"""
        query_loader = create_data_loader(
            question=[user_query],
            targets=None,
            max_len=self.max_len,
            batch_size=1,
            shuffle=False,
            tokenizer=tokenizer,
        )
        review_texts, predictions, prediction_probs = self._get_predictions(
            data_loader=query_loader,
            model=self.bert_classifier,
        )

        return dict(zip(review_texts, zip(list(predictions), list(prediction_probs))))

In [47]:
infer_model = ModelInference(
    tokenizer=tokenizer,
    model_type=ModelSettings().model_type,
    pretrained_model=pretrained_model,
    max_len=TokenizerSettings().max_length,
    prob_thresh=ModelSettings().binary_thresh,
)

In [103]:
model_op = infer_model.predict(
    "What is the notion of people for the new iPhone 13?"
)

In [104]:
model_op

{'What is the notion of people for the new iPhone 13?': (1.0, 0.937)}

Autosaving every 300 seconds
