In [16]:
import torch
from transformers import BertModel, BertTokenizerFast
import torch.nn as nn
import pickle

#------------------------Setup--------------------------#
with open('scaler.pkl', 'rb') as f:
    scaler = pickle.load(f)

class CustomBERTModel(nn.Module):
    def __init__(self):
        super(CustomBERTModel, self).__init__()
        self.bert = BertModel.from_pretrained('prajjwal1/bert-tiny')
        self.classifier_stock = nn.Linear(self.bert.config.hidden_size, 6) # stock classification head
        self.classifier_sentiment = nn.Linear(self.bert.config.hidden_size, 3) # sentiment classification head
        self.regression = nn.Linear(self.bert.config.hidden_size, 1) # regression head
        
    def forward(self, input_ids, attention_mask, token_type_ids):
        outputs = self.bert(
            input_ids=input_ids,
            attention_mask=attention_mask,
            token_type_ids=token_type_ids
        )
        pooled_output = outputs[1]
        stock_labels = self.classifier_stock(pooled_output)
        sentiment_labels = self.classifier_sentiment(pooled_output)
        regression_values = self.regression(pooled_output)
        
        return stock_labels, sentiment_labels, regression_values

deployed_model = CustomBERTModel()
deployed_model.load_state_dict(torch.load("082_081.pt"))
deployed_model = deployed_model.to("cpu")

def prepare_input(text, tokenizer):
    """
    Takes a string, tokenizes, and prepares it into expected format (list of token ids, attention masks, etc.) ready for model input

    Arguments:
    text -- string, Raw text string
    tokenizer -- transformers.Tokenizer, Initialized tokenizer

    Returns:
    input_dict -- dictionary, Contains required inputs for model
    """

    # Tokenize the text
    encoding = tokenizer.encode_plus(
        text,
        truncation=True, 
        padding=True,
        return_tensors='pt'  # Return PyTorch tensors
    )

    # Get the input ids and attention masks from tokenizer and convert to tensors
    input_ids = encoding['input_ids']
    attention_mask = encoding['attention_mask']

    # Put all tensor entries into a single dictionary
    input_dict = {
        'input_ids': input_ids,
        'token_type_ids': torch.zeros(input_ids.shape, dtype=torch.long),
        'attention_mask': attention_mask,
    }
    
    return input_dict

def predict_loaded_from_loaded_model(model, res, scaler):
    model.eval()
    with torch.no_grad():
        inputs = {key: val.to("cpu") for key, val in res.items()}  

        stock_labels_pred, sentiment_labels_pred, regression_values_pred = model(inputs["input_ids"], inputs["attention_mask"], inputs["token_type_ids"])
        
        stock_label = torch.argmax(stock_labels_pred, dim=1).item()
        sentiment_label = torch.argmax(sentiment_labels_pred, dim=1).item()
        regression_value = scaler.inverse_transform(regression_values_pred.cpu().numpy()) # inverse transform of scaling

    return stock_label, sentiment_label, regression_value[0][0]  # return the single value 

deployed_tokenizer = BertTokenizerFast.from_pretrained('prajjwal1/bert-tiny')


#-------------------------------------------------------#



In [17]:
res = prepare_input("@TechTrends: Cisco faces challenges in its supply chain, affecting the stock. A tough challenge to overcome. #SupplyChainIssues #TechStocks", deployed_tokenizer)
res

Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.


{'input_ids': tensor([[  101,  1030,  6627,  7913, 18376,  1024, 26408,  5344,  7860,  1999,
           2049,  4425,  4677,  1010, 12473,  1996,  4518,  1012,  1037,  7823,
           4119,  2000,  9462,  1012,  1001,  4425, 24925,  8977,  6342,  2229,
           1001,  6627, 14758,  2015,   102]]),
 'token_type_ids': tensor([[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
          0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]]),
 'attention_mask': tensor([[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
          1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]])}

In [18]:
predict_loaded_from_loaded_model(deployed_model, res, scaler)

(5, 2, -0.032974098)

In [11]:
# check pytorch version
# update pytorch to 1.8.1



'2.1.1'