In [1]:
import torch
import numpy as np
import pandas as pd
from pathlib import Path
from nlp_models.multi_task_model.mtl import MTLInference

In [2]:
DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'

DATA_FOLDER = Path('../data/0_external/google-quest-challenge/')
MODEL_DIR = Path('../models/multi-task-model/')
HF_MODEL_CARD = 'sentence-transformers/multi-qa-mpnet-base-dot-v1'

In [3]:
output_dir = MODEL_DIR / 'multi-task-model-finetuned-classification-layer-20230609'
tokenizer_dir = output_dir / 'tokenizer'
model_file = output_dir / 'mtl.bin'

In [4]:
df_train = pd.read_csv(DATA_FOLDER / 'train.csv')
label_dict = dict([(k,v) for k, v in enumerate(df_train.category.unique())])

In [5]:
mtl_model = MTLInference(tokenizer_dir, model_file, pretrained_model=False)

In [8]:
query = 'Using a ski helmet for winter biking'
pred = mtl_model.predict(query)
print(f'classification probabilities: {pred[0]}')
print(f'sentence embeddings: {pred[1]}')

classification probabilities tensor([[0.2659, 0.2525, 0.1171, 0.1082, 0.2971]])
sentence embeddings tensor([[-5.5846e-03, -8.3601e-02, -3.9778e-02,  5.0667e-03,  1.3832e-02,
         -2.4812e-02,  1.2418e-01, -1.4370e-02,  6.9655e-02,  5.4987e-02,
          1.8685e-02,  8.9461e-03,  2.4301e-02,  3.2691e-03,  3.7805e-02,
         -2.8740e-02, -1.5769e-02, -6.4413e-02, -1.3930e-02,  1.7328e-02,
          4.9049e-02,  6.9645e-03, -4.9728e-02, -1.0012e-03,  3.6664e-02,
         -2.2294e-02,  7.5014e-02,  5.7905e-03,  3.8401e-02,  9.5102e-03,
          2.8432e-02, -3.0667e-02, -2.5576e-03,  5.4535e-03, -1.7073e-05,
          1.0844e-02,  1.9808e-02, -3.2601e-02,  2.3568e-02,  2.5074e-02,
          3.2901e-02, -2.3803e-02,  1.4860e-02,  2.5398e-03, -4.3857e-04,
          5.6636e-02, -1.9603e-03, -2.9645e-02,  2.4378e-03,  1.1967e-02,
          3.9961e-02, -8.8684e-05, -3.8535e-02,  4.2042e-02, -1.2100e-02,
          2.7294e-02, -2.2590e-02, -2.8260e-02,  4.6821e-02,  7.4480e-03,
         -2.