## Load models 

In [1]:
from transformers import BertForSequenceClassification
from transformers import DistilBertTokenizer, DistilBertForSequenceClassification
import pandas as pd
from torch.utils.data import DataLoader, RandomSampler, SequentialSampler
from transformers import AdamW, get_linear_schedule_with_warmup
import torch
import os
from transformers import BertTokenizer
import torch
from transformers import DistilBertTokenizer

tokenizer = DistilBertTokenizer.from_pretrained("bert-base-uncased")
# device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device = torch.device("cpu")

In [2]:


def load_models(model_paths, device, label_dicts):
    models = {}
    for model_name, model_path in model_paths.items():
        label_dict = label_dicts[model_name]
        model = DistilBertForSequenceClassification.from_pretrained("bert-base-uncased",
                                                            num_labels=len(label_dict),
                                                            output_attentions=False,
                                                            output_hidden_states=False)

        model.to(device)
        model.load_state_dict(torch.load(f'{model_path}', map_location=torch.device('cpu')))
        model.eval()
        models[model_name] = model
    return models

## Producing a prediction

In [3]:
## Classification 
def predict(inputs, model, label_dict):   
    with torch.no_grad():
        outputs = model(**inputs)
    
    predicted_class_idx = torch.argmax(outputs.logits, dim=-1).item()
    
    for label, idx in label_dict.items():
        if idx == predicted_class_idx:
            predicted_class = label
            break
    return predicted_class

def label_exists(label, dictionary):
    return label in dictionary

def prediction_runner(inputs, models, label_dicts, label):
    if label_exists(label, label_dicts):
        model = models[label]
        label_dict = label_dicts[label]
        new_label = predict(inputs, model, label_dict)
        label = prediction_runner(inputs, models, label_dicts, new_label)
    return label
    
    

def prediction_loop(text, models, label_dicts):
    # Check if item is or is not a Grocery Item
    print(f'Got the text {text}')
    inputs = tokenizer(text, padding=True, truncation=True, max_length=512, return_tensors="pt").to(device)
    model = models['is_grocery']
    label_dict = label_dicts['is_grocery']
    l1_pred = predict(inputs, model, label_dict)

    if l1_pred == 'GROC':
        model = models['food_beverage']
        label_dict = label_dicts['food_beverage']
        l2_pred = predict(inputs, model, label_dict)
        if l2_pred:
            label = prediction_runner(inputs, models, label_dicts, l2_pred)
            return label
        else:
            return "Critical Error"
    return l1_pred

## Testing Loop 

In [4]:
## Main 
label_dicts = {
    "is_grocery" : {'GROC': 0, 'NONG': 1},
    "food_beverage" : {'FDAA': 0, 'BEVA': 1},
    "FDAA" : {'BAKA': 0,'MISC': 1,'ANIP': 2,'FRTV': 3,'CUFD': 4,},
    "BEVA" : {'SODR': 0, 'WATR': 1, 'CTCC': 2},
    "SODR" : {'FIZD': 0, 'ENRD': 1, 'FRUJ': 2},
    "WATR" : {'FLWR': 0, 'SMWR': 1},
    "CTCC" : {'COFF': 0, 'HTCD': 1, 'SBTB': 2},
    "BAKA" : {'BAKY': 0, 'BKGD': 1},
    "MISC" : {'CHSW': 0, 'JAMH': 1, 'SAUC': 2},
    "ANIP" : {'DAIR': 0, 'FISH': 1, 'MEAT': 2},
    "FRTV" : {'FRFR': 0, 'FRVG': 1, 'POTA': 2},
    "CUFD" : {'BRKF': 0, 'BSOI': 1, 'CANF': 2, 'RICE': 3},
    "BAKY" : {'BRRL': 0, 'INCK': 1, 'PKWT': 2, 'WHBR': 3, 'WHMB': 4},
    "BKGD" : {'SRFL': 0, 'OTFL': 1, 'PWSU': 2},
    "CHSW" : {'CHOC': 0, 'GUMM': 1, 'OTSW': 2},
    "JAMH" : {'HONY': 0, 'JAMM': 1, 'CRSP': 2},
    "DAIR" : {'CHCH': 0, 'EGGS': 1, 'SKML': 2, 'SFCH': 3, 'WHMK': 4},
    "FISH" : {'FSFF': 0, 'FWFF': 1, 'PRAW': 2},
    "MEAT" : {'PORK': 0, 'BEEF': 1, 'CHCK': 2, 'TURK': 3},
    "FRFR" : {'AVOC': 0, 'BANA': 1, 'BLUE': 2, 'APPL': 3, 'GRAP': 4, 'KIWI': 5, 'LEMN': 6, 'ORNG': 7, 'PINE': 8, 'PLUM': 9, 'RASP': 10},
    "FRVG" : {'BROC': 0, 'CARR': 1, 'CAUL': 2, 'CUCU': 3, 'LETT': 4, 'MUSH': 5, 'ONIO': 6, 'PEPP': 7, 'TOMA': 8, 'MXVG': 9},
    "POTA" : {'POTT': 0, 'SWPT': 1},
    "BRKF" : {'BCGF': 0, 'BCER': 1},
    "BSOI" : {'BUTT': 0, 'MARG': 1, 'OLOL': 2},
    "CANF" : {'CTUN': 0, 'BKBN': 1, 'CNFR': 2},
    "RICE" : {'BSRX': 0, 'JSRX': 1, 'LGRX': 2},
  
}
model_paths = {
    "is_grocery" : "models/is_grocery_bert.model",
    "food_beverage" : "models/food_beverage_models/finetuned_BERT_epoch_5.model",
    "FDAA" : "models/fdaa_models/finetuned_BERT_epoch_5.model",
    "BEVA" : "models/beva_models/finetuned_BERT_epoch_4.model",
    "SODR" : "models/sodr_models/finetuned_BERT_epoch_5.model",
    "WATR" : "models/watr_models/finetuned_BERT_epoch_5.model",
    "CTCC" : "models/ctcc_models/finetuned_BERT_epoch_5.model",
    "BAKA" : "models/baka_models/finetuned_BERT_epoch_5.model",
    "MISC" : "models/misc_models/finetuned_BERT_epoch_5.model",
    "ANIP" : "models/anip_models/finetuned_BERT_epoch_3.model",
    "FRTV" : "models/frtv_models/finetuned_BERT_epoch_5.model",
    "CUFD" : "models/cufd_models/finetuned_BERT_epoch_5.model",
    "BAKY" : "models/baky_models/finetuned_BERT_epoch_5.model",
    "BKGD" : "models/bkgd_models/finetuned_BERT_epoch_5.model",
    "CHSW" : "models/chsw_models/finetuned_BERT_epoch_5.model",
    "JAMH" : "models/jamh_models/finetuned_BERT_epoch_5.model",
    "DAIR" : "models/dair_models/finetuned_BERT_epoch_3.model",
    "FISH" : "models/fish_models/finetuned_BERT_epoch_4.model",
    "MEAT" : "models/meat_models/finetuned_BERT_epoch_5.model",
    "FRFR" : "models/frfr_models/finetuned_BERT_epoch_5.model",
    "FRVG" : "models/frvg_models/finetuned_BERT_epoch_5.model",
    "POTA" : "models/pota_models/finetuned_BERT_epoch_5.model",
    "BRKF" : "models/brkf_models/finetuned_BERT_epoch_5.model",
    "BSOI" : "models/bsoi_models/finetuned_BERT_epoch_5.model",
    "CANF" : "models/canf_models/finetuned_BERT_epoch_5.model",
    "RICE" : "models/rice_models/finetuned_BERT_epoch_3.model",
}

device = torch.device("cpu")
models = load_models(model_paths, device, label_dicts)

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.seq_relationship.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

# Testing Loop

In [5]:
COLS = ["CATEGORY", "NAME"]
df = pd.read_csv('../Data/SingleDataset.csv', names=COLS, index_col=False)
df.head() 

accuate_results = 0 
total_results = 0
for index, row in df.iterrows():
    text_string = row["NAME"]
    result = prediction_loop(text=text_string, models=models, label_dicts=label_dicts)
    if row["CATEGORY"] == result:
        accuate_results += 1
    total_results += 1

accuracy = accuate_results/total_results 

print(accuracy)


Got the text Panini 4 Pack
Got the text Tesco Chunky Cheese Rolls 4 Pack
Got the text Ciabatta Roll 4 Pack
Got the text St Pierre 6 Brioche Burger Buns
Got the text Tesco Large White Hot Dog Rolls 6 Pack
Got the text Big Bite Soft White Roll 6Pack
Got the text 6 Pack White Finger Rolls
Got the text 6 Pack Soft White Rolls
Got the text Large White Bap 4 Pack
Got the text Baker Street Burger Buns 6 Pack
Got the text Baker Street 4 Hot Dog Rolls
Got the text Tesco Finest 4 Mango  Passion Fruit Buttermilk Pancakes
Got the text Warburtons Sliced Sandwich Rolls 12 Pack
Got the text Tesco Finest 4 Onion Brioche Hot Dog Rolls
Got the text Tesco Finest 4 Brioche Burger Buns
Got the text Tesco Wholemeal Deli Rolls 4 Pack
Got the text Warburtons Hot Dog Rolls X 6
Got the text Tesco Fire Pit 6 Jumbo Brioche Hot Dog Rolls
Got the text Tesco 4 Brioche Buns
Got the text Tesco Finest 2 Turmeric Brioche Burger Buns
Got the text Warburtons 6 Sliced Sandwich Rolls
Got the text Tesco Large Wholemeal Baps 