In [1]:
# uninstall first
!pip uninstall torch torchvision torchaudio

# e.g. for CUDA Toolkit 1.8
!pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118

Found existing installation: torch 2.3.0+cu121
Uninstalling torch-2.3.0+cu121:
  Would remove:
    /usr/local/bin/convert-caffe2-to-onnx
    /usr/local/bin/convert-onnx-to-caffe2
    /usr/local/bin/torchrun
    /usr/local/lib/python3.10/dist-packages/functorch/*
    /usr/local/lib/python3.10/dist-packages/torch-2.3.0+cu121.dist-info/*
    /usr/local/lib/python3.10/dist-packages/torch/*
    /usr/local/lib/python3.10/dist-packages/torchgen/*
Proceed (Y/n)? Y
  Successfully uninstalled torch-2.3.0+cu121
Found existing installation: torchvision 0.18.0+cu121
Uninstalling torchvision-0.18.0+cu121:
  Would remove:
    /usr/local/lib/python3.10/dist-packages/torchvision-0.18.0+cu121.dist-info/*
    /usr/local/lib/python3.10/dist-packages/torchvision.libs/libcudart.7ec1eba6.so.12
    /usr/local/lib/python3.10/dist-packages/torchvision.libs/libjpeg.ceea7512.so.62
    /usr/local/lib/python3.10/dist-packages/torchvision.libs/libnvjpeg.f00ca762.so.12
    /usr/local/lib/python3.10/dist-packages/torc

In [2]:
import yaml
import os
import sys
import torch
import json
import joblib
import numpy as np
import pandas as pd
from torch import nn
from transformers import BertTokenizer, BertModel, AdamW, get_linear_schedule_with_warmup

In [3]:
bert_model_name = 'bert-base-uncased'
id2label={0: 'Household', 1: 'Dairy', 2: 'galletas', 3: 'Beverages', 4: 'Meat/Poultry/Seafood', 5: 'bombones', 6: 'Other', 7: 'AlcoholicBeverages', 8: 'Snacks/Candy', 9: 'Canned/JarredGoods', 10: 'PersonalCare', 11: 'FrozenFoods', 12: 'Pasta/Grains', 13: 'Bakery', 14: 'Prepared/Ready-Made Foods', 15: 'Toys: Other', 16: 'Prepared/Ready-Made_Foods', 17: 'Electronics', 18: 'Baby', 19: 'Pet', 20: 'FreshProduce', 21: 'Toys', 22: 'Produce', 23: 'Vitamins: Other', 24: 'Medicines: Household'}
model_path="/content/drive/MyDrive/Shopfully/FewSelectedCategory/bert_classifier.pth"
max_length=128
num_labels = 25

In [4]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [18]:
class BERTClassifier(nn.Module):
    def __init__(self, bert_model_name, num_classes):
        super(BERTClassifier, self).__init__()
        self.bert = BertModel.from_pretrained(bert_model_name)
        self.dropout = nn.Dropout(0.1)
        self.fc = nn.Linear(self.bert.config.hidden_size, num_classes)

    def forward(self, input_ids, attention_mask):
        outputs = self.bert(input_ids=input_ids, attention_mask=attention_mask)
        pooled_output = outputs.pooler_output
        x = self.dropout(pooled_output)
        logits = self.fc(x)
        return logits

def get_device():
    return torch.device("cuda" if torch.cuda.is_available() else "cpu")

def predict_category(text):
    # These can be provided manually no need to reac the dataset again
    device=get_device()
    model = BERTClassifier(bert_model_name, num_labels)
    model.to(device)
    print(device)
    tokenizer = BertTokenizer.from_pretrained(bert_model_name)
    model.load_state_dict(torch.load(model_path, map_location=torch.device('cpu')))
    model.to(device)
    model.eval()
    encoding = tokenizer(text, return_tensors='pt', max_length=max_length, padding='max_length', truncation=True)
    input_ids = encoding['input_ids'].to(device)
    attention_mask = encoding['attention_mask'].to(device)

    with torch.no_grad():
        outputs = model(input_ids=input_ids, attention_mask=attention_mask)
        _, preds = torch.max(outputs, dim=1)

        # Return the label corresponding to the predicted numerical ID from id2label
        return id2label[preds.item()]

def preprocess(dict_request):
    processed_dict = {}
    for key, value in dict_request.items():
        processed_dict[key] = value.strip().lower()
    processed_string = ' '.join([f"{key}={value}" for key, value in processed_dict.items()])
    return processed_string

def form_response(dict_request):
    # Preprocess the input data
    input_data = preprocess(dict_request)
    print(input_data)
    # Get the prediction
    response = predict_category(input_data)
    return response


def api_response(dict_request):
    try:
        # Preprocess the input data
        input_data = preprocess(dict_request)

        # Get the prediction
        response = predict_category(input_data)

    except Exception as e:
        return {"error": str(e)}

In [19]:
text = "For Children Baby 3 Months+ Concentrated Liquid Strawberry Flavour 200mg/5ml 50ml,product_brand: Nurofen"
tokenizer = BertTokenizer.from_pretrained("bert-base-uncased")

In [20]:
predicted_label = predict_category(text)
print("Predicted label:", predicted_label)

cpu
Predicted label: Household


In [21]:
text = "mortadela especial taco,product_brand: la europea"
predicted_label = predict_category(text)
print("Predicted label:", predicted_label)

cpu
Predicted label: Meat/Poultry/Seafood
