In [1]:
# Import Libraries
import torch
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

from gensim.utils import simple_preprocess
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import classification_report, confusion_matrix

import torch.nn as nn
from torch.optim import AdamW
from torch.utils.data import Dataset, DataLoader

from transformers import get_linear_schedule_with_warmup, AutoTokenizer, AutoModel, logging

import warnings
warnings.filterwarnings("ignore")

logging.set_verbosity_error()

In [2]:
from google.colab import drive
drive.mount('/content/drive')

import torch
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device

Mounted at /content/drive


device(type='cpu')

In [3]:
class SentimentClassifier(nn.Module):
    def __init__(self, n_classes, device):
        super(SentimentClassifier, self).__init__()
        self.bert = AutoModel.from_pretrained("vinai/phobert-base")
        self.drop = nn.Dropout(p=0.3)
        self.fc = nn.Linear(self.bert.config.hidden_size, n_classes)
        nn.init.normal_(self.fc.weight, std=0.02)
        nn.init.normal_(self.fc.bias, 0)
        self.device = device  

    def forward(self, input_ids, attention_mask):
        last_hidden_state, output = self.bert(
            input_ids=input_ids,
            attention_mask=attention_mask,
            return_dict=False 
        )

        x = self.drop(output)
        x = self.fc(x)
        return x

In [4]:
# "load_model_and_tokenizer" function
def load_model_and_tokenizer(model_class, tokenizer_class, model_path, tokenizer_path, device):
    # Load model
    model = model_class(n_classes=7, device=device)
    model.load_state_dict(torch.load(model_path, map_location=torch.device('cpu')))
    model.eval()

    # Load tokenizer
    tokenizer = tokenizer_class.from_pretrained(tokenizer_path)

    return model, tokenizer

model_path = '/content/drive/MyDrive/[AI] Ngoc Quy/Bully/bully_detection_model.pth'
tokenizer_path = '/content/drive/MyDrive/[AI] Ngoc Quy/Bully/bully_detection_model'
# Load saved model and tokenizer
loaded_model, loaded_tokenizer = load_model_and_tokenizer(SentimentClassifier, AutoTokenizer, model_path, tokenizer_path, device)

config.json:   0%|          | 0.00/557 [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/543M [00:00<?, ?B/s]

In [None]:
class_names = [0, 1]
# "predict_sentiment" function
def predict_sentiment(sentence, tokenizer, model, device, max_len=120):
    # Tokenize the input sentence
    encoded_input = tokenizer.encode_plus(
        sentence,
        max_length=max_len,
        truncation=True,
        add_special_tokens=True,
        padding='max_length',
        return_attention_mask=True,
        return_token_type_ids=False,
        return_tensors='pt',
    )

    # Move input tensors to the appropriate device
    input_ids = encoded_input['input_ids'].to(device)
    attention_mask = encoded_input['attention_mask'].to(device)

    # Make the prediction
    with torch.no_grad():
        output = model(input_ids, attention_mask)
        _, predicted_label = torch.max(output, dim=1)

    return predicted_label.item()

# Example usage
sentence_to_predict = "Chào buổi sáng"
predicted_label = predict_sentiment(sentence_to_predict, loaded_tokenizer, loaded_model, device)

print(f"Predicted Sentiment Label: {class_names[predicted_label]}")