# Hungarian Sentiment Analysis with HuBERT

This notebook demonstrates sentiment analysis using a fine-tuned Hungarian BERT model.

## 1. Installation

In [2]:
!pip install torch transformers




[notice] A new release of pip is available: 25.0.1 -> 25.1.1
[notice] To update, run: python.exe -m pip install --upgrade pip


## 2. Imports and Configuration

In [3]:
import torch
import torch.nn as nn
from transformers import AutoTokenizer, AutoModel

# Configuration
MODEL_NAME = "SZTAKI-HLT/hubert-base-cc"  # Same as training
MAX_LENGTH = 128  # Same as training
LABEL_MAP = {0: "negative", 1: "neutral", 2: "positive"}  # Reverse mapping
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")

print(f"Using device: {DEVICE}")

Using device: cpu


## 3. Model Architecture

In [4]:
class SentimentClassifier(nn.Module):
    def __init__(self, model_name, num_classes):
        super(SentimentClassifier, self).__init__()
        self.bert = AutoModel.from_pretrained(model_name)
        self.dropout = nn.Dropout(0.1)
        self.fc = nn.Linear(self.bert.config.hidden_size, num_classes)

    def forward(self, input_ids, attention_mask):
        outputs = self.bert(
            input_ids=input_ids,
            attention_mask=attention_mask
        )
        pooled_output = outputs.last_hidden_state[:, 0, :]  # Use [CLS] token
        pooled_output = self.dropout(pooled_output)
        return self.fc(pooled_output)

## 4. Model Loading

In [5]:
def load_model(model_path):
    # Initialize model with same architecture as training
    model = SentimentClassifier(MODEL_NAME, num_classes=3).to(DEVICE)

    # Load trained weights
    model.load_state_dict(torch.load(model_path, map_location=DEVICE))
    model.eval()
    return model

## 5. Prediction Function

In [6]:
def predict_sentiment(text, model, tokenizer):
    encoding = tokenizer(
        text,
        max_length=MAX_LENGTH,
        padding='max_length',
        truncation=True,
        return_tensors='pt'
    )

    input_ids = encoding['input_ids'].to(DEVICE)
    attention_mask = encoding['attention_mask'].to(DEVICE)

    with torch.no_grad():
        outputs = model(input_ids, attention_mask)
        _, preds = torch.max(outputs, dim=1)

    return LABEL_MAP[preds.cpu().item()]

## 6. Demo

In [7]:
# Load tokenizer and model (make sure 'best_model.pt' is in your directory)
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
model = load_model("best_model.pt")  # Replace with your actual model path

# Example predictions
texts = [
    "Ez a film fantasztikus volt!",
    "Nem tetszett a könyv.",
    "Átlagos élmény volt, semmi különös.",
    "Süt a nap.",
    "Esik az eső.",
    "Szép időnk van ma.",
]

for text in texts:
    sentiment = predict_sentiment(text, model, tokenizer)
    print(f"Text: {text}")
    print(f"Sentiment: {sentiment}")
    print("-" * 50)

Text: Ez a film fantasztikus volt!
Sentiment: positive
--------------------------------------------------
Text: Nem tetszett a könyv.
Sentiment: negative
--------------------------------------------------
Text: Átlagos élmény volt, semmi különös.
Sentiment: negative
--------------------------------------------------
Text: Süt a nap.
Sentiment: neutral
--------------------------------------------------
Text: Esik az eső.
Sentiment: neutral
--------------------------------------------------
Text: Szép időnk van ma.
Sentiment: positive
--------------------------------------------------


## 7. Interactive Prediction

In [8]:
#@title Enter your own Hungarian text
user_text = "Ez a modell nagyon jól működik!" #@param {type:"string"}

if user_text.strip():
    sentiment = predict_sentiment(user_text, model, tokenizer)
    print(f"Text: {user_text}")
    print(f"Predicted sentiment: {sentiment}")
else:
    print("Please enter some text for analysis.")

Text: Ez a modell nagyon jól működik!
Predicted sentiment: positive
