In [None]:
# Install required libraries
!pip install transformers datasets scikit-learn gradio -q

from datasets import load_dataset
from transformers import BertTokenizerFast, BertForSequenceClassification, Trainer, TrainingArguments
from sklearn.metrics import accuracy_score, f1_score
import torch
import numpy as np


In [None]:
# Load dataset
ag_news = load_dataset("ag_news")

# Tokenizer and model
tokenizer = BertTokenizerFast.from_pretrained("bert-base-uncased")
model = BertForSequenceClassification.from_pretrained("bert-base-uncased", num_labels=4)


In [None]:
def tokenize(batch):
    return tokenizer(batch['text'], padding=True, truncation=True)

tag2idx = {k: i for i, k in enumerate(ag_news['train'].features['label'].names)}

ag_news_encoded = ag_news.map(tokenize, batched=True)

In [None]:
def compute_metrics(pred):
    labels = pred.label_ids
    preds = np.argmax(pred.predictions, axis=1)
    return {
        'accuracy': accuracy_score(labels, preds),
        'f1': f1_score(labels, preds, average='weighted')
    }

In [None]:

training_args = TrainingArguments(
    output_dir="bert-news",
    evaluation_strategy="epoch",
    logging_dir='./logs',
    per_device_train_batch_size=16,
    per_device_eval_batch_size=16,
    num_train_epochs=2,
    weight_decay=0.01
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=ag_news_encoded['train'].shuffle(seed=42).select(range(5000)),
    eval_dataset=ag_news_encoded['test'].select(range(1000)),
    tokenizer=tokenizer,
    compute_metrics=compute_metrics
)

trainer.train()

In [None]:
import gradio as gr

def classify(text):
    inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True)
    outputs = model(**inputs)
    preds = torch.softmax(outputs.logits, dim=1)
    return {ag_news['train'].features['label'].names[i]: float(preds[0][i]) for i in range(4)}

gr.Interface(fn=classify, inputs="text", outputs="label").launch()
