In [16]:
import torch
print(torch.cuda.is_available())
print(torch.cuda.get_device_name(0) if torch.cuda.is_available() else "No GPU found")

True
NVIDIA GeForce GTX 1660


In [6]:
pip install transformers datasets accelerate evaluate

Collecting evaluate
  Downloading evaluate-0.4.6-py3-none-any.whl.metadata (9.5 kB)
Downloading evaluate-0.4.6-py3-none-any.whl (84 kB)
Installing collected packages: evaluate
Successfully installed evaluate-0.4.6
Note: you may need to restart the kernel to use updated packages.


In [17]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from transformers import Trainer, TrainingArguments
from datasets import Dataset
import torch
import pandas as pd

# === Load the same CSV ===
file_path = "../data/processed/full_2k.csv"
df = pd.read_csv(file_path)

# encode labels the same way
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
df['label'] = le.fit_transform(df['Category'])
num_classes = len(le.classes_)

# reproducible splits (same as before)
from sklearn.model_selection import train_test_split
X_temp, X_test, y_temp, y_test = train_test_split(
    df['Description'], df['label'],
    test_size=0.1, stratify=df['label'], random_state=13
)
X_train, X_val, y_train, y_val = train_test_split(
    X_temp, y_temp,
    test_size=0.1111111,
    stratify=y_temp, random_state=13
)

# create small dataframes
train_df = pd.DataFrame({'text': X_train, 'label': y_train})
val_df   = pd.DataFrame({'text': X_val,   'label': y_val})
test_df  = pd.DataFrame({'text': X_test,  'label': y_test})

In [18]:
model_name = "distilbert-base-uncased"
tokenizer = AutoTokenizer.from_pretrained(model_name)

def tokenize(batch):
    return tokenizer(batch["text"],
                     truncation=True,
                     padding="max_length",
                     max_length=256)

# Convert to HF Dataset
train_ds = Dataset.from_pandas(train_df)
val_ds   = Dataset.from_pandas(val_df)
test_ds  = Dataset.from_pandas(test_df)

# Map tokenizer over data
train_ds = train_ds.map(tokenize, batched=True, batch_size=len(train_ds))
val_ds   = val_ds.map(tokenize, batched=True, batch_size=len(val_ds))
test_ds  = test_ds.map(tokenize, batched=True, batch_size=len(test_ds))

# Set the correct tensor columns
train_ds.set_format("torch", columns=["input_ids", "attention_mask", "label"])
val_ds.set_format("torch", columns=["input_ids", "attention_mask", "label"])
test_ds.set_format("torch", columns=["input_ids", "attention_mask", "label"])

Map:   0%|          | 0/27543 [00:00<?, ? examples/s]

Map:   0%|          | 0/3443 [00:00<?, ? examples/s]

Map:   0%|          | 0/3443 [00:00<?, ? examples/s]

In [19]:
from transformers import AutoModelForSequenceClassification

model = AutoModelForSequenceClassification.from_pretrained(
    model_name,
    num_labels=num_classes
)

Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [21]:
from transformers import TrainingArguments, Trainer
from evaluate import load

metric = load("accuracy")

def compute_metrics(eval_pred):
    logits, labels = eval_pred
    preds = logits.argmax(-1)
    return {"accuracy": metric.compute(predictions=preds, references=labels)["accuracy"]}

training_args = TrainingArguments(
    output_dir="./bert_runs",
    eval_strategy="epoch",
    save_strategy="epoch",
    learning_rate=2e-5,
    per_device_train_batch_size=16,
    per_device_eval_batch_size=32,
    num_train_epochs=5,
    weight_decay=0.01,
    load_best_model_at_end=True,
    metric_for_best_model="accuracy",
    logging_dir="./logs",
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_ds,
    eval_dataset=val_ds,
    tokenizer=tokenizer,
    compute_metrics=compute_metrics,
)

  trainer = Trainer(


In [22]:
trainer.train()

results = trainer.evaluate(test_ds)
print(f"Test accuracy: {results['eval_accuracy']:.4f}")

Epoch,Training Loss,Validation Loss


KeyboardInterrupt: 

In [12]:
import transformers
print(transformers.__version__)

4.56.2


In [9]:
pip install -U transformers

Collecting transformers
  Downloading transformers-4.57.0-py3-none-any.whl.metadata (41 kB)
Downloading transformers-4.57.0-py3-none-any.whl (12.0 MB)
   ---------------------------------------- 0.0/12.0 MB ? eta -:--:--
   ----------------- ---------------------- 5.2/12.0 MB 39.7 MB/s eta 0:00:01
   ---------------------------------------- 12.0/12.0 MB 39.6 MB/s  0:00:00
Installing collected packages: transformers
  Attempting uninstall: transformers
    Found existing installation: transformers 4.56.2
    Uninstalling transformers-4.56.2:
      Successfully uninstalled transformers-4.56.2
Successfully installed transformers-4.57.0
Note: you may need to restart the kernel to use updated packages.


In [13]:
from transformers import TrainingArguments
help(TrainingArguments)

Help on class TrainingArguments in module transformers.training_args:

class TrainingArguments(builtins.object)
 |  TrainingArguments(
 |      output_dir: Optional[str] = None,
 |      overwrite_output_dir: bool = False,
 |      do_train: bool = False,
 |      do_eval: bool = False,
 |      do_predict: bool = False,
 |      eval_strategy: Union[transformers.trainer_utils.IntervalStrategy, str] = 'no',
 |      prediction_loss_only: bool = False,
 |      per_device_train_batch_size: int = 8,
 |      per_device_eval_batch_size: int = 8,
 |      per_gpu_train_batch_size: Optional[int] = None,
 |      per_gpu_eval_batch_size: Optional[int] = None,
 |      gradient_accumulation_steps: int = 1,
 |      eval_accumulation_steps: Optional[int] = None,
 |      eval_delay: Optional[float] = 0,
 |      torch_empty_cache_steps: Optional[int] = None,
 |      learning_rate: float = 5e-05,
 |      weight_decay: float = 0.0,
 |      adam_beta1: float = 0.9,
 |      adam_beta2: float = 0.999,
 |      ada