In [1]:
pip install transformers datasets torch scikit-learn pandas numpy

Collecting fsspec<=2025.3.0,>=2023.1.0 (from fsspec[http]<=2025.3.0,>=2023.1.0->datasets)
  Downloading fsspec-2025.3.0-py3-none-any.whl.metadata (11 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch)
  Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cufft-cu12==11.2.1.3 (from torch)
  Downloading nvidia_cufft_cu12-11.2.1.3-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-curand-cu12==10.3.5.147 (from torch)
  Downloading nvidia_curand_cu12-10.3.5.147-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cusolver-cu12==11.6.1.9 (from torch)
  Downloading nvidia_cusolver_cu12-11.6.1.9-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cusparse-cu12==12.3.1.170 (from torch)
  Downloading nvidia_cusparse_cu12-12

In [2]:
import pandas as pd
from sklearn.model_selection import train_test_split
from datasets import Dataset, DatasetDict

df = pd.read_csv("/kaggle/input/dataset-2/balanced_transformed_intent_dataset.csv")

train_df, test_df = train_test_split(df, test_size=0.2, random_state=42)
val_df, test_df = train_test_split(test_df, test_size=0.5, random_state=42)

dataset = DatasetDict({
    "train": Dataset.from_pandas(train_df),
    "validation": Dataset.from_pandas(val_df),
    "test": Dataset.from_pandas(test_df)
})

In [3]:
import pickle
import pandas as pd
from sklearn.preprocessing import LabelEncoder
intents = df["intent"].unique()
label_encoder = LabelEncoder()
label_encoder.fit(intents)

intent_to_texts = {}
for intent in intents:
    intent_to_texts[intent] = df[df['intent'] == intent]['input_text'].tolist()

df['intent_encoded'] = label_encoder.transform(df['intent'])

encoder_data = {
    'label_encoder': label_encoder,
    'intent_to_texts': intent_to_texts,
    'encoded_queries': df[['input_text', 'intent_encoded']].to_dict('records')
}

with open("text_intent_encoder.pkl", "wb") as f:
    pickle.dump(encoder_data, f)
with open("text_intent_encoder.pkl", "rb") as f:
    loaded_data = pickle.load(f)
    
print("Saved encoder can map these intents:", loaded_data['label_encoder'].classes_)
print("Example mapping for first intent:", 
      loaded_data['intent_to_texts'][loaded_data['label_encoder'].classes_[0]][:1])

Saved encoder can map these intents: ['bug_identification' 'code_conversion' 'code_generation' 'code_summary'
 'design_explanation' 'function_explanation' 'generate_docstring' 'misc'
 'refactor_suggestion' 'schema_inference' 'visualization']
Example mapping for first intent: ['Can you generate a brief explanation for the following Python 3 code def _make_eof_intr(): global _EOF, _INTR if (_EOF is not None) and (_INTR is not None): return # inherit EOF and INTR definitions from controlling process. try: from termios import VEOF, VINTR fd = None for name in \'stdin\', \'stdout\': stream = getattr(sys, \'__%s__\' % name, None) if stream is None or not hasattr(stream, \'fileno\'): continue try: fd = stream.fileno() except ValueError: continue if fd is None: # no fd, raise ValueError to fallback on CEOF, CINTR raise ValueError("No stream has a fileno") intr = ord(termios.tcgetattr(fd)[6][VINTR]) eof = ord(termios.tcgetattr(fd)[6][VEOF]) except (ImportError, OSError, IOError, ValueError, ter

In [4]:
from transformers import RobertaTokenizer

tokenizer = RobertaTokenizer.from_pretrained("roberta-base")

def tokenize_function(examples):
    return tokenizer(
        examples["input_text"],
        padding="max_length",
        truncation=True,   
        max_length=512     
    )

tokenized_dataset = dataset.map(tokenize_function, batched=True)

tokenizer_config.json:   0%|          | 0.00/25.0 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

config.json:   0%|          | 0.00/481 [00:00<?, ?B/s]

Map:   0%|          | 0/7999 [00:00<?, ? examples/s]

Map:   0%|          | 0/1000 [00:00<?, ? examples/s]

Map:   0%|          | 0/1000 [00:00<?, ? examples/s]

In [5]:
def encode_labels(examples):
    return {"labels": label_encoder.transform(examples["intent"])}

tokenized_dataset = tokenized_dataset.map(encode_labels, batched=True)

Map:   0%|          | 0/7999 [00:00<?, ? examples/s]

Map:   0%|          | 0/1000 [00:00<?, ? examples/s]

Map:   0%|          | 0/1000 [00:00<?, ? examples/s]

In [6]:
tokenized_dataset.set_format("torch", columns=["input_ids", "attention_mask", "labels"])

In [7]:
from transformers import RobertaForSequenceClassification, TrainingArguments, Trainer

model = RobertaForSequenceClassification.from_pretrained(
    "roberta-base",
    num_labels=len(label_encoder.classes_)  
)

2025-07-03 15:38:17.297518: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1751557097.507400      19 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1751557097.565890      19 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


model.safetensors:   0%|          | 0.00/499M [00:00<?, ?B/s]

Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [8]:
from transformers import (
    RobertaForSequenceClassification, 
    TrainingArguments, 
    Trainer,
    EarlyStoppingCallback
)
import numpy as np
from sklearn.metrics import accuracy_score
import torch
total_steps = len(tokenized_dataset["train"]) * 5  # epochs * train samples
warmup_steps = int(0.1 * total_steps)

def compute_metrics(eval_pred):
    predictions, labels = eval_pred
    predictions = np.argmax(predictions, axis=1)
    return {"accuracy": accuracy_score(labels, predictions)}

training_args = TrainingArguments(
    output_dir="./intent_classifier",
    eval_strategy="epoch",  # Changed from evaluation_strategy
    save_strategy="epoch",
    per_device_train_batch_size=16,
    per_device_eval_batch_size=32,
    gradient_accumulation_steps=1,
    num_train_epochs=5,
    save_total_limit=3,
    logging_dir="./logs",
    logging_steps=50,
    learning_rate=3e-5,
    weight_decay=0.01,
    warmup_steps=warmup_steps,
    load_best_model_at_end=True,
    metric_for_best_model="accuracy",
    greater_is_better=True,
    fp16=True,
    gradient_checkpointing=False,
    report_to="tensorboard",
    optim="adamw_torch",
    seed=42
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_dataset["train"],
    eval_dataset=tokenized_dataset["validation"],
    compute_metrics=compute_metrics,
    callbacks=[
        EarlyStoppingCallback(
            early_stopping_patience=2, 
            early_stopping_threshold=0.01  
        )
    ]
)
trainer.train()



Epoch,Training Loss,Validation Loss,Accuracy
1,2.1166,1.91954,0.338
2,1.0743,0.823074,0.861
3,0.2463,0.245317,0.937
4,0.1461,0.137916,0.968
5,0.0685,0.097634,0.983




TrainOutput(global_step=1250, training_loss=0.9100631748199463, metrics={'train_runtime': 2152.5171, 'train_samples_per_second': 18.581, 'train_steps_per_second': 0.581, 'total_flos': 1.052397700529664e+16, 'train_loss': 0.9100631748199463, 'epoch': 5.0})

In [9]:
if "test" in tokenized_dataset:  # Safety check
    results = trainer.evaluate(tokenized_dataset["test"], metric_key_prefix="test")
    print(f"\nFinal Test Accuracy: {results['test_accuracy']:.4f}")
    print("Full Test Results:", {k: v for k, v in results.items() if k.startswith('test_')})




early stopping required metric_for_best_model, but did not find eval_accuracy so early stopping is disabled



Final Test Accuracy: 0.9820
Full Test Results: {'test_loss': 0.11043448746204376, 'test_accuracy': 0.982, 'test_runtime': 15.307, 'test_samples_per_second': 65.33, 'test_steps_per_second': 1.045}
