## 0- Fine-tuning BERT for multi-class classification with Turkish language datasets

In [1]:
import pandas as pd 
import kagglehub
import os
import torch
import numpy as np
from transformers import BertTokenizerFast 
from transformers import BertForSequenceClassification
from transformers import Trainer, TrainingArguments
from torch.utils.data import Dataset
from datetime import datetime
from sklearn.metrics import accuracy_score, precision_recall_fscore_support

# Early stopping callback
from transformers.integrations import TensorBoardCallback
from transformers import EarlyStoppingCallback

# Set seed for reproducibility
seed = 42
torch.manual_seed(seed)
np.random.seed(seed)


  from .autonotebook import tqdm as notebook_tqdm


## 1- Load the dataset

In [2]:
if "TTC4900.csv" not in os.listdir():
 !wget  https://raw.githubusercontent.com/savasy/TurkishTextClassification/master/TTC4900.csv
else:
   print("Already there !")

data= pd.read_csv("TTC4900.csv")
data=data.sample(frac=1.0, random_state=42)
data.head(5)
print(f"data.shape={data.shape}")

Already there !
data.shape=(4900, 2)


In [3]:
labels=["teknoloji","ekonomi","sağlık","siyaset","kültür","spor","dünya"]
NUM_LABELS= len(labels)
id2label={i:l for i,l in enumerate(labels)}
label2id={l:i for i,l in enumerate(labels)}
print(f"label2id: {label2id}")
# Convert the category labels to integers
data["labels"]=data.category.map(lambda x: label2id[x.strip()])
data.head()

SIZE= data.shape[0]

train_texts= list(data.text[:SIZE//2])
val_texts=   list(data.text[SIZE//2:(3*SIZE)//4 ])
test_texts=  list(data.text[(3*SIZE)//4:])

train_labels= list(data.labels[:SIZE//2])
val_labels=   list(data.labels[SIZE//2:(3*SIZE)//4])
test_labels=  list(data.labels[(3*SIZE)//4:])
print(f"len(train_texts): {len(train_texts)}")
print(f"len(train_labels): {len(train_labels)}")

print(f"len(val_texts): {len(val_texts)}")  
print(f"len(val_labels): {len(val_labels)}")

print(f"len(test_texts): {len(test_texts)}")
print(f"len(test_labels): {len(test_labels)}")

label2id: {'teknoloji': 0, 'ekonomi': 1, 'saglik': 2, 'siyaset': 3, 'kultur': 4, 'spor': 5, 'dunya': 6}
len(train_texts): 2450
len(train_labels): 2450
len(val_texts): 1225
len(val_labels): 1225
len(test_texts): 1225
len(test_labels): 1225


## 2- call the model and tokenizer

In [4]:
device= "cuda" if torch.cuda.is_available() else "cpu"
tokenizer= BertTokenizerFast.from_pretrained("dbmdz/bert-base-turkish-uncased", max_length=512)
model= BertForSequenceClassification.from_pretrained("dbmdz/bert-base-turkish-uncased",
                                                    num_labels=NUM_LABELS,
                                                    id2label=id2label,
                                                    label2id=label2id).to(device)

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at dbmdz/bert-base-turkish-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


## 3- Tokenize datasets

In [5]:
train_encodings = tokenizer(train_texts, truncation=True, padding=True)
val_encodings  = tokenizer(val_texts, truncation=True, padding=True)
test_encodings = tokenizer(test_texts, truncation=True, padding=True)

## 4- Create a customized dataset

In [6]:
class MyDataset(Dataset):
    def __init__(self, encodings, labels):
        self.encodings = encodings
        self.labels = labels
    def __getitem__(self, idx):
        item = {key: val[idx].clone().detach() if isinstance(val[idx], torch.Tensor) else torch.tensor(val[idx]) for key, val in self.encodings.items()}
        item['labels'] = torch.tensor(self.labels[idx])
        return item
    def __len__(self):
        return len(self.labels)

train_dataset = MyDataset(train_encodings, train_labels)
val_dataset = MyDataset(val_encodings, val_labels)
test_dataset = MyDataset(test_encodings, test_labels)

print(f"Training dataset length: {len(train_dataset)}")
print(f"Validation dataset length: {len(val_dataset)}")
print(f"Test dataset length: {len(test_dataset)}")


Training dataset length: 2450
Validation dataset length: 1225
Test dataset length: 1225


## 5- Fine-tuning the model

In [None]:
# Create output directories
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
output_dir = f'./multi_class_results_{timestamp}'
log_dir = f'./multi_class_logs_{timestamp}'
os.makedirs(output_dir, exist_ok=True)
os.makedirs(log_dir, exist_ok=True)


# Function to calculate the metrics

def compute_metrics(pred): 
    labels = pred.label_ids 
    preds = pred.predictions.argmax(-1) 
    precision, recall, f1, _ = precision_recall_fscore_support(labels, preds, average='macro') 
    acc = accuracy_score(labels, preds) 
    return { 
        'accuracy': acc, 
        'f1': f1, 
        'precision': precision, 
        'recall': recall 
    } 


training_args = TrainingArguments(
    output_dir=output_dir,  # output directory for model predictions and checkpoints
    do_eval=True,  # whether to evaluate during training
    do_train=True,  # whether to train the model
    num_train_epochs=15,  # total number of training epochs
    per_device_train_batch_size=16,  # reduced batch size to prevent CUDA OOM errors
    per_device_eval_batch_size=16,  # batch size for evaluation
    warmup_ratio=0.1,  # ratio of warmup steps - more flexible than fixed steps
    weight_decay=0.01,  # strength of weight decay
    logging_dir=log_dir,  # directory to save logs
    save_strategy='steps',  # save the model after each epoch
    evaluation_strategy='steps',  # evaluate the model after each epoch
    logging_strategy='steps',  # log steps instead of epochs for more frequent updates
    report_to='tensorboard',  # report logs to TensorBoard
    logging_steps=100,  # how often to log the training loss
    fp16=True if torch.cuda.is_available() else False,  # whether to use mixed precision training
    load_best_model_at_end=True,  # load the best model when finished training
    metric_for_best_model='f1',  # use F1 score to determine best model
    greater_is_better=True,  # higher F1 is better
    seed=seed,
    dataloader_drop_last=True,  # drop last incomplete batch
    save_total_limit=3,  # limit the total amount of checkpoints saved
)

trainer = Trainer(
    # the pre-trained model that will be fine-tuned 
    model=model,
     # training arguments that we defined above                        
    args=training_args,                 
    train_dataset=train_dataset,         
    eval_dataset=val_dataset,            
    compute_metrics= compute_metrics,
    callbacks=[
        TensorBoardCallback()
    ]
)
# Train the model
print("Starting training...")
results = trainer.train()
print("Training completed!")
print(results)


You are adding a <class 'transformers.integrations.integration_utils.TensorBoardCallback'> to the callbacks of this Trainer, but there is already one. The currentlist of callbacks is
:DefaultFlowCallback
TensorBoardCallback


Starting training...


Step,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
100,0.0685,0.396809,0.921053,0.921138,0.921829,0.921075
200,0.0636,0.478505,0.922697,0.92216,0.924307,0.923181
300,0.0799,0.470653,0.913651,0.914176,0.916587,0.912955
400,0.0594,0.526829,0.92023,0.919942,0.920081,0.920243
500,0.0635,0.569343,0.912829,0.911568,0.911818,0.912862
600,0.0581,0.535571,0.92023,0.920028,0.921627,0.919855
700,0.033,0.548331,0.922697,0.922558,0.92309,0.922524
800,0.0121,0.530762,0.928454,0.928033,0.928026,0.928313
900,0.0105,0.627388,0.922697,0.922774,0.923572,0.922213
1000,0.0106,0.597101,0.92352,0.92324,0.924202,0.923102


Training completed!
TrainOutput(global_step=2295, training_loss=0.0209259257694475, metrics={'train_runtime': 936.1082, 'train_samples_per_second': 39.258, 'train_steps_per_second': 2.452, 'total_flos': 9661871683584000.0, 'train_loss': 0.0209259257694475, 'epoch': 15.0})


## 6- Evaluate on test set

In [12]:
# Evaluate on test set
print("Evaluating on test set...")
test_results = trainer.evaluate(test_dataset)
print("Test results:", test_results)

Evaluating on test set...


Test results: {'eval_loss': 0.5825170874595642, 'eval_accuracy': 0.9292763157894737, 'eval_f1': 0.9284400955797399, 'eval_precision': 0.9282520751154196, 'eval_recall': 0.929484226441632, 'eval_runtime': 6.5818, 'eval_samples_per_second': 186.118, 'eval_steps_per_second': 11.699, 'epoch': 15.0}


## 7- Save the final model


In [18]:
# Save the final model
trainer.save_model(f"{output_dir}/final_multi_class_model")
print(f"Final model saved to {output_dir}/final_multi_class_model")

# Example of using the model for inference a new sentence
def predict_sentiment(text):
    inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True).to(device)
    with torch.no_grad():
        outputs = model(**inputs)

    probabilities = torch.nn.functional.softmax(outputs.logits, dim=-1)
    prediction = torch.argmax(probabilities, dim=-1).item()
    
    return id2label[prediction]

Final model saved to ./multi_class_results_20250319_142541/final_multi_class_model


## 8- Run the model for inference

In [20]:
test_texts = [
        "Fenerbahçeli futbolcular kısa paslarla hazırlık çalışması yaptılar", # spor
        "Türkiye’de mali istikrarı sağlamak ve yatırımları artırmak için yeni politikalar geliştirilmelidir.", # ekonomi
        "Yapay zeka ve otomasyon, üretim sektöründe verimliliği artırarak maliyetleri düşürüyor.", # teknoloji
        "Küresel ısınma, dünyanın ekosistemlerini ve iklim dengesini tehdit eden en büyük sorunlardan biridir.", # dünya
        "Koronavirüs salgınında günlük vaka sayısı 50.000'in üzerine çıktı.", # sağlık
        "Türkiye'nin en büyük sorunu olan terör, son yıllarda büyük oranda azaldı.", # siyaset
        "Türkiye'nin kültürel zenginlikleri, dünya genelinde büyük ilgi görüyor." # kültür
    ]
test_texts_labels = ["spor", "ekonomi", "teknoloji", "dünya", "sağlık", "siyaset", "kültür"]
for index, text in enumerate(test_texts):
    prediction = predict_sentiment(text)
    print(f"Text: {text}")
    print(f"Prediction: {prediction}  -  True label: {test_texts_labels[index]}")
    print("-" * 50)


Text: Fenerbahçeli futbolcular kısa paslarla hazırlık çalışması yaptılar
Prediction: spor  -  True label: spor
--------------------------------------------------
Text: Türkiye’de mali istikrarı sağlamak ve yatırımları artırmak için yeni politikalar geliştirilmelidir.
Prediction: ekonomi  -  True label: ekonomi
--------------------------------------------------
Text: Yapay zeka ve otomasyon, üretim sektöründe verimliliği artırarak maliyetleri düşürüyor.
Prediction: teknoloji  -  True label: teknoloji
--------------------------------------------------
Text: Küresel ısınma, dünyanın ekosistemlerini ve iklim dengesini tehdit eden en büyük sorunlardan biridir.
Prediction: dunya  -  True label: dünya
--------------------------------------------------
Text: Koronavirüs salgınında günlük vaka sayısı 50.000'in üzerine çıktı.
Prediction: saglik  -  True label: sağlık
--------------------------------------------------
Text: Türkiye'nin en büyük sorunu olan terör, son yıllarda büyük oranda azaldı.