# distilbert-base-cased

In [None]:
# Gerekli kütüphaneleri yükleyin
!pip install evaluate

import pandas as pd
import torch
from torch.utils.data import Dataset, DataLoader
from transformers import BertTokenizerFast, BertForQuestionAnswering, AdamW
from transformers import get_scheduler
from tqdm import tqdm
import time
import evaluate
import matplotlib.pyplot as plt

# Veri dosyalarını yükleyin
train_file = "/kaggle/input/enelpi-q-a/questions_and_answers.csv"
test_file = "/kaggle/input/enelpi-q-a/test_questions_and_answers.csv"

train_data = pd.read_csv(train_file)
test_data = pd.read_csv(test_file)

# Küçük bir alt set oluşturun (ilk 200 örnek)
small_train_data = train_data#.head(1000)
small_test_data = test_data#.head(100)

# Dataset sınıfını tanımlayın
class QADataset(Dataset):
    def __init__(self, data, tokenizer, max_len=512):
        self.data = data
        self.tokenizer = tokenizer
        self.max_len = max_len

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        row = self.data.iloc[idx]
        question = row["question"]
        context = row["context"]
        answer = row["answer"]

        # Tokenize işlemi
        inputs = self.tokenizer(
            question,
            context,
            max_length=self.max_len,
            truncation=True,
            padding="max_length",
            return_tensors="pt",
            return_offsets_mapping=True  # offset_mapping'i ekliyoruz
        )

        offset_mapping = inputs.pop("offset_mapping").squeeze(0)
        start_char_idx = context.find(answer)
        end_char_idx = start_char_idx + len(answer)

        # Offset mapping ile token pozisyonlarını belirleme
        start_token_idx = None
        end_token_idx = None

        for idx, (start, end) in enumerate(offset_mapping):
            if start <= start_char_idx < end:
                start_token_idx = idx
            if start < end_char_idx <= end:
                end_token_idx = idx

        if start_token_idx is None or end_token_idx is None:
            start_token_idx = 0
            end_token_idx = 0

        inputs["start_positions"] = torch.tensor(start_token_idx, dtype=torch.long)
        inputs["end_positions"] = torch.tensor(end_token_idx, dtype=torch.long)

        return {key: val.squeeze(0) for key, val in inputs.items()}

# Model ve Tokenizer'ı yükleyin
model_name = "distilbert-base-cased"

tokenizer = BertTokenizerFast.from_pretrained(model_name)
model = BertForQuestionAnswering.from_pretrained(model_name)

# Dataset ve DataLoader oluşturun
train_dataset = QADataset(small_train_data, tokenizer)
test_dataset = QADataset(small_test_data, tokenizer)

train_loader = DataLoader(train_dataset, batch_size=4, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=4, shuffle=False)

# Optimizasyon ve öğrenme planı
optimizer = AdamW(model.parameters(), lr=5e-5)
num_epochs = 3
num_training_steps = num_epochs * len(train_loader)
lr_scheduler = get_scheduler("linear", optimizer=optimizer, num_warmup_steps=0, num_training_steps=num_training_steps)

# GPU kullanımı kontrolü
device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
model.to(device)

# Eğitim fonksiyonu
def train(model, train_loader, optimizer, lr_scheduler, device):
    model.train()
    train_loss_values = []
    val_loss_values = []

    for epoch in range(num_epochs):
        epoch_loss = 0
        for batch in tqdm(train_loader, desc=f"Training Epoch {epoch+1}"):
            batch = {key: val.to(device) for key, val in batch.items()}
            outputs = model(**batch)
            loss = outputs.loss

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            lr_scheduler.step()

            epoch_loss += loss.item()
        train_loss_values.append(epoch_loss / len(train_loader))

        # Validation Loss hesaplama
        model.eval()
        val_loss = 0
        with torch.no_grad():
            for batch in test_loader:
                batch = {key: val.to(device) for key, val in batch.items()}
                outputs = model(**batch)
                val_loss += outputs.loss.item()
        val_loss_values.append(val_loss / len(test_loader))

    return train_loss_values, val_loss_values

# Değerlendirme fonksiyonu
from evaluate import load
import torch

# F1 ve EM metriklerini yükle
metric_f1 = load("f1")
metric_em = load("exact_match")


from sklearn.metrics import f1_score
from collections import Counter

def calculate_f1_score(str1, str2):
    # Tokenize the strings
    tokens1 = str1.split()
    tokens2 = str2.split()
    
    # Create Counter objects for both strings
    counter1 = Counter(tokens1)
    counter2 = Counter(tokens2)
    
    # Calculate precision and recall
    common_tokens = sum((counter1 & counter2).values())
    precision = common_tokens / len(tokens2) if len(tokens2) > 0 else 0
    recall = common_tokens / len(tokens1) if len(tokens1) > 0 else 0
    
    # Calculate F1-score
    if precision + recall > 0:
        f1 = 2 * (precision * recall) / (precision + recall)
    else:
        f1 = 0
    
    return f1


from nltk.translate.bleu_score import sentence_bleu, SmoothingFunction

def calculate_bleu(reference_text, candidate_text):
    """
    İki string arasındaki BLEU skorunu hesaplar.
    
    Args:
        reference_text (str): Referans metin (doğru çeviri).
        candidate_text (str): Modelin ürettiği metin (hipotez).
    
    Returns:
        float: Hesaplanan BLEU skoru (0 ile 1 arasında bir değer).
    """
    # Metinleri kelime bazlı tokenize et
    reference = [reference_text.split()]  # Referans listesi içinde bir liste olmalı
    candidate = candidate_text.split()   # Modelin çıktısı tokenize edilmiş
    
    # BLEU skorunu hesapla
    smooth_fn = SmoothingFunction().method1  # Küçük veri setleri için smoothing kullanılır
    bleu_score = sentence_bleu(reference, candidate, smoothing_function=smooth_fn)
    
    return bleu_score

import traceback


def evaluate_model(model, test_loader, device, tokenizer):
    model.eval()
    f1_scores = []
    em_scores = []
    bleu_scores = []

    # Test setini değerlendir
    with torch.no_grad():
        for batch in tqdm(test_loader, desc="Evaluating"):
            batch = {key: val.to(device) for key, val in batch.items()}
            outputs = model(**batch)
            start_logits = outputs.start_logits
            end_logits = outputs.end_logits

            # En yüksek olasılıkla başlama ve bitiş token'larını tahmin et
            start_pred = torch.argmax(start_logits, dim=-1)
            end_pred = torch.argmax(end_logits, dim=-1)

            for i in range(len(start_pred)):
                # token id'leri ile pozisyonları al
                start_position = start_pred[i].item()
                end_position = end_pred[i].item()

                # Token'ları decode et
                input_ids = batch['input_ids'][i].cpu().numpy()
                pred_text = tokenizer.decode(input_ids[start_position:end_position + 1], skip_special_tokens=True)
                
                # Referans metni
                ref_start = batch["start_positions"][i].item()
                ref_end = batch["end_positions"][i].item()
                ref_text = tokenizer.decode(input_ids[ref_start:ref_end + 1], skip_special_tokens=True)

                # Eğer boş bir metin varsa, bunları atlayalım
                if not pred_text or not ref_text:
                    continue

                # F1 ve EM hesaplama
                try:
                    #f1 = metric_f1.compute(predictions=[pred_text], references=[ref_text])
                    f1 = calculate_f1_score(pred_text, ref_text)
                    em = metric_em.compute(predictions=[pred_text], references=[ref_text])
                    bleu_score_tmp = calculate_bleu(pred_text, ref_text)

                    f1_scores.append(f1)
                    em_scores.append(em["exact_match"])
                    bleu_scores.append(bleu_score_tmp)
                except ValueError as e:
                    print(f"ValueError for prediction: {pred_text} and reference: {ref_text} -> {e}")
                    traceback.print_exc()
                    continue

                # İstenirse hata durumunu yazdır
                #if pred_text != ref_text:
                    #print(f"Predicted Text: {pred_text}")
                    #print(f"Reference Text: {ref_text}")
                    #print(f"Error with prediction: {pred_text} and reference: {ref_text}")

    # Ortalama F1 ve EM skorlarını hesapla
    avg_f1 = sum(f1_scores) / len(f1_scores) if f1_scores else 0
    avg_em = sum(em_scores) / len(em_scores) if em_scores else 0
    avg_bleu = sum(bleu_scores) / len(bleu_scores) if bleu_scores else 0

    return avg_f1, avg_em, avg_bleu

# Eğitim süresi ve validation süresi hesaplama
def main(is_train=True):
    train_loss_values, val_loss_values = [], []
    f1_score, em_score = 0, 0

    if is_train:
        train_start = time.time()
        train_loss_values, val_loss_values = train(model, train_loader, optimizer, lr_scheduler, device)
        train_end = time.time()
        print(f"Training Duration: {train_end - train_start} seconds.")

        # Eğitim ve validation loss grafiğini çizme
        plt.plot(range(1, num_epochs + 1), train_loss_values, label='Train Loss')
        plt.plot(range(1, num_epochs + 1), val_loss_values, label='Validation Loss')
        plt.xlabel('Epoch')
        plt.ylabel('Loss')
        plt.legend()
        plt.tight_layout()
        plt.savefig(f'loss_plot_{model_name.replace("/", "_")}.png', dpi=300)
        plt.show()

        # Modeli kaydetme
        torch.save(model.state_dict(), f"model_{model_name.replace('/', '_')}.pth")
        print(f"Model kaydedildi: model_{model_name.replace('/', '_')}.pth")
    else:
        # Eğitim olmadan kaydedilen modeli yükleyin
        model.load_state_dict(torch.load(f"model_{model_name.replace('/', '_')}.pth"))
        model.to(device)
        print(f"Model yüklendi: model_{model_name.replace('/', '_')}.pth")

    # Test adımını çalıştırma
    test_start = time.time()
    f1_score, em_score, blue_sonuc = evaluate_model(model, test_loader, device, tokenizer)
    test_end = time.time()
    print(f"Test Duration: {test_end - test_start} seconds.")

    # Sonuçları yazdırın
    print(f"Test F1 Skoru: {f1_score:.4f}")
    print(f"Test Exact Match (EM) Skoru: {em_score:.4f}")
    print(f"Test BLEU Skoru: {blue_sonuc:.4f}")

# Eğitim veya test işlemi başlatma
is_train = True  # Eğitim yap, False olursa eğitilmiş modeli kullan
main(is_train)

# dbmdz/bert-base-turkish-cased

In [None]:
# Gerekli kütüphaneleri yükleyin
!pip install evaluate

import pandas as pd
import torch
from torch.utils.data import Dataset, DataLoader
from transformers import BertTokenizerFast, BertForQuestionAnswering, AdamW
from transformers import get_scheduler
from tqdm import tqdm
import time
import evaluate
import matplotlib.pyplot as plt

# Veri dosyalarını yükleyin
train_file = "/kaggle/input/enelpi-q-a/questions_and_answers.csv"
test_file = "/kaggle/input/enelpi-q-a/test_questions_and_answers.csv"

train_data = pd.read_csv(train_file)
test_data = pd.read_csv(test_file)

# Küçük bir alt set oluşturun (ilk 200 örnek)
small_train_data = train_data#.head(1000)
small_test_data = test_data#.head(100)

# Dataset sınıfını tanımlayın
class QADataset(Dataset):
    def __init__(self, data, tokenizer, max_len=512):
        self.data = data
        self.tokenizer = tokenizer
        self.max_len = max_len

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        row = self.data.iloc[idx]
        question = row["question"]
        context = row["context"]
        answer = row["answer"]

        # Tokenize işlemi
        inputs = self.tokenizer(
            question,
            context,
            max_length=self.max_len,
            truncation=True,
            padding="max_length",
            return_tensors="pt",
            return_offsets_mapping=True  # offset_mapping'i ekliyoruz
        )

        offset_mapping = inputs.pop("offset_mapping").squeeze(0)
        start_char_idx = context.find(answer)
        end_char_idx = start_char_idx + len(answer)

        # Offset mapping ile token pozisyonlarını belirleme
        start_token_idx = None
        end_token_idx = None

        for idx, (start, end) in enumerate(offset_mapping):
            if start <= start_char_idx < end:
                start_token_idx = idx
            if start < end_char_idx <= end:
                end_token_idx = idx

        if start_token_idx is None or end_token_idx is None:
            start_token_idx = 0
            end_token_idx = 0

        inputs["start_positions"] = torch.tensor(start_token_idx, dtype=torch.long)
        inputs["end_positions"] = torch.tensor(end_token_idx, dtype=torch.long)

        return {key: val.squeeze(0) for key, val in inputs.items()}

# Model ve Tokenizer'ı yükleyin
model_name = "dbmdz/bert-base-turkish-cased"

tokenizer = BertTokenizerFast.from_pretrained(model_name)
model = BertForQuestionAnswering.from_pretrained(model_name)

# Dataset ve DataLoader oluşturun
train_dataset = QADataset(small_train_data, tokenizer)
test_dataset = QADataset(small_test_data, tokenizer)

train_loader = DataLoader(train_dataset, batch_size=4, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=4, shuffle=False)

# Optimizasyon ve öğrenme planı
optimizer = AdamW(model.parameters(), lr=5e-5)
num_epochs = 3
num_training_steps = num_epochs * len(train_loader)
lr_scheduler = get_scheduler("linear", optimizer=optimizer, num_warmup_steps=0, num_training_steps=num_training_steps)

# GPU kullanımı kontrolü
device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
model.to(device)

# Eğitim fonksiyonu
def train(model, train_loader, optimizer, lr_scheduler, device):
    model.train()
    train_loss_values = []
    val_loss_values = []

    for epoch in range(num_epochs):
        epoch_loss = 0
        for batch in tqdm(train_loader, desc=f"Training Epoch {epoch+1}"):
            batch = {key: val.to(device) for key, val in batch.items()}
            outputs = model(**batch)
            loss = outputs.loss

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            lr_scheduler.step()

            epoch_loss += loss.item()
        train_loss_values.append(epoch_loss / len(train_loader))

        # Validation Loss hesaplama
        model.eval()
        val_loss = 0
        with torch.no_grad():
            for batch in test_loader:
                batch = {key: val.to(device) for key, val in batch.items()}
                outputs = model(**batch)
                val_loss += outputs.loss.item()
        val_loss_values.append(val_loss / len(test_loader))

    return train_loss_values, val_loss_values

# Değerlendirme fonksiyonu
from evaluate import load
import torch

# F1 ve EM metriklerini yükle
metric_f1 = load("f1")
metric_em = load("exact_match")


from sklearn.metrics import f1_score
from collections import Counter

def calculate_f1_score(str1, str2):
    # Tokenize the strings
    tokens1 = str1.split()
    tokens2 = str2.split()
    
    # Create Counter objects for both strings
    counter1 = Counter(tokens1)
    counter2 = Counter(tokens2)
    
    # Calculate precision and recall
    common_tokens = sum((counter1 & counter2).values())
    precision = common_tokens / len(tokens2) if len(tokens2) > 0 else 0
    recall = common_tokens / len(tokens1) if len(tokens1) > 0 else 0
    
    # Calculate F1-score
    if precision + recall > 0:
        f1 = 2 * (precision * recall) / (precision + recall)
    else:
        f1 = 0
    
    return f1


from nltk.translate.bleu_score import sentence_bleu, SmoothingFunction

def calculate_bleu(reference_text, candidate_text):
    """
    İki string arasındaki BLEU skorunu hesaplar.
    
    Args:
        reference_text (str): Referans metin (doğru çeviri).
        candidate_text (str): Modelin ürettiği metin (hipotez).
    
    Returns:
        float: Hesaplanan BLEU skoru (0 ile 1 arasında bir değer).
    """
    # Metinleri kelime bazlı tokenize et
    reference = [reference_text.split()]  # Referans listesi içinde bir liste olmalı
    candidate = candidate_text.split()   # Modelin çıktısı tokenize edilmiş
    
    # BLEU skorunu hesapla
    smooth_fn = SmoothingFunction().method1  # Küçük veri setleri için smoothing kullanılır
    bleu_score = sentence_bleu(reference, candidate, smoothing_function=smooth_fn)
    
    return bleu_score

import traceback


def evaluate_model(model, test_loader, device, tokenizer):
    model.eval()
    f1_scores = []
    em_scores = []
    bleu_scores = []

    # Test setini değerlendir
    with torch.no_grad():
        for batch in tqdm(test_loader, desc="Evaluating"):
            batch = {key: val.to(device) for key, val in batch.items()}
            outputs = model(**batch)
            start_logits = outputs.start_logits
            end_logits = outputs.end_logits

            # En yüksek olasılıkla başlama ve bitiş token'larını tahmin et
            start_pred = torch.argmax(start_logits, dim=-1)
            end_pred = torch.argmax(end_logits, dim=-1)

            for i in range(len(start_pred)):
                # token id'leri ile pozisyonları al
                start_position = start_pred[i].item()
                end_position = end_pred[i].item()

                # Token'ları decode et
                input_ids = batch['input_ids'][i].cpu().numpy()
                pred_text = tokenizer.decode(input_ids[start_position:end_position + 1], skip_special_tokens=True)
                
                # Referans metni
                ref_start = batch["start_positions"][i].item()
                ref_end = batch["end_positions"][i].item()
                ref_text = tokenizer.decode(input_ids[ref_start:ref_end + 1], skip_special_tokens=True)

                # Eğer boş bir metin varsa, bunları atlayalım
                if not pred_text or not ref_text:
                    continue

                # F1 ve EM hesaplama
                try:
                    #f1 = metric_f1.compute(predictions=[pred_text], references=[ref_text])
                    f1 = calculate_f1_score(pred_text, ref_text)
                    em = metric_em.compute(predictions=[pred_text], references=[ref_text])
                    bleu_score_tmp = calculate_bleu(pred_text, ref_text)

                    f1_scores.append(f1)
                    em_scores.append(em["exact_match"])
                    bleu_scores.append(bleu_score_tmp)
                except ValueError as e:
                    print(f"ValueError for prediction: {pred_text} and reference: {ref_text} -> {e}")
                    traceback.print_exc()
                    continue

                # İstenirse hata durumunu yazdır
                #if pred_text != ref_text:
                    #print(f"Predicted Text: {pred_text}")
                    #print(f"Reference Text: {ref_text}")
                    #print(f"Error with prediction: {pred_text} and reference: {ref_text}")

    # Ortalama F1 ve EM skorlarını hesapla
    avg_f1 = sum(f1_scores) / len(f1_scores) if f1_scores else 0
    avg_em = sum(em_scores) / len(em_scores) if em_scores else 0
    avg_bleu = sum(bleu_scores) / len(bleu_scores) if bleu_scores else 0

    return avg_f1, avg_em, avg_bleu

# Eğitim süresi ve validation süresi hesaplama
def main(is_train=True):
    train_loss_values, val_loss_values = [], []
    f1_score, em_score = 0, 0

    if is_train:
        train_start = time.time()
        train_loss_values, val_loss_values = train(model, train_loader, optimizer, lr_scheduler, device)
        train_end = time.time()
        print(f"Training Duration: {train_end - train_start} seconds.")

        # Eğitim ve validation loss grafiğini çizme
        plt.plot(range(1, num_epochs + 1), train_loss_values, label='Train Loss')
        plt.plot(range(1, num_epochs + 1), val_loss_values, label='Validation Loss')
        plt.xlabel('Epoch')
        plt.ylabel('Loss')
        plt.legend()
        plt.tight_layout()
        plt.savefig(f'loss_plot_{model_name.replace("/", "_")}.png', dpi=300)
        plt.show()

        # Modeli kaydetme
        torch.save(model.state_dict(), f"model_{model_name.replace('/', '_')}.pth")
        print(f"Model kaydedildi: model_{model_name.replace('/', '_')}.pth")
    else:
        # Eğitim olmadan kaydedilen modeli yükleyin
        model.load_state_dict(torch.load(f"model_{model_name.replace('/', '_')}.pth"))
        model.to(device)
        print(f"Model yüklendi: model_{model_name.replace('/', '_')}.pth")

    # Test adımını çalıştırma
    test_start = time.time()
    f1_score, em_score, blue_sonuc = evaluate_model(model, test_loader, device, tokenizer)
    test_end = time.time()
    print(f"Test Duration: {test_end - test_start} seconds.")

    # Sonuçları yazdırın
    print(f"Test F1 Skoru: {f1_score:.4f}")
    print(f"Test Exact Match (EM) Skoru: {em_score:.4f}")
    print(f"Test BLEU Skoru: {blue_sonuc:.4f}")

# Eğitim veya test işlemi başlatma
is_train = True  # Eğitim yap, False olursa eğitilmiş modeli kullan
main(is_train)

# Inference BERTurk

In [None]:
import torch
from transformers import BertTokenizerFast, BertForQuestionAnswering

# Model ve tokenizer'ı yükleme
model_name = "dbmdz/bert-base-turkish-cased"
model_path = f"model_{model_name.replace('/', '_')}.pth"

# Tokenizer ve model yükleniyor
tokenizer = BertTokenizerFast.from_pretrained(model_name)
model = BertForQuestionAnswering.from_pretrained(model_name)
model.load_state_dict(torch.load(model_path, map_location=torch.device('cuda' if torch.cuda.is_available() else 'cpu')))
model.eval()

# Cihaz seçimi
device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
model.to(device)

# Kullanıcıdan soru ve bağlam alıp cevap üreten fonksiyon
def answer_question(question, context):
    inputs = tokenizer.encode_plus(
        question,
        context,
        return_tensors="pt",
        truncation=True,
        max_length=512
    )

    # Modeli cihaza taşıma
    inputs = {k: v.to(device) for k, v in inputs.items()}
    
    # Model tahmini
    with torch.no_grad():
        outputs = model(**inputs)

    start_logits = outputs.start_logits
    end_logits = outputs.end_logits

    # En yüksek olasılıkla başlama ve bitiş pozisyonlarını belirleme
    start_index = torch.argmax(start_logits)
    end_index = torch.argmax(end_logits)

    # Tahmin edilen cevabı çözümleme
    answer_ids = inputs["input_ids"][0][start_index: end_index + 1]
    answer = tokenizer.decode(answer_ids, skip_special_tokens=True)

    return answer

# Kullanıcıdan sürekli giriş alıp cevap döndürme
while True:
    context = input("Lütfen bağlamı girin (çıkmak için 'q' yazın): ")
    if context.lower() == 'q':
        print("Çıkılıyor...")
        break
    
    question = input("Lütfen soruyu girin: ")
    if question.lower() == 'q':
        print("Çıkılıyor...")
        break

    answer = answer_question(question, context)
    print(f"Cevap: {answer}\n")

# xml-roberta-base

In [None]:
# Gerekli kütüphaneleri yükleyin
!pip install evaluate

import pandas as pd
import torch
from torch.utils.data import Dataset, DataLoader
from transformers import BertTokenizerFast, BertForQuestionAnswering, AdamW
from transformers import get_scheduler
from tqdm import tqdm
import time
import evaluate
import matplotlib.pyplot as plt

# Veri dosyalarını yükleyin
train_file = "/kaggle/input/enelpi-q-a/questions_and_answers.csv"
test_file = "/kaggle/input/enelpi-q-a/test_questions_and_answers.csv"

train_data = pd.read_csv(train_file)
test_data = pd.read_csv(test_file)

# Küçük bir alt set oluşturun (ilk 200 örnek)
small_train_data = train_data#.head(200)
small_test_data = test_data#.head(10)

# Dataset sınıfını tanımlayın
class QADataset(Dataset):
    def __init__(self, data, tokenizer, max_len=512):
        self.data = data
        self.tokenizer = tokenizer
        self.max_len = max_len

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        row = self.data.iloc[idx]
        question = row["question"]
        context = row["context"]
        answer = row["answer"]

        # Tokenize işlemi
        inputs = self.tokenizer(
            question,
            context,
            max_length=self.max_len,
            truncation=True,
            padding="max_length",
            return_tensors="pt",
            return_offsets_mapping=True  # offset_mapping'i ekliyoruz
        )

        offset_mapping = inputs.pop("offset_mapping").squeeze(0)
        start_char_idx = context.find(answer)
        end_char_idx = start_char_idx + len(answer)

        # Offset mapping ile token pozisyonlarını belirleme
        start_token_idx = None
        end_token_idx = None

        for idx, (start, end) in enumerate(offset_mapping):
            if start <= start_char_idx < end:
                start_token_idx = idx
            if start < end_char_idx <= end:
                end_token_idx = idx

        if start_token_idx is None or end_token_idx is None:
            start_token_idx = 0
            end_token_idx = 0

        inputs["start_positions"] = torch.tensor(start_token_idx, dtype=torch.long)
        inputs["end_positions"] = torch.tensor(end_token_idx, dtype=torch.long)

        return {key: val.squeeze(0) for key, val in inputs.items()}

# Model ve Tokenizer'ı yükleyin
model_name = "FacebookAI/xlm-roberta-base"

from transformers import AutoTokenizer
from transformers import XLMRobertaForQuestionAnswering

# XLM-Roberta modelini yükleyin
model = XLMRobertaForQuestionAnswering.from_pretrained(model_name)
tokenizer = AutoTokenizer.from_pretrained(model_name)

# Dataset ve DataLoader oluşturun
train_dataset = QADataset(small_train_data, tokenizer)
test_dataset = QADataset(small_test_data, tokenizer)

train_loader = DataLoader(train_dataset, batch_size=4, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=4, shuffle=False)

# Optimizasyon ve öğrenme planı
optimizer = AdamW(model.parameters(), lr=5e-5)
num_epochs = 3
num_training_steps = num_epochs * len(train_loader)
lr_scheduler = get_scheduler("linear", optimizer=optimizer, num_warmup_steps=0, num_training_steps=num_training_steps)

# GPU kullanımı kontrolü
device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
model.to(device)

# Eğitim fonksiyonu
def train(model, train_loader, optimizer, lr_scheduler, device):
    model.train()
    train_loss_values = []
    val_loss_values = []

    for epoch in range(num_epochs):
        epoch_loss = 0
        for batch in tqdm(train_loader, desc=f"Training Epoch {epoch+1}"):
            batch = {key: val.to(device) for key, val in batch.items()}
            outputs = model(**batch)
            loss = outputs.loss

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            lr_scheduler.step()

            epoch_loss += loss.item()
        train_loss_values.append(epoch_loss / len(train_loader))

        # Validation Loss hesaplama
        model.eval()
        val_loss = 0
        with torch.no_grad():
            for batch in test_loader:
                batch = {key: val.to(device) for key, val in batch.items()}
                outputs = model(**batch)
                val_loss += outputs.loss.item()
        val_loss_values.append(val_loss / len(test_loader))

    return train_loss_values, val_loss_values

# Değerlendirme fonksiyonu
from evaluate import load
import torch

# F1 ve EM metriklerini yükle
metric_f1 = load("f1")
metric_em = load("exact_match")


from sklearn.metrics import f1_score
from collections import Counter

def calculate_f1_score(str1, str2):
    # Tokenize the strings
    tokens1 = str1.split()
    tokens2 = str2.split()
    
    # Create Counter objects for both strings
    counter1 = Counter(tokens1)
    counter2 = Counter(tokens2)
    
    # Calculate precision and recall
    common_tokens = sum((counter1 & counter2).values())
    precision = common_tokens / len(tokens2) if len(tokens2) > 0 else 0
    recall = common_tokens / len(tokens1) if len(tokens1) > 0 else 0
    
    # Calculate F1-score
    if precision + recall > 0:
        f1 = 2 * (precision * recall) / (precision + recall)
    else:
        f1 = 0
    
    return f1


from nltk.translate.bleu_score import sentence_bleu, SmoothingFunction

def calculate_bleu(reference_text, candidate_text):
    """
    İki string arasındaki BLEU skorunu hesaplar.
    
    Args:
        reference_text (str): Referans metin (doğru çeviri).
        candidate_text (str): Modelin ürettiği metin (hipotez).
    
    Returns:
        float: Hesaplanan BLEU skoru (0 ile 1 arasında bir değer).
    """
    # Metinleri kelime bazlı tokenize et
    reference = [reference_text.split()]  # Referans listesi içinde bir liste olmalı
    candidate = candidate_text.split()   # Modelin çıktısı tokenize edilmiş
    
    # BLEU skorunu hesapla
    smooth_fn = SmoothingFunction().method1  # Küçük veri setleri için smoothing kullanılır
    bleu_score = sentence_bleu(reference, candidate, smoothing_function=smooth_fn)
    
    return bleu_score

import traceback


def evaluate_model(model, test_loader, device, tokenizer):
    model.eval()
    f1_scores = []
    em_scores = []
    bleu_scores = []

    # Test setini değerlendir
    with torch.no_grad():
        for batch in tqdm(test_loader, desc="Evaluating"):
            batch = {key: val.to(device) for key, val in batch.items()}
            outputs = model(**batch)
            start_logits = outputs.start_logits
            end_logits = outputs.end_logits

            # En yüksek olasılıkla başlama ve bitiş token'larını tahmin et
            start_pred = torch.argmax(start_logits, dim=-1)
            end_pred = torch.argmax(end_logits, dim=-1)

            for i in range(len(start_pred)):
                # token id'leri ile pozisyonları al
                start_position = start_pred[i].item()
                end_position = end_pred[i].item()

                # Token'ları decode et
                input_ids = batch['input_ids'][i].cpu().numpy()
                pred_text = tokenizer.decode(input_ids[start_position:end_position + 1], skip_special_tokens=True)
                
                # Referans metni
                ref_start = batch["start_positions"][i].item()
                ref_end = batch["end_positions"][i].item()
                ref_text = tokenizer.decode(input_ids[ref_start:ref_end + 1], skip_special_tokens=True)

                # Eğer boş bir metin varsa, bunları atlayalım
                if not pred_text or not ref_text:
                    continue

                # F1 ve EM hesaplama
                try:
                    #f1 = metric_f1.compute(predictions=[pred_text], references=[ref_text])
                    f1 = calculate_f1_score(pred_text, ref_text)
                    em = metric_em.compute(predictions=[pred_text], references=[ref_text])
                    bleu_score_tmp = calculate_bleu(pred_text, ref_text)

                    f1_scores.append(f1)
                    em_scores.append(em["exact_match"])
                    bleu_scores.append(bleu_score_tmp)
                except ValueError as e:
                    print(f"ValueError for prediction: {pred_text} and reference: {ref_text} -> {e}")
                    traceback.print_exc()
                    continue

                # İstenirse hata durumunu yazdır
                #if pred_text != ref_text:
                    #print(f"Predicted Text: {pred_text}")
                    #print(f"Reference Text: {ref_text}")
                    #print(f"Error with prediction: {pred_text} and reference: {ref_text}")

    # Ortalama F1 ve EM skorlarını hesapla
    avg_f1 = sum(f1_scores) / len(f1_scores) if f1_scores else 0
    avg_em = sum(em_scores) / len(em_scores) if em_scores else 0
    avg_bleu = sum(bleu_scores) / len(bleu_scores) if bleu_scores else 0

    return avg_f1, avg_em, avg_bleu

# Eğitim süresi ve validation süresi hesaplama
def main(is_train=True):
    train_loss_values, val_loss_values = [], []
    f1_score, em_score = 0, 0

    if is_train:
        train_start = time.time()
        train_loss_values, val_loss_values = train(model, train_loader, optimizer, lr_scheduler, device)
        train_end = time.time()
        print(f"Training duration: {train_end - train_start}")

        # Eğitim ve validation loss grafiğini çizme
        plt.plot(range(1, num_epochs + 1), train_loss_values, label='Train Loss')
        plt.plot(range(1, num_epochs + 1), val_loss_values, label='Validation Loss')
        plt.xlabel('Epoch')
        plt.ylabel('Loss')
        plt.legend()
        plt.tight_layout()
        plt.savefig(f'loss_plot_{model_name.replace("/", "_")}.png', dpi=300)
        plt.show()

        # Modeli kaydetme
        torch.save(model.state_dict(), f"model_{model_name.replace('/', '_')}.pth")
        print(f"Model kaydedildi: model_{model_name.replace('/', '_')}.pth")
    else:
        # Eğitim olmadan kaydedilen modeli yükleyin
        model.load_state_dict(torch.load(f"model_{model_name.replace('/', '_')}.pth"))
        model.to(device)
        print(f"Model yüklendi: model_{model_name.replace('/', '_')}.pth")

    # Test adımını çalıştırma
    test_start = time.time()
    f1_score, em_score, blue_sonuc = evaluate_model(model, test_loader, device, tokenizer)
    test_end = time.time()
    print(f"Test duration: {test_end - test_start}")

    # Sonuçları yazdırın
    print(f"Test F1 Skoru: {f1_score:.4f}")
    print(f"Test Exact Match (EM) Skoru: {em_score:.4f}")
    print(f"Test BLEU Skoru: {blue_sonuc:.4f}")

# Eğitim veya test işlemi başlatma
is_train = True  # Eğitim yap, False olursa eğitilmiş modeli kullan
main(is_train)

# dbmdz/electra-base-turkish-cased-discriminator

In [None]:
# Gerekli kütüphaneleri yükleyin
!pip install evaluate

import pandas as pd
import torch
from torch.utils.data import Dataset, DataLoader
from transformers import BertTokenizerFast, BertForQuestionAnswering, AdamW
from transformers import get_scheduler
from tqdm import tqdm
import time
import evaluate
import matplotlib.pyplot as plt

# Veri dosyalarını yükleyin
train_file = "/kaggle/input/enelpi-q-a/questions_and_answers.csv"
test_file = "/kaggle/input/enelpi-q-a/test_questions_and_answers.csv"

train_data = pd.read_csv(train_file)
test_data = pd.read_csv(test_file)

# Küçük bir alt set oluşturun (ilk 200 örnek)
small_train_data = train_data#.head(1000)
small_test_data = test_data#.head(100)

# Dataset sınıfını tanımlayın
class QADataset(Dataset):
    def __init__(self, data, tokenizer, max_len=512):
        self.data = data
        self.tokenizer = tokenizer
        self.max_len = max_len

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        row = self.data.iloc[idx]
        question = row["question"]
        context = row["context"]
        answer = row["answer"]

        # Tokenize işlemi
        inputs = self.tokenizer(
            question,
            context,
            max_length=self.max_len,
            truncation=True,
            padding="max_length",
            return_tensors="pt",
            return_offsets_mapping=True  # offset_mapping'i ekliyoruz
        )

        offset_mapping = inputs.pop("offset_mapping").squeeze(0)
        start_char_idx = context.find(answer)
        end_char_idx = start_char_idx + len(answer)

        # Offset mapping ile token pozisyonlarını belirleme
        start_token_idx = None
        end_token_idx = None

        for idx, (start, end) in enumerate(offset_mapping):
            if start <= start_char_idx < end:
                start_token_idx = idx
            if start < end_char_idx <= end:
                end_token_idx = idx

        if start_token_idx is None or end_token_idx is None:
            start_token_idx = 0
            end_token_idx = 0

        inputs["start_positions"] = torch.tensor(start_token_idx, dtype=torch.long)
        inputs["end_positions"] = torch.tensor(end_token_idx, dtype=torch.long)

        return {key: val.squeeze(0) for key, val in inputs.items()}

# Model ve Tokenizer'ı yükleyin
model_name = "dbmdz/electra-base-turkish-cased-discriminator"

tokenizer = BertTokenizerFast.from_pretrained(model_name)
model = BertForQuestionAnswering.from_pretrained(model_name)

# Dataset ve DataLoader oluşturun
train_dataset = QADataset(small_train_data, tokenizer)
test_dataset = QADataset(small_test_data, tokenizer)

train_loader = DataLoader(train_dataset, batch_size=4, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=4, shuffle=False)

# Optimizasyon ve öğrenme planı
optimizer = AdamW(model.parameters(), lr=5e-5)
num_epochs = 3
num_training_steps = num_epochs * len(train_loader)
lr_scheduler = get_scheduler("linear", optimizer=optimizer, num_warmup_steps=0, num_training_steps=num_training_steps)

# GPU kullanımı kontrolü
device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
model.to(device)

# Eğitim fonksiyonu
def train(model, train_loader, optimizer, lr_scheduler, device):
    model.train()
    train_loss_values = []
    val_loss_values = []

    for epoch in range(num_epochs):
        epoch_loss = 0
        for batch in tqdm(train_loader, desc=f"Training Epoch {epoch+1}"):
            batch = {key: val.to(device) for key, val in batch.items()}
            outputs = model(**batch)
            loss = outputs.loss

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            lr_scheduler.step()

            epoch_loss += loss.item()
        train_loss_values.append(epoch_loss / len(train_loader))

        # Validation Loss hesaplama
        model.eval()
        val_loss = 0
        with torch.no_grad():
            for batch in test_loader:
                batch = {key: val.to(device) for key, val in batch.items()}
                outputs = model(**batch)
                val_loss += outputs.loss.item()
        val_loss_values.append(val_loss / len(test_loader))

    return train_loss_values, val_loss_values

# Değerlendirme fonksiyonu
from evaluate import load
import torch

# F1 ve EM metriklerini yükle
metric_f1 = load("f1")
metric_em = load("exact_match")


from sklearn.metrics import f1_score
from collections import Counter

def calculate_f1_score(str1, str2):
    # Tokenize the strings
    tokens1 = str1.split()
    tokens2 = str2.split()
    
    # Create Counter objects for both strings
    counter1 = Counter(tokens1)
    counter2 = Counter(tokens2)
    
    # Calculate precision and recall
    common_tokens = sum((counter1 & counter2).values())
    precision = common_tokens / len(tokens2) if len(tokens2) > 0 else 0
    recall = common_tokens / len(tokens1) if len(tokens1) > 0 else 0
    
    # Calculate F1-score
    if precision + recall > 0:
        f1 = 2 * (precision * recall) / (precision + recall)
    else:
        f1 = 0
    
    return f1


from nltk.translate.bleu_score import sentence_bleu, SmoothingFunction

def calculate_bleu(reference_text, candidate_text):
    """
    İki string arasındaki BLEU skorunu hesaplar.
    
    Args:
        reference_text (str): Referans metin (doğru çeviri).
        candidate_text (str): Modelin ürettiği metin (hipotez).
    
    Returns:
        float: Hesaplanan BLEU skoru (0 ile 1 arasında bir değer).
    """
    # Metinleri kelime bazlı tokenize et
    reference = [reference_text.split()]  # Referans listesi içinde bir liste olmalı
    candidate = candidate_text.split()   # Modelin çıktısı tokenize edilmiş
    
    # BLEU skorunu hesapla
    smooth_fn = SmoothingFunction().method1  # Küçük veri setleri için smoothing kullanılır
    bleu_score = sentence_bleu(reference, candidate, smoothing_function=smooth_fn)
    
    return bleu_score

import traceback


def evaluate_model(model, test_loader, device, tokenizer):
    model.eval()
    f1_scores = []
    em_scores = []
    bleu_scores = []

    # Test setini değerlendir
    with torch.no_grad():
        for batch in tqdm(test_loader, desc="Evaluating"):
            batch = {key: val.to(device) for key, val in batch.items()}
            outputs = model(**batch)
            start_logits = outputs.start_logits
            end_logits = outputs.end_logits

            # En yüksek olasılıkla başlama ve bitiş token'larını tahmin et
            start_pred = torch.argmax(start_logits, dim=-1)
            end_pred = torch.argmax(end_logits, dim=-1)

            for i in range(len(start_pred)):
                # token id'leri ile pozisyonları al
                start_position = start_pred[i].item()
                end_position = end_pred[i].item()

                # Token'ları decode et
                input_ids = batch['input_ids'][i].cpu().numpy()
                pred_text = tokenizer.decode(input_ids[start_position:end_position + 1], skip_special_tokens=True)
                
                # Referans metni
                ref_start = batch["start_positions"][i].item()
                ref_end = batch["end_positions"][i].item()
                ref_text = tokenizer.decode(input_ids[ref_start:ref_end + 1], skip_special_tokens=True)

                # Eğer boş bir metin varsa, bunları atlayalım
                if not pred_text or not ref_text:
                    continue

                # F1 ve EM hesaplama
                try:
                    #f1 = metric_f1.compute(predictions=[pred_text], references=[ref_text])
                    f1 = calculate_f1_score(pred_text, ref_text)
                    em = metric_em.compute(predictions=[pred_text], references=[ref_text])
                    bleu_score_tmp = calculate_bleu(pred_text, ref_text)

                    f1_scores.append(f1)
                    em_scores.append(em["exact_match"])
                    bleu_scores.append(bleu_score_tmp)
                except ValueError as e:
                    print(f"ValueError for prediction: {pred_text} and reference: {ref_text} -> {e}")
                    traceback.print_exc()
                    continue

                # İstenirse hata durumunu yazdır
                #if pred_text != ref_text:
                    #print(f"Predicted Text: {pred_text}")
                    #print(f"Reference Text: {ref_text}")
                    #print(f"Error with prediction: {pred_text} and reference: {ref_text}")

    # Ortalama F1 ve EM skorlarını hesapla
    avg_f1 = sum(f1_scores) / len(f1_scores) if f1_scores else 0
    avg_em = sum(em_scores) / len(em_scores) if em_scores else 0
    avg_bleu = sum(bleu_scores) / len(bleu_scores) if bleu_scores else 0

    return avg_f1, avg_em, avg_bleu

# Eğitim süresi ve validation süresi hesaplama
def main(is_train=True):
    train_loss_values, val_loss_values = [], []
    f1_score, em_score = 0, 0

    if is_train:
        train_start = time.time()
        train_loss_values, val_loss_values = train(model, train_loader, optimizer, lr_scheduler, device)
        train_end = time.time()
        print(f"Training duration: {train_end - train_start}")

        # Eğitim ve validation loss grafiğini çizme
        plt.plot(range(1, num_epochs + 1), train_loss_values, label='Train Loss')
        plt.plot(range(1, num_epochs + 1), val_loss_values, label='Validation Loss')
        plt.xlabel('Epoch')
        plt.ylabel('Loss')
        plt.legend()
        plt.tight_layout()
        plt.savefig(f'loss_plot_{model_name.replace("/", "_")}.png', dpi=300)
        plt.show()

        # Modeli kaydetme
        torch.save(model.state_dict(), f"model_{model_name.replace('/', '_')}.pth")
        print(f"Model kaydedildi: model_{model_name.replace('/', '_')}.pth")
    else:
        # Eğitim olmadan kaydedilen modeli yükleyin
        model.load_state_dict(torch.load(f"model_{model_name.replace('/', '_')}.pth"))
        model.to(device)
        print(f"Model yüklendi: model_{model_name.replace('/', '_')}.pth")

    # Test adımını çalıştırma
    test_start = time.time()
    f1_score, em_score, blue_sonuc = evaluate_model(model, test_loader, device, tokenizer)
    test_end = time.time()
    print(f"Training duration: {test_end - test_start}")

    # Sonuçları yazdırın
    print(f"Test F1 Skoru: {f1_score:.4f}")
    print(f"Test Exact Match (EM) Skoru: {em_score:.4f}")
    print(f"Test BLEU Skoru: {blue_sonuc:.4f}")

# Eğitim veya test işlemi başlatma
is_train = True  # Eğitim yap, False olursa eğitilmiş modeli kullan
main(is_train)

# google-t5/t5-small

In [None]:
!pip install evaluate

import pandas as pd
import torch
from torch.utils.data import Dataset, DataLoader
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, Seq2SeqTrainer, Seq2SeqTrainingArguments
import time
import numpy as np
import evaluate
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt


# Veri yükleme
train_file = "/kaggle/input/enelpi-q-a/questions_and_answers.csv"
test_file = "/kaggle/input/enelpi-q-a/test_questions_and_answers.csv"
train_df = pd.read_csv(train_file)
test_df = pd.read_csv(test_file)

# Model ismi
model_name = "google-t5/t5-small"

# Tokenizer ve model yükleme
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSeq2SeqLM.from_pretrained(model_name)

# Verilerin birleştirilmesi
train_df["input_text"] = train_df["context"] + " " + train_df["question"]
test_df["input_text"] = test_df["context"] + " " + test_df["question"]

# Küçük bir alt set seçimi
train_df = train_df#.sample(100, random_state=42)  # Eğitim için 100 örnek
test_df = test_df#.sample(20, random_state=42)    # Test için 20 örnek

# Özel Dataset sınıfı
class QADataset(Dataset):
    def __init__(self, dataframe, tokenizer, max_len=512):
        self.data = dataframe
        self.tokenizer = tokenizer
        self.max_len = max_len

    def __len__(self):
        return len(self.data)

    def __getitem__(self, index):
        row = self.data.iloc[index]
        input_text = row["input_text"]
        target_text = row["answer"]

        inputs = self.tokenizer(
            input_text,
            max_length=self.max_len,
            truncation=True,
            padding="max_length",
            return_tensors="pt",
        )
        targets = self.tokenizer(
            target_text,
            max_length=self.max_len,
            truncation=True,
            padding="max_length",
            return_tensors="pt",
        )

        return {
            "input_ids": inputs["input_ids"].squeeze(0),
            "attention_mask": inputs["attention_mask"].squeeze(0),
            "labels": targets["input_ids"].squeeze(0),
        }

from nltk.translate.bleu_score import sentence_bleu, SmoothingFunction
from collections import Counter

# BLEU skoru hesaplama
def calculate_bleu(reference_text, candidate_text):
    smooth_fn = SmoothingFunction().method1
    bleu_score = sentence_bleu(reference_text, candidate_text, smoothing_function=smooth_fn)
    return bleu_score

# F1 skoru hesaplama
def calculate_f1_score(preds, labels):
    f1_scores = []
    for pred, label in zip(preds, labels):
        pred_tokens = Counter(pred)
        label_tokens = Counter(label[0])  # Reference list içinde bir liste
        common = pred_tokens & label_tokens
        num_same = sum(common.values())
        if num_same == 0:
            f1_scores.append(0)
        else:
            precision = num_same / len(pred)
            recall = num_same / len(label[0])
            f1_scores.append(2 * precision * recall / (precision + recall))
    return np.mean(f1_scores)

# Değerlendirme metrikleri
def compute_metrics(pred):
    labels_ids = pred.label_ids
    pred_ids = pred.predictions

    # Tokenizer kullanarak metinlere dönüştürme
    decoded_preds = [pred.split() for pred in tokenizer.batch_decode(pred_ids, skip_special_tokens=True)]
    decoded_labels = [[label.split()] for label in tokenizer.batch_decode(labels_ids, skip_special_tokens=True)]

    # F1 Skoru
    f1 = calculate_f1_score(decoded_preds, decoded_labels)

    # Exact Match (EM) Skoru
    exact_match = np.mean([
        int(" ".join(pred) == " ".join(label[0]))
        for pred, label in zip(decoded_preds, decoded_labels)
    ])

    # BLEU Skoru
    bleu_score = np.mean([
        calculate_bleu(label, pred)
        for label, pred in zip(decoded_labels, decoded_preds)
    ])

    return {
        "f1": f1,
        "exact_match": exact_match,
        "bleu_score": bleu_score
    }

# Dataset ve DataLoader oluşturma
train_dataset = QADataset(train_df, tokenizer)
test_dataset = QADataset(test_df, tokenizer)

train_loader = DataLoader(train_dataset, batch_size=8, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=8, shuffle=False)

# Eğitim için ayarlar
training_args = Seq2SeqTrainingArguments(
    output_dir="./results",
    evaluation_strategy="epoch",
    learning_rate=2e-5,
    per_device_train_batch_size=4,
    per_device_eval_batch_size=4,
    num_train_epochs=3,
    save_total_limit=2,
    predict_with_generate=True,
    logging_dir="./logs",
    logging_steps=10,
    do_train=True,
    do_eval=True,
    optim="adamw_torch",
    report_to="none",  # WANDB kapalı
)

# Trainer ayarları
trainer = Seq2SeqTrainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=test_dataset,
    tokenizer=tokenizer,
    compute_metrics=compute_metrics
)

# Eğitim veya değerlendirme için flag
is_train = True  # Eğer False yaparsanız eğitim yapılmaz.

if is_train:
    # Eğitim
    train_start = time.time()
    trainer.train()
    training_time = time.time() - train_start
    print(f"Eğitim süresi: {training_time:.2f} saniye")

    # Eğer tüm modeli kaydetmek isterseniz:
    torch.save(model, f'model_{model_name.replace("/", "_")}.pth')

    # Eğitim ve doğrulama kayıpları grafiği
    train_loss = trainer.state.log_history
    train_losses = [x["loss"] for x in train_loss if "loss" in x]
    eval_losses = [x["eval_loss"] for x in train_loss if "eval_loss" in x]

    #plt.figure(figsize=(10, 6))
    plt.plot(range(1, len(train_losses) + 1), train_losses, label="Training Loss")
    plt.plot(range(1, len(eval_losses) + 1), eval_losses, label="Validation Loss")
    plt.xlabel("Epoch")
    plt.ylabel("Loss")
    #plt.title("Eğitim ve Doğrulama Kayıpları")
    plt.legend()
    plt.tight_layout()
    plt.savefig(f'./loss_plot_{model_name.replace("/", "_")}.png', dpi=300)
    plt.show()

else:
    print("Model eğitimi atlandı.")

# Test
test_start = time.time()
test_results = trainer.evaluate()
test_time = time.time() - test_start

# Inference süresi
start_time = time.time()
sample = test_dataset[0]
input_ids = sample["input_ids"].unsqueeze(0).to(model.device)
attention_mask = sample["attention_mask"].unsqueeze(0).to(model.device)

generated_ids = model.generate(input_ids=input_ids, attention_mask=attention_mask)
inference_time = time.time() - start_time

# Sonuçlar
print(f"Test süresi: {test_time:.2f} saniye")
print(f"Inference süresi: {inference_time:.2f} saniye")

# Test sonuçları
print(f"Test F1 Skoru: {test_results['eval_f1']:.4f}")
print(f"Test Exact Match (EM) Skoru: {test_results['eval_exact_match']:.4f}")
print(f"Test BLEU Skoru: {test_results['eval_bleu_score']:.4f}")