In [1]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


# **Kütüphaneler**






In [2]:
import re
import pandas as pd
import numpy as np

#Text Preprocessor Libraries
from nltk.tokenize import word_tokenize
import nltk
from nltk.corpus import stopwords
from zemberek.morphology import TurkishMorphology

nltk.download('punkt_tab') 

# Word Embedding Modeli
from gensim.models import Word2Vec, FastText
from sklearn.feature_extraction.text import TfidfVectorizer


from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.utils import to_categorical

# Model performance metrics
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.model_selection import train_test_split

# BERT libraries
from transformers import AutoTokenizer, AutoModel
import torch

# LSTM libraries
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.preprocessing.text import Tokenizer

# CNN libraries
from tensorflow.keras.layers import Conv1D, MaxPooling1D, Flatten

# Class Weights
from sklearn.utils.class_weight import compute_class_weight

from torch.utils.data import DataLoader, TensorDataset
from torch import nn



[nltk_data] Downloading package punkt_tab to /root/nltk_data...
[nltk_data]   Package punkt_tab is already up-to-date!


# Text Preprocessing

In [None]:
class TextPreprocessor:
    '''
    Türkçe metin ön işleme sınıfı.
    Bu sınıf, Türkçe metinler üzerinde çeşitli ön işleme adımları uygular:
    - Noktalama işaretlerini kaldırma
    - Sayıları kaldırma
    - Özel karakterleri temizleme
    - Stopword'leri kaldırma
    - Kelimeleri köklerine ayırma
    - Kısa kelimeleri filtreleme
    '''
    def __init__(self):
        # Türkçe stopword'leri yükle
        self.stop_words = set(stopwords.words('turkish'))
        # Zemberek kütüphanesi ile Türkçe morfolojik analiz oluştur
        self.morphology = TurkishMorphology.create_with_defaults()

    def find_root(self, word):
        '''Bir kelimenin kökünü bulur. Eğer analiz sonucu yoksa orijinal kelimeyi döndürür.'''
        results = self.morphology.analyze(word)
        for result in results:
            return result.get_s85tem()
        return word

    def lemmatize_text(self, text):
        '''Metindeki tüm kelimelerin köklerini çıkarır.'''
        words = text.split()
        lemmatized_words = [self.find_root(word) for word in words]
        return " ".join(lemmatized_words)

    def remove_punctuation(self, text):
        '''Noktalama işaretlerini metinden kaldırır.'''
        return re.sub(r'[\W]', ' ', text)

    def remove_numbers(self, text):
        '''Metindeki sayıları kaldırır.'''
        return re.sub(r'\d+', '', text)

    def remove_special_characters(self, text):
        '''Özel karakterleri metinden kaldırır.'''
        return re.sub(r'_x000D_', '', text)

    def remove_stopwords(self, text):
        '''Türkçe stopword'leri metinden kaldırır.'''
        return " ".join(word.lower() for word in text.split() if word.lower() not in self.stop_words)

    def remove_short_words(self, text):
        '''2 karakterden kısa kelimeleri metinden kaldırır.'''
        return " ".join(word for word in text.split() if len(word) > 2)

    def preprocess(self, text):
        '''
        Tüm metin ön işleme adımlarını sırasıyla uygular:
        - Özel karakterleri kaldırma
        - Sayıları kaldırma
        - Noktalama işaretlerini kaldırma
        - Stopword'leri kaldırma
        - Lemmatizasyon
        - Kısa kelimeleri kaldırma
        '''
        text = self.remove_special_characters(text)
        text = self.remove_numbers(text)
        text = self.remove_punctuation(text)
        text = self.remove_stopwords(text)
        text = self.lemmatize_text(text)
        text = self.remove_short_words(text)
        return text

# Word Representation

In [3]:
class WordRepresentation:
    '''
    Kelime temsil modellerini oluşturmak için sınıf:
    - Word2Vec
    - FastText
    - TF-IDF
    - BERT Embedding
    - T5 Embedding
    '''
    def word2vec(self, sentences, **params):
        """Word2Vec modelini oluştur ve eğit.
        sentences = Giriş 
        """
        model = Word2Vec(
            sentences=sentences, 
            vector_size=200, # Varsayılan embedding boyutu
            window=5, # Varsayılan pencere boyutu
            min_count=2, # Minimum kelime frekansı
            workers=4, # Paralel iş parçacığı sayısı
            epochs=10
        )
        return model

    def fasttext(self, sentences, **params):
        """FastText modelini oluştur ve eğit."""
        
        model = FastText(
            sentences=sentences,
            vector_size=params.get('vector_size', 200),  # Varsayılan embedding boyutu
            window=params.get('window', 5),  # Varsayılan pencere boyutu
            min_count=params.get('min_count', 2),  # Minimum kelime frekansı
            workers=params.get('workers', 4),  # Paralel iş parçacığı sayısı
            epochs = params.get('epochs', 5)
        )
        return model


    def tfidf(self, sentences, **params):
        """TF-IDF vektörleştiricisini oluştur ve uygula."""
        vectorizer = TfidfVectorizer(**params)
        tfidf_matrix = vectorizer.fit_transform(sentences)
        return vectorizer, tfidf_matrix

    def T5Transformer(self, sentences, batch_size=32, **params):
        model_name = "google/mt5-small"
        device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        tokenizer = AutoTokenizer.from_pretrained(model_name)
        model = AutoModel.from_pretrained(model_name).to(device)  # Modeli GPU'ya taşı

        embeddings = []  # Embedding listesi

        # Batch işlemi
        for i in range(0, len(sentences), batch_size): # batch boyutunu start stop fonksiyonu ile ayarla
            batch = sentences[i:i + batch_size]  # Batch'i al
            # Tokenizer ile batch'i işleme
            inputs = tokenizer(
                batch,
                return_tensors="pt",
                padding=True,
                truncation=True,
                max_length=params.get('max_length', 512)  # Uzunluk sınırı
            ).to(device)  # Girişleri GPU'ya taşı

            with torch.no_grad(): 
                outputs = model.encoder(**inputs)  # Sadece encoder kısmını çalıştır
                batch_embeddings = outputs.last_hidden_state.mean(dim=1)  # Cümle için ortalama embedding al
            embeddings.append(batch_embeddings.cpu().numpy())  # Batch embedding'leri CPU'ya taşı ve kaydet

        # Tüm batch'leri birleştir
        return np.vstack(embeddings)


    def BertEmbedding(self, sentences, batch_size=32, **params):
        model_name = "dbmdz/bert-base-turkish-cased"
        device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

        # Model ve tokenizer yükleniyor
        tokenizer = AutoTokenizer.from_pretrained(model_name)
        model = AutoModel.from_pretrained(model_name).to(device)

        # Tüm cümleler için embedding hesaplama
        all_embeddings = []
        for i in range(0, len(sentences), batch_size):
            batch_sentences = sentences[i:i + batch_size]
            inputs = tokenizer(
                batch_sentences,
                return_tensors="pt",
                padding=True,
                truncation=True,
                max_length=512 # Uzunluk sınırı
            ).to(device)

            with torch.no_grad():
                outputs = model(**inputs)
                embeddings = outputs.last_hidden_state.mean(dim=1)  # Cümle için ortalama embedding al
                all_embeddings.append(embeddings.cpu().numpy()) # Batch embedding'leri CPU'ya taşı ve kaydet

        return np.vstack(all_embeddings)  # Batch sonuçlarını birleştir



# Deep Learning

In [4]:
class DeepLearning:
    def LSTM(self, vectors, labels, embedding_dim=200, hidden_size=256, epochs=10, batch_size=16, dropout=0.3, learning_rate=0.001):
        '''
        LSTM modeli oluştur ve %80 eğitim, %20 test ile eğit (Keras kullanılarak).

        Args:
            vectors: Giriş embedding vektörleri (ör. Word2Vec, FastText, TF-IDF, Transformer).
            labels: Sınıf etiketleri.
            embedding_dim: Embedding boyutu.
            hidden_size: LSTM gizli birim boyutu.
            epochs: Eğitim epoch sayısı.
            batch_size: Batch boyutu.
            learning_rate: Öğrenme oranı.

        Returns:
            Test metriklerini içeren bir sözlük.
        '''

        vectors = vectors.reshape((vectors.shape[0], 1 , vectors.shape[1]))
        output_size = len(np.unique(labels))
        binary_classification = (output_size == 2)

        X_train, X_test, y_train, y_test = train_test_split(
            vectors, labels,
            test_size = 0.2,
            random_state = 42,
            stratify=labels
        )

        # Dengesiz veride sinif agirliklarinin duzenlenmesi
        class_weights = compute_class_weight('balanced',
            classes=np.unique(y_train),
            y=y_train
        )
        class_weights = {i: weight for i, weight in enumerate(class_weights)}
        print(f"Sınıf Ağırlıkları: {class_weights}")


        model = Sequential()
        model.add(LSTM(hidden_size, return_sequences = False, input_shape=(1, embedding_dim)))
        model.add(Dropout(dropout))
        model.add(Dense(64, activation='relu'))
        model.add(Dropout(dropout))
        model.add(Dense(3, activation='softmax' if not binary_classification else 'sigmoid'))

        loss = 'sparse_categorical_crossentropy' if not binary_classification else 'binary_crossentropy'
        model.compile(
            loss=loss,
            optimizer=Adam(learning_rate=learning_rate),
            metrics=['accuracy']
        )

        # Modeli eğit
        model.fit(
            np.array(X_train),
            np.array(y_train),
            epochs=epochs,
            batch_size=batch_size,
            class_weight=class_weights,
            verbose=1
        )

        # Test seti tahmini
        y_pred = model.predict(np.array(X_test), verbose=1)
        y_pred = np.argmax(y_pred, axis=1) if not binary_classification else (y_pred > 0.5).astype(int)

        # Performans metriklerini hesapla
        average_type = 'weighted'
        metrics = {
            'accuracy': accuracy_score(y_test, y_pred),
            'precision': precision_score(y_test, y_pred, average=average_type),
            'recall': recall_score(y_test, y_pred, average=average_type),
            'f1_score': f1_score(y_test, y_pred, average=average_type)
        }

        print("Test Sonuçları:")
        for metric, value in metrics.items():
            print(f"{metric.capitalize()}: {value:.4f}")

        return metrics

    def CNN(self, vectors, labels, embedding_dim=200, num_filters=64, kernel_size=5, pool_size=2,
            dense_units=64, dropout_rate=0.2, epochs=7, batch_size=16, learning_rate=0.001):
        '''
        CNN modeli oluştur ve %80 eğitim, %20 test ile eğit (Keras kullanılarak).

        Args:
            vectors: Giriş embedding vektörleri (ör. Word2Vec, FastText, GloVe, TF-IDF, Transformer).
            labels: Sınıf etiketleri.
            embedding_dim: Embedding boyutu.
            num_filters: Convolutional layer filtre sayısı.
            kernel_size: Kernel boyutu.
            pool_size: Max pooling boyutu.
            dense_units: Fully connected layer'daki nöron sayısı.
            dropout_rate: Dropout oranı.
            epochs: Eğitim epoch sayısı.
            batch_size: Batch boyutu.
            learning_rate: Öğrenme oranı.

        Returns:
            Test metriklerini içeren bir sözlük.
        '''

        # Veriyi uygun şekle getirme
        vectors = vectors.reshape((vectors.shape[0], vectors.shape[1], 1))
        output_size = len(np.unique(labels))
        binary_classification = (output_size == 2)

        # Eğitim ve test seti ayırma
        X_train, X_test, y_train, y_test = train_test_split(
            vectors, labels,
            test_size=0.2,
            random_state=42,
            stratify=labels
        )

        # Dengesiz veride sınıf ağırlıklarının düzenlenmesi
        class_weights = compute_class_weight('balanced',
            classes=np.unique(y_train),
            y=y_train
        )
        class_weights = {i: weight for i, weight in enumerate(class_weights)}
        print(f"Sınıf Ağırlıkları: {class_weights}")

        # Model oluşturma
        model = Sequential()
        model.add(Conv1D(num_filters, kernel_size, activation='relu', input_shape=(embedding_dim, 1)))
        model.add(MaxPooling1D(pool_size))
        model.add(Flatten())
        model.add(Dense(dense_units, activation='relu'))
        model.add(Dropout(dropout_rate))
        model.add(Dense(output_size, activation='softmax' if not binary_classification else 'sigmoid'))

        loss = 'sparse_categorical_crossentropy' if not binary_classification else 'binary_crossentropy'
        model.compile(
            loss=loss,
            optimizer=Adam(learning_rate=learning_rate),
            metrics=['accuracy']
        )

        # Modeli eğit
        model.fit(
            np.array(X_train),
            np.array(y_train),
            epochs=epochs,
            batch_size=batch_size,
            class_weight=class_weights,
            verbose=1
        )

        # Test seti tahmini
        y_pred = model.predict(np.array(X_test), verbose=1)
        y_pred = np.argmax(y_pred, axis=1) if not binary_classification else (y_pred > 0.5).astype(int)

        # Performans metriklerini hesapla
        average_type = 'weighted'
        metrics = {
            'accuracy': accuracy_score(y_test, y_pred),
            'precision': precision_score(y_test, y_pred, average=average_type),
            'recall': recall_score(y_test, y_pred, average=average_type),
            'f1_score': f1_score(y_test, y_pred, average=average_type)
        }

        print("Test Sonuçları:")
        for metric, value in metrics.items():
            print(f"{metric.capitalize()}: {value:.4f}")

        return metrics


    def CapsuleNetwork(self, embeddings, labels, batch_size=16, epochs=10, learning_rate=0.001):
        """
        Capsule Network tabanlı bir sınıflandırıcı: Embedding vektörlerini alır ve kendi içinde sınıflama yapar.

        Args:
            embeddings: Giriş embedding vektörleri (numpy array formatında).
            labels: Sınıf etiketleri (numpy array ya da liste).
            batch_size: Batch boyutu.
            epochs: Eğitim epoch sayısı.
            learning_rate: Öğrenme oranı.

        Returns:
            Test metriklerini içeren bir sözlük.
        """




        device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

        # Eğitim ve test setlerini ayırma
        X_train, X_test, y_train, y_test = train_test_split(
            embeddings, labels, test_size=0.2, random_state=42, stratify=labels
        )

        # Sınıf ağırlıklarını hesaplama
        class_weights = compute_class_weight(
            class_weight="balanced",
            classes=torch.unique(torch.tensor(labels)).cpu().numpy(),
            y=labels
        )
        class_weights = torch.tensor(class_weights, dtype=torch.float).to(device)

        # TensorDataset ve DataLoader
        train_data = TensorDataset(torch.tensor(X_train, dtype=torch.float32), torch.tensor(y_train, dtype=torch.long))
        test_data = TensorDataset(torch.tensor(X_test, dtype=torch.float32), torch.tensor(y_test, dtype=torch.long))

        train_loader = DataLoader(train_data, batch_size=batch_size, shuffle=True)
        test_loader = DataLoader(test_data, batch_size=batch_size)

        # Capsule Network Model
        class CapsuleLayer(nn.Module):
            def __init__(self, input_dim, output_dim, num_capsules, routing_iterations):
                """
                input_dim: Giriş veri boyutu (özellik sayısı)
                output_dim: Her kapsülün çıkış boyutu
                num_capsules: Kapsül sayısı (kaç tane kapsül çıkışı olacağı)
                routing_iterations: Dinamik yönlendirme (routing) iterasyon sayısı
                """
                super(CapsuleLayer, self).__init__()
                self.num_capsules = num_capsules  # Kapsül sayısını sakla
                self.routing_iterations = routing_iterations  # Yönlendirme tekrar sayısını sakla
        
                # Ağırlık matrisi (W) oluşturuluyor
                self.W = nn.Parameter(torch.randn(1, input_dim, num_capsules * output_dim))

            def forward(self, x):
                """
                x: Giriş verisi (batch_size, input_dim)
                """
                batch_size = x.size(0)  # Batch boyutunu al
        
                # Kapsüllere bağlanacak ağırlık matrisini uygula
                x = torch.matmul(x.unsqueeze(1), self.W).squeeze(1)
        
                # Tensor boyutunu ayarla (batch_size, num_capsules, output_dim)
                x = x.view(batch_size, self.num_capsules, -1)
        
                # L2 norm ile vektörlerin uzunluğunu normalize et
                x = x / torch.norm(x, dim=-1, keepdim=True)
        
                return x

        class CapsuleNetworkModel(nn.Module):
            def __init__(self, input_dim, num_classes):
                """
                input_dim: Modelin giriş boyutu
                num_classes: Çıkış katmanındaki sınıf sayısı (kaç sınıfa sınıflandırma yapılacağı)
                """
                super(CapsuleNetworkModel, self).__init__()
        
                # Kapsül Katmanı oluşturuluyor (10 kapsül, her biri 16 boyutlu çıkış üretiyor)
                self.capsule = CapsuleLayer(input_dim, 16, num_capsules=10, routing_iterations=3)
        
                # Tam bağlı (Dense) katman ekleniyor
                self.fc = nn.Linear(16 * 10, num_classes)  # 10 kapsül * 16 boyut -> num_classes

            def forward(self, x):
                """
                x: Giriş verisi (batch_size, input_dim)
                """
                x = self.capsule(x)  # Kapsül katmanından geçir
        
                # Düzleştirme işlemi (Flatten)
                x = x.view(x.size(0), -1)
        
                # Tam bağlı katmandan geçirerek sınıflandırma yap
                x = self.fc(x)
        
                return x

        num_labels = len(set(labels))
        binary_classification = (num_labels == 2)

        model = CapsuleNetworkModel(embeddings.shape[1], 1 if binary_classification else num_labels).to(device)

        # Optimizasyon ve loss fonksiyonu
        if binary_classification:
            loss_fn = nn.BCEWithLogitsLoss()
        else:
            loss_fn = nn.CrossEntropyLoss(weight=class_weights)
        optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

        # Eğitim döngüsü
        model.train()
        for epoch in range(epochs):
            total_loss = 0
            for batch in train_loader:
                batch_embeddings, batch_labels = [b.to(device) for b in batch]
                optimizer.zero_grad()
                outputs = model(batch_embeddings).squeeze()
                loss = loss_fn(outputs, batch_labels.float() if binary_classification else batch_labels)
                total_loss += loss.item()
                loss.backward()
                optimizer.step()
            print(f"Epoch {epoch + 1}/{epochs}, Loss: {total_loss / len(train_loader):.4f}")

        # Değerlendirme
        model.eval()
        y_pred = []
        y_true = []
        with torch.no_grad():
            for batch in test_loader:
                batch_embeddings, batch_labels = [b.to(device) for b in batch]
                outputs = model(batch_embeddings).squeeze()
                if binary_classification:
                    predictions = (torch.sigmoid(outputs) > 0.5).cpu().numpy()
                else:
                    predictions = torch.argmax(outputs, dim=1).cpu().numpy()
                y_pred.extend(predictions)
                y_true.extend(batch_labels.cpu().numpy())

        # Performans metriklerini hesapla
        average_type = "binary" if binary_classification else "weighted"
        metrics = {
            "accuracy": accuracy_score(y_true, y_pred),
            "precision": precision_score(y_true, y_pred, average=average_type),
            "recall": recall_score(y_true, y_pred, average=average_type),
            "f1_score": f1_score(y_true, y_pred, average=average_type)
        }

        print("Test Sonuçları:")
        for metric, value in metrics.items():
            print(f"{metric.capitalize()}: {value:.4f}")

        return metrics

# Transformer Classifier

In [5]:
class TransformerClassifier:
    def BERT(self, embeddings, labels, model_name="dbmdz/bert-base-turkish-cased", batch_size=16, epochs=5, learning_rate=0.001):
        """
        BERT tabanlı sınıflandırma modeli oluşturur ve eğitir.

        Args:
            embeddings: Gömme vektörleri (numpy array formatında).
            labels: Sınıf etiketleri (numpy array ya da liste).
            model_name: Kullanılacak BERT modelinin adı.
            batch_size: Batch boyutu.
            epochs: Eğitim epoch sayısı.
            learning_rate: Öğrenme oranı.

        Returns:
            Test metriklerini içeren bir sözlük.
        """


        device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

        print(f"{model_name} modeli ile sınıflandırma modeli eğitiliyor...")

        # Eğitim ve test setlerini ayırma
        X_train, X_test, y_train, y_test = train_test_split(
            embeddings, labels, test_size=0.2, random_state=42, stratify=labels
        )

        # Sınıf ağırlıklarını hesaplama
        class_weights = compute_class_weight(
            class_weight='balanced',
            classes=torch.unique(torch.tensor(labels)).cpu().numpy(),
            y=labels
        )
        class_weights = torch.tensor(class_weights, dtype=torch.float).to(device)

        # TensorDataset ve DataLoader
        train_data = TensorDataset(torch.tensor(X_train, dtype=torch.float32), torch.tensor(y_train, dtype=torch.long))
        test_data = TensorDataset(torch.tensor(X_test, dtype=torch.float32), torch.tensor(y_test, dtype=torch.long))

        train_loader = DataLoader(train_data, batch_size=batch_size, shuffle=True)
        test_loader = DataLoader(test_data, batch_size=batch_size)


        num_labels = len(set(labels))
        binary_classification = (num_labels == 2)
        model = nn.Sequential(
            nn.Linear(embeddings.shape[1], 128),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(128, 1 if binary_classification else num_labels),
            nn.Sigmoid() if binary_classification else nn.Softmax(dim=1)
        ).to(device)

        # Optimizasyon ve loss fonksiyonu
        if binary_classification:
            loss_fn = nn.BCELoss()
        else:
            loss_fn = nn.CrossEntropyLoss(weight=class_weights)
        optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

        # Eğitim döngüsü
        model.train()
        for epoch in range(epochs):
            total_loss = 0
            for batch in train_loader:
                batch_embeddings, batch_labels = [b.to(device) for b in batch]
                optimizer.zero_grad()
                outputs = model(batch_embeddings).squeeze()
                loss = loss_fn(outputs, batch_labels.float() if binary_classification else batch_labels)
                total_loss += loss.item()
                loss.backward()
                optimizer.step()
            print(f"Epoch {epoch + 1}/{epochs}, Loss: {total_loss / len(train_loader):.4f}")

        # Değerlendirme
        model.eval()
        y_pred = []
        y_true = []
        with torch.no_grad():
            for batch in test_loader:
                batch_embeddings, batch_labels = [b.to(device) for b in batch]
                outputs = model(batch_embeddings).squeeze()
                if binary_classification:
                    predictions = (outputs > 0.5).cpu().numpy()
                else:
                    predictions = torch.argmax(outputs, dim=1).cpu().numpy()
                y_pred.extend(predictions)
                y_true.extend(batch_labels.cpu().numpy())

        # Performans metriklerini hesapla
        average_type = 'binary' if binary_classification else 'weighted'
        metrics = {
            'accuracy': accuracy_score(y_true, y_pred),
            'precision': precision_score(y_true, y_pred, average=average_type),
            'recall': recall_score(y_true, y_pred, average=average_type),
            'f1_score': f1_score(y_true, y_pred, average=average_type)
        }

        print("Test Sonuçları:")
        for metric, value in metrics.items():
            print(f"{metric.capitalize()}: {value:.4f}")

        return metrics


    def T5(self, embeddings, labels, model_name="google/mt5-small", batch_size=16, epochs=5, learning_rate=0.001):
            """
            T5 tabanlı sınıflandırma modeli oluşturur ve eğitir.

            Args:
                embeddings: Gömme vektörleri (numpy array formatında).
                labels: Sınıf etiketleri (numpy array ya da liste).
                model_name: Kullanılacak T5 modelinin adı.
                batch_size: Batch boyutu.
                epochs: Eğitim epoch sayısı.
                learning_rate: Öğrenme oranı.

            Returns:
                Test metriklerini içeren bir sözlük.
            """

            device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

            print(f"{model_name} modeli ile sınıflandırma modeli eğitiliyor...")

            # Eğitim ve test setlerini ayırma
            X_train, X_test, y_train, y_test = train_test_split(
                embeddings, labels, test_size=0.2, random_state=42, stratify=labels
            )

            # Sınıf ağırlıklarını hesaplama
            class_weights = compute_class_weight(
                class_weight='balanced',
                classes=torch.unique(torch.tensor(labels)).cpu().numpy(),
                y=labels
            )
            class_weights = torch.tensor(class_weights, dtype=torch.float).to(device)

            # TensorDataset ve DataLoader
            train_data = TensorDataset(torch.tensor(X_train, dtype=torch.float32), torch.tensor(y_train, dtype=torch.long))
            test_data = TensorDataset(torch.tensor(X_test, dtype=torch.float32), torch.tensor(y_test, dtype=torch.long))

            train_loader = DataLoader(train_data, batch_size=batch_size, shuffle=True)
            test_loader = DataLoader(test_data, batch_size=batch_size)

            num_labels = len(set(labels))
            binary_classification = (num_labels == 2)
            model = nn.Sequential(
                nn.Linear(embeddings.shape[1], 128),
                nn.ReLU(),
                nn.Dropout(0.3),
                nn.Linear(128, 1 if binary_classification else num_labels),
                nn.Sigmoid() if binary_classification else nn.Softmax(dim=1)
            ).to(device)

            # Optimizasyon ve loss fonksiyonu
            if binary_classification:
                loss_fn = nn.BCELoss()
            else:
                loss_fn = nn.CrossEntropyLoss(weight=class_weights)
            optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

            # Eğitim döngüsü
            model.train()
            for epoch in range(epochs):
                total_loss = 0
                for batch in train_loader:
                    batch_embeddings, batch_labels = [b.to(device) for b in batch]
                    optimizer.zero_grad()
                    outputs = model(batch_embeddings).squeeze()
                    loss = loss_fn(outputs, batch_labels.float() if binary_classification else batch_labels)
                    total_loss += loss.item()
                    loss.backward()
                    optimizer.step()
                print(f"Epoch {epoch + 1}/{epochs}, Loss: {total_loss / len(train_loader):.4f}")

            # Değerlendirme
            model.eval()
            y_pred = []
            y_true = []
            with torch.no_grad():
                for batch in test_loader:
                    batch_embeddings, batch_labels = [b.to(device) for b in batch]
                    outputs = model(batch_embeddings).squeeze()
                    if binary_classification:
                        predictions = (outputs > 0.5).cpu().numpy()
                    else:
                        predictions = torch.argmax(outputs, dim=1).cpu().numpy()
                    y_pred.extend(predictions)
                    y_true.extend(batch_labels.cpu().numpy())

            # Performans metriklerini hesapla
            average_type = 'binary' if binary_classification else 'weighted'
            metrics = {
                'accuracy': accuracy_score(y_true, y_pred),
                'precision': precision_score(y_true, y_pred, average=average_type),
                'recall': recall_score(y_true, y_pred, average=average_type),
                'f1_score': f1_score(y_true, y_pred, average=average_type)
            }

            print("Test Sonuçları:")
            for metric, value in metrics.items():
                print(f"{metric.capitalize()}: {value:.4f}")

            return metrics

# Training

**Veri Yükleme**


In [6]:
# Veri yükleme
data = pd.read_excel("/content/orj_aigen_paraphrased.xlsx") # Verinin yüklenmesi
data = data.dropna(subset=["label", "oz"]) # boş satır kontrolü
preprocessor = TextPreprocessor() # Metin ön işleme sınıfının değişkene atanması
data["oz"] = data["oz"].apply(preprocessor.preprocess) # metin ön işleme adımının çalıştırılması 
tokenized_sentences = [word_tokenize(sentence) for sentence in data["oz"].astype(str)] # Metinlerin tokenize edilmesi
labels = data["label"].to_numpy()
sentences = data["oz"].astype(str).tolist() # Transformer modelleri için cümleler tokenize edilmeden liste halinde verilmiştir. Modellerde auto tokenizer bulunmaktadır.

**Sınıfların Yüklenmesi**

In [7]:
wr = WordRepresentation() # Kelime temsil yöntemleri sınıfının değişkene atanması 
dl = DeepLearning() # Derin öğrenme algoritmaları sınıfının değişkene atanması
tcr = TransformerClassifier() # Transformer yapılı sınıflayıcıların sınıfının değişkene atanması

**Word2Vec Model**

In [8]:
w2v_model = wr.word2vec(tokenized_sentences)

# Embedding vektörlerini oluşturma
embedding_dim = 200
w2v_vectors = np.array([
  np.mean([w2v_model.wv[word] for word in sentence if word in w2v_model.wv] or [np.zeros(embedding_dim)], axis=0)
  for sentence in tokenized_sentences
])


**Fast-Text Model**

In [9]:
fasttext_model = wr.fasttext(tokenized_sentences)

embedding_dim = 200
ft_vectors = np.array([
    np.mean([fasttext_model.wv[word] for word in sentence if word in fasttext_model.wv] or [np.zeros(embedding_dim)], axis=0)
    for sentence in tokenized_sentences
])

**TF-IDF Model**

In [10]:
tfidf_vectorizer, tfidf_matrix = wr.tfidf(
    data["oz"],
    max_features=30000,  # Özellik sınırı 30000
    max_df=0.9,         # Çok sık geçen kelimeleri filtrele (ör. 90%'dan fazla geçenler)
    min_df=2,           # Çok nadir geçen kelimeleri filtrele (ör. 2'den az geçenler)
)

# TF-IDF matrisini dense (yoğun) formata çevir
tfidf_vectors = tfidf_matrix.toarray()

**Bert Embedding**

In [11]:
# Cihaz ayarı
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")  # GPU varsa kullanılır, yoksa CPU

# BERT embedding işlemi
wr = WordRepresentation()
bert_embeddings = wr.BertEmbedding(
    sentences=sentences,
    device=device  # Cihaz bilgisi ekleniyor
)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


**T5 Embedding**


In [12]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
t5_embeddings = wr.T5Transformer(sentences=sentences, device=device, batch_size=64)

You are using the default legacy behaviour of the <class 'transformers.models.t5.tokenization_t5.T5Tokenizer'>. This is expected, and simply means that the `legacy` (previous) behavior will be used so nothing changes for you. If you want to use the new behaviour, set `legacy=False`. This should only be set if you understand what it means, and thoroughly read the reason why this was added as explained in https://github.com/huggingface/transformers/pull/24565


**Word2Vec - LSTM Classifier**

In [19]:
metrics = dl.LSTM(w2v_vectors, labels, dropout=0.2)

Sınıf Ağırlıkları: {0: 3.666833583708897, 1: 0.7333299951596481, 2: 0.7333299951596481}
Epoch 1/10


  super().__init__(**kwargs)


[1m2746/2746[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 3ms/step - accuracy: 0.6025 - loss: 0.7559
Epoch 2/10
[1m2746/2746[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 3ms/step - accuracy: 0.7202 - loss: 0.5794
Epoch 3/10
[1m2746/2746[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 3ms/step - accuracy: 0.7511 - loss: 0.5381
Epoch 4/10
[1m2746/2746[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 3ms/step - accuracy: 0.7685 - loss: 0.5104
Epoch 5/10
[1m2746/2746[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 3ms/step - accuracy: 0.7763 - loss: 0.4889
Epoch 6/10
[1m2746/2746[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 3ms/step - accuracy: 0.7870 - loss: 0.4664
Epoch 7/10
[1m2746/2746[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 3ms/step - accuracy: 0.7950 - loss: 0.4479
Epoch 8/10
[1m2746/2746[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 3ms/step - accuracy: 0.8009 - loss: 0.4381
Epoch 9/10
[1m2746/2746[0m [32m

**FastText - LSTM Classifier**

In [22]:
metrics = dl.LSTM(ft_vectors, labels, dropout=0.1)

Sınıf Ağırlıkları: {0: 3.666833583708897, 1: 0.7333299951596481, 2: 0.7333299951596481}
Epoch 1/10


  super().__init__(**kwargs)


[1m2746/2746[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 3ms/step - accuracy: 0.6049 - loss: 0.7795
Epoch 2/10
[1m2746/2746[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 3ms/step - accuracy: 0.7122 - loss: 0.6103
Epoch 3/10
[1m2746/2746[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 3ms/step - accuracy: 0.7366 - loss: 0.5646
Epoch 4/10
[1m2746/2746[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 3ms/step - accuracy: 0.7518 - loss: 0.5347
Epoch 5/10
[1m2746/2746[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 3ms/step - accuracy: 0.7684 - loss: 0.5026
Epoch 6/10
[1m2746/2746[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 3ms/step - accuracy: 0.7807 - loss: 0.4792
Epoch 7/10
[1m2746/2746[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 3ms/step - accuracy: 0.7849 - loss: 0.4652
Epoch 8/10
[1m2746/2746[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 3ms/step - accuracy: 0.7982 - loss: 0.4445
Epoch 9/10
[1m2746/2746[0m [32m

**TF-IDF - LSTM Classifier**

In [23]:
metrics = dl.LSTM(
    tfidf_vectors, labels, embedding_dim=tfidf_vectors.shape[1], hidden_size=256, epochs=10
)

Sınıf Ağırlıkları: {0: 3.666833583708897, 1: 0.7333299951596481, 2: 0.7333299951596481}


  super().__init__(**kwargs)


Epoch 1/10
[1m2746/2746[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m72s[0m 26ms/step - accuracy: 0.7303 - loss: 0.6374
Epoch 2/10
[1m2746/2746[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m70s[0m 26ms/step - accuracy: 0.8918 - loss: 0.2872
Epoch 3/10
[1m2746/2746[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m70s[0m 26ms/step - accuracy: 0.9266 - loss: 0.1953
Epoch 4/10
[1m2746/2746[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m70s[0m 26ms/step - accuracy: 0.9394 - loss: 0.1506
Epoch 5/10
[1m2746/2746[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m70s[0m 26ms/step - accuracy: 0.9499 - loss: 0.1154
Epoch 6/10
[1m2746/2746[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m70s[0m 26ms/step - accuracy: 0.9600 - loss: 0.0932
Epoch 7/10
[1m2746/2746[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m70s[0m 26ms/step - accuracy: 0.9641 - loss: 0.0835
Epoch 8/10
[1m2746/2746[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m71s[0m 26ms/step - accuracy: 0.9689 - loss: 0.0734
Epoch 9/

**Bert Embedding - LSTM Classifier**

In [16]:
metrics = dl.LSTM(
    vectors=torch.tensor(bert_embeddings, device=device).cpu().numpy(),  # Embeddingleri CPU'ya alıyoruz
    labels=labels, hidden_size=256, epochs=10, batch_size=16, dropout=0.3, learning_rate=0.001)

Sınıf Ağırlıkları: {0: 3.666833583708897, 1: 0.7333299951596481, 2: 0.7333299951596481}


  super().__init__(**kwargs)


Epoch 1/10
[1m2746/2746[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 3ms/step - accuracy: 0.6466 - loss: 0.6958
Epoch 2/10
[1m2746/2746[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 3ms/step - accuracy: 0.7374 - loss: 0.5340
Epoch 3/10
[1m2746/2746[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 3ms/step - accuracy: 0.7581 - loss: 0.5078
Epoch 4/10
[1m2746/2746[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 3ms/step - accuracy: 0.7530 - loss: 0.5214
Epoch 5/10
[1m2746/2746[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 3ms/step - accuracy: 0.6613 - loss: 0.6788
Epoch 6/10
[1m2746/2746[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 3ms/step - accuracy: 0.7689 - loss: 0.4895
Epoch 7/10
[1m2746/2746[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 3ms/step - accuracy: 0.7846 - loss: 0.4667
Epoch 8/10
[1m2746/2746[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 3ms/step - accuracy: 0.7941 - loss: 0.4536
Epoch 9/10
[1m2746/274

**T5 Embedding - LSTM Classifier**

In [17]:
metrics = dl.LSTM(
    vectors=t5_embeddings,
    labels=labels,
    embedding_dim=t5_embeddings.shape[1],
    hidden_size=256,
    epochs=10,
    batch_size=16
)

Sınıf Ağırlıkları: {0: 3.666833583708897, 1: 0.7333299951596481, 2: 0.7333299951596481}


  super().__init__(**kwargs)


Epoch 1/10
[1m2746/2746[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 3ms/step - accuracy: 0.6379 - loss: 0.6713
Epoch 2/10
[1m2746/2746[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 3ms/step - accuracy: 0.7071 - loss: 0.5201
Epoch 3/10
[1m2746/2746[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 3ms/step - accuracy: 0.7332 - loss: 0.4963
Epoch 4/10
[1m2746/2746[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 3ms/step - accuracy: 0.7421 - loss: 0.4778
Epoch 5/10
[1m2746/2746[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 3ms/step - accuracy: 0.7535 - loss: 0.4647
Epoch 6/10
[1m2746/2746[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 3ms/step - accuracy: 0.7667 - loss: 0.4546
Epoch 7/10
[1m2746/2746[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 3ms/step - accuracy: 0.7759 - loss: 0.4437
Epoch 8/10
[1m2746/2746[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 3ms/step - accuracy: 0.7826 - loss: 0.4360
Epoch 9/10
[1m2746/274

**Word2Vec - CNN Classifier**

In [24]:
metrics = dl.CNN(w2v_vectors, labels, dropout_rate=0.3, epochs=7)

Sınıf Ağırlıkları: {0: 3.666833583708897, 1: 0.7333299951596481, 2: 0.7333299951596481}
Epoch 1/7


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m2746/2746[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 1ms/step - accuracy: 0.5849 - loss: 0.8166
Epoch 2/7
[1m2746/2746[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 2ms/step - accuracy: 0.6548 - loss: 0.6551
Epoch 3/7
[1m2746/2746[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 1ms/step - accuracy: 0.6803 - loss: 0.6226
Epoch 4/7
[1m2746/2746[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 1ms/step - accuracy: 0.7057 - loss: 0.5950
Epoch 5/7
[1m2746/2746[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 1ms/step - accuracy: 0.7156 - loss: 0.5770
Epoch 6/7
[1m2746/2746[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 1ms/step - accuracy: 0.7232 - loss: 0.5725
Epoch 7/7
[1m2746/2746[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 1ms/step - accuracy: 0.7247 - loss: 0.5568
[1m344/344[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step
Test Sonuçları:
Accuracy: 0.7464
Precision: 0.8029
Recall: 0.7464
F1_score: 0.7568


**FastText - CNN Classifier**

In [19]:
metrics = dl.CNN(ft_vectors, labels)

Sınıf Ağırlıkları: {0: 3.666833583708897, 1: 0.7333299951596481, 2: 0.7333299951596481}
Epoch 1/7


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m2746/2746[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 1ms/step - accuracy: 0.5801 - loss: 0.8257
Epoch 2/7
[1m2746/2746[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 1ms/step - accuracy: 0.6406 - loss: 0.6799
Epoch 3/7
[1m2746/2746[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 1ms/step - accuracy: 0.6616 - loss: 0.6483
Epoch 4/7
[1m2746/2746[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 2ms/step - accuracy: 0.6787 - loss: 0.6215
Epoch 5/7
[1m2746/2746[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 1ms/step - accuracy: 0.6917 - loss: 0.6080
Epoch 6/7
[1m2746/2746[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 1ms/step - accuracy: 0.6986 - loss: 0.5991
Epoch 7/7
[1m2746/2746[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 1ms/step - accuracy: 0.7017 - loss: 0.5868
[1m344/344[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step
Test Sonuçları:
Accuracy: 0.7155
Precision: 0.7796
Recall: 0.7155
F1_score: 0.7244


**TFIDF - CNN Classifier**

In [25]:
metrics = dl.CNN(tfidf_vectors, labels, embedding_dim=tfidf_vectors.shape[1])

Sınıf Ağırlıkları: {0: 3.666833583708897, 1: 0.7333299951596481, 2: 0.7333299951596481}


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/7
[1m2746/2746[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m81s[0m 28ms/step - accuracy: 0.6972 - loss: 0.6914
Epoch 2/7
[1m2746/2746[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m78s[0m 28ms/step - accuracy: 0.8761 - loss: 0.3161
Epoch 3/7
[1m2746/2746[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m78s[0m 28ms/step - accuracy: 0.9172 - loss: 0.2208
Epoch 4/7
[1m2746/2746[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m78s[0m 28ms/step - accuracy: 0.9434 - loss: 0.1613
Epoch 5/7
[1m2746/2746[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m77s[0m 28ms/step - accuracy: 0.9521 - loss: 0.1391
Epoch 6/7
[1m2746/2746[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m78s[0m 28ms/step - accuracy: 0.9598 - loss: 0.1192
Epoch 7/7
[1m2746/2746[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m77s[0m 28ms/step - accuracy: 0.9640 - loss: 0.1091
[1m344/344[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 5ms/step
Test Sonuçları:
Accuracy: 0.8865
Precision: 0.8869
Recall: 0.886

**BERT Embedding - CNN Classifier**

In [31]:
metrics = dl.CNN(
    vectors=torch.tensor(bert_embeddings, device=device).cpu().numpy(),  # Embeddingleri CPU'ya alıyoruz
    labels=labels, epochs=7, batch_size=16, embedding_dim=bert_embeddings.shape[1], learning_rate=0.001)

Sınıf Ağırlıkları: {0: 3.666833583708897, 1: 0.7333299951596481, 2: 0.7333299951596481}


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/7
[1m2746/2746[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 2ms/step - accuracy: 0.6295 - loss: 0.7385
Epoch 2/7
[1m2746/2746[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 2ms/step - accuracy: 0.7131 - loss: 0.5673
Epoch 3/7
[1m2746/2746[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 2ms/step - accuracy: 0.7338 - loss: 0.5284
Epoch 4/7
[1m2746/2746[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 1ms/step - accuracy: 0.7500 - loss: 0.5052
Epoch 5/7
[1m2746/2746[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 2ms/step - accuracy: 0.7685 - loss: 0.4871
Epoch 6/7
[1m2746/2746[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 2ms/step - accuracy: 0.7788 - loss: 0.4687
Epoch 7/7
[1m2746/2746[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 2ms/step - accuracy: 0.7820 - loss: 0.4620
[1m344/344[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step
Test Sonuçları:
Accuracy: 0.7306
Precision: 0.8293
Recall: 0.7306
F1_score: 0.

**T5 Embedding - CNN Classifier**

In [32]:
metrics = dl.LSTM(
    vectors=t5_embeddings,
    labels=labels,
    embedding_dim=t5_embeddings.shape[1],
    epochs=7,
    batch_size=16
)

Sınıf Ağırlıkları: {0: 3.666833583708897, 1: 0.7333299951596481, 2: 0.7333299951596481}


  super().__init__(**kwargs)


Epoch 1/7
[1m2746/2746[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 3ms/step - accuracy: 0.6361 - loss: 0.6742
Epoch 2/7
[1m2746/2746[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 3ms/step - accuracy: 0.7088 - loss: 0.5197
Epoch 3/7
[1m2746/2746[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 3ms/step - accuracy: 0.7338 - loss: 0.4949
Epoch 4/7
[1m2746/2746[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 3ms/step - accuracy: 0.7487 - loss: 0.4690
Epoch 5/7
[1m2746/2746[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 3ms/step - accuracy: 0.7600 - loss: 0.4627
Epoch 6/7
[1m2746/2746[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 3ms/step - accuracy: 0.7719 - loss: 0.4533
Epoch 7/7
[1m2746/2746[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 3ms/step - accuracy: 0.7750 - loss: 0.4509
[1m344/344[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step
Test Sonuçları:
Accuracy: 0.8142
Precision: 0.8403
Recall: 0.8142
F1_score: 0

**Word2Vec - CapsuleNetwork Classifier**

In [33]:
metrics = dl.CapsuleNetwork(w2v_vectors, labels)

Epoch 1/10, Loss: 0.7482
Epoch 2/10, Loss: 0.6417
Epoch 3/10, Loss: 0.6083
Epoch 4/10, Loss: 0.5858
Epoch 5/10, Loss: 0.5689
Epoch 6/10, Loss: 0.5534
Epoch 7/10, Loss: 0.5436
Epoch 8/10, Loss: 0.5327
Epoch 9/10, Loss: 0.5223
Epoch 10/10, Loss: 0.5151
Test Sonuçları:
Accuracy: 0.7765
Precision: 0.8057
Recall: 0.7765
F1_score: 0.7853


**FastText - CapsuleNetwork Classifier**

In [34]:
metrics = dl.CapsuleNetwork(ft_vectors, labels)

Epoch 1/10, Loss: 0.7814
Epoch 2/10, Loss: 0.6744
Epoch 3/10, Loss: 0.6385
Epoch 4/10, Loss: 0.6130
Epoch 5/10, Loss: 0.5971
Epoch 6/10, Loss: 0.5814
Epoch 7/10, Loss: 0.5706
Epoch 8/10, Loss: 0.5581
Epoch 9/10, Loss: 0.5494
Epoch 10/10, Loss: 0.5394
Test Sonuçları:
Accuracy: 0.7760
Precision: 0.8058
Recall: 0.7760
F1_score: 0.7845


**TFIDF - CapsuleNetwork Classifier**

In [36]:
metrics = dl.CapsuleNetwork(tfidf_vectors, labels)

Epoch 1/10, Loss: 0.6781
Epoch 2/10, Loss: 0.3796
Epoch 3/10, Loss: 0.2751
Epoch 4/10, Loss: 0.2135
Epoch 5/10, Loss: 0.1718
Epoch 6/10, Loss: 0.1401
Epoch 7/10, Loss: 0.1141
Epoch 8/10, Loss: 0.0955
Epoch 9/10, Loss: 0.0788
Epoch 10/10, Loss: 0.0665
Test Sonuçları:
Accuracy: 0.8607
Precision: 0.8614
Recall: 0.8607
F1_score: 0.8610


**BERT Embedding - CapsuleNetwork Classifier**

In [41]:
metrics = dl.CapsuleNetwork(
    embeddings=torch.tensor(bert_embeddings, device=device).cpu().numpy(),  # Embeddingleri CPU'ya alıyoruz
    labels=labels)

Epoch 1/10, Loss: 0.6118
Epoch 2/10, Loss: 0.4919
Epoch 3/10, Loss: 0.4568
Epoch 4/10, Loss: 0.4348
Epoch 5/10, Loss: 0.4181
Epoch 6/10, Loss: 0.4051
Epoch 7/10, Loss: 0.3946
Epoch 8/10, Loss: 0.3853
Epoch 9/10, Loss: 0.3751
Epoch 10/10, Loss: 0.3618
Test Sonuçları:
Accuracy: 0.8582
Precision: 0.8655
Recall: 0.8582
F1_score: 0.8608


**T5 Embedding - CapsuleNetwork Classifier**

In [42]:
metrics = dl.CapsuleNetwork(
    embeddings=t5_embeddings,
    labels=labels,
)

Epoch 1/10, Loss: 0.6173
Epoch 2/10, Loss: 0.5048
Epoch 3/10, Loss: 0.4742
Epoch 4/10, Loss: 0.4574
Epoch 5/10, Loss: 0.4418
Epoch 6/10, Loss: 0.4297
Epoch 7/10, Loss: 0.4203
Epoch 8/10, Loss: 0.4138
Epoch 9/10, Loss: 0.4082
Epoch 10/10, Loss: 0.4053
Test Sonuçları:
Accuracy: 0.8531
Precision: 0.8506
Recall: 0.8531
F1_score: 0.8514


**Word2Vec - BERT Classifier**

In [44]:
metrics = tcr.BERT(w2v_vectors, labels, epochs=10)

dbmdz/bert-base-turkish-cased modeli ile sınıflandırma modeli eğitiliyor...
Epoch 1/10, Loss: 0.8944
Epoch 2/10, Loss: 0.8367
Epoch 3/10, Loss: 0.8208
Epoch 4/10, Loss: 0.8100
Epoch 5/10, Loss: 0.8010
Epoch 6/10, Loss: 0.7957
Epoch 7/10, Loss: 0.7901
Epoch 8/10, Loss: 0.7866
Epoch 9/10, Loss: 0.7828
Epoch 10/10, Loss: 0.7792
Test Sonuçları:
Accuracy: 0.7724
Precision: 0.8101
Recall: 0.7724
F1_score: 0.7822


**FastText - BERT Classifier**

In [45]:
metrics = tcr.BERT(ft_vectors, labels, epochs=10)

dbmdz/bert-base-turkish-cased modeli ile sınıflandırma modeli eğitiliyor...
Epoch 1/10, Loss: 0.9063
Epoch 2/10, Loss: 0.8601
Epoch 3/10, Loss: 0.8432
Epoch 4/10, Loss: 0.8322
Epoch 5/10, Loss: 0.8245
Epoch 6/10, Loss: 0.8175
Epoch 7/10, Loss: 0.8117
Epoch 8/10, Loss: 0.8066
Epoch 9/10, Loss: 0.8030
Epoch 10/10, Loss: 0.7990
Test Sonuçları:
Accuracy: 0.7597
Precision: 0.8028
Recall: 0.7597
F1_score: 0.7698


**TF IDF - BERT Classifier**

In [53]:
metrics = tcr.BERT(tfidf_vectors, labels, epochs=7)

dbmdz/bert-base-turkish-cased modeli ile sınıflandırma modeli eğitiliyor...
Epoch 1/7, Loss: 0.7999
Epoch 2/7, Loss: 0.6759
Epoch 3/7, Loss: 0.6443
Epoch 4/7, Loss: 0.6257
Epoch 5/7, Loss: 0.6153
Epoch 6/7, Loss: 0.6060
Epoch 7/7, Loss: 0.6012
Test Sonuçları:
Accuracy: 0.8675
Precision: 0.8740
Recall: 0.8675
F1_score: 0.8701


**BERT - BERT Classifier**

In [51]:
metrics = tcr.BERT(bert_embeddings, labels, epochs=5)

dbmdz/bert-base-turkish-cased modeli ile sınıflandırma modeli eğitiliyor...
Epoch 1/5, Loss: 0.8389
Epoch 2/5, Loss: 0.7919
Epoch 3/5, Loss: 0.7785
Epoch 4/5, Loss: 0.7702
Epoch 5/5, Loss: 0.7667
Test Sonuçları:
Accuracy: 0.7982
Precision: 0.8390
Recall: 0.7982
F1_score: 0.8076


**T5 - BERT Classifier**

In [55]:
metrics = tcr.BERT(t5_embeddings, labels, epochs=5)

dbmdz/bert-base-turkish-cased modeli ile sınıflandırma modeli eğitiliyor...
Epoch 1/5, Loss: 0.8547
Epoch 2/5, Loss: 0.8080
Epoch 3/5, Loss: 0.7931
Epoch 4/5, Loss: 0.7863
Epoch 5/5, Loss: 0.7784
Test Sonuçları:
Accuracy: 0.8001
Precision: 0.8207
Recall: 0.8001
F1_score: 0.8080


**Word2Vec - T5 Classifier**

In [59]:
metrics = tcr.T5(w2v_vectors, labels, epochs=10)

google/mt5-small modeli ile sınıflandırma modeli eğitiliyor...
Epoch 1/10, Loss: 0.8962
Epoch 2/10, Loss: 0.8405
Epoch 3/10, Loss: 0.8215
Epoch 4/10, Loss: 0.8111
Epoch 5/10, Loss: 0.8063
Epoch 6/10, Loss: 0.7950
Epoch 7/10, Loss: 0.7924
Epoch 8/10, Loss: 0.7866
Epoch 9/10, Loss: 0.7840
Epoch 10/10, Loss: 0.7799
Test Sonuçları:
Accuracy: 0.8099
Precision: 0.8177
Recall: 0.8099
F1_score: 0.8113


**FastText - T5 Classifier**

In [63]:
metrics = tcr.T5(ft_vectors, labels, epochs=5)

google/mt5-small modeli ile sınıflandırma modeli eğitiliyor...
Epoch 1/5, Loss: 0.9114
Epoch 2/5, Loss: 0.8619
Epoch 3/5, Loss: 0.8417
Epoch 4/5, Loss: 0.8316
Epoch 5/5, Loss: 0.8241
Test Sonuçları:
Accuracy: 0.7704
Precision: 0.7867
Recall: 0.7704
F1_score: 0.7758


**TF IDF - T5 Classifier**

In [77]:
metrics = tcr.T5(tfidf_vectors, labels, epochs=5)

google/mt5-small modeli ile sınıflandırma modeli eğitiliyor...
Epoch 1/5, Loss: 0.7986
Epoch 2/5, Loss: 0.6768
Epoch 3/5, Loss: 0.6441
Epoch 4/5, Loss: 0.6261
Epoch 5/5, Loss: 0.6154
Test Sonuçları:
Accuracy: 0.8725
Precision: 0.8740
Recall: 0.8725
F1_score: 0.8731


**BERT Embedding - T5 Classifier**

In [74]:
metrics = tcr.T5(bert_embeddings, labels,epochs=9)

google/mt5-small modeli ile sınıflandırma modeli eğitiliyor...
Epoch 1/9, Loss: 0.8383
Epoch 2/9, Loss: 0.7933
Epoch 3/9, Loss: 0.7788
Epoch 4/9, Loss: 0.7714
Epoch 5/9, Loss: 0.7664
Epoch 6/9, Loss: 0.7607
Epoch 7/9, Loss: 0.7591
Epoch 8/9, Loss: 0.7522
Epoch 9/9, Loss: 0.7505
Test Sonuçları:
Accuracy: 0.8318
Precision: 0.8459
Recall: 0.8318
F1_score: 0.8343


**T5 Embedding - T5 Classifier**

In [67]:
metrics = tcr.T5(t5_embeddings, labels, epochs=7)

google/mt5-small modeli ile sınıflandırma modeli eğitiliyor...
Epoch 1/10, Loss: 0.8574
Epoch 2/10, Loss: 0.8081
Epoch 3/10, Loss: 0.7957
Epoch 4/10, Loss: 0.7866
Epoch 5/10, Loss: 0.7791
Epoch 6/10, Loss: 0.7768
Epoch 7/10, Loss: 0.7705
Epoch 8/10, Loss: 0.7674
Epoch 9/10, Loss: 0.7639
Epoch 10/10, Loss: 0.7604
Test Sonuçları:
Accuracy: 0.7446
Precision: 0.8237
Recall: 0.7446
F1_score: 0.7637
