In [1]:
from google.colab import files
uploaded = files.upload()

Saving guncel_duygu_veri_seti.csv to guncel_duygu_veri_seti.csv


In [2]:
import pandas as pd
import torch
from torch.utils.data import Dataset, DataLoader, Subset
from transformers import AutoTokenizer, AutoModelForSequenceClassification, get_linear_schedule_with_warmup
from torch.optim import AdamW  # 🔄 AdamW artık buradan geliyor
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import classification_report, accuracy_score
import numpy as np
import os
import pickle

# Özel PyTorch Dataset sınıfı
class SongDataset(Dataset):
    def __init__(self, texts, labels, tokenizer, max_length=256):
        self.texts = texts
        self.labels = labels
        self.tokenizer = tokenizer
        self.max_length = max_length

    def __len__(self):
        return len(self.texts)

    def __getitem__(self, idx):
        encoding = self.tokenizer(
            self.texts[idx],
            truncation=True,
            padding='max_length',
            max_length=self.max_length,
            return_tensors='pt'
        )
        return {
            'input_ids': encoding['input_ids'].squeeze(0),
            'attention_mask': encoding['attention_mask'].squeeze(0),
            'labels': torch.tensor(self.labels[idx], dtype=torch.long)
        }

# ✅ Veriyi oku ve işle
df = pd.read_csv("guncel_duygu_veri_seti.csv")
df = df[['clean_lyrics', 'mood']].dropna()

# Etiketleri encode et
labels = df["mood"].astype("category").cat.codes
label_names = df["mood"].astype("category").cat.categories.tolist()

# Etiket isimlerini kaydet
os.makedirs("results", exist_ok=True)
with open("results/label_encoder.pkl", "wb") as f:
    pickle.dump(label_names, f)

# Tokenizer ve Dataset
model_name = "dbmdz/bert-base-turkish-cased"
tokenizer = AutoTokenizer.from_pretrained(model_name)
dataset = SongDataset(df["clean_lyrics"].tolist(), labels.tolist(), tokenizer)

# K-Fold ayarları
k_folds = 5
skf = StratifiedKFold(n_splits=k_folds, shuffle=True, random_state=42)
fold_accuracies = []

# Eğitim/Test döngüsü
for fold, (train_idx, val_idx) in enumerate(skf.split(np.zeros(len(labels)), labels)):
    print(f"\n===== Fold {fold + 1}/{k_folds} =====")

    train_subset = Subset(dataset, train_idx)
    val_subset = Subset(dataset, val_idx)

    train_loader = DataLoader(train_subset, batch_size=8, shuffle=True)
    val_loader = DataLoader(val_subset, batch_size=8)

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=len(label_names))
    model.to(device)

    optimizer = AdamW(model.parameters(), lr=2e-5, eps=1e-8)
    scheduler = get_linear_schedule_with_warmup(optimizer, 0, len(train_loader) * 5)

    # Eğitim
    for epoch in range(5):
        print(f"\nEpoch {epoch + 1}/5")
        model.train()
        total_loss = 0
        for step, batch in enumerate(train_loader):
            b_input_ids = batch['input_ids'].to(device)
            b_mask = batch['attention_mask'].to(device)
            b_labels = batch['labels'].to(device)

            model.zero_grad()
            outputs = model(b_input_ids, attention_mask=b_mask, labels=b_labels)
            loss = outputs.loss
            total_loss += loss.item()
            loss.backward()
            torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
            optimizer.step()
            scheduler.step()

        print(f"Training Loss: {total_loss / len(train_loader):.4f}")

    # Doğrulama
    model.eval()
    preds, true_labels = [], []
    with torch.no_grad():
        for batch in val_loader:
            b_input_ids = batch['input_ids'].to(device)
            b_mask = batch['attention_mask'].to(device)
            b_labels = batch['labels'].to(device)
            outputs = model(b_input_ids, attention_mask=b_mask)
            logits = outputs.logits
            preds.extend(torch.argmax(logits, axis=1).cpu().numpy())
            true_labels.extend(b_labels.cpu().numpy())

    acc = accuracy_score(true_labels, preds)
    fold_accuracies.append(acc)
    print(f"Validation Accuracy: {acc:.4f}")
    print(classification_report(true_labels, preds, target_names=label_names))

# Sonuçlar
print("\n===== Sonuçlar =====")
print(f"Fold başına doğruluklar: {fold_accuracies}")
print(f"Ortalama doğruluk: {np.mean(fold_accuracies):.4f}")


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/60.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/385 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/251k [00:00<?, ?B/s]


===== Fold 1/5 =====


Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


model.safetensors:   0%|          | 0.00/445M [00:00<?, ?B/s]

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at dbmdz/bert-base-turkish-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.



Epoch 1/5
Training Loss: 0.8443

Epoch 2/5
Training Loss: 0.6141

Epoch 3/5
Training Loss: 0.4356

Epoch 4/5
Training Loss: 0.2918

Epoch 5/5
Training Loss: 0.1949
Validation Accuracy: 0.7000
              precision    recall  f1-score   support

       mutlu       0.64      0.66      0.65       214
        öfke       0.87      0.70      0.78       216
       üzgün       0.64      0.74      0.68       220

    accuracy                           0.70       650
   macro avg       0.72      0.70      0.70       650
weighted avg       0.72      0.70      0.70       650


===== Fold 2/5 =====


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at dbmdz/bert-base-turkish-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.



Epoch 1/5
Training Loss: 0.8805

Epoch 2/5
Training Loss: 0.6335

Epoch 3/5
Training Loss: 0.4510

Epoch 4/5
Training Loss: 0.3099

Epoch 5/5
Training Loss: 0.2051
Validation Accuracy: 0.6831
              precision    recall  f1-score   support

       mutlu       0.65      0.68      0.66       214
        öfke       0.82      0.70      0.75       216
       üzgün       0.61      0.67      0.64       220

    accuracy                           0.68       650
   macro avg       0.69      0.68      0.69       650
weighted avg       0.69      0.68      0.69       650


===== Fold 3/5 =====


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at dbmdz/bert-base-turkish-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.



Epoch 1/5
Training Loss: 0.8859

Epoch 2/5
Training Loss: 0.6421

Epoch 3/5
Training Loss: 0.4468

Epoch 4/5
Training Loss: 0.2967

Epoch 5/5
Training Loss: 0.1950
Validation Accuracy: 0.7231
              precision    recall  f1-score   support

       mutlu       0.66      0.71      0.68       214
        öfke       0.88      0.72      0.79       216
       üzgün       0.67      0.75      0.71       220

    accuracy                           0.72       650
   macro avg       0.74      0.72      0.73       650
weighted avg       0.74      0.72      0.73       650


===== Fold 4/5 =====


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at dbmdz/bert-base-turkish-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.



Epoch 1/5
Training Loss: 0.9531

Epoch 2/5
Training Loss: 0.7594

Epoch 3/5
Training Loss: 0.5832

Epoch 4/5
Training Loss: 0.4300

Epoch 5/5
Training Loss: 0.3071


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at dbmdz/bert-base-turkish-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Validation Accuracy: 0.7369
              precision    recall  f1-score   support

       mutlu       0.72      0.65      0.68       215
        öfke       0.86      0.74      0.80       216
       üzgün       0.66      0.82      0.73       219

    accuracy                           0.74       650
   macro avg       0.75      0.74      0.74       650
weighted avg       0.75      0.74      0.74       650


===== Fold 5/5 =====

Epoch 1/5
Training Loss: 0.8863

Epoch 2/5
Training Loss: 0.6453

Epoch 3/5
Training Loss: 0.4335

Epoch 4/5
Training Loss: 0.2903

Epoch 5/5
Training Loss: 0.1804
Validation Accuracy: 0.7257
              precision    recall  f1-score   support

       mutlu       0.67      0.76      0.71       215
        öfke       0.87      0.70      0.77       215
       üzgün       0.68      0.72      0.70       219

    accuracy                           0.73       649
   macro avg       0.74      0.73      0.73       649
weighted avg       0.74      0.73      0.73       

In [13]:
model


BertForSequenceClassification(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(32000, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0-11): 12 x BertLayer(
          (attention): BertAttention(
            (self): BertSdpaSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e

In [23]:
import torch
import pickle
from transformers import AutoModelForSequenceClassification, AutoTokenizer

# 🔹 Klasör ismi
model_path = "saved_model"

# 🔹 Label dosyasını oku
with open(f"{model_path}/bert_labels.pkl", "rb") as f:
    label_names = pickle.load(f)

id2label = {i: label for i, label in enumerate(label_names)}
label2id = {label: i for i, label in enumerate(label_names)}

# 🔹 HuggingFace modelini başlat
model = AutoModelForSequenceClassification.from_pretrained(
    "dbmdz/bert-base-turkish-cased",
    num_labels=len(label_names),
    id2label=id2label,
    label2id=label2id
)

# 🔹 .pth'den state dict yükle
state_dict = torch.load(f"{model_path}/bert_model.pth", map_location="cpu")
model.load_state_dict(state_dict)

# 🔹 tokenizer.pkl yerine HuggingFace tokenizer kullan
tokenizer = AutoTokenizer.from_pretrained("dbmdz/bert-base-turkish-cased")

# 🔹 HuggingFace formatında kaydet
model.save_pretrained(model_path)
tokenizer.save_pretrained(model_path)

print("✅ Model ve tokenizer başarıyla Hugging Face formatında kaydedildi.")


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at dbmdz/bert-base-turkish-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


✅ Model ve tokenizer başarıyla Hugging Face formatında kaydedildi.


In [4]:
import torch
from transformers import AutoModelForSequenceClassification, AutoTokenizer
from torch.nn.functional import softmax
import numpy as np
import pickle

# 🔹 Cihaz ayarı
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# 🔹 Modeli yeniden yükle
model_name = "dbmdz/bert-base-turkish-cased"
model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=3)
model.load_state_dict(torch.load("saved_model/bert_model.pth", map_location=device))
model.to(device)
model.eval()

# 🔹 Tokenizer'ı yükle
with open("saved_model/bert_tokenizer.pkl", "rb") as f:
    tokenizer = pickle.load(f)

# 🔹 Etiket adlarını yükle
with open("saved_model/bert_labels.pkl", "rb") as f:
    label_names = pickle.load(f)

# 🔍 Tahmin fonksiyonu
def predict_emotion_kfold(lyrics):
    inputs = tokenizer(lyrics, return_tensors="pt", truncation=True, padding="max_length", max_length=256)
    inputs = {k: v.to(device) for k, v in inputs.items()}

    with torch.no_grad():
        outputs = model(**inputs)
        probs = softmax(outputs.logits, dim=-1).cpu().numpy()[0]

    print("\n🎤 Tahmin Olasılıkları:")
    for label, prob in zip(label_names, probs):
        print(f"{label}: {prob:.3f}")

    predicted = label_names[np.argmax(probs)]
    print(f"\n✅ Tahmin Edilen Duygu: {predicted}")


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at dbmdz/bert-base-turkish-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [5]:
predict_emotion_kfold("""
Bana yaptıklarını unutmadım!
Yaktın geçtin, ben artık yokum!
İçimde öfke, dışımda suskunluk!
""")



🎤 Tahmin Olasılıkları:
mutlu: 0.004
öfke: 0.952
üzgün: 0.044

✅ Tahmin Edilen Duygu: öfke


In [6]:
predict_emotion_kfold("""
Sensizlik çöktü gecelere
Bir başıma kaldım yine
Kalbim kırık dökük bir şehir
Sesin çınlıyor boşlukta
""")



🎤 Tahmin Olasılıkları:
mutlu: 0.001
öfke: 0.001
üzgün: 0.997

✅ Tahmin Edilen Duygu: üzgün


In [7]:
predict_emotion_kfold("""
Güneş yine doğdu içime
Seninle her şey daha güzel
Dans edelim rüzgarla birlikte
Hayat bugün çok neşeli
""")



🎤 Tahmin Olasılıkları:
mutlu: 0.998
öfke: 0.001
üzgün: 0.001

✅ Tahmin Edilen Duygu: mutlu


In [22]:
# 🔽 Model klasörünü ZIP yap
!zip -r saved_model.zip saved_model

# 📎 Google Colab'da bağlantı oluştur
from google.colab import files
files.download("saved_model.zip")


  adding: saved_model/ (stored 0%)
  adding: saved_model/model.safetensors (deflated 7%)
  adding: saved_model/vocab.txt (deflated 53%)
  adding: saved_model/bert_tokenizer.pkl (deflated 58%)
  adding: saved_model/tokenizer_config.json (deflated 75%)
  adding: saved_model/bert_labels.pkl (deflated 2%)
  adding: saved_model/config.json (deflated 51%)
  adding: saved_model/tokenizer.json (deflated 70%)
  adding: saved_model/bert_model.pth (deflated 7%)
  adding: saved_model/special_tokens_map.json (deflated 42%)


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [None]:
predict_emotion_kfold("""
Güneş yine doğdu içime
Seninle her şey daha güzel
Dans edelim rüzgarla birlikte
Hayat bugün çok neşeli
""")
