In [1]:
import sqlite3
import pandas as pd

conn = sqlite3.connect(r"C:\Users\kutayd\sqlite\lvbelc5_lyrics.db")
query = "SELECT * FROM lyrics"
df = pd.read_sql_query(query, conn)

df.head()

Unnamed: 0,id,song_title,artist,featured_artists,album,release_year,lyrics
0,1,BEMBEYAZ KEFENDEN,Lvbel C5,,C5MODE,2022,"Bembeyaz kefenden Polo bu\nGözlük Prada, şapka..."
1,2,BANA GÖNDER!,Lvbel C5,,,2023,Bana gönder\nSarışın yeşil gözlü kızların heps...
2,3,İZLEDİ MAHALLE,Lvbel C5,,,2023,"Lvbel, Lvbel C5 okey\nAuh, woah\nDedi bana ""Ba..."
3,4,İbrahim Tilaver,Lvbel C5,,,2022,"Alaaddin'e verdim dumanlar, bebeğim\nKafasında..."
4,5,AH YALAN DÜNYA,Lvbel C5,,#1,2024,"Kızım, bana ne?\nN'apıyo'san yap kızım, bana n..."


In [2]:
!pip install nltk



In [3]:
import re
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
import nltk

nltk.download('punkt')
nltk.download('stopwords')

stop_words = set(stopwords.words('turkish')) # loading turkish stopword list into a python set

def clean_lyrics(text):
    if not isinstance(text, str):
        return ""
    text = text.lower()  
    text = re.sub(r'[^a-zçğıöşü\s]', '', text)  
    tokens = text.split() # splits into individual words
    filtered = [word for word in tokens if word not in stop_words] # filter loop to remove turkish stopwords
    return ' '.join(filtered)

df['cleaned_lyrics'] = df['lyrics'].apply(clean_lyrics) # applying the cleaning function into the lyrics column

df[['song_title', 'cleaned_lyrics']].head()

[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\kutayd\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\kutayd\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


Unnamed: 0,song_title,cleaned_lyrics
0,BEMBEYAZ KEFENDEN,bembeyaz kefenden polo gözlük prada şapkamsa g...
1,BANA GÖNDER!,bana gönder sarışın yeşil gözlü kızların hepsi...
2,İZLEDİ MAHALLE,lvbel lvbel c okey auh woah dedi bana baba kıy...
3,İbrahim Tilaver,alaaddine verdim dumanlar bebeğim kafasından d...
4,AH YALAN DÜNYA,kızım bana napıyosan yap kızım bana benim hiçb...


In [4]:
!pip install transformers torch



## Load tokenizer and model

In [5]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch
import torch.nn.functional as F

model_name = "savasy/bert-base-turkish-sentiment-cased"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSequenceClassification.from_pretrained(model_name)

  from .autonotebook import tqdm as notebook_tqdm


## Define Sentiment Prediction Function

In [6]:
def predict_sentiment(text):
    if not isinstance(text, str) or text.strip() == "":
        return "unknown"

    inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True, max_length=512)
    with torch.no_grad():
        outputs = model(**inputs)
        probs = F.softmax(outputs.logits, dim=1)
        pred_class = torch.argmax(probs, dim=1).item()

    # 0 = negative, 1 = neutral, 2 = positive
    label_map = {0: "negative", 1: "neutral", 2: "positive"}
    return label_map[pred_class]

## Apply to All Lyrics

In [8]:
df['predicted_sentiment'] = df['lyrics'].apply(predict_sentiment)
df[['song_title', 'predicted_sentiment']].head(40)

Unnamed: 0,song_title,predicted_sentiment
0,BEMBEYAZ KEFENDEN,neutral
1,BANA GÖNDER!,neutral
2,İZLEDİ MAHALLE,neutral
3,İbrahim Tilaver,neutral
4,AH YALAN DÜNYA,neutral
5,PRENSES,neutral
6,ÇÖZEMEZLER,neutral
7,JET BABA,neutral
8,behzat ç,neutral
9,MERMER,neutral


# Lvbel C5 Lyrics Analysis: Can AI Understand Turkish Rap?

This project started as a sentiment analysis experiment on the lyrics of Turkish rapper **Lvbel C5**.

I wanted to explore:
- How his lyrical themes evolved over the years
- Whether his tone became more positive (money, girls, luxury) or stayed dark (streets, struggle, police)

---

##  But, Sentiment Analysis Flopped

I tried:
-  Lexicon-based sentiment scoring (manually defined positive/negative Turkish words)
-  HuggingFace BERT model (`savasy/bert-base-turkish-sentiment-cased`)

###  What happened?
- BERT predicted `"neutral"` for nearly everything
- It couldn’t handle slang, metaphors, or sarcasm common in rap
- Long lyrics got **truncated**, losing emotional weight