In [6]:
import pandas as pd
import numpy as np
from tqdm import tqdm
import copy
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, accuracy_score, confusion_matrix, ConfusionMatrixDisplay

path = 'datasets/turkish_dataset/turkce_cumleler_kokler_corrected_50k.csv'
df = pd.read_csv(path)
df = df.drop(columns=['id', 'text'])

X = df['roots']
y = df['label'] 

df = df.head(2000)
df

Unnamed: 0,label,roots
0,0,gercekten hikâye izlemek yeni yilmak giricem
1,0,cok baklava
2,0,1 sn dukel ataturk karma e m baslattigi siyasî...
3,0,konfederasyon sosyal politika bakanlik istisar...
4,1,hakem tarih yazmak kisi 32 on alanyaspor kaybe...
...,...,...
1995,0,gu biri istanbul tekrar kazanabilme ihtimal se...
1996,1,haka altun telefon bas care beklemek numara si...
1997,1,dalyarak salas nolacak
1998,1,serefsiz kopek masum askermiydi darbe yapmak


In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)

X_train = np.where(pd.isna(X_train), '', X_train)
X_test = np.where(pd.isna(X_test), '', X_test)

vectorizer = TfidfVectorizer()
X_train_tfidf = vectorizer.fit_transform(X_train)
X_test_tfidf = vectorizer.transform(X_test)

In [None]:
# Logistic Regression modelini oluştur ve eğit
logreg_model = LogisticRegression(max_iter=200)
logreg_model.fit(X_train_tfidf, y_train)

# Tahmin yap
y_pred = logreg_model.predict(X_test_tfidf)

# Modeli değerlendir
print("Accuracy:", accuracy_score(y_test, y_pred))
print(classification_report(y_test, y_pred))

In [None]:
cm = confusion_matrix(y_test, y_pred)
disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=logreg_model.classes_)
disp.plot(cmap=plt.cm.Blues)
plt.title("Confusion Matrix")
plt.show()