In [None]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

df = pd.read_csv('Sleep_health_and_lifestyle_dataset.csv')

df['Sleep Disorder'] = df['Sleep Disorder'].fillna('None')

df[['BP_Systolic', 'BP_Diastolic']] = df['Blood Pressure'].str.split('/', expand=True).astype(int)
df = df.drop(columns=['Blood Pressure', 'Person ID'])

le = LabelEncoder()
for col in ['Gender', 'Occupation', 'BMI Category', 'Sleep Disorder']:
    df[col] = le.fit_transform(df[col])

plt.figure(figsize=(10, 8))
sns.heatmap(df.corr(), annot=True, cmap='coolwarm', fmt='.2f')
plt.title("Özellikler Arası Korelasyon Matrisi")
plt.show()

plt.figure(figsize=(6, 4))
sns.countplot(x='Sleep Disorder', data=df)
plt.title("Uyku Bozukluğu Dağılımı (0: Insomnia, 1: None, 2: Sleep Apnea)")
plt.show()

X = df.drop(columns=['Sleep Disorder'])
y = df['Sleep Disorder']

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

models = {
    "Logistic Regression": LogisticRegression(max_iter=2000),
    "Random Forest": RandomForestClassifier(n_estimators=100, random_state=42),
    "KNN": KNeighborsClassifier(n_neighbors=5)
}

print("MODEL SONUÇLARI:")
for name, model in models.items():
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    acc = accuracy_score(y_test, y_pred)
    print(f"{name} Doğruluk Oranı (Accuracy): %{acc*100:.2f}")

rf_model = models["Random Forest"]
y_pred_rf = rf_model.predict(X_test)

print("\nRandom Forest Detaylı Raporu:")
print(classification_report(y_test, y_pred_rf))

plt.figure(figsize=(6, 5))
sns.heatmap(confusion_matrix(y_test, y_pred_rf), annot=True, fmt='d', cmap='Blues')
plt.title("Confusion Matrix (Tahmin vs Gerçek)")
plt.ylabel("Gerçek Değer")
plt.xlabel("Tahmin Edilen")
plt.show()

cm = confusion_matrix(y_test, y_pred_rf)

plt.figure(figsize=(7, 6))
sns.heatmap(
    cm,
    annot=True,
    fmt='d',
    cmap='Blues',
    linewidths=1,
    linecolor='black',
    xticklabels=['Insomnia', 'None (Sağlıklı)', 'Sleep Apnea'],
    yticklabels=['Insomnia', 'None (Sağlıklı)', 'Sleep Apnea']
)
plt.title('Random Forest Hata Matrisi (Confusion Matrix)', fontsize=14)
plt.xlabel('Tahmin Edilen Sınıf', fontsize=12)
plt.ylabel('Gerçek Sınıf', fontsize=12)
plt.show()
