In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.pipeline import Pipeline
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier
from sklearn.metrics import accuracy_score, f1_score, classification_report, confusion_matrix
import joblib
import os

In [2]:
os.makedirs("../models", exist_ok=True)

In [3]:
df = pd.read_csv("../data/data.csv", encoding='ISO-8859-9', sep=';')
print(f"Veri boyutu: {df.shape}")

Veri boyutu: (1267, 24)


In [4]:
# 3. Ön İşleme
# - Virgülleri noktaya çevir
for col in ['KTAS duration_min', 'BT', 'SBP', 'DBP', 'HR', 'RR', 'Saturation']:
    df[col] = df[col].astype(str).str.replace(',', '.')

In [6]:
# - Sayısal dönüşüm
num_cols = ['KTAS duration_min', 'BT', 'SBP', 'DBP', 'HR', 'RR', 'Saturation']
for col in num_cols:
    df[col] = pd.to_numeric(df[col], errors='coerce')

In [7]:
# Eksik değer doldurma (ortalama)
df[num_cols] = df[num_cols].fillna(df[num_cols].mean())

In [8]:
y = df['KTAS_expert']  # Hedef değişken
X = df.drop(['KTAS_expert', 'KTAS_RN', 'Diagnosis in ED'], axis=1)  # Gereksiz sütunları çıkar

In [10]:
# Kategorik kolonlar
categorical_cols = ['Chief_complain']
for col in categorical_cols:
    le = LabelEncoder()
    X[col] = le.fit_transform(X[col].astype(str))

In [11]:
# 5. Eğitim/Test Ayrımı
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [12]:
# 6. Modeller
models = {
    "LogisticRegression": LogisticRegression(max_iter=1000, class_weight='balanced'),
    "RandomForest": RandomForestClassifier(n_estimators=200, random_state=42),
    "XGBoost": XGBClassifier(use_label_encoder=False, eval_metric='mlogloss')
}

results = {}

In [14]:
# Hatalı değerleri NaN yap
df.replace("#BOŞ!", np.nan, inplace=True)

# Virgül yerine nokta
for col in ['KTAS duration_min', 'BT', 'SBP', 'DBP', 'HR', 'RR', 'Saturation']:
    df[col] = df[col].astype(str).str.replace(',', '.')

# Sayısal dönüşüm
num_cols = ['KTAS duration_min', 'BT', 'SBP', 'DBP', 'HR', 'RR', 'Saturation']
for col in num_cols:
    df[col] = pd.to_numeric(df[col], errors='coerce')

# Eksik değerleri ortalama ile doldur
df[num_cols] = df[num_cols].fillna(df[num_cols].mean())


In [15]:
for name, model in models.items():
    pipe = Pipeline([
        ("scaler", StandardScaler()),
        ("clf", model)
    ])
    pipe.fit(X_train, y_train)
    y_pred = pipe.predict(X_test)
    acc = accuracy_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred, average='weighted')
    results[name] = {"accuracy": acc, "f1": f1}
    print(f"{name} -> Accuracy: {acc:.4f}, F1: {f1:.4f}")

ValueError: could not convert string to float: '#BOŞ!'