In [None]:
# Kütüphaneler
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.metrics import classification_report, accuracy_score
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC

# Veri setinin yüklenmesi ve sütun isimlerinin verilmesi
data = pd.read_csv('veri-seti.txt', sep='\t')
data.columns = ['Np', 'Pg', 'Dbp', 'Tst', '2Si', 'Bmi', 'Dpf', 'Age', 'Outcome']

# Eksik verilerin kontrol edilmesi
print(data.isnull().sum())

# Özellikler ve hedef değişkenin ayrılması
X = data.drop('Outcome', axis=1)
y = data['Outcome']

# Eğitim ve test verisinin ayrılması
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Verilerin standartlaştırılması
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Model ve hiperparametrelerin tanımlanması
models = {
    'Logistic Regression': LogisticRegression(),
    'Decision Tree': DecisionTreeClassifier(),
    'Random Forest': RandomForestClassifier(),
    'SVM': SVC()
}

param_grids = {
    'Logistic Regression': {'C': [0.1, 1, 10, 100]},
    'Decision Tree': {'max_depth': [3, 5, 7, 10]},
    'Random Forest': {'n_estimators': [50, 100, 200], 'max_depth': [3, 5, 7, 10]},
    'SVM': {'C': [0.1, 1, 10, 100], 'kernel': ['linear', 'rbf']}
}

# Modellerin eğitilmesi ve optimizasyonu
best_models = {}
for model_name, model in models.items():
    print(f"Training {model_name}...")
    grid_search = GridSearchCV(model, param_grids[model_name], cv=5, scoring='accuracy')
    grid_search.fit(X_train, y_train)
    best_models[model_name] = grid_search.best_estimator_
    print(f"Best params for {model_name}: {grid_search.best_params_}")
    print(f"Best cross-validation accuracy for {model_name}: {grid_search.best_score_}")

# Modellerin test verisi üzerinde değerlendirilmesi
for model_name, model in best_models.items():
    y_pred = model.predict(X_test)
    print(f"Results for {model_name}:")
    print(classification_report(y_test, y_pred))
    print(f"Accuracy: {accuracy_score(y_test, y_pred)}")


Np         0
Pg         0
Dbp        0
Tst        0
2Si        0
Bmi        0
Dpf        0
Age        0
Outcome    0
dtype: int64
Training Logistic Regression...
Best params for Logistic Regression: {'C': 10}
Best cross-validation accuracy for Logistic Regression: 0.7655071304811408
Training Decision Tree...
Best params for Decision Tree: {'max_depth': 3}
Best cross-validation accuracy for Decision Tree: 0.7573903771824604
Training Random Forest...
