In [7]:
# 📁 model_training_pca_gridsearch.ipynb
# Dataset1 HOG + SVM + PCA kombinasyonu icin hiperparametre optimizasyonu

import os
import numpy as np
from sklearn.model_selection import GridSearchCV
from sklearn.pipeline import Pipeline
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.metrics import classification_report
from src.method1.evaluate import evaluate_model
import joblib
import sys

# Proje köküne geç
proj_root = "/Users/nurefsanolfaz/yap470_project/xray_anomaly_method1"
os.chdir(proj_root)
sys.path.insert(0, proj_root)

print("Çalışma dizini:", os.getcwd())

# === Dataset1 veya Dataset2'yi sec ===
FEATURE_DIR = "features/dataset1/hog"  # veya "features/dataset2/hog"
MODEL_SAVE_PATH = "models/dataset1/gridsearch_best_svm.pkl"  # uygun klasorle degistir

# === Ozellikleri yukle ===
X_train = np.load(os.path.join(FEATURE_DIR, "X_train.npy"))
y_train = np.load(os.path.join(FEATURE_DIR, "y_train.npy"))
X_val   = np.load(os.path.join(FEATURE_DIR, "X_val.npy"))
y_val   = np.load(os.path.join(FEATURE_DIR, "y_val.npy"))
X_test  = np.load(os.path.join(FEATURE_DIR, "X_test.npy"))
y_test  = np.load(os.path.join(FEATURE_DIR, "y_test.npy"))

# === Pipeline tanimi ===
pipe = Pipeline([
    ('scaler', StandardScaler()),
    ('pca', PCA()),
    ('model', SVC())
])

# === Hiperparametre araligi ===
param_grid = {
    'pca__n_components': [100, 200, 300],
    'model__C': [0.1, 1, 10],
    'model__kernel': ['rbf', 'linear'],
    'model__gamma': ['scale', 'auto']
}

# === GridSearchCV ===
grid = GridSearchCV(pipe, param_grid, cv=3, scoring='accuracy', verbose=2, n_jobs=-1)
grid.fit(X_train, y_train)

# === En iyi sonuc ===
print("\n[INFO] En iyi skor:", grid.best_score_)
print("[INFO] En iyi parametreler:", grid.best_params_)

# === Test seti uzerinde degerlendirme ===
best_model = grid.best_estimator_
evaluate_model(best_model, X_train, y_train, X_val, y_val, X_test, y_test)

# === Modeli kaydet ===
os.makedirs(os.path.dirname(MODEL_SAVE_PATH), exist_ok=True)
joblib.dump(best_model, MODEL_SAVE_PATH)
print(f"\n[INFO] Model kaydedildi: {MODEL_SAVE_PATH}")


Çalışma dizini: /Users/nurefsanolfaz/yap470_project/xray_anomaly_method1
Fitting 3 folds for each of 36 candidates, totalling 108 fits
[CV] END model__C=0.1, model__gamma=scale, model__kernel=rbf, pca__n_components=100; total time=   9.3s
[CV] END model__C=0.1, model__gamma=scale, model__kernel=rbf, pca__n_components=100; total time=   9.3s
[CV] END model__C=0.1, model__gamma=scale, model__kernel=rbf, pca__n_components=100; total time=   9.8s
[CV] END model__C=0.1, model__gamma=scale, model__kernel=rbf, pca__n_components=200; total time=  13.3s
[CV] END model__C=0.1, model__gamma=scale, model__kernel=rbf, pca__n_components=200; total time=  13.4s
[CV] END model__C=0.1, model__gamma=scale, model__kernel=rbf, pca__n_components=200; total time=  13.4s
[CV] END model__C=0.1, model__gamma=scale, model__kernel=linear, pca__n_components=100; total time=   5.7s
[CV] END model__C=0.1, model__gamma=scale, model__kernel=linear, pca__n_components=100; total time=   5.5s
[CV] END model__C=0.1, mode

In [8]:
# Dataset1'de Haralick + SVM + PCA + class_weight=balanced icin hiperparametre optimizasyonu


# === Dataset1 icin Haralick ozelliklerini yukle ===
FEATURE_DIR = "features/dataset1/haralick"
MODEL_SAVE_PATH = "models/dataset1/gridsearch_haralick_svm_balanced.pkl"

X_train = np.load(os.path.join(FEATURE_DIR, "X_train.npy"))
y_train = np.load(os.path.join(FEATURE_DIR, "y_train.npy"))
X_val   = np.load(os.path.join(FEATURE_DIR, "X_val.npy"))
y_val   = np.load(os.path.join(FEATURE_DIR, "y_val.npy"))
X_test  = np.load(os.path.join(FEATURE_DIR, "X_test.npy"))
y_test  = np.load(os.path.join(FEATURE_DIR, "y_test.npy"))

# === Pipeline tanimi ===
pipe = Pipeline([
    ('scaler', StandardScaler()),
    ('pca', PCA()),
    ('model', SVC(class_weight='balanced'))  # balanced eklendi
])

# === Hiperparametre araligi ===
param_grid = {
    'pca__n_components': [10, 13],  # Haralick ozellikleri dusuk boyutlu oldugu icin
    'model__C': [0.1, 1, 10],
    'model__kernel': ['rbf', 'linear'],
    'model__gamma': ['scale', 'auto']
}

# === GridSearchCV ===
grid = GridSearchCV(pipe, param_grid, cv=3, scoring='accuracy', verbose=2, n_jobs=-1)
grid.fit(X_train, y_train)

# === En iyi sonuc ===
print("\n[INFO] En iyi skor:", grid.best_score_)
print("[INFO] En iyi parametreler:", grid.best_params_)

# === Test seti uzerinde degerlendirme ===
best_model = grid.best_estimator_
evaluate_model(best_model, X_train, y_train, X_val, y_val, X_test, y_test)

# === Modeli kaydet ===
os.makedirs(os.path.dirname(MODEL_SAVE_PATH), exist_ok=True)
joblib.dump(best_model, MODEL_SAVE_PATH)
print(f"\n[INFO] Model kaydedildi: {MODEL_SAVE_PATH}")


Fitting 3 folds for each of 24 candidates, totalling 72 fits
[CV] END model__C=0.1, model__gamma=scale, model__kernel=linear, pca__n_components=10; total time=   0.2s
[CV] END model__C=0.1, model__gamma=scale, model__kernel=linear, pca__n_components=10; total time=   0.2s
[CV] END model__C=0.1, model__gamma=scale, model__kernel=rbf, pca__n_components=10; total time=   0.4s
[CV] END model__C=0.1, model__gamma=scale, model__kernel=rbf, pca__n_components=10; total time=   0.4s
[CV] END model__C=0.1, model__gamma=scale, model__kernel=rbf, pca__n_components=10; total time=   0.4s
[CV] END model__C=0.1, model__gamma=scale, model__kernel=rbf, pca__n_components=13; total time=   0.4s
[CV] END model__C=0.1, model__gamma=scale, model__kernel=rbf, pca__n_components=13; total time=   0.4s
[CV] END model__C=0.1, model__gamma=scale, model__kernel=linear, pca__n_components=10; total time=   0.2s
[CV] END model__C=0.1, model__gamma=scale, model__kernel=linear, pca__n_components=13; total time=   0.2s


In [9]:
# Dataset1'de Haralick + SVM + PCA + class_weight=balanced + 5-fold CV icin hiperparametre optimizasyonu

from sklearn.model_selection import StratifiedKFold


# === Dataset1 icin Haralick ozelliklerini yukle ===
FEATURE_DIR = "features/dataset1/haralick"
MODEL_SAVE_PATH = "models/dataset1/gridsearch_haralick_svm_balanced_5fold.pkl"

X_train = np.load(os.path.join(FEATURE_DIR, "X_train.npy"))
y_train = np.load(os.path.join(FEATURE_DIR, "y_train.npy"))
X_val   = np.load(os.path.join(FEATURE_DIR, "X_val.npy"))
y_val   = np.load(os.path.join(FEATURE_DIR, "y_val.npy"))
X_test  = np.load(os.path.join(FEATURE_DIR, "X_test.npy"))
y_test  = np.load(os.path.join(FEATURE_DIR, "y_test.npy"))

# === Train + Val birlestir ===
X_combined = np.concatenate([X_train, X_val], axis=0)
y_combined = np.concatenate([y_train, y_val], axis=0)

# === Pipeline tanimi ===
pipe = Pipeline([
    ('scaler', StandardScaler()),
    ('pca', PCA()),
    ('model', SVC(class_weight='balanced'))
])

# === Hiperparametre araligi ===
param_grid = {
    'pca__n_components': [10, 13],
    'model__C': [0.1, 1, 10],
    'model__kernel': ['rbf', 'linear'],
    'model__gamma': ['scale', 'auto']
}

# === Stratified 5-fold Cross Validation ===
skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
grid = GridSearchCV(pipe, param_grid, cv=skf, scoring='accuracy', verbose=2, n_jobs=-1)
grid.fit(X_combined, y_combined)

# === En iyi sonuc ===
print("\n[INFO] En iyi skor:", grid.best_score_)
print("[INFO] En iyi parametreler:", grid.best_params_)

# === Test seti uzerinde degerlendirme ===
best_model = grid.best_estimator_
evaluate_model(best_model, X_combined, y_combined, X_val, y_val, X_test, y_test)

# === Modeli kaydet ===
os.makedirs(os.path.dirname(MODEL_SAVE_PATH), exist_ok=True)
joblib.dump(best_model, MODEL_SAVE_PATH)
print(f"\n[INFO] Model kaydedildi: {MODEL_SAVE_PATH}")


Fitting 5 folds for each of 24 candidates, totalling 120 fits
[CV] END model__C=0.1, model__gamma=scale, model__kernel=rbf, pca__n_components=10; total time=   0.4s
[CV] END model__C=0.1, model__gamma=scale, model__kernel=rbf, pca__n_components=10; total time=   0.5s
[CV] END model__C=0.1, model__gamma=scale, model__kernel=rbf, pca__n_components=10; total time=   0.5s
[CV] END model__C=0.1, model__gamma=scale, model__kernel=rbf, pca__n_components=10; total time=   0.5s
[CV] END model__C=0.1, model__gamma=scale, model__kernel=rbf, pca__n_components=13; total time=   0.5s
[CV] END model__C=0.1, model__gamma=scale, model__kernel=rbf, pca__n_components=10; total time=   0.5s
[CV] END model__C=0.1, model__gamma=scale, model__kernel=rbf, pca__n_components=13; total time=   0.5s
[CV] END model__C=0.1, model__gamma=scale, model__kernel=rbf, pca__n_components=13; total time=   0.5s
[CV] END model__C=0.1, model__gamma=scale, model__kernel=linear, pca__n_components=10; total time=   0.2s
[CV] END

In [10]:
# 📁 model_training_pca_gridsearch.ipynb
# Dataset1'de Combined (HOG + LBP + Haralick) + SVM + PCA + class_weight=balanced + 5-fold CV icin optimize edilmiş versiyon

import os
import numpy as np
from sklearn.model_selection import GridSearchCV, StratifiedKFold
from sklearn.pipeline import Pipeline
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.metrics import classification_report
from src.method1.evaluate import evaluate_model
import joblib

# === Combined ozelliklerini yukle ===
X_hog = np.load("features/dataset1/hog/X_train.npy")
X_lbp = np.load("features/dataset1/lbp/X_train.npy")
X_har = np.load("features/dataset1/haralick/X_train.npy")

X_train_combined = np.hstack([X_hog, X_lbp, X_har])

X_hog_val = np.load("features/dataset1/hog/X_val.npy")
X_lbp_val = np.load("features/dataset1/lbp/X_val.npy")
X_har_val = np.load("features/dataset1/haralick/X_val.npy")
X_val_combined = np.hstack([X_hog_val, X_lbp_val, X_har_val])

X_hog_test = np.load("features/dataset1/hog/X_test.npy")
X_lbp_test = np.load("features/dataset1/lbp/X_test.npy")
X_har_test = np.load("features/dataset1/haralick/X_test.npy")
X_test_combined = np.hstack([X_hog_test, X_lbp_test, X_har_test])

# Etiketleri yukle
y_train = np.load("features/dataset1/hog/y_train.npy")
y_val = np.load("features/dataset1/hog/y_val.npy")
y_test = np.load("features/dataset1/hog/y_test.npy")

# === Train + Val birlestir ===
X_combined = np.concatenate([X_train_combined, X_val_combined], axis=0)
y_combined = np.concatenate([y_train, y_val], axis=0)

# === Pipeline tanimi ===
pipe = Pipeline([
    ('scaler', StandardScaler()),
    ('pca', PCA()),
    ('model', SVC(class_weight='balanced'))
])

# === Hiperparametre araligi ===
param_grid = {
    'pca__n_components': [100, 200, 300],
    'model__C': [0.1, 1, 10],
    'model__kernel': ['rbf'],
    'model__gamma': ['scale']
}

# === Stratified 5-fold Cross Validation ===
skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
grid = GridSearchCV(pipe, param_grid, cv=skf, scoring='accuracy', verbose=2, n_jobs=-1)
grid.fit(X_combined, y_combined)

# === En iyi sonuc ===
print("\n[INFO] En iyi skor:", grid.best_score_)
print("[INFO] En iyi parametreler:", grid.best_params_)

# === Test seti uzerinde degerlendirme ===
best_model = grid.best_estimator_
evaluate_model(best_model, X_combined, y_combined, X_val_combined, y_val, X_test_combined, y_test)

# === Modeli kaydet ===
MODEL_SAVE_PATH = "models/dataset1/gridsearch_combined_svm_balanced_5fold.pkl"
os.makedirs(os.path.dirname(MODEL_SAVE_PATH), exist_ok=True)
joblib.dump(best_model, MODEL_SAVE_PATH)
print(f"\n[INFO] Model kaydedildi: {MODEL_SAVE_PATH}")

Fitting 5 folds for each of 9 candidates, totalling 45 fits
[CV] END model__C=0.1, model__gamma=scale, model__kernel=rbf, pca__n_components=100; total time=  11.9s
[CV] END model__C=0.1, model__gamma=scale, model__kernel=rbf, pca__n_components=100; total time=  12.1s
[CV] END model__C=0.1, model__gamma=scale, model__kernel=rbf, pca__n_components=100; total time=  12.2s
[CV] END model__C=0.1, model__gamma=scale, model__kernel=rbf, pca__n_components=100; total time=  12.5s
[CV] END model__C=0.1, model__gamma=scale, model__kernel=rbf, pca__n_components=100; total time=  13.1s
[CV] END model__C=0.1, model__gamma=scale, model__kernel=rbf, pca__n_components=200; total time=  15.7s
[CV] END model__C=0.1, model__gamma=scale, model__kernel=rbf, pca__n_components=200; total time=  16.2s
[CV] END model__C=0.1, model__gamma=scale, model__kernel=rbf, pca__n_components=200; total time=  16.4s
[CV] END model__C=0.1, model__gamma=scale, model__kernel=rbf, pca__n_components=200; total time=  10.4s
[CV]

In [12]:
# 📁 model_training_pca_gridsearch.ipynb
# Dataset1'de Combined (HOG + LBP + Haralick) + SVM + PCA + class_weight=balanced + 5-fold CV icin optimize edilmiş versiyon

import os
import numpy as np
from sklearn.model_selection import GridSearchCV, StratifiedKFold
from sklearn.pipeline import Pipeline
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.metrics import classification_report
from src.method1.evaluate import evaluate_model
import joblib

# === Combined ozelliklerini yukle ===
X_hog = np.load("features/dataset1/hog/X_train.npy")
X_lbp = np.load("features/dataset1/lbp/X_train.npy")
X_har = np.load("features/dataset1/haralick/X_train.npy")

X_train_combined = np.hstack([X_hog, X_lbp, X_har])

X_hog_val = np.load("features/dataset1/hog/X_val.npy")
X_lbp_val = np.load("features/dataset1/lbp/X_val.npy")
X_har_val = np.load("features/dataset1/haralick/X_val.npy")
X_val_combined = np.hstack([X_hog_val, X_lbp_val, X_har_val])

X_hog_test = np.load("features/dataset1/hog/X_test.npy")
X_lbp_test = np.load("features/dataset1/lbp/X_test.npy")
X_har_test = np.load("features/dataset1/haralick/X_test.npy")
X_test_combined = np.hstack([X_hog_test, X_lbp_test, X_har_test])

# Etiketleri yukle
y_train = np.load("features/dataset1/hog/y_train.npy")
y_val = np.load("features/dataset1/hog/y_val.npy")
y_test = np.load("features/dataset1/hog/y_test.npy")

# === Train + Val birlestir ===
X_combined = np.concatenate([X_train_combined, X_val_combined], axis=0)
y_combined = np.concatenate([y_train, y_val], axis=0)

# === Pipeline tanimi ===
pipe = Pipeline([
    ('scaler', StandardScaler()),
    ('pca', PCA()),
    ('model', SVC(class_weight='balanced'))
])

# === Hiperparametre araligi ===
param_grid = {
    'pca__n_components': [100, 200, 300],
    'model__C': [0.1, 1, 10],
    'model__kernel': ['rbf'],
    'model__gamma': ['scale']
}

# === Stratified 5-fold Cross Validation ===
skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
grid = GridSearchCV(pipe, param_grid, cv=skf, scoring='f1_macro', verbose=2, n_jobs=-1)
grid.fit(X_combined, y_combined)

# === En iyi sonuc ===
print("\n[INFO] En iyi skor:", grid.best_score_)
print("[INFO] En iyi parametreler:", grid.best_params_)

# === Test seti uzerinde degerlendirme ===
best_model = grid.best_estimator_
evaluate_model(best_model, X_combined, y_combined, X_val_combined, y_val, X_test_combined, y_test)

# === Modeli kaydet ===
MODEL_SAVE_PATH = "models/dataset1/gridsearch_combined_svm_balanced_5fold.pkl"
os.makedirs(os.path.dirname(MODEL_SAVE_PATH), exist_ok=True)
joblib.dump(best_model, MODEL_SAVE_PATH)
print(f"\n[INFO] Model kaydedildi: {MODEL_SAVE_PATH}")


Fitting 5 folds for each of 9 candidates, totalling 45 fits
[CV] END model__C=0.1, model__gamma=scale, model__kernel=rbf, pca__n_components=100; total time=  10.5s
[CV] END model__C=0.1, model__gamma=scale, model__kernel=rbf, pca__n_components=100; total time=  11.1s
[CV] END model__C=0.1, model__gamma=scale, model__kernel=rbf, pca__n_components=100; total time=  11.2s
[CV] END model__C=0.1, model__gamma=scale, model__kernel=rbf, pca__n_components=100; total time=  11.5s
[CV] END model__C=0.1, model__gamma=scale, model__kernel=rbf, pca__n_components=100; total time=  12.7s
[CV] END model__C=0.1, model__gamma=scale, model__kernel=rbf, pca__n_components=200; total time=  15.5s
[CV] END model__C=0.1, model__gamma=scale, model__kernel=rbf, pca__n_components=200; total time=  16.4s
[CV] END model__C=0.1, model__gamma=scale, model__kernel=rbf, pca__n_components=200; total time=  16.9s
[CV] END model__C=0.1, model__gamma=scale, model__kernel=rbf, pca__n_components=200; total time=  10.4s
[CV]

In [15]:
import os
import numpy as np
from sklearn.model_selection import GridSearchCV, StratifiedKFold
from sklearn.pipeline import Pipeline
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
from sklearn.feature_selection import SelectKBest, f_classif
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import classification_report
from src.method1.evaluate import evaluate_model
import joblib

# === Combined öznitelikleri yükle ===
X_hog = np.load("features/dataset1/hog/X_train.npy")
X_lbp = np.load("features/dataset1/lbp/X_train.npy")
X_har = np.load("features/dataset1/haralick/X_train.npy")
X_train_combined = np.hstack([X_hog, X_lbp, X_har])

X_hog_val = np.load("features/dataset1/hog/X_val.npy")
X_lbp_val = np.load("features/dataset1/lbp/X_val.npy")
X_har_val = np.load("features/dataset1/haralick/X_val.npy")
X_val_combined = np.hstack([X_hog_val, X_lbp_val, X_har_val])

X_hog_test = np.load("features/dataset1/hog/X_test.npy")
X_lbp_test = np.load("features/dataset1/lbp/X_test.npy")
X_har_test = np.load("features/dataset1/haralick/X_test.npy")
X_test_combined = np.hstack([X_hog_test, X_lbp_test, X_har_test])

# === Etiketleri yükle ===
y_train = np.load("features/dataset1/hog/y_train.npy")
y_val   = np.load("features/dataset1/hog/y_val.npy")
y_test  = np.load("features/dataset1/hog/y_test.npy")

# === Train + Val birleştir ===
X_combined = np.concatenate([X_train_combined, X_val_combined], axis=0)
y_combined = np.concatenate([y_train, y_val], axis=0)

# === Pipeline tanımı ===
pipe = Pipeline([
    ('scaler', StandardScaler()),
    ('select', SelectKBest(score_func=f_classif)),
    ('pca', PCA()),
    ('model', MLPClassifier(max_iter=500, random_state=42))
])

# === Hiperparametre aralıkları ===
param_grid = {
    'select__k': [300, 500, 800],
    'pca__n_components': [50, 100, 200],
    'model__hidden_layer_sizes': [(64,), (128,), (128, 64)],
    'model__alpha': [0.0001, 0.001],
    'model__learning_rate_init': [0.001, 0.01]
}

# === 5-fold Stratified CV ===
skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
grid = GridSearchCV(pipe, param_grid, cv=skf, scoring='f1_macro', verbose=2, n_jobs=-1)
grid.fit(X_combined, y_combined)

# === Sonuçlar ===
print("\n[INFO] En iyi skor:", grid.best_score_)
print("[INFO] En iyi parametreler:", grid.best_params_)

# === Test set değerlendirmesi ===
best_model = grid.best_estimator_
evaluate_model(best_model, X_combined, y_combined, X_val_combined, y_val, X_test_combined, y_test)

# === Modeli kaydet ===
MODEL_SAVE_PATH = "models/dataset1/gridsearch_combined_mlp_selectkbest_pca.pkl"
os.makedirs(os.path.dirname(MODEL_SAVE_PATH), exist_ok=True)
joblib.dump(best_model, MODEL_SAVE_PATH)
print(f"\n[INFO] Model kaydedildi: {MODEL_SAVE_PATH}")

Fitting 5 folds for each of 108 candidates, totalling 540 fits
[CV] END model__alpha=0.0001, model__hidden_layer_sizes=(64,), model__learning_rate_init=0.001, pca__n_components=50, select__k=500; total time=   9.2s
[CV] END model__alpha=0.0001, model__hidden_layer_sizes=(64,), model__learning_rate_init=0.001, pca__n_components=50, select__k=500; total time=   9.3s
[CV] END model__alpha=0.0001, model__hidden_layer_sizes=(64,), model__learning_rate_init=0.001, pca__n_components=50, select__k=500; total time=   9.4s
[CV] END model__alpha=0.0001, model__hidden_layer_sizes=(64,), model__learning_rate_init=0.001, pca__n_components=50, select__k=300; total time=   9.4s
[CV] END model__alpha=0.0001, model__hidden_layer_sizes=(64,), model__learning_rate_init=0.001, pca__n_components=50, select__k=300; total time=   9.5s
[CV] END model__alpha=0.0001, model__hidden_layer_sizes=(64,), model__learning_rate_init=0.001, pca__n_components=50, select__k=300; total time=   9.5s
[CV] END model__alpha=0.0

In [17]:
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.svm import SVC
from src.method1.evaluate import evaluate_model

# Pipeline'ı yeniden tanımla
pipe = Pipeline([
    ('scaler', StandardScaler()),
    ('pca', PCA(n_components=3000)),
    ('model', SVC(C=10, kernel='rbf', gamma='scale', class_weight='balanced'))
])

# Eğit ve değerlendir
pipe.fit(X_combined, y_combined)

evaluate_model(pipe, X_combined, y_combined, X_val_combined, y_val, X_test_combined, y_test)


[INFO] Model eğitiliyor...
[INFO] Validation Accuracy: 1.0000

[INFO] Test set classification report:
              precision    recall  f1-score   support

      NORMAL       0.98      0.39      0.56       234
   PNEUMONIA       0.73      0.99      0.84       390

    accuracy                           0.77       624
   macro avg       0.86      0.69      0.70       624
weighted avg       0.82      0.77      0.74       624



In [18]:
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.svm import SVC
from src.method1.evaluate import evaluate_model

# Pipeline'ı yeniden tanımla
pipe = Pipeline([
    ('scaler', StandardScaler()),
    ('model', SVC(C=10, kernel='rbf', gamma='scale', class_weight='balanced'))
])

# Eğit ve değerlendir
pipe.fit(X_combined, y_combined)

evaluate_model(pipe, X_combined, y_combined, X_val_combined, y_val, X_test_combined, y_test)


[INFO] Model eğitiliyor...
[INFO] Validation Accuracy: 1.0000

[INFO] Test set classification report:
              precision    recall  f1-score   support

      NORMAL       0.98      0.40      0.57       234
   PNEUMONIA       0.73      0.99      0.84       390

    accuracy                           0.77       624
   macro avg       0.86      0.70      0.70       624
weighted avg       0.83      0.77      0.74       624



In [24]:
from sklearn.feature_selection import SelectKBest, f_classif
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.svm import SVC

# Pipeline tanımı
pipe = Pipeline([
    ('scaler', StandardScaler()),
    ('select', SelectKBest(score_func=f_classif, k=3000)),  # ilk olarak en anlamlı 3000 özniteliği seç
    ('pca', PCA(n_components=500)),                         # ardından PCA ile 500 bileşene indir
    ('model', SVC(kernel='rbf', C=10, gamma='scale', class_weight='balanced'))
])

# Eğit ve değerlendir
pipe.fit(X_combined, y_combined)

evaluate_model(pipe, X_combined, y_combined, X_val_combined, y_val, X_test_combined, y_test)


[INFO] Model eğitiliyor...
[INFO] Validation Accuracy: 1.0000

[INFO] Test set classification report:
              precision    recall  f1-score   support

      NORMAL       0.95      0.32      0.48       234
   PNEUMONIA       0.71      0.99      0.83       390

    accuracy                           0.74       624
   macro avg       0.83      0.66      0.65       624
weighted avg       0.80      0.74      0.70       624



In [26]:
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.svm import SVC
from src.method1.evaluate import evaluate_model

pipe = Pipeline([
    ('scaler', StandardScaler()),
    ('pca', PCA(n_components=5000)),
    ('model', SVC(
        kernel='rbf',
        C=10,
        gamma='scale',
        class_weight='balanced'
    ))
])

# Eğit ve değerlendir
pipe.fit(X_combined, y_combined)
evaluate_model(pipe, X_combined, y_combined, X_val_combined, y_val, X_test_combined, y_test)


[INFO] Model eğitiliyor...
[INFO] Validation Accuracy: 1.0000

[INFO] Test set classification report:
              precision    recall  f1-score   support

      NORMAL       0.98      0.39      0.56       234
   PNEUMONIA       0.73      0.99      0.84       390

    accuracy                           0.77       624
   macro avg       0.86      0.69      0.70       624
weighted avg       0.82      0.77      0.74       624



In [28]:
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.feature_selection import SelectKBest, f_classif
from sklearn.svm import SVC
import joblib

# === Pipeline tanımı ===
pipe = Pipeline([
    ('scaler', StandardScaler()),
    ('model', SVC(kernel='rbf', C=10, gamma='scale', class_weight='balanced'))
])

# === Eğit ve değerlendir ===
pipe.fit(X_combined, y_combined)
evaluate_model(pipe, X_combined, y_combined, X_val_combined, y_val, X_test_combined, y_test)

# === Modeli kaydet ===
joblib.dump(pipe, 'models/dataset1/final_svm_selectk2000.pkl')
print("[INFO] PCA’sız final model kaydedildi: models/dataset1/final_svm_selectk2000.pkl")


[INFO] Model eğitiliyor...
[INFO] Validation Accuracy: 1.0000

[INFO] Test set classification report:
              precision    recall  f1-score   support

      NORMAL       0.98      0.40      0.57       234
   PNEUMONIA       0.73      0.99      0.84       390

    accuracy                           0.77       624
   macro avg       0.86      0.70      0.70       624
weighted avg       0.83      0.77      0.74       624

[INFO] PCA’sız final model kaydedildi: models/dataset1/final_svm_selectk2000.pkl


In [39]:
from sklearn.preprocessing import LabelEncoder

le = LabelEncoder()
y_combined_enc = le.fit_transform(y_combined)
y_val_enc = le.transform(y_val)
y_test_enc = le.transform(y_test)

from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.feature_selection import SelectKBest, f_classif
from sklearn.decomposition import PCA
from xgboost import XGBClassifier
from sklearn.model_selection import GridSearchCV, StratifiedKFold

pipe = Pipeline([
    ('scaler', StandardScaler()),
    ('select', SelectKBest(score_func=f_classif)),
    ('pca', PCA()),
    ('model', XGBClassifier(eval_metric='mlogloss', random_state=42))
])

param_grid = {
    'select__k': [800, 1000, 1500],
    'pca__n_components': [100, 200, 300],
    'model__n_estimators': [100, 200],
    'model__max_depth': [3, 5],
    'model__learning_rate': [0.01, 0.1],
    'model__subsample': [0.8, 1.0]
}

cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
grid = GridSearchCV(pipe, param_grid, cv=cv, scoring='accuracy', n_jobs=-1, verbose=2)
grid.fit(X_combined, y_combined_enc)

print("\n[INFO] En iyi skor:", grid.best_score_)
print("[INFO] En iyi parametreler:", grid.best_params_)

best_model = grid.best_estimator_
evaluate_model(best_model, X_combined, y_combined_enc, X_val_combined, y_val_enc, X_test_combined, y_test_enc)

import joblib
joblib.dump(best_model, "models/dataset1/final_xgb_selectk_pca_optimized.pkl")
joblib.dump(le, "models/dataset1/final_xgb_label_encoder.pkl")

Fitting 5 folds for each of 144 candidates, totalling 720 fits
[CV] END model__learning_rate=0.01, model__max_depth=3, model__n_estimators=100, model__subsample=0.8, pca__n_components=100, select__k=800; total time=   6.0s
[CV] END model__learning_rate=0.01, model__max_depth=3, model__n_estimators=100, model__subsample=0.8, pca__n_components=100, select__k=1000; total time=   5.9s
[CV] END model__learning_rate=0.01, model__max_depth=3, model__n_estimators=100, model__subsample=0.8, pca__n_components=100, select__k=1000; total time=   6.0s
[CV] END model__learning_rate=0.01, model__max_depth=3, model__n_estimators=100, model__subsample=0.8, pca__n_components=100, select__k=800; total time=   6.1s
[CV] END model__learning_rate=0.01, model__max_depth=3, model__n_estimators=100, model__subsample=0.8, pca__n_components=100, select__k=800; total time=   6.1s
[CV] END model__learning_rate=0.01, model__max_depth=3, model__n_estimators=100, model__subsample=0.8, pca__n_components=100, select__k

['models/dataset1/final_xgb_label_encoder.pkl']

In [4]:
import os, sys
# Proje köküne geç
proj_root = "/Users/nurefsanolfaz/yap470_project/xray_anomaly_method1"
os.chdir(proj_root)
sys.path.insert(0, proj_root)

print("Çalışma dizini:", os.getcwd())

from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import GridSearchCV, StratifiedKFold
from src.method1.evaluate import evaluate_model
import joblib
import numpy as np
from sklearn.decomposition import PCA



DATASET = 'dataset1' 
FEATURE_DIR = f"features/{DATASET}/combined"
MODEL_PATH = f"models/{DATASET}/final_knn_combined.pkl"

X_train = np.load(f"{FEATURE_DIR}/X_train.npy")
X_val   = np.load(f"{FEATURE_DIR}/X_val.npy")
X_test  = np.load(f"{FEATURE_DIR}/X_test.npy")
y_train = np.load(f"{FEATURE_DIR}/y_train.npy")
y_val   = np.load(f"{FEATURE_DIR}/y_val.npy")
y_test  = np.load(f"{FEATURE_DIR}/y_test.npy")

# Train + Val birleşimi
X_combined = np.concatenate([X_train, X_val], axis=0)
y_combined = np.concatenate([y_train, y_val], axis=0)

from sklearn.preprocessing import LabelEncoder

le = LabelEncoder()
y_combined = le.fit_transform(y_combined)
y_val= le.transform(y_val)
y_test = le.transform(y_test)

pipe = Pipeline([
    ('scaler', StandardScaler()),
    ('pca', PCA(n_components=200)),
    ('model', KNeighborsClassifier())
])

param_grid = {
    'model__n_neighbors': [3, 5, 7, 9],
    'model__weights': ['uniform', 'distance'],
    'model__metric': ['euclidean']
}

cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
grid = GridSearchCV(pipe, param_grid, cv=cv, scoring='f1_macro', verbose=2, n_jobs=-1)
grid.fit(X_combined, y_combined)

print("[INFO] En iyi skor:", grid.best_score_)
print("[INFO] En iyi parametreler:", grid.best_params_)

best_model = grid.best_estimator_

evaluate_model(best_model, X_combined, y_combined, X_val, y_val, X_test, y_test)

# Kaydet
os.makedirs(os.path.dirname(MODEL_PATH), exist_ok=True)
joblib.dump(best_model, MODEL_PATH)
print(f"[INFO] Final KNN modeli kaydedildi: {MODEL_PATH}")

Çalışma dizini: /Users/nurefsanolfaz/yap470_project/xray_anomaly_method1
Fitting 5 folds for each of 8 candidates, totalling 40 fits
[CV] END model__metric=euclidean, model__n_neighbors=3, model__weights=uniform; total time=  25.6s
[CV] END model__metric=euclidean, model__n_neighbors=3, model__weights=distance; total time=  25.7s
[CV] END model__metric=euclidean, model__n_neighbors=3, model__weights=uniform; total time=  25.9s
[CV] END model__metric=euclidean, model__n_neighbors=3, model__weights=uniform; total time=  26.0s
[CV] END model__metric=euclidean, model__n_neighbors=3, model__weights=distance; total time=  26.0s
[CV] END model__metric=euclidean, model__n_neighbors=3, model__weights=distance; total time=  26.1s
[CV] END model__metric=euclidean, model__n_neighbors=3, model__weights=uniform; total time=  26.1s
[CV] END model__metric=euclidean, model__n_neighbors=3, model__weights=uniform; total time=  26.3s
[CV] END model__metric=euclidean, model__n_neighbors=3, model__weights=d