In [2]:
import numpy as np
import os, sys
from sklearn.svm import SVC
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.decomposition import PCA
from sklearn.model_selection import GridSearchCV, StratifiedKFold
from sklearn.metrics import classification_report
import joblib

# Proje köküne geç
proj_root = "/Users/nurefsanolfaz/yap470_project/xray_anomaly_method1"
os.chdir(proj_root)
sys.path.insert(0, proj_root)

print("Çalışma dizini:", os.getcwd())

# === Yükle ===
X_train = np.load("features/dataset2/combined/X_train.npy")
X_val   = np.load("features/dataset2/combined/X_val.npy")
X_test  = np.load("features/dataset2/combined/X_test.npy")

y_train = np.load("features/dataset2/combined/y_train.npy")
y_val   = np.load("features/dataset2/combined/y_val.npy")
y_test  = np.load("features/dataset2/combined/y_test.npy")

# === y'leri sayısallaştır ===
le = LabelEncoder()
y_train = le.fit_transform(y_train)
y_val   = le.transform(y_val)
y_test  = le.transform(y_test)

# === Train+Val birleştir ===
X_combined = np.concatenate([X_train, X_val], axis=0)
y_combined = np.concatenate([y_train, y_val], axis=0)

# === Pipeline ===
pipe = Pipeline([
    ('scaler', StandardScaler()),
    ('pca', PCA()),  # n_components grid search ile seçilecek
    ('model', SVC(class_weight='balanced'))
])

# === GridSearch parametreleri ===
param_grid = {
    'pca__n_components': [100, 300, 500],
    'model__C': [0.1, 1, 10],
    'model__gamma': ['scale'],
    'model__kernel': ['rbf']
}

# === GridSearchCV ===
cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
grid = GridSearchCV(pipe, param_grid, cv=cv, scoring='accuracy', verbose=2, n_jobs=-1)
grid.fit(X_combined, y_combined)

# === En iyi sonucu yazdır ===
print("[INFO] En iyi skor:", grid.best_score_)
print("[INFO] En iyi parametreler:", grid.best_params_)

# === Test set değerlendirme ===
best_model = grid.best_estimator_
y_pred = best_model.predict(X_test)
print("\n[INFO] Test classification report:")
print(classification_report(y_test, y_pred, target_names=le.classes_))

# === Modeli kaydet ===
os.makedirs("models/dataset2", exist_ok=True)
joblib.dump(best_model, "models/dataset2/final_svm_combined.pkl")
print("[INFO] SVM modeli kaydedildi: final_svm_combined.pkl")

Çalışma dizini: /Users/nurefsanolfaz/yap470_project/xray_anomaly_method1
Fitting 5 folds for each of 9 candidates, totalling 45 fits
[CV] END model__C=0.1, model__gamma=scale, model__kernel=rbf, pca__n_components=100; total time= 1.5min
[CV] END model__C=0.1, model__gamma=scale, model__kernel=rbf, pca__n_components=100; total time= 1.5min
[CV] END model__C=0.1, model__gamma=scale, model__kernel=rbf, pca__n_components=100; total time= 1.5min
[CV] END model__C=0.1, model__gamma=scale, model__kernel=rbf, pca__n_components=100; total time= 1.5min
[CV] END model__C=0.1, model__gamma=scale, model__kernel=rbf, pca__n_components=100; total time= 1.6min
[CV] END model__C=0.1, model__gamma=scale, model__kernel=rbf, pca__n_components=300; total time= 2.9min
[CV] END model__C=0.1, model__gamma=scale, model__kernel=rbf, pca__n_components=300; total time= 2.9min
[CV] END model__C=0.1, model__gamma=scale, model__kernel=rbf, pca__n_components=300; total time= 2.9min
[CV] END model__C=1, model__gamma=s

In [3]:
import os
import numpy as np
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import StratifiedKFold, GridSearchCV
from sklearn.metrics import classification_report
import joblib

# === Verileri yükle ===
X_train = np.load("features/dataset2/combined/X_train.npy")
X_val   = np.load("features/dataset2/combined/X_val.npy")
X_test  = np.load("features/dataset2/combined/X_test.npy")

y_train = np.load("features/dataset2/combined/y_train.npy")
y_val   = np.load("features/dataset2/combined/y_val.npy")
y_test  = np.load("features/dataset2/combined/y_test.npy")

# === Train + Val birleştir ===
X_combined = np.concatenate([X_train, X_val], axis=0)
y_combined = np.concatenate([y_train, y_val], axis=0)

# === Pipeline tanımı ===
pipe = Pipeline([
    ('scaler', StandardScaler()),
    ('pca', PCA()),
    ('model', MLPClassifier(max_iter=300, random_state=42))
])

# === Hiperparametre aralığı ===
param_grid = {
    'pca__n_components': [100, 200, 300],
    'model__hidden_layer_sizes': [(128,), (64,), (128, 64)],
    'model__alpha': [0.0001, 0.001],
    'model__learning_rate_init': [0.001, 0.01]
}

# === GridSearch + 5-Fold CV ===
cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
grid = GridSearchCV(pipe, param_grid, cv=cv, scoring='accuracy', n_jobs=-1, verbose=2)

# === Eğit ===
grid.fit(X_combined, y_combined)

# === En iyi sonucu yazdır ===
print("[INFO] En iyi skor:", grid.best_score_)
print("[INFO] En iyi parametreler:", grid.best_params_)

# === Test set üzerinde performans ===
y_pred = grid.best_estimator_.predict(X_test)
print("\n[INFO] Test classification report:\n")
print(classification_report(y_test, y_pred))

# === Modeli kaydet ===
MODEL_PATH = "models/dataset2/final_mlp_combined.pkl"
os.makedirs(os.path.dirname(MODEL_PATH), exist_ok=True)
joblib.dump(grid.best_estimator_, MODEL_PATH)
print(f"\n[INFO] MLP modeli kaydedildi: {MODEL_PATH}")

Fitting 5 folds for each of 36 candidates, totalling 180 fits
[CV] END model__alpha=0.0001, model__hidden_layer_sizes=(128,), model__learning_rate_init=0.001, pca__n_components=200; total time= 1.2min
[CV] END model__alpha=0.0001, model__hidden_layer_sizes=(128,), model__learning_rate_init=0.001, pca__n_components=200; total time= 1.2min
[CV] END model__alpha=0.0001, model__hidden_layer_sizes=(128,), model__learning_rate_init=0.001, pca__n_components=200; total time= 1.3min
[CV] END model__alpha=0.0001, model__hidden_layer_sizes=(128,), model__learning_rate_init=0.001, pca__n_components=100; total time= 1.3min




[CV] END model__alpha=0.0001, model__hidden_layer_sizes=(128,), model__learning_rate_init=0.001, pca__n_components=100; total time= 1.5min




[CV] END model__alpha=0.0001, model__hidden_layer_sizes=(128,), model__learning_rate_init=0.001, pca__n_components=100; total time= 1.4min
[CV] END model__alpha=0.0001, model__hidden_layer_sizes=(128,), model__learning_rate_init=0.001, pca__n_components=100; total time= 1.5min
[CV] END model__alpha=0.0001, model__hidden_layer_sizes=(128,), model__learning_rate_init=0.001, pca__n_components=100; total time= 1.5min
[CV] END model__alpha=0.0001, model__hidden_layer_sizes=(128,), model__learning_rate_init=0.01, pca__n_components=100; total time=  40.1s
[CV] END model__alpha=0.0001, model__hidden_layer_sizes=(128,), model__learning_rate_init=0.001, pca__n_components=200; total time=  59.4s
[CV] END model__alpha=0.0001, model__hidden_layer_sizes=(128,), model__learning_rate_init=0.001, pca__n_components=200; total time= 1.3min
[CV] END model__alpha=0.0001, model__hidden_layer_sizes=(128,), model__learning_rate_init=0.001, pca__n_components=300; total time= 1.3min
[CV] END model__alpha=0.0001



[CV] END model__alpha=0.0001, model__hidden_layer_sizes=(64,), model__learning_rate_init=0.001, pca__n_components=100; total time=  57.0s
[CV] END model__alpha=0.0001, model__hidden_layer_sizes=(64,), model__learning_rate_init=0.001, pca__n_components=100; total time=  49.6s
[CV] END model__alpha=0.0001, model__hidden_layer_sizes=(64,), model__learning_rate_init=0.001, pca__n_components=100; total time=  54.5s
[CV] END model__alpha=0.0001, model__hidden_layer_sizes=(128,), model__learning_rate_init=0.01, pca__n_components=300; total time= 1.0min




[CV] END model__alpha=0.0001, model__hidden_layer_sizes=(64,), model__learning_rate_init=0.001, pca__n_components=100; total time=  48.3s




[CV] END model__alpha=0.0001, model__hidden_layer_sizes=(64,), model__learning_rate_init=0.001, pca__n_components=100; total time=  42.2s
[CV] END model__alpha=0.0001, model__hidden_layer_sizes=(64,), model__learning_rate_init=0.001, pca__n_components=200; total time=  53.0s
[CV] END model__alpha=0.0001, model__hidden_layer_sizes=(64,), model__learning_rate_init=0.001, pca__n_components=200; total time= 1.3min
[CV] END model__alpha=0.0001, model__hidden_layer_sizes=(64,), model__learning_rate_init=0.001, pca__n_components=300; total time= 1.1min
[CV] END model__alpha=0.0001, model__hidden_layer_sizes=(64,), model__learning_rate_init=0.001, pca__n_components=300; total time= 1.2min
[CV] END model__alpha=0.0001, model__hidden_layer_sizes=(64,), model__learning_rate_init=0.001, pca__n_components=200; total time= 1.4min
[CV] END model__alpha=0.0001, model__hidden_layer_sizes=(64,), model__learning_rate_init=0.001, pca__n_components=200; total time= 1.4min
[CV] END model__alpha=0.0001, mode



[CV] END model__alpha=0.001, model__hidden_layer_sizes=(128,), model__learning_rate_init=0.001, pca__n_components=100; total time= 1.2min
[CV] END model__alpha=0.001, model__hidden_layer_sizes=(128,), model__learning_rate_init=0.001, pca__n_components=100; total time= 1.1min




[CV] END model__alpha=0.001, model__hidden_layer_sizes=(128,), model__learning_rate_init=0.001, pca__n_components=100; total time= 1.0min
[CV] END model__alpha=0.001, model__hidden_layer_sizes=(128,), model__learning_rate_init=0.001, pca__n_components=100; total time=  50.6s




[CV] END model__alpha=0.001, model__hidden_layer_sizes=(128,), model__learning_rate_init=0.001, pca__n_components=200; total time=  46.8s
[CV] END model__alpha=0.001, model__hidden_layer_sizes=(128,), model__learning_rate_init=0.001, pca__n_components=200; total time=  56.3s
[CV] END model__alpha=0.001, model__hidden_layer_sizes=(128,), model__learning_rate_init=0.001, pca__n_components=100; total time= 1.0min
[CV] END model__alpha=0.001, model__hidden_layer_sizes=(128,), model__learning_rate_init=0.001, pca__n_components=200; total time= 1.2min
[CV] END model__alpha=0.001, model__hidden_layer_sizes=(128,), model__learning_rate_init=0.001, pca__n_components=200; total time=  59.9s
[CV] END model__alpha=0.001, model__hidden_layer_sizes=(128,), model__learning_rate_init=0.001, pca__n_components=300; total time=  50.7s
[CV] END model__alpha=0.001, model__hidden_layer_sizes=(128,), model__learning_rate_init=0.001, pca__n_components=200; total time= 1.1min
[CV] END model__alpha=0.001, model



[CV] END model__alpha=0.001, model__hidden_layer_sizes=(64,), model__learning_rate_init=0.001, pca__n_components=100; total time=  54.9s
[CV] END model__alpha=0.001, model__hidden_layer_sizes=(64,), model__learning_rate_init=0.001, pca__n_components=100; total time=  48.7s
[CV] END model__alpha=0.001, model__hidden_layer_sizes=(64,), model__learning_rate_init=0.001, pca__n_components=100; total time=  57.5s




[CV] END model__alpha=0.001, model__hidden_layer_sizes=(64,), model__learning_rate_init=0.001, pca__n_components=100; total time=  35.7s
[CV] END model__alpha=0.001, model__hidden_layer_sizes=(64,), model__learning_rate_init=0.001, pca__n_components=100; total time=  47.6s
[CV] END model__alpha=0.001, model__hidden_layer_sizes=(64,), model__learning_rate_init=0.001, pca__n_components=200; total time=  59.3s
[CV] END model__alpha=0.001, model__hidden_layer_sizes=(64,), model__learning_rate_init=0.001, pca__n_components=200; total time=  55.8s
[CV] END model__alpha=0.001, model__hidden_layer_sizes=(64,), model__learning_rate_init=0.001, pca__n_components=200; total time= 1.1min
[CV] END model__alpha=0.001, model__hidden_layer_sizes=(64,), model__learning_rate_init=0.001, pca__n_components=200; total time= 1.0min
[CV] END model__alpha=0.001, model__hidden_layer_sizes=(64,), model__learning_rate_init=0.001, pca__n_components=200; total time= 1.1min
[CV] END model__alpha=0.001, model__hidde

In [2]:
import numpy as np
import os, sys
from xgboost import XGBClassifier
from sklearn.model_selection import GridSearchCV
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report
import joblib
from sklearn.preprocessing import LabelEncoder
from sklearn.decomposition import PCA

# Proje köküne geç
proj_root = "/Users/nurefsanolfaz/yap470_project/xray_anomaly_method1"
os.chdir(proj_root)
sys.path.insert(0, proj_root)

print("Çalışma dizini:", os.getcwd())

# Dataset2 - hog öznitelikler
X_train = np.load("features/dataset2/hog/X_train.npy")
X_val   = np.load("features/dataset2/hog/X_val.npy")
X_test  = np.load("features/dataset2/hog/X_test.npy")

y_train = np.load("features/dataset2/hog/y_train.npy")
y_val   = np.load("features/dataset2/hog/y_val.npy")
y_test  = np.load("features/dataset2/hog/y_test.npy")



# Train + Val birleştir
X_combined = np.concatenate([X_train, X_val])
y_combined = np.concatenate([y_train, y_val])

# Label encode et
le = LabelEncoder()
y_combined = le.fit_transform(y_combined)
y_test= le.transform(y_test)

pipe = Pipeline([
    ('scaler', StandardScaler()),
    ('pca', PCA(n_components=200)),
    ('model', XGBClassifier(eval_metric='mlogloss'))
])

param_grid = {
    'model__n_estimators': [100, 200],
    'model__max_depth': [3,5],
    'model__learning_rate': [0.01],
    'model__subsample': [0.8]
}


grid = GridSearchCV(pipe, param_grid, scoring='f1_macro', verbose=2, n_jobs=-1)
grid.fit(X_combined, y_combined)


print("[INFO] En iyi skor:", grid.best_score_)
print("[INFO] En iyi parametreler:", grid.best_params_)

best_model = grid.best_estimator_

# Test set değerlendirmesi
y_pred = best_model.predict(X_test)
print("\n[INFO] Test classification report:\n")
print(classification_report(y_test, y_pred))

# Modeli kaydet
joblib.dump(best_model, "models/dataset2/final_xgb_combined.pkl")
print("[INFO] XGBoost modeli kaydedildi: models/dataset2/final_xgb_combined.pkl")

Çalışma dizini: /Users/nurefsanolfaz/yap470_project/xray_anomaly_method1
Fitting 5 folds for each of 4 candidates, totalling 20 fits
[CV] END model__learning_rate=0.01, model__max_depth=3, model__n_estimators=100, model__subsample=0.8; total time= 1.1min
[CV] END model__learning_rate=0.01, model__max_depth=3, model__n_estimators=100, model__subsample=0.8; total time= 1.2min
[CV] END model__learning_rate=0.01, model__max_depth=3, model__n_estimators=100, model__subsample=0.8; total time= 1.3min
[CV] END model__learning_rate=0.01, model__max_depth=3, model__n_estimators=100, model__subsample=0.8; total time= 1.3min
[CV] END model__learning_rate=0.01, model__max_depth=3, model__n_estimators=100, model__subsample=0.8; total time= 1.3min
[CV] END model__learning_rate=0.01, model__max_depth=3, model__n_estimators=200, model__subsample=0.8; total time= 1.4min
[CV] END model__learning_rate=0.01, model__max_depth=3, model__n_estimators=200, model__subsample=0.8; total time= 1.5min
[CV] END mode

In [1]:
import os, sys
# Proje köküne geç
proj_root = "/Users/nurefsanolfaz/yap470_project/xray_anomaly_method1"
os.chdir(proj_root)
sys.path.insert(0, proj_root)

print("Çalışma dizini:", os.getcwd())

from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import GridSearchCV, StratifiedKFold
from src.method1.evaluate import evaluate_model
import joblib
import numpy as np
from sklearn.decomposition import PCA



DATASET = 'dataset2' 
FEATURE_DIR = f"features/{DATASET}/combined"
MODEL_PATH = f"models/{DATASET}/final_knn_combined.pkl"

X_train = np.load(f"{FEATURE_DIR}/X_train.npy")
X_val   = np.load(f"{FEATURE_DIR}/X_val.npy")
X_test  = np.load(f"{FEATURE_DIR}/X_test.npy")
y_train = np.load(f"{FEATURE_DIR}/y_train.npy")
y_val   = np.load(f"{FEATURE_DIR}/y_val.npy")
y_test  = np.load(f"{FEATURE_DIR}/y_test.npy")

# Train + Val birleşimi
X_combined = np.concatenate([X_train, X_val], axis=0)
y_combined = np.concatenate([y_train, y_val], axis=0)

from sklearn.preprocessing import LabelEncoder

le = LabelEncoder()
y_combined = le.fit_transform(y_combined)
y_val= le.transform(y_val)
y_test = le.transform(y_test)

pipe = Pipeline([
    ('scaler', StandardScaler()),
    ('pca', PCA(n_components=200)),
    ('model', KNeighborsClassifier())
])

param_grid = {
    'model__n_neighbors': [3, 5, 7, 9],
    'model__weights': ['uniform', 'distance'],
    'model__metric': ['euclidean']
}

cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
grid = GridSearchCV(pipe, param_grid, cv=cv, scoring='f1_macro', verbose=2, n_jobs=-1)
grid.fit(X_combined, y_combined)

print("[INFO] En iyi skor:", grid.best_score_)
print("[INFO] En iyi parametreler:", grid.best_params_)

best_model = grid.best_estimator_

evaluate_model(best_model, X_combined, y_combined, X_val, y_val, X_test, y_test)

# Kaydet
os.makedirs(os.path.dirname(MODEL_PATH), exist_ok=True)
joblib.dump(best_model, MODEL_PATH)
print(f"[INFO] Final KNN modeli kaydedildi: {MODEL_PATH}")

Çalışma dizini: /Users/nurefsanolfaz/yap470_project/xray_anomaly_method1
Fitting 5 folds for each of 8 candidates, totalling 40 fits
[CV] END model__metric=euclidean, model__n_neighbors=3, model__weights=uniform; total time=  57.6s
[CV] END model__metric=euclidean, model__n_neighbors=3, model__weights=uniform; total time=  57.7s
[CV] END model__metric=euclidean, model__n_neighbors=3, model__weights=uniform; total time=  58.0s
[CV] END model__metric=euclidean, model__n_neighbors=3, model__weights=distance; total time=  58.8s
[CV] END model__metric=euclidean, model__n_neighbors=3, model__weights=uniform; total time=  59.0s
[CV] END model__metric=euclidean, model__n_neighbors=3, model__weights=distance; total time=  59.7s
[CV] END model__metric=euclidean, model__n_neighbors=3, model__weights=distance; total time=  59.8s
[CV] END model__metric=euclidean, model__n_neighbors=3, model__weights=uniform; total time= 1.0min
[CV] END model__metric=euclidean, model__n_neighbors=3, model__weights=d