In [1]:
# ---------------------------------------
# Gerekli Kütüphaneler
# ---------------------------------------
import os
import glob
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import nibabel as nib
from torch.utils.data import Dataset, DataLoader, WeightedRandomSampler
from torch.nn import CrossEntropyLoss
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score, precision_recall_fscore_support
from sklearn.model_selection import train_test_split, StratifiedKFold, GridSearchCV
from sklearn.preprocessing import StandardScaler
from collections import Counter
from tqdm import tqdm
import matplotlib.pyplot as plt
import seaborn as sns
import torchvision.models.video as video_models
from torch.amp import autocast, GradScaler
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB
from sklearn.tree import DecisionTreeClassifier
from sklearn.neural_network import MLPClassifier
import joblib
import pandas as pd

# ---------------------------------------
# CUDA ve CUDNN Ayarları
# ---------------------------------------
DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
torch.backends.cudnn.benchmark = True
torch.backends.cudnn.deterministic = False
print(f"Using device: {DEVICE}")

# ---------------------------------------
# 1) Dataset Dizini ve Sınıf Etiketleri
# ---------------------------------------
DATA_DIR = '/kaggle/input/adniunpreprocessed'  # Ham NIfTI klasörünüz
classes = {'CN': 0, 'MCI': 1, 'AD': 2}

# ---------------------------------------
# 2) Dosya Toplama ve Filtreleme
# ---------------------------------------
pattern = os.path.join(DATA_DIR, '**', '*.nii*')
all_files = glob.glob(pattern, recursive=True)
valid_paths, valid_labels = [], []

for fp in all_files:
    parent = os.path.basename(os.path.dirname(fp))
    if parent not in classes:
        continue
    if os.path.getsize(fp) == 0:
        continue
    valid_paths.append(fp)
    valid_labels.append(classes[parent])

# İkinci filtre: sadece açılabilen NIfTI
clean_paths, clean_labels = [], []
for path, label in zip(valid_paths, valid_labels):
    try:
        _ = nib.load(path)
        clean_paths.append(path)
        clean_labels.append(label)
    except:
        continue

valid_paths  = clean_paths
valid_labels = clean_labels
print(f"Total sample sayısı: {len(valid_paths)}")
print("Sınıf dağılımı:", {c: valid_labels.count(classes[c]) for c in classes})

# ---------------------------------------
# 3) Train/Test Split
# ---------------------------------------
paths_train, paths_test, labels_train, labels_test = train_test_split(
    valid_paths, valid_labels, test_size=0.20, stratify=valid_labels, random_state=42
)
print(f"Train / Test sayıları: {len(paths_train)}/{len(paths_test)}")

# ---------------------------------------
# 4) Model Sınıfı
# ---------------------------------------
class InceptionModule3D(nn.Module):
    def __init__(self, in_channels, ch1x1, ch3x3red, ch3x3, ch5x5red, ch5x5, pool_proj):
        super().__init__()
        
        # 1x1 conv branch
        self.branch1 = nn.Conv3d(in_channels, ch1x1, kernel_size=1)
        
        # 1x1 -> 3x3 conv branch
        self.branch2 = nn.Sequential(
            nn.Conv3d(in_channels, ch3x3red, kernel_size=1),
            nn.Conv3d(ch3x3red, ch3x3, kernel_size=3, padding=1)
        )
        
        # 1x1 -> 5x5 conv branch
        self.branch3 = nn.Sequential(
            nn.Conv3d(in_channels, ch5x5red, kernel_size=1),
            nn.Conv3d(ch5x5red, ch5x5, kernel_size=5, padding=2)
        )
        
        # 3x3 pool -> 1x1 conv branch
        self.branch4 = nn.Sequential(
            nn.MaxPool3d(kernel_size=3, stride=1, padding=1),
            nn.Conv3d(in_channels, pool_proj, kernel_size=1)
        )
        
        self.bn = nn.BatchNorm3d(ch1x1 + ch3x3 + ch5x5 + pool_proj)
        self.relu = nn.ReLU(inplace=True)

    def forward(self, x):
        branch1 = self.branch1(x)
        branch2 = self.branch2(x)
        branch3 = self.branch3(x)
        branch4 = self.branch4(x)
        
        outputs = [branch1, branch2, branch3, branch4]
        return self.relu(self.bn(torch.cat(outputs, 1)))

class Inception3DClassifier(nn.Module):
    def __init__(self, num_classes=3):
        super().__init__()
        
        # Initial conv layer
        self.conv1 = nn.Sequential(
            nn.Conv3d(1, 64, kernel_size=7, stride=2, padding=3),
            nn.BatchNorm3d(64),
            nn.ReLU(inplace=True),
            nn.MaxPool3d(kernel_size=3, stride=2, padding=1)
        )
        
        # Inception modules
        self.inception3a = InceptionModule3D(64, 64, 96, 128, 16, 32, 32)
        self.inception3b = InceptionModule3D(256, 128, 128, 192, 32, 96, 64)
        
        self.maxpool = nn.MaxPool3d(kernel_size=3, stride=2, padding=1)
        
        self.inception4a = InceptionModule3D(480, 192, 96, 208, 16, 48, 64)
        self.inception4b = InceptionModule3D(512, 160, 112, 224, 24, 64, 64)
        self.inception4c = InceptionModule3D(512, 128, 128, 256, 24, 64, 64)
        self.inception4d = InceptionModule3D(512, 112, 144, 288, 32, 64, 64)
        self.inception4e = InceptionModule3D(528, 256, 160, 320, 32, 128, 128)
        
        self.maxpool2 = nn.MaxPool3d(kernel_size=3, stride=2, padding=1)
        
        self.inception5a = InceptionModule3D(832, 256, 160, 320, 32, 128, 128)
        self.inception5b = InceptionModule3D(832, 384, 192, 384, 48, 128, 128)
        
        # Global average pooling and classifier
        self.avgpool = nn.AdaptiveAvgPool3d((1, 1, 1))
        self.dropout = nn.Dropout(0.5)
        self.fc = nn.Sequential(
            nn.Linear(1024, 512),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(512, num_classes)
        )

    def forward(self, x):
        x = self.conv1(x)
        
        x = self.inception3a(x)
        x = self.inception3b(x)
        x = self.maxpool(x)
        
        x = self.inception4a(x)
        x = self.inception4b(x)
        x = self.inception4c(x)
        x = self.inception4d(x)
        x = self.inception4e(x)
        x = self.maxpool2(x)
        
        x = self.inception5a(x)
        x = self.inception5b(x)
        
        x = self.avgpool(x)
        x = torch.flatten(x, 1)
        x = self.dropout(x)
        x = self.fc(x)
        
        return x
    
    def extract_features(self, x):
        x = self.conv1(x)
        
        x = self.inception3a(x)
        x = self.inception3b(x)
        x = self.maxpool(x)
        
        x = self.inception4a(x)
        x = self.inception4b(x)
        x = self.inception4c(x)
        x = self.inception4d(x)
        x = self.inception4e(x)
        x = self.maxpool2(x)
        
        x = self.inception5a(x)
        x = self.inception5b(x)
        
        x = self.avgpool(x)
        x = torch.flatten(x, 1)
        return x

# ---------------------------------------
# 5) Dataset Sınıfı
# ---------------------------------------
class ADNI_ResNet3D_Dataset(Dataset):
    def __init__(self, paths, labels, 
                 target_depth=64, target_height=112, target_width=112,
                 is_training=True):
        self.paths = paths
        self.labels = labels
        self.d = target_depth
        self.h = target_height
        self.w = target_width
        self.is_training = is_training

    def __len__(self):
        return len(self.paths)

    def __getitem__(self, idx):
        # 1) Load NIfTI
        path = self.paths[idx]
        img = nib.load(path).get_fdata().astype(np.float32)

        # 2) Robust normalization (%1-%99 persentil arasını [0,1])
        p1, p99 = np.percentile(img, (1, 99))
        img = np.clip(img, p1, p99)
        img = (img - p1) / (p99 - p1 + 1e-6)

        # 3) Center-crop veya pad 3D hacim → target (D x H x W)
        D, H, W = img.shape
        
        # Depth (z ekseni) orta bölgeden target_depth al
        cd = D // 2
        hd = self.d // 2
        start_d = max(cd - hd, 0)
        end_d = start_d + self.d
        if end_d > D:
            end_d = D
            start_d = D - self.d
        patch = img[start_d:end_d, :, :]

        # Yükseklik ve genişlik için merkezden al veya pad
        ch = H // 2
        hh = self.h // 2
        start_h = max(ch - hh, 0)
        end_h = start_h + self.h
        if end_h > H:
            end_h = H
            start_h = H - self.h
        patch = patch[:, start_h:end_h, :]

        cw = W // 2
        hw = self.w // 2
        start_w = max(cw - hw, 0)
        end_w = start_w + self.w
        if end_w > W:
            end_w = W
            start_w = W - self.w
        patch = patch[:, :, start_w:end_w]

        # Eğer patch boyutları eksikse pad et
        pd, ph, pw = patch.shape
        if pd != self.d or ph != self.h or pw != self.w:
            padded = np.zeros((self.d, self.h, self.w), dtype=np.float32)
            sd = (self.d - pd) // 2
            sh = (self.h - ph) // 2
            sw = (self.w - pw) // 2
            padded[sd:sd+pd, sh:sh+ph, sw:sw+pw] = patch
            patch = padded

        # 4) Tensor'a dönüştür ve kanal ekseni ekle: (1, D, H, W)
        img_tensor = torch.from_numpy(patch).unsqueeze(0)

        label = torch.tensor(self.labels[idx], dtype=torch.long)
        return img_tensor, label

# ---------------------------------------
# 6) Dataset ve DataLoader
# ---------------------------------------
train_ds = ADNI_ResNet3D_Dataset(
    paths_train, labels_train,
    target_depth=64, target_height=112, target_width=112,
    is_training=True
)

test_ds = ADNI_ResNet3D_Dataset(
    paths_test, labels_test,
    target_depth=64, target_height=112, target_width=112,
    is_training=False
)

batch_size = 2
train_loader = DataLoader(
    train_ds, batch_size=batch_size,
    shuffle=True, num_workers=2,
    pin_memory=True, prefetch_factor=2
)

test_loader = DataLoader(
    test_ds, batch_size=batch_size,
    shuffle=False, num_workers=2,
    pin_memory=True, prefetch_factor=2
)

# ---------------------------------------
# 8) Özellik Çıkarma ve ML Algoritmaları
# ---------------------------------------
def extract_features_and_train_ml(model, data_loader, device):
    model.eval()
    all_features = []
    all_labels = []
    
    # Özellik çıkarma sürecini optimize et
    with torch.no_grad():
        for imgs, labels in tqdm(data_loader, desc="Özellik Çıkarma"):
            imgs = imgs.to(device)
            features = model.extract_features(imgs)
            all_features.append(features.cpu().numpy())
            all_labels.append(labels.numpy())
    
    X = np.vstack(all_features)
    y = np.concatenate(all_labels)
    
    # Veri ölçeklendirme
    scaler = StandardScaler()
    X = scaler.fit_transform(X)
    
    # Sınıf ağırlıklarını hesapla
    class_counts = np.bincount(y)
    class_weights = len(y) / (len(np.unique(y)) * class_counts)
    sample_weights = class_weights[y]
    
    # ML Algoritmaları ve hiperparametreler
    ml_models = {
        'Random Forest': {
            'model': RandomForestClassifier(random_state=42, class_weight='balanced'),
            'params': {
                'n_estimators': [100, 200],  # Azaltıldı
                'max_depth': [10, 15],
                'min_samples_split': [2]
            }
        },
        'Gradient Boosting': {
            'model': GradientBoostingClassifier(random_state=42),
            'params': {
                'n_estimators': [100, 200],  # Azaltıldı
                'learning_rate': [0.1],
                'max_depth': [3, 5]
            }
        },
        'SVM': {
            'model': SVC(probability=True, random_state=42, class_weight='balanced'),
            'params': {
                'C': [1, 10],
                'kernel': ['rbf'],
                'gamma': ['scale']
            }
        },
        'KNN': {
            'model': KNeighborsClassifier(weights='distance'),
            'params': {
                'n_neighbors': [3, 5],
                'metric': ['euclidean']
            }
        },
        'Logistic Regression': {
            'model': LogisticRegression(
                max_iter=1000,  # Azaltıldı
                solver='saga',
                random_state=42,
                class_weight='balanced',
                n_jobs=1
            ),
            'params': {
                'C': [1, 10],
                'penalty': ['l2']
            }
        },
        'Naive Bayes': {
            'model': GaussianNB(),
            'params': {}
        },
        'Decision Tree': {
            'model': DecisionTreeClassifier(random_state=42, class_weight='balanced'),
            'params': {
                'max_depth': [5, 10],
                'min_samples_split': [2],
                'min_samples_leaf': [1]
            }
        },
        'Neural Network': {
            'model': MLPClassifier(
                max_iter=1000,  # Azaltıldı
                random_state=42,
                solver='adam',
                early_stopping=True
            ),
            'params': {
                'hidden_layer_sizes': [(100,)],
                'alpha': [0.001],
                'learning_rate': ['adaptive']
            }
        }
    }
    
    # Save the scaler for later use
    try:
        joblib.dump(scaler, 'feature_scaler.joblib')
    except Exception as e:
        print(f"Scaler kaydedilemedi: {str(e)}")
    
    results = {}
    
    # Cross-validation için StratifiedKFold - kat sayısı azaltıldı
    skf = StratifiedKFold(n_splits=3, shuffle=True, random_state=42)
    
    # Her model için eğitim ve değerlendirme
    for name, model_info in ml_models.items():
        print(f"\nEğitiliyor: {name}")
        
        try:
            # Hiperparametre optimizasyonu
            if model_info['params']:
                grid_search = GridSearchCV(
                    model_info['model'],
                    model_info['params'],
                    cv=skf,
                    scoring='f1_weighted',
                    n_jobs=1
                )
                if name in ['KNN', 'Neural Network']:
                    grid_search.fit(X, y)
                else:
                    grid_search.fit(X, y, sample_weight=sample_weights)
                best_model = grid_search.best_estimator_
                print(f"En iyi parametreler: {grid_search.best_params_}")
            else:
                best_model = model_info['model']
                if name in ['KNN', 'Neural Network']:
                    best_model.fit(X, y)
                else:
                    best_model.fit(X, y, sample_weight=sample_weights)
            
            # Cross-validation sonuçları
            cv_scores = []
            cv_precision = []
            cv_recall = []
            cv_f1 = []
            
            for train_idx, val_idx in skf.split(X, y):
                X_train, X_val = X[train_idx], X[val_idx]
                y_train, y_val = y[train_idx], y[val_idx]
                sample_weights_train = sample_weights[train_idx]
                
                if name in ['KNN', 'Neural Network']:
                    best_model.fit(X_train, y_train)
                else:
                    best_model.fit(X_train, y_train, sample_weight=sample_weights_train)
                y_pred = best_model.predict(X_val)
                
                cv_scores.append(accuracy_score(y_val, y_pred))
                precision, recall, f1, _ = precision_recall_fscore_support(y_val, y_pred, average='weighted')
                cv_precision.append(precision)
                cv_recall.append(recall)
                cv_f1.append(f1)
            
            # Modeli kaydet
            try:
                joblib.dump(best_model, f'{name.lower().replace(" ", "_")}_model.joblib')
            except Exception as e:
                print(f"Model kaydedilemedi: {str(e)}")
            
            # Sonuçları kaydet
            results[name] = {
                'CV Accuracy': np.mean(cv_scores),
                'CV Accuracy Std': np.std(cv_scores),
                'CV Precision': np.mean(cv_precision),
                'CV Recall': np.mean(cv_recall),
                'CV F1': np.mean(cv_f1),
                'Best Model': best_model
            }
            
            print(f"{name} - CV Accuracy: {results[name]['CV Accuracy']:.4f} ± {results[name]['CV Accuracy Std']:.4f}")
            print(f"CV Precision: {results[name]['CV Precision']:.4f}")
            print(f"CV Recall: {results[name]['CV Recall']:.4f}")
            print(f"CV F1: {results[name]['CV F1']:.4f}")
            
        except Exception as e:
            print(f"{name} modeli eğitilirken hata oluştu: {str(e)}")
            continue
    
    # Sonuçları DataFrame'e dönüştür ve kaydet
    results_df = pd.DataFrame({
        'Model': list(results.keys()),
        'CV Accuracy': [results[name]['CV Accuracy'] for name in results.keys()],
        'CV Accuracy Std': [results[name]['CV Accuracy Std'] for name in results.keys()],
        'CV Precision': [results[name]['CV Precision'] for name in results.keys()],
        'CV Recall': [results[name]['CV Recall'] for name in results.keys()],
        'CV F1': [results[name]['CV F1'] for name in results.keys()]
    })
    
    try:
        results_df.to_csv('ml_results.csv', index=False)
    except Exception as e:
        print(f"Sonuçlar kaydedilemedi: {str(e)}")
    
    # Confusion matrix görselleştirme
    for name in results.keys():
        try:
            best_model = results[name]['Best Model']
            y_pred = best_model.predict(X)
            cm = confusion_matrix(y, y_pred)
            plt.figure(figsize=(8, 6))
            sns.heatmap(cm, annot=True, fmt='d', cmap='Blues')
            plt.title(f'Confusion Matrix - {name}')
            plt.ylabel('True Label')
            plt.xlabel('Predicted Label')
            plt.savefig(f'{name.lower().replace(" ", "_")}_confusion_matrix.png')
            plt.close()
        except Exception as e:
            print(f"{name} için confusion matrix oluşturulamadı: {str(e)}")
            continue
    
    return results_df

# ---------------------------------------
# 9) Model Eğitimi ve ML
# ---------------------------------------
def train_and_extract_features():
    model = Inception3DClassifier(num_classes=3).to(DEVICE)
    
    # Sadece 1 epoch için eğitim
    optimizer = torch.optim.AdamW(model.parameters(), lr=1e-4, weight_decay=1e-4)
    cls_loss_fn = CrossEntropyLoss()
    
    # 1 epoch eğitim
    model.train()
    for imgs, labels in tqdm(train_loader, desc="1 Epoch Eğitim"):
        imgs = imgs.to(DEVICE)
        labels = labels.to(DEVICE)
        
        optimizer.zero_grad()
        logits = model(imgs)
        loss = cls_loss_fn(logits, labels)
        loss.backward()
        optimizer.step()
    
    # Özellik çıkarma ve ML eğitimi
    results_df = extract_features_and_train_ml(model, train_loader, DEVICE)
    print("\nML Algoritmaları Sonuçları:")
    print(results_df)
    
    return results_df

# Eğitimi başlat
if __name__ == "__main__":
    results = train_and_extract_features()

Using device: cuda
Total sample sayısı: 2182
Sınıf dağılımı: {'CN': 748, 'MCI': 981, 'AD': 453}
Train / Test sayıları: 1745/437


1 Epoch Eğitim: 100%|██████████| 873/873 [10:58<00:00,  1.33it/s]
Özellik Çıkarma: 100%|██████████| 873/873 [10:48<00:00,  1.35it/s]



Eğitiliyor: Random Forest
En iyi parametreler: {'max_depth': 15, 'min_samples_split': 2, 'n_estimators': 100}
Random Forest - CV Accuracy: 0.5616 ± 0.0153
CV Precision: 0.5638
CV Recall: 0.5616
CV F1: 0.5579

Eğitiliyor: Gradient Boosting
En iyi parametreler: {'learning_rate': 0.1, 'max_depth': 5, 'n_estimators': 200}
Gradient Boosting - CV Accuracy: 0.5851 ± 0.0035
CV Precision: 0.5833
CV Recall: 0.5851
CV F1: 0.5809

Eğitiliyor: SVM
En iyi parametreler: {'C': 10, 'gamma': 'scale', 'kernel': 'rbf'}
SVM - CV Accuracy: 0.4172 ± 0.0134
CV Precision: 0.5141
CV Recall: 0.4172
CV F1: 0.3870

Eğitiliyor: KNN
En iyi parametreler: {'metric': 'euclidean', 'n_neighbors': 5}
KNN - CV Accuracy: 0.5381 ± 0.0046
CV Precision: 0.5364
CV Recall: 0.5381
CV F1: 0.5338

Eğitiliyor: Logistic Regression




En iyi parametreler: {'C': 10, 'penalty': 'l2'}




Logistic Regression - CV Accuracy: 0.4894 ± 0.0120
CV Precision: 0.5737
CV Recall: 0.4894
CV F1: 0.4767

Eğitiliyor: Naive Bayes
Naive Bayes - CV Accuracy: 0.3633 ± 0.0277
CV Precision: 0.4059
CV Recall: 0.3633
CV F1: 0.3360

Eğitiliyor: Decision Tree
En iyi parametreler: {'max_depth': 10, 'min_samples_leaf': 1, 'min_samples_split': 2}
Decision Tree - CV Accuracy: 0.4814 ± 0.0221
CV Precision: 0.5253
CV Recall: 0.4814
CV F1: 0.4878

Eğitiliyor: Neural Network
En iyi parametreler: {'alpha': 0.001, 'hidden_layer_sizes': (100,), 'learning_rate': 'adaptive'}
Neural Network - CV Accuracy: 0.5083 ± 0.0155
CV Precision: 0.4965
CV Recall: 0.5083
CV F1: 0.4744

ML Algoritmaları Sonuçları:
                 Model  CV Accuracy  CV Accuracy Std  CV Precision  CV Recall  \
0        Random Forest     0.561616         0.015307      0.563788   0.561616   
1    Gradient Boosting     0.585100         0.003508      0.583307   0.585100   
2                  SVM     0.417202         0.013375      0.514109  