In [1]:
# Load images and labels
import os
from PIL import Image

import numpy as np
import matplotlib.pyplot as plt

# # Define the input directory
input_dir = 'C:/Users/odys_/Desktop/ML_winows_2/archive_small'

# Initialize lists to store images and labels
images = []
labels = []

# Loop through each folder (class name) in the input directory
for category in os.listdir(input_dir):
    category_path = os.path.join(input_dir, category)
    if os.path.isdir(category_path):  # Ensure it's a directory
        for file_name in os.listdir(category_path):
            if file_name.endswith('.jpeg') or file_name.endswith('.png'):  # Check for valid image files
                img_path = os.path.join(category_path, file_name)
                try:
                    # Open and preprocess the image
                    img = Image.open(img_path)
                    # img = img.resize((128, 128))  # Resize to 128x128
                    img_array = np.array(img)
                    
                    # Append the image and its label
                    images.append(img_array)
                    labels.append(category)  # Use the folder name as the label
                except Exception as e:
                    print(f"Error loading image {img_path}: {e}")

# Convert lists to numpy arrays
images = np.array(images)
labels = np.array(labels)

# Print some information about the loaded data
print(f"Loaded {len(images)} images.")
print(f"Image shape: {images[0].shape} ")
print(f"Labels: {np.unique(labels)}")


Loaded 600 images.
Image shape: (64, 64) 
Labels: ['AbdomenCT' 'BreastMRI' 'Hand' 'HeadCT']


In [2]:
from sklearn.model_selection import train_test_split

# Set random_state for reproducibility
random_state = 42

# Split into train and test only
X_train, X_test, y_train, y_test = train_test_split(
    images, labels, test_size=0.3, random_state=random_state, stratify=labels
)

print(f"Shape Train set: {X_train.shape}, labels: {y_train.shape}")
print(f"Shape Test set: {X_test.shape}, labels:{y_test.shape}")

Shape Train set: (420, 64, 64), labels: (420,)
Shape Test set: (180, 64, 64), labels:(180,)


In [4]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from sklearn.preprocessing import LabelEncoder
import numpy as np

# Μετατροπή ετικετών σε αριθμούς
label_encoder = LabelEncoder()
y_train_encoded = label_encoder.fit_transform(y_train)
y_test_encoded = label_encoder.transform(y_test)

# Μετατροπή των εικόνων σε PyTorch tensors
X_train_tensor = torch.tensor(X_train, dtype=torch.float32).unsqueeze(1)  # Προσθήκη καναλιού (1 για grayscale)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32).unsqueeze(1)

# Δημιουργία TensorDataset και DataLoader
train_data = TensorDataset(X_train_tensor, torch.tensor(y_train_encoded, dtype=torch.long))
test_data = TensorDataset(X_test_tensor, torch.tensor(y_test_encoded, dtype=torch.long))

train_loader = DataLoader(train_data, batch_size=32, shuffle=True)
test_loader = DataLoader(test_data, batch_size=32, shuffle=False)

# Ορισμός του CNN μοντέλου
class CNN(nn.Module):
    def __init__(self, num_classes):
        super(CNN, self).__init__()
        self.conv1 = nn.Conv2d(1, 32, kernel_size=3, padding=1)  # 1: κανάλι εικόνας (grayscale)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, padding=1)
        self.conv3 = nn.Conv2d(64, 128, kernel_size=3, padding=1)
        self.fc1 = nn.Linear(128 * 8 * 8, 512)  # Προσαρμογή ανάλογα με το μέγεθος των εικόνων
        self.fc2 = nn.Linear(512, num_classes)
        self.relu = nn.ReLU()
        self.maxpool = nn.MaxPool2d(2)

    def forward(self, x):
        x = self.relu(self.conv1(x))
        x = self.maxpool(x)
        x = self.relu(self.conv2(x))
        x = self.maxpool(x)
        x = self.relu(self.conv3(x))
        x = self.maxpool(x)
        x = x.view(x.size(0), -1)  # Flatten το tensor
        x = self.relu(self.fc1(x))
        x = self.fc2(x)
        return x

# Ορισμός του μοντέλου, κριτηρίου και optimizer
model = CNN(num_classes=len(np.unique(y_train)))
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Εκπαίδευση του μοντέλου
num_epochs = 20
for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0
    for inputs, labels in train_loader:
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        _, predicted = torch.max(outputs, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

    train_accuracy = 100 * correct / total
    print(f"Epoch {epoch+1}/{num_epochs}, Loss: {running_loss/len(train_loader):.4f}, Accuracy: {train_accuracy:.2f}%")

# Αξιολόγηση του μοντέλου στο Test Set
model.eval()
test_correct = 0
test_total = 0
with torch.no_grad():
    for inputs, labels in test_loader:
        outputs = model(inputs)
        _, predicted = torch.max(outputs, 1)
        test_total += labels.size(0)
        test_correct += (predicted == labels).sum().item()

test_accuracy = 100 * test_correct / test_total
print(f"Test Accuracy: {test_accuracy:.2f}%")

Epoch 1/20, Loss: 8.8333, Accuracy: 68.33%
Epoch 2/20, Loss: 0.0199, Accuracy: 99.29%
Epoch 3/20, Loss: 0.0014, Accuracy: 100.00%
Epoch 4/20, Loss: 0.0001, Accuracy: 100.00%
Epoch 5/20, Loss: 0.0000, Accuracy: 100.00%
Epoch 6/20, Loss: 0.0000, Accuracy: 100.00%
Epoch 7/20, Loss: 0.0000, Accuracy: 100.00%
Epoch 8/20, Loss: 0.0000, Accuracy: 100.00%
Epoch 9/20, Loss: 0.0000, Accuracy: 100.00%
Epoch 10/20, Loss: 0.0000, Accuracy: 100.00%
Epoch 11/20, Loss: 0.0000, Accuracy: 100.00%
Epoch 12/20, Loss: 0.0000, Accuracy: 100.00%
Epoch 13/20, Loss: 0.0000, Accuracy: 100.00%
Epoch 14/20, Loss: 0.0000, Accuracy: 100.00%
Epoch 15/20, Loss: 0.0000, Accuracy: 100.00%
Epoch 16/20, Loss: 0.0000, Accuracy: 100.00%
Epoch 17/20, Loss: 0.0000, Accuracy: 100.00%
Epoch 18/20, Loss: 0.0000, Accuracy: 100.00%
Epoch 19/20, Loss: 0.0000, Accuracy: 100.00%
Epoch 20/20, Loss: 0.0000, Accuracy: 100.00%
Test Accuracy: 99.44%


In [5]:
import optuna
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset

def objective(trial):
    # Hyperparameters to optimize
    lr = trial.suggest_loguniform('lr', 1e-4, 1e-2)
    batch_size = trial.suggest_categorical('batch_size', [16, 32, 64])
    n_filters1 = trial.suggest_categorical('n_filters1', [16, 32, 64])
    n_filters2 = trial.suggest_categorical('n_filters2', [32, 64, 128])
    n_filters3 = trial.suggest_categorical('n_filters3', [64, 128, 256])
    fc_size = trial.suggest_categorical('fc_size', [128, 256, 512])
    dropout_rate = trial.suggest_uniform('dropout', 0.2, 0.5)

    # DataLoader with new batch size
    train_data = TensorDataset(X_train_tensor, torch.tensor(y_train_encoded, dtype=torch.long))
    test_data = TensorDataset(X_test_tensor, torch.tensor(y_test_encoded, dtype=torch.long))
    train_loader = DataLoader(train_data, batch_size=batch_size, shuffle=True)
    test_loader = DataLoader(test_data, batch_size=batch_size, shuffle=False)

    # Define model with trial hyperparameters
    class CNN(nn.Module):
        def __init__(self, num_classes):
            super(CNN, self).__init__()
            self.conv1 = nn.Conv2d(1, n_filters1, kernel_size=3, padding=1)
            self.conv2 = nn.Conv2d(n_filters1, n_filters2, kernel_size=3, padding=1)
            self.conv3 = nn.Conv2d(n_filters2, n_filters3, kernel_size=3, padding=1)
            self.maxpool = nn.MaxPool2d(2)
            self.relu = nn.ReLU()
            self.dropout = nn.Dropout(dropout_rate)
            # Calculate the size after convolutions and pooling
            self._to_linear = n_filters3 * 8 * 8  # Adjust if your image size is not 64x64
            self.fc1 = nn.Linear(self._to_linear, fc_size)
            self.fc2 = nn.Linear(fc_size, num_classes)

        def forward(self, x):
            x = self.relu(self.conv1(x))
            x = self.maxpool(x)
            x = self.relu(self.conv2(x))
            x = self.maxpool(x)
            x = self.relu(self.conv3(x))
            x = self.maxpool(x)
            x = x.view(x.size(0), -1)
            x = self.dropout(self.relu(self.fc1(x)))
            x = self.fc2(x)
            return x

    model = CNN(num_classes=len(np.unique(y_train)))
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=lr)

    # Training loop (fewer epochs for speed)
    num_epochs = 5
    for epoch in range(num_epochs):
        model.train()
        for inputs, labels in train_loader:
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

    # Evaluation
    model.eval()
    test_correct = 0
    test_total = 0
    with torch.no_grad():
        for inputs, labels in test_loader:
            outputs = model(inputs)
            _, predicted = torch.max(outputs, 1)
            test_total += labels.size(0)
            test_correct += (predicted == labels).sum().item()
    test_accuracy = 100 * test_correct / test_total
    return -test_accuracy  # Optuna minimizes, so return negative accuracy

# Run Optuna study
study = optuna.create_study(direction='minimize')
study.optimize(objective, n_trials=20)

print("Best hyperparameters:", study.best_trial.params)
print("Best test accuracy: {:.2f}%".format(-study.best_value))

  from .autonotebook import tqdm as notebook_tqdm
[I 2025-06-01 23:51:16,286] A new study created in memory with name: no-name-3f9b5556-0e80-4a17-aad2-03ad7a5ecba5
  lr = trial.suggest_loguniform('lr', 1e-4, 1e-2)
  dropout_rate = trial.suggest_uniform('dropout', 0.2, 0.5)
[I 2025-06-01 23:52:28,500] Trial 0 finished with value: -99.44444444444444 and parameters: {'lr': 0.0006488487548052437, 'batch_size': 64, 'n_filters1': 64, 'n_filters2': 64, 'n_filters3': 128, 'fc_size': 512, 'dropout': 0.21010390781363764}. Best is trial 0 with value: -99.44444444444444.
[I 2025-06-01 23:53:32,968] Trial 1 finished with value: -97.22222222222223 and parameters: {'lr': 0.0013914606644257297, 'batch_size': 64, 'n_filters1': 64, 'n_filters2': 64, 'n_filters3': 128, 'fc_size': 128, 'dropout': 0.2702861390068038}. Best is trial 0 with value: -99.44444444444444.
[I 2025-06-01 23:53:53,642] Trial 2 finished with value: -100.0 and parameters: {'lr': 0.00026217275565833665, 'batch_size': 16, 'n_filters1': 

Best hyperparameters: {'lr': 0.00026217275565833665, 'batch_size': 16, 'n_filters1': 16, 'n_filters2': 32, 'n_filters3': 64, 'fc_size': 512, 'dropout': 0.45944441481938225}
Best test accuracy: 100.00%
