## Imports

In [1]:
import os
from PIL import Image
import torch
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, models
import torch.nn as nn
import torch.optim as optim
from sklearn.model_selection import train_test_split
from torch.utils.data import DataLoader, Subset

## Read data

In [5]:
# Pfade zu den Verzeichnissen
path_cancer = '/Users/I550949/Desktop/Master Inforamtik/1.Fachsemester/ML/lung_colon_image_set/colon_image_sets/colon_aca'
path_no_cancer = "/Users/I550949/Desktop/Master Inforamtik/1.Fachsemester/ML/lung_colon_image_set/colon_image_sets/colon_n"

# Bilder einlesen: jedes 10. Bild
def load_images(path, label):
    images = []
    for idx, file in enumerate(sorted(os.listdir(path))):
        if idx % 10 == 0 and file.endswith(".jpeg"):
            images.append((os.path.join(path, file), label))
    return images

# Datensätze laden
cancer_images = load_images(path_cancer, 1)  # Label 1 für Krebs
no_cancer_images = load_images(path_no_cancer, 0)  # Label 0 für kein Krebs

# Gesamtdatensatz kombinieren
dataset = cancer_images + no_cancer_images

In [6]:
# Dataset Klasse
class CancerDataset(Dataset):
    def __init__(self, dataset, transform=None):
        self.dataset = dataset
        self.transform = transform

    def __len__(self):
        return len(self.dataset)

    def __getitem__(self, idx):
        img_path, label = self.dataset[idx]
        image = Image.open(img_path).convert("RGB")
        if self.transform:
            image = self.transform(image)
        return image, torch.tensor(label, dtype=torch.long)

# Transformationen (Datenaugmentation und Normalisierung)
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]),
])

In [7]:
# Datensatz und DataLoader
cancer_dataset = CancerDataset(dataset, transform=transform)

# Indizes für den Datensatz
dataset_size = len(cancer_dataset)
indices = list(range(dataset_size))

# Train-Test-Split
train_indices, test_indices = train_test_split(indices, test_size=0.2, random_state=42)

# Subsets für Training und Testen
train_dataset = Subset(cancer_dataset, train_indices)
test_dataset = Subset(cancer_dataset, test_indices)

# DataLoader
train_dataloader = DataLoader(train_dataset, batch_size=16, shuffle=True)
test_dataloader = DataLoader(test_dataset, batch_size=16, shuffle=False)

# Optional: Überprüfen der Größen
print(f"Train Dataset Size: {len(train_dataset)}")
print(f"Test Dataset Size: {len(test_dataset)}")

Train Dataset Size: 800
Test Dataset Size: 200


## Model

In [8]:
# CNN-Modell
class SimpleCNN(nn.Module):
    def __init__(self):
        super(SimpleCNN, self).__init__()
        self.conv1 = nn.Conv2d(3, 32, kernel_size=3, stride=1, padding=1)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1)
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2)
        self.fc1 = nn.Linear(64 * 56 * 56, 128)
        self.fc2 = nn.Linear(128, 2)

    def forward(self, x):
        x = self.pool(torch.relu(self.conv1(x)))
        x = self.pool(torch.relu(self.conv2(x)))
        x = x.view(x.size(0), -1)  # Flatten
        x = torch.relu(self.fc1(x))
        x = self.fc2(x)
        return x

# Modell initialisieren
model = SimpleCNN()

# Verlustfunktion und Optimierer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

## Training

In [9]:
# Training
epochs = 4
for epoch in range(epochs):
    running_loss = 0.0
    for images, labels in train_dataloader:
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
    print(f"Epoch {epoch + 1}, Loss: {running_loss / len(train_dataloader)}")

Epoch 1, Loss: 1.4120253086090089
Epoch 2, Loss: 0.5450680872797966
Epoch 3, Loss: 0.3363558980822563
Epoch 4, Loss: 0.08733604282140732


## Evaluation

In [10]:
# Evaluation
correct = 0
total = 0
with torch.no_grad():
    for images, labels in test_dataloader:
        outputs = model(images)
        _, predicted = torch.max(outputs, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print(f"Accuracy: {100 * correct / total}%")

Accuracy: 84.0%
