In [5]:
import os
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelBinarizer
from PIL import Image

# Constants
DATA_PATH = "Dataset_BUSI_with_GT"
IMG_SIZE = (224, 224)
DATA_SPLIT = [0.7, 0.3]  # [train, val]
BATCH_SIZE = 32
NUM_EPOCHS = 10
MODEL_SAVE_PATH = "breast_ultrasound_model.pth"

# Custom Dataset
class CustomDataset(Dataset):
    def __init__(self, data, labels, transform=None):
        self.data = data
        self.labels = labels
        self.transform = transform

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        sample = {'image': self.data[idx], 'label': self.labels[idx]}
        if self.transform:
            sample = self.transform(sample)
        return sample

# Load Data
def load_data(class_name, file_names):
    class_data = []
    class_labels = []
    for fname in file_names:
        if not fname.endswith("_mask.png"):
            image_path = os.path.join(DATA_PATH, class_name, fname)
            image = Image.open(image_path).convert("L").resize(IMG_SIZE)
            class_data.append(np.asarray(image))
            class_labels.append(class_name)
    return class_data, class_labels

# Load data into memory
class_folders = os.listdir(DATA_PATH)
class_folders_contents = {folder_name: os.listdir(os.path.join(DATA_PATH, folder_name)) for folder_name in class_folders}

data = []
labels = []
for class_name, files in class_folders_contents.items():
    class_data, class_labels = load_data(class_name, files)
    data.extend(class_data)
    labels.extend(class_labels)

# Convert lists to numpy arrays
data = np.array(data)[:, np.newaxis, :, :]  # Add channel dimension
labels = np.array(labels)

# Convert labels to one-hot encoded format
label_binarizer = LabelBinarizer()
one_hot_labels = label_binarizer.fit_transform(labels)

# Split the dataset into training and validation sets
trainX, valX, trainY, valY = train_test_split(data, one_hot_labels, train_size=DATA_SPLIT[0], stratify=labels)

# Convert data to PyTorch tensors
trainX_torch = torch.from_numpy(trainX)
trainY_torch = torch.from_numpy(trainY)
valX_torch = torch.from_numpy(valX)
valY_torch = torch.from_numpy(valY)

# Create PyTorch datasets and dataloaders
train_dataset = CustomDataset(trainX_torch, trainY_torch)
val_dataset = CustomDataset(valX_torch, valY_torch)

train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE)

# Define the CNN model architecture
class SimpleCNN(nn.Module):
    def __init__(self, num_classes):
        super(SimpleCNN, self).__init__()
        self.conv1 = nn.Conv2d(in_channels=1, out_channels=16, kernel_size=3, stride=1, padding=1)
        self.conv2 = nn.Conv2d(in_channels=16, out_channels=32, kernel_size=3, stride=1, padding=1)
        self.conv3 = nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3, stride=1, padding=1)
        self.fc1 = nn.Linear(in_features=64*28*28, out_features=128)
        self.fc2 = nn.Linear(in_features=128, out_features=num_classes)
        self.relu = nn.ReLU()
        self.maxpool = nn.MaxPool2d(kernel_size=2, stride=2)
        self.dropout = nn.Dropout(p=0.5)
        
    def forward(self, x):
        x = self.relu(self.conv1(x))
        x = self.maxpool(x)
        x = self.relu(self.conv2(x))
        x = self.maxpool(x)
        x = self.relu(self.conv3(x))
        x = self.maxpool(x)
        x = x.view(x.size(0), -1)
        x = self.dropout(self.relu(self.fc1(x)))
        x = self.fc2(x)
        return x

# Initialize the model, loss function, and optimizer
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = SimpleCNN(len(class_folders)).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Training loop
for epoch in range(NUM_EPOCHS):
    model.train
