<a href="https://colab.research.google.com/github/jwalker2124/bme3053c/blob/main/BME3053c_FinalUpdated.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
"""!pip install --upgrade --force-reinstall numpy
!pip install --upgrade --force-reinstall torch torchvision torchaudio
!pip install -U --force-reinstall numpy==1.26.4 # Installs NumPy 1.26.4
!pip install -U torch torchvision --no-deps # Installs torch and torchvision without upgrading dependencies, like numpy
!pip install -U scikit-image # Install scikit-image, it may need rebuilding if it relies on numpy<2
"""
# Check numpy version to confirm
import numpy
print(numpy.__version__) # Should print 1.26.4


from google.colab import drive
import os
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
from PIL import Image
import torch
import torch.nn as nn
import torch.nn.functional as F
from sklearn.metrics import r2_score

# Mounts Google Drive
drive.mount('/content/drive')

1.26.4
Mounted at /content/drive


In [3]:

def generate_labels(image_dir, mask_dir):
    """
    Generate labels (cell counts) based on the number of masks per image.
    Args:
        image_dir (str): Path to the directory containing images.
        mask_dir (str): Path to the directory containing mask folders for each image.

    Returns:
        list of tuples: [(image_path, cell_count), ...]
    """
    data = []
    for image_id in os.listdir(image_dir):
        image_path = os.path.join(image_dir, image_id)
        mask_path = os.path.join(mask_dir, image_id.removesuffix('.png'), "masks")
        cell_count = len(os.listdir(mask_path))  # Count the number of masks
        data.append((image_path, cell_count))
    return data

# Example usage
train_images = "/content/drive/MyDrive/BME3053c_Final/data/stage1_train_images"
data_dir = "/content/drive/MyDrive/BME3053c_Final/data/stage1_train"
labels = generate_labels(train_images, data_dir)


In [4]:
class CellCountDataset(torch.utils.data.Dataset):
    def __init__(self, data, transform=None):
        self.data = data
        self.transform = transform

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        image_path, cell_count = self.data[idx]
        image = Image.open(image_path).convert("RGB")
        if self.transform:
            image = self.transform(image)
        return image, torch.tensor(cell_count, dtype=torch.float32)

# Define transformations and dataset
transform = transforms.Compose([
    transforms.Resize((128, 128)),
    transforms.ToTensor(),
])

dataset = CellCountDataset(labels, transform=transform)
dataloader = torch.utils.data.DataLoader(dataset, batch_size=16, shuffle=True)


In [6]:
class CellCountNet(nn.Module):
    def __init__(self):
        super(CellCountNet, self).__init__()
        self.conv1 = nn.Conv2d(3, 16, kernel_size=3, stride=1, padding=1)
        self.conv2 = nn.Conv2d(16, 32, kernel_size=3, stride=1, padding=1)
        self.pool = nn.MaxPool2d(2, 2)
        self.fc1 = nn.Linear(32 * 32 * 32, 128)  # Adjust size based on input dims
        self.fc2 = nn.Linear(128, 1)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = x.view(x.size(0), -1)  # Flatten for FC layers
        x = F.relu(self.fc1(x))
        x = self.fc2(x)  # Regression output
        return x


In [9]:
from torch.optim.lr_scheduler import StepLR

# Model, loss, and optimizer
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = CellCountNet().to(device)
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

#scheduler = StepLR(optimizer, step_size=5, gamma=0.95)  # Reduce LR by 5% every 5 epochs
epsilon = 1e-6  # Small value to prevent division by zero
num_epochs = 100
for epoch in range(num_epochs):
    model.train()
    epoch_loss = 0

    all_preds = []
    all_counts = []

    for images, counts in dataloader:
        images = images.to(device)
        counts = counts.to(device)

        optimizer.zero_grad()
        outputs = model(images).squeeze()

        # Compute individual losses
        loss_per_sample = (outputs - counts) ** 2  # MSE for each sample

        # Compute weights (inverse proportional to count)
        weights = 1.0 / (counts + epsilon)

        # Apply weights to the loss
        weighted_loss = (weights * loss_per_sample).mean()

        # Backpropagation
        weighted_loss.backward()
        optimizer.step()

        epoch_loss += weighted_loss.item()

        all_preds.extend(outputs.detach().cpu().numpy())
        all_counts.extend(counts.cpu().numpy())

    #scheduler.step()
    r2 = r2_score(all_counts, all_preds)

    print(f"Epoch {epoch + 1}/{num_epochs}, Loss: {epoch_loss:.4f}, R^2: {r2: .4f}")

Epoch 1/100, Loss: 901.4296, R^2: -0.2974
Epoch 2/100, Loss: 912.8829, R^2: -0.2901
Epoch 3/100, Loss: 811.0158, R^2: -0.2689
Epoch 4/100, Loss: 840.2483, R^2: -0.2510
Epoch 5/100, Loss: 780.0918, R^2: -0.2160
Epoch 6/100, Loss: 800.1796, R^2: -0.1922
Epoch 7/100, Loss: 751.5945, R^2: -0.1747
Epoch 8/100, Loss: 854.0558, R^2: -0.2300
Epoch 9/100, Loss: 790.6206, R^2: -0.2146
Epoch 10/100, Loss: 697.3278, R^2: -0.1010
Epoch 11/100, Loss: 736.5576, R^2: -0.0560
Epoch 12/100, Loss: 738.4486, R^2: -0.0812
Epoch 13/100, Loss: 602.7681, R^2:  0.0164
Epoch 14/100, Loss: 479.5314, R^2:  0.1915
Epoch 15/100, Loss: 398.7224, R^2:  0.3085
Epoch 16/100, Loss: 376.5245, R^2:  0.3651
Epoch 17/100, Loss: 324.9423, R^2:  0.4226
Epoch 18/100, Loss: 343.5312, R^2:  0.4347
Epoch 19/100, Loss: 320.1661, R^2:  0.4574
Epoch 20/100, Loss: 295.6901, R^2:  0.4754
Epoch 21/100, Loss: 272.9540, R^2:  0.5104
Epoch 22/100, Loss: 249.9719, R^2:  0.5426
Epoch 23/100, Loss: 244.9340, R^2:  0.5448
Epoch 24/100, Loss: 

In [1]:
import matplotlib.pyplot as plt

# Evaluate and visualize
model.eval()
num_samples = 5  # Number of samples to display
sample_images = []  # To store images for visualization
true_counts = []    # To store ground truth counts
predicted_counts = []  # To store predicted counts
absolute_errors = []  # To store absolute errors

with torch.no_grad():
    total_count_error = 0
    for images, counts in dataloader:  # Iterate over the test/validation dataloader
        images = images.to(device)
        counts = counts.to(device)
        outputs = model(images).squeeze()  # Model predictions
        outputs = outputs.cpu().numpy()
        counts = counts.cpu().numpy()

        # Store sample data
        for i in range(min(len(images), num_samples - len(sample_images))):
            sample_images.append(images[i].cpu().permute(1, 2, 0).numpy())  # Convert tensor to image
            true_counts.append(counts[i])
            predicted_counts.append(outputs[i])
            absolute_errors.append(abs(outputs[i] - counts[i]))

        if len(sample_images) >= num_samples:
            break
    for images, counts in dataloader:
        images = images.to(device)
        counts = counts.to(device)
        outputs = model(images).squeeze()
        total_count_error += torch.sum(torch.abs(outputs - counts)).item()

    print(f"Average absolute count error: {total_count_error / len(dataset):.2f}")

# Plot sample images with counts and errors
plt.figure(figsize=(15, 5))
for i in range(len(sample_images)):
    plt.subplot(1, len(sample_images), i + 1)
    plt.imshow(sample_images[i])
    plt.axis("off")
    plt.title(
        f"True: {int(true_counts[i])}\n"
        f"Pred: {predicted_counts[i]:.1f}\n"
        f"Abs Error: {absolute_errors[i]:.1f}"
    )

plt.tight_layout()
plt.show()


NameError: name 'model' is not defined