In [2]:
from google.colab import drive
drive.mount('/content/drive')
!pwd

Mounted at /content/drive
/content


In [5]:
import os
import cv2
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
import numpy as np
from scipy.ndimage import gaussian_filter
from tqdm import tqdm
import matplotlib.pyplot as plt
import scipy.io as sio
from PIL import Image
import torch.nn.functional as F

train_images_dir = '/content/drive/MyDrive/images'
train_gt_dir = '/content/drive/MyDrive/ground_truth'
train_dm_dir = '/content/drive/MyDrive/density_maps'

test_images_dir = '/content/drive/MyDrive/images1'
test_gt_dir = '/content/drive/MyDrive/ground_truth1'
test_dm_dir = '/content/drive/MyDrive/density_maps1'

viz_dir = '/content/drive/MyDrive/cctrans_visualizations'
MODEL_SAVE_DIR = '/content/drive/MyDrive/cctrans_checkpoints'
PRETRAINED_WEIGHTS_PATH = '/content/drive/MyDrive/64_256_upsampler.pt'

os.makedirs(train_dm_dir, exist_ok=True)
os.makedirs(test_dm_dir, exist_ok=True)
os.makedirs(viz_dir, exist_ok=True)
os.makedirs(MODEL_SAVE_DIR, exist_ok=True)

class CrowdDataset(Dataset):
    def __init__(self, images_dir, gt_dir, dm_dir, image_size=(256, 256)):
        self.images_dir = images_dir
        self.gt_dir = gt_dir
        self.dm_dir = dm_dir
        self.image_size = image_size
        self.image_files = sorted([f for f in os.listdir(images_dir) if f.endswith(('.jpg', '.png'))])
        self.transform = transforms.Compose([
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
        ])

    def __len__(self):
        return len(self.image_files)

    def __getitem__(self, idx):
        import numpy as np
        import scipy.io as sio

        img_name = self.image_files[idx]
        img_path = os.path.join(self.images_dir, img_name)

        base_name_parts = os.path.splitext(img_name)[0].split('_')
        if len(base_name_parts) > 1 and base_name_parts[0] == 'IMG':
            img_number = base_name_parts[1]
            gt_name = f'GT_IMG_{img_number}.mat'
        else:
            base_name = os.path.splitext(img_name)[0]
            gt_name = f'GT_{base_name}.mat'
        gt_path = os.path.join(self.gt_dir, gt_name)

        dm_path = os.path.join(self.dm_dir, f'{os.path.splitext(img_name)[0]}.npy')

        image = cv2.imread(img_path)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        original_size = image.shape[:2]
        image_resized = cv2.resize(image, self.image_size)

        image_tensor = self.transform(Image.fromarray(image_resized))

        if os.path.exists(dm_path):
            density_map = np.load(dm_path)
            if density_map.shape[:2] != self.image_size:
                density_map = cv2.resize(density_map, self.image_size, interpolation=cv2.INTER_LINEAR)
            crowd_count = np.sum(density_map)
        else:
            points = np.array([])
            try:
                gt_data = sio.loadmat(gt_path)
                points = gt_data['image_info'][0, 0][0][0][0]
            except:
                points = np.array([])

            if points.size > 0:
                points_resized = points.copy()
                points_resized[:, 0] = points[:, 0] * (self.image_size[1] / original_size[1])
                points_resized[:, 1] = points[:, 1] * (self.image_size[0] / original_size[0])
                crowd_count = points.shape[0]
            else:
                points_resized = np.array([])
                crowd_count = 0

            density_map = self.create_density_map(points_resized, target_size=self.image_size)
            np.save(dm_path, density_map)

        density_map_tensor = torch.from_numpy(density_map).float().unsqueeze(0)
        return image_tensor, density_map_tensor, crowd_count

    def create_density_map(self, points, target_size):
        h, w = target_size
        density_map = np.zeros((h, w), dtype=np.float32)
        if points.shape[0] == 0:
            return density_map

        sigma = 8.0
        for i in range(points.shape[0]):
            x = int(points[i, 0])
            y = int(points[i, 1])
            if 0 <= x < w and 0 <= y < h:
                density_map[y, x] = 1.0

        density_map = gaussian_filter(density_map, sigma=sigma, mode='constant')
        current_sum = np.sum(density_map)
        if current_sum > 0:
            density_map = density_map / current_sum * points.shape[0]
        return density_map

class DoubleConv(nn.Module):
    def __init__(self, in_channels, out_channels, mid_channels=None):
        super().__init__()
        if not mid_channels:
            mid_channels = out_channels
        self.double_conv = nn.Sequential(
            nn.Conv2d(in_channels, mid_channels, kernel_size=3, padding=1, bias=False),
            nn.BatchNorm2d(mid_channels),
            nn.ReLU(inplace=True),
            nn.Conv2d(mid_channels, out_channels, kernel_size=3, padding=1, bias=False),
            nn.BatchNorm2d(out_channels),
            nn.ReLU(inplace=True)
        )
    def forward(self, x):
        return self.double_conv(x)

class Down(nn.Module):
    def __init__(self, in_channels, out_channels):
        super().__init__()
        self.maxpool_conv = nn.Sequential(
            nn.MaxPool2d(2),
            DoubleConv(in_channels, out_channels)
        )
    def forward(self, x):
        return self.maxpool_conv(x)

class Up(nn.Module):
    def __init__(self, in_channels, out_channels, bilinear=True):
        super().__init__()
        if bilinear:
            self.up = nn.Upsample(scale_factor=2, mode='bilinear', align_corners=True)
            self.conv = DoubleConv(in_channels, out_channels, in_channels // 2)
        else:
            self.up = nn.ConvTranspose2d(in_channels, in_channels // 2, kernel_size=2, stride=2)
            self.conv = DoubleConv(in_channels, out_channels)
    def forward(self, x1, x2):
        x1 = self.up(x1)
        diffY = x2.size()[2] - x1.size()[2]
        diffX = x2.size()[3] - x1.size()[3]
        x1 = F.pad(x1, [diffX // 2, diffX - diffX // 2, diffY // 2, diffY - diffY // 2])
        x = torch.cat([x2, x1], dim=1)
        return self.conv(x)

class OutConv(nn.Module):
    def __init__(self, in_channels, out_channels):
        super().__init__()
        self.conv = nn.Conv2d(in_channels, out_channels, kernel_size=1)
    def forward(self, x):
        return self.conv(x)

class UNet(nn.Module):
    def __init__(self, n_channels, n_classes, bilinear=False):
        super().__init__()
        self.inc = DoubleConv(n_channels, 64)
        self.down1 = Down(64, 128)
        self.down2 = Down(128, 256)
        self.down3 = Down(256, 512)
        self.down4 = Down(512, 1024)
        self.up1 = Up(1024, 512, bilinear)
        self.up2 = Up(512, 256, bilinear)
        self.up3 = Up(256, 128, bilinear)
        self.up4 = Up(128, 64, bilinear)
        self.outc = OutConv(64, n_classes)
    def forward(self, x):
        x1 = self.inc(x)
        x2 = self.down1(x1)
        x3 = self.down2(x2)
        x4 = self.down3(x3)
        x5 = self.down4(x4)
        x = self.up1(x5, x4)
        x = self.up2(x, x3)
        x = self.up3(x, x2)
        x = self.up4(x, x1)
        return self.outc(x)

class CrowdDiff(nn.Module):
    def __init__(self, img_channels=3, output_channels=1, time_embed_dim=256):
        super().__init__()
        self.denoising_unet = UNet(n_channels=img_channels, n_classes=output_channels)
        self.counting_branch = nn.Sequential(
            nn.Conv2d(output_channels, 64, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(64, 1, kernel_size=1)
        )
    def forward(self, x):
        density_map = F.relu(self.denoising_unet(x))
        predicted_count = torch.sum(density_map, dim=[1, 2, 3])
        return density_map, predicted_count

def train_one_epoch(model, dataloader, optimizer, device):
    model.train()
    total_loss = 0
    criterion = nn.MSELoss(reduction='sum')
    for images, gt_dms, _ in tqdm(dataloader, desc="Training"):
        images = images.to(device)
        gt_dms = gt_dms.to(device)
        optimizer.zero_grad()
        pred_dm, _ = model(images)
        if pred_dm.shape[2:] != gt_dms.shape[2:]:
            gt_dms = F.interpolate(gt_dms, size=pred_dm.shape[2:], mode='bilinear', align_corners=False)
        loss = criterion(pred_dm, gt_dms) / images.shape[0]
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    return total_loss / len(dataloader)

def evaluate(model, dataloader, device):
    model.eval()
    mae, mse = 0, 0
    with torch.no_grad():
        for images, _, gt_counts in tqdm(dataloader, desc="Evaluating"):
            images = images.to(device)
            _, pred_counts = model(images)
            mae += torch.abs(pred_counts - gt_counts.to(device)).sum().item()
            mse += ((pred_counts - gt_counts.to(device))**2).sum().item()
    n = len(dataloader.dataset)
    return mae/n, (mse/n)**0.5

def visualize_predictions(model, dataloader, device, num_images=5):
    model.eval()
    inv_normalize = transforms.Normalize(
        mean=[-0.485/0.229, -0.456/0.224, -0.406/0.225],
        std=[1/0.229, 1/0.224, 1/0.225]
    )
    with torch.no_grad():
        for i, (images, gt_dm, _) in enumerate(dataloader):
            if i >= num_images: break
            images = images.to(device)
            pred_dm, _ = model(images)
            img_np = inv_normalize(images[0].cpu()).permute(1,2,0).numpy().clip(0,1)
            gt = gt_dm.to(device)
            if pred_dm.shape[2:] != gt.shape[2:]:
                gt = F.interpolate(gt, size=pred_dm.shape[2:], mode='bilinear', align_corners=False)
            gt_np = gt[0].squeeze().cpu().numpy()
            pred_np = pred_dm[0].squeeze().cpu().numpy()
            fig, axes = plt.subplots(1,3,figsize=(18,6))
            axes[0].imshow(img_np); axes[0].axis('off')
            axes[1].imshow(gt_np, cmap='jet'); axes[1].axis('off')
            axes[2].imshow(pred_np, cmap='jet'); axes[2].axis('off')
            plt.savefig(os.path.join(viz_dir, f'vis_{i}.png'))
            plt.close(fig)

if __name__ == '__main__':
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    train_dataset = CrowdDataset(train_images_dir, train_gt_dir, train_dm_dir)
    test_dataset = CrowdDataset(test_images_dir, test_gt_dir, test_dm_dir)
    train_loader = DataLoader(train_dataset, batch_size=4, shuffle=True, num_workers=0)
    test_loader = DataLoader(test_dataset, batch_size=1, shuffle=False, num_workers=0)

    model = CrowdDiff(img_channels=3, output_channels=1).to(device)
    if os.path.exists(PRETRAINED_WEIGHTS_PATH):
        state = torch.load(PRETRAINED_WEIGHTS_PATH, map_location=device)
        model_dict = model.state_dict()
        pretrained = {k:v for k,v in state.items() if k in model_dict and v.shape==model_dict[k].shape}
        model_dict.update(pretrained)
        model.load_state_dict(model_dict)
    optimizer = optim.AdamW(model.parameters(), lr=1e-5)
    best_mae = float('inf')
    for epoch in range(20):
        print(f"Epoch {epoch+1}/20")
        loss = train_one_epoch(model, train_loader, optimizer, device)
        print(f"Loss: {loss:.4f}")
        mae, rmse = evaluate(model, test_loader, device)
        print(f"MAE: {mae:.2f}, RMSE: {rmse:.2f}")
        if mae < best_mae:
            best_mae = mae
            torch.save(model.state_dict(), os.path.join(MODEL_SAVE_DIR, 'best.pth'))
    visualize_predictions(model, test_loader, device)


Attempting to load pre-trained weights from /content/drive/MyDrive/64_256_upsampler.pt

--- State Dictionary Loading Summary ---
----------------------------------------


--- Epoch 1/20 ---


Training: 100%|██████████| 100/100 [02:20<00:00,  1.41s/it]


Average Training Loss: 6054.9362


Evaluating: 100%|██████████| 316/316 [03:10<00:00,  1.66it/s]


Evaluation after epoch 1: MAE = 5231.85, RMSE = 5267.91
Saved best model with MAE: 5231.85

--- Epoch 2/20 ---


Training: 100%|██████████| 100/100 [00:29<00:00,  3.41it/s]


Average Training Loss: 205.8007


Evaluating: 100%|██████████| 316/316 [00:10<00:00, 29.56it/s]


Evaluation after epoch 2: MAE = 576.33, RMSE = 590.17
Saved best model with MAE: 576.33

--- Epoch 3/20 ---


Training: 100%|██████████| 100/100 [00:32<00:00,  3.05it/s]


Average Training Loss: 43.7598


Evaluating: 100%|██████████| 316/316 [00:10<00:00, 29.72it/s]


Evaluation after epoch 3: MAE = 116.98, RMSE = 131.51
Saved best model with MAE: 116.98

--- Epoch 4/20 ---


Training: 100%|██████████| 100/100 [00:31<00:00,  3.21it/s]


Average Training Loss: 25.5400


Evaluating: 100%|██████████| 316/316 [00:10<00:00, 29.53it/s]


Evaluation after epoch 4: MAE = 67.66, RMSE = 91.99
Saved best model with MAE: 67.66

--- Epoch 5/20 ---


Training: 100%|██████████| 100/100 [00:31<00:00,  3.15it/s]


Average Training Loss: 20.1546


Evaluating: 100%|██████████| 316/316 [00:10<00:00, 30.30it/s]


Evaluation after epoch 5: MAE = 70.87, RMSE = 109.81

--- Epoch 6/20 ---


Training: 100%|██████████| 100/100 [00:28<00:00,  3.47it/s]


Average Training Loss: 17.8188


Evaluating: 100%|██████████| 316/316 [00:10<00:00, 30.24it/s]


Evaluation after epoch 6: MAE = 74.26, RMSE = 114.03

--- Epoch 7/20 ---


Training: 100%|██████████| 100/100 [00:28<00:00,  3.46it/s]


Average Training Loss: 16.2063


Evaluating: 100%|██████████| 316/316 [00:10<00:00, 30.32it/s]


Evaluation after epoch 7: MAE = 83.15, RMSE = 122.79

--- Epoch 8/20 ---


Training: 100%|██████████| 100/100 [00:28<00:00,  3.47it/s]


Average Training Loss: 15.5387


Evaluating: 100%|██████████| 316/316 [00:10<00:00, 30.18it/s]


Evaluation after epoch 8: MAE = 91.82, RMSE = 130.20

--- Epoch 9/20 ---


Training: 100%|██████████| 100/100 [00:28<00:00,  3.47it/s]


Average Training Loss: 14.7486


Evaluating: 100%|██████████| 316/316 [00:10<00:00, 30.22it/s]


Evaluation after epoch 9: MAE = 100.35, RMSE = 136.91

--- Epoch 10/20 ---


Training: 100%|██████████| 100/100 [00:28<00:00,  3.46it/s]


Average Training Loss: 14.6452


Evaluating: 100%|██████████| 316/316 [00:10<00:00, 29.98it/s]


Evaluation after epoch 10: MAE = 98.26, RMSE = 135.30

--- Epoch 11/20 ---


Training: 100%|██████████| 100/100 [00:28<00:00,  3.45it/s]


Average Training Loss: 14.1444


Evaluating: 100%|██████████| 316/316 [00:10<00:00, 29.55it/s]


Evaluation after epoch 11: MAE = 104.75, RMSE = 140.46

--- Epoch 12/20 ---


Training: 100%|██████████| 100/100 [00:28<00:00,  3.47it/s]


Average Training Loss: 13.8696


Evaluating: 100%|██████████| 316/316 [00:10<00:00, 29.56it/s]


Evaluation after epoch 12: MAE = 109.90, RMSE = 144.51

--- Epoch 13/20 ---


Training: 100%|██████████| 100/100 [00:28<00:00,  3.46it/s]


Average Training Loss: 14.4803


Evaluating: 100%|██████████| 316/316 [00:10<00:00, 29.93it/s]


Evaluation after epoch 13: MAE = 104.63, RMSE = 140.33

--- Epoch 14/20 ---


Training: 100%|██████████| 100/100 [00:28<00:00,  3.48it/s]


Average Training Loss: 13.9236


Evaluating: 100%|██████████| 316/316 [00:10<00:00, 29.36it/s]


Evaluation after epoch 14: MAE = 107.95, RMSE = 142.79

--- Epoch 15/20 ---


Training: 100%|██████████| 100/100 [00:28<00:00,  3.48it/s]


Average Training Loss: 13.6047


Evaluating: 100%|██████████| 316/316 [00:10<00:00, 30.15it/s]


Evaluation after epoch 15: MAE = 113.31, RMSE = 147.06

--- Epoch 16/20 ---


Training: 100%|██████████| 100/100 [00:28<00:00,  3.49it/s]


Average Training Loss: 13.4131


Evaluating: 100%|██████████| 316/316 [00:10<00:00, 30.60it/s]


Evaluation after epoch 16: MAE = 114.07, RMSE = 147.71

--- Epoch 17/20 ---


Training: 100%|██████████| 100/100 [00:28<00:00,  3.50it/s]


Average Training Loss: 13.3334


Evaluating: 100%|██████████| 316/316 [00:10<00:00, 30.58it/s]


Evaluation after epoch 17: MAE = 117.28, RMSE = 150.28

--- Epoch 18/20 ---


Training: 100%|██████████| 100/100 [00:28<00:00,  3.50it/s]


Average Training Loss: 13.2437


Evaluating: 100%|██████████| 316/316 [00:10<00:00, 30.13it/s]


Evaluation after epoch 18: MAE = 117.02, RMSE = 150.13

--- Epoch 19/20 ---


Training: 100%|██████████| 100/100 [00:28<00:00,  3.50it/s]


Average Training Loss: 13.1935


Evaluating: 100%|██████████| 316/316 [00:10<00:00, 30.76it/s]


Evaluation after epoch 19: MAE = 118.58, RMSE = 151.35

--- Epoch 20/20 ---


Training: 100%|██████████| 100/100 [00:28<00:00,  3.49it/s]


Average Training Loss: 13.1575


Evaluating: 100%|██████████| 316/316 [00:10<00:00, 30.57it/s]


Evaluation after epoch 20: MAE = 118.99, RMSE = 151.71

--- Final Evaluation ---
Loading best model for final evaluation.


Evaluating: 100%|██████████| 316/316 [00:10<00:00, 30.48it/s]


Final Test MAE (Best Model): 67.66
Final Test RMSE (Best Model): 91.99

--- Generating Visualizations ---
Visualizations saved to: /content/drive/MyDrive/cctrans_visualizations
