<H2> Import Libraries and Set Device </H2>


In [1]:
import torch
from torchvision import transforms
from torch.utils.data import Dataset, DataLoader
import torchvision.models as models
import torch.nn as nn
from PIL import Image
from tqdm import tqdm
import os

# Update dataset path
dataset_path = r'D:\Huron_Unlabeled_Data'

# Set device for computation
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

<h2> Unlabeled Dataset Class </h2>


In [2]:
class Unlabeled_Dataset(Dataset):
    def __init__(self, image_files, transform=None):
        self.image_files = image_files
        self.transform = transform

    def __len__(self):
        return len(self.image_files)

    def __getitem__(self, idx):
        img_path = self.image_files[idx]
        try:
            image = Image.open(img_path).convert('RGB')
            if self.transform:
                image = self.transform(image)
            return image
        except Exception as e:
            print(f"Error loading image {img_path}: {e}")
            return torch.zeros(3, 512, 512)  # Return zero tensor if loading fails


<H2> Data Transformation and Loading </H2>

In [3]:
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),  # Converts images to PyTorch tensors with pixel values in [0, 1]
])

# List all image files in the dataset
image_extensions = ('.png',)  
image_files = [os.path.join(root, filename)
               for root, dirs, files in os.walk(dataset_path)
               for filename in files
               if filename.lower().endswith(image_extensions)]

dataset = Unlabeled_Dataset(image_files, transform=transform)
loader = DataLoader(dataset, batch_size=16, num_workers=0, pin_memory=True)

<H2> Getting Mean and Standard Deviation for The Unlabelled Dataset </H2>


In [4]:
import torch
from tqdm import tqdm

channel_sum = torch.zeros(3).to(device)
channel_squared_sum = torch.zeros(3).to(device)
num_pixels = 0

# Calculate mean and std with a progress bar
for images in tqdm(loader, desc="Calculating mean and std"):
    images = images.to(device)

    # Update sum and squared sum
    channel_sum += images.sum(dim=[0, 2, 3])
    channel_squared_sum += (images ** 2).sum(dim=[0, 2, 3])
    num_pixels += images.size(0) * images.size(2) * images.size(3)

# Calculate mean and std
mean = channel_sum / num_pixels
std = (channel_squared_sum / num_pixels - mean ** 2).sqrt()

print(f"Calculated Mean: {mean}, Calculated Std: {std}")


Calculating mean and std: 100%|██████████| 1022/1022 [04:18<00:00,  3.96it/s]

Calculated Mean: tensor([0.8786, 0.8474, 0.8732], device='cuda:0'), Calculated Std: tensor([0.2504, 0.2687, 0.2513], device='cuda:0')





<H2> Apply Transformations to prep Data for SSL encoder </H2>