In [None]:
!pip install torch torchvision timm




## Q1) --> Design steps

### --> TASKS -->

There are three types of visual recognition/classification tasks are studied in the following
sections. The first one is based on the concept of “symmetry” (section 4), the second
one is based on “counting” (section 5), and the last one is based on “grouping or conformance behavior” (section 6). First two tasks include several sub-tasks, which may require additional learning of concepts such as “uniformity” or “grouping”. 

All tasks are designed as binary classification problems.

### --> DATASET GENERATION -->

We create synthetic data sets for these image recognition problems. All images are
generated in size of 200 × 200.

### --> NETWORK CONFIG -->

To handle the binary tasks and these synthetic images, we adapt the inception V4
model by changing the input size, and replacing the original softmax output layer by
one hidden fully connected layer of 1024 nodes (with relu activation) plus one new
softmax layer of 2 nodes. 

We also ensure all previous layers are frozen to retain the learned features from ImageNet, except the new layers (which will be trained).

In [None]:
import os
import sys
import numpy as np
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader, random_split
import torchvision.transforms as transforms
from PIL import Image
import timm
from tqdm import tqdm
from torch.optim import RMSprop
from torch.optim.lr_scheduler import ReduceLROnPlateau
from sklearn.metrics import precision_score, recall_score

class ImageFolderDataset(Dataset):
    def __init__(self, root_dir, transform=None):
        self.root_dir = root_dir
        self.transform = transform
        self.classes = sorted(os.listdir(root_dir))
        self.class_to_idx = {cls: int(cls) for cls in self.classes}
        print(self.class_to_idx)
        self.images = []
        self.labels = []

        for class_name in self.classes:
            class_dir = os.path.join(root_dir, class_name)
            class_idx = self.class_to_idx[class_name]

            for img_name in os.listdir(class_dir):
                img_path = os.path.join(class_dir, img_name)
                if img_path.lower().endswith(('.png', '.jpg', '.jpeg')):
                    self.images.append(img_path)
                    self.labels.append(class_idx)

    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):
        img_path = self.images[idx]
        label = self.labels[idx]
        image = Image.open(img_path).convert('RGB')

        if self.transform:
            image = self.transform(image)

        return image, label

class GaussianNoise:
    def __init__(self, mean=0., std=0.1):
        self.mean = mean
        self.std = std

    def __call__(self, img):
        noise = torch.randn(img.size()) * self.std + self.mean
        noisy_img = img + noise
        return torch.clamp(noisy_img, 0., 1.)

class SaltPepperNoise:
    def __init__(self, prob=0.004):
        self.prob = prob

    def __call__(self, img):
        noisy_img = img.clone()
        salt = torch.rand(img.size()) < (self.prob / 2)
        pepper = torch.rand(img.size()) < (self.prob / 2)
        noisy_img[salt] = 1
        noisy_img[pepper] = 0
        return noisy_img

class RandomNoise:
    def __init__(self, gaussian_std=0.1, sp_prob=0.004):
        self.gaussian = GaussianNoise(std=gaussian_std)
        self.salt_pepper = SaltPepperNoise(prob=sp_prob)

    def __call__(self, img):
        if np.random.rand() > 0.4:
            if np.random.rand() > 0.5:
                return self.gaussian(img)
            else:
                return self.salt_pepper(img)
        return img

def build_model(num_classes=2, pretrained=True):
    model = timm.create_model('inception_v4', pretrained=pretrained, num_classes=num_classes)
    in_features = model.get_classifier().in_features
    model.classifier = nn.Sequential(
        nn.Linear(in_features, 1024),
        nn.ReLU(),
        nn.Linear(1024, num_classes),
        nn.Softmax(dim=1)
    )
    return model

def train_epoch(model, dataloader, criterion, optimizer, device):
    model.train()
    total_loss = 0
    correct = 0
    total = 0
    all_labels = []
    all_preds = []

    pbar = tqdm(dataloader, desc='Training')
    for inputs, labels in pbar:
        inputs, labels = inputs.to(device), labels.to(device)

        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        total_loss += loss.item() * inputs.size(0)
        _, predicted = outputs.max(1)
        total += labels.size(0)
        correct += predicted.eq(labels).sum().item()

        all_labels.extend(labels.cpu().numpy())
        all_preds.extend(predicted.cpu().numpy())

        pbar.set_postfix({'loss': loss.item(), 'acc': 100.*correct/total})

    precision = precision_score(all_labels, all_preds, average='binary')
    recall = recall_score(all_labels, all_preds, average='binary')

    return total_loss / total, 100.*correct/total, precision, recall

def validate(model, dataloader, criterion, device):
    model.eval()
    total_loss = 0
    correct = 0
    total = 0
    all_labels = []
    all_preds = []

    with torch.no_grad():
        for inputs, labels in dataloader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            loss = criterion(outputs, labels)

            total_loss += loss.item() * inputs.size(0)
            _, predicted = outputs.max(1)
            total += labels.size(0)
            correct += predicted.eq(labels).sum().item()

            all_labels.extend(labels.cpu().numpy())
            all_preds.extend(predicted.cpu().numpy())

    precision = precision_score(all_labels, all_preds, average='binary')
    recall = recall_score(all_labels, all_preds, average='binary')

    return total_loss / total, 100.*correct/total, precision, recall

def train_model(
    data_dir,
    valid_split=0.2,
    resume_model_path=None,
    enable_image_noise=False,
    num_epochs=70,
    batch_size=40,
    learning_rate=0.0001
):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    print(f"Using device: {device}")

    train_transforms = transforms.Compose([
        transforms.Resize((299, 299)),
        transforms.ToTensor(),
        RandomNoise() if enable_image_noise else transforms.Lambda(lambda x: x),
        transforms.RandomHorizontalFlip(),
        transforms.RandomVerticalFlip(),
        transforms.RandomRotation(5),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
    ])

    valid_transforms = transforms.Compose([
        transforms.Resize((299, 299)),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
    ])

    full_dataset = ImageFolderDataset(data_dir, transform=train_transforms)

    valid_size = int(valid_split * len(full_dataset))
    train_size = len(full_dataset) - valid_size

    train_dataset, valid_dataset = random_split(
        full_dataset,
        [train_size, valid_size],
        generator=torch.Generator().manual_seed(42)
    )

    valid_dataset.dataset.transform = valid_transforms

    print(f"Total images: {len(full_dataset)}")
    print(f"Training images: {len(train_dataset)}")
    print(f"Validation images: {len(valid_dataset)}")

    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=2)
    valid_loader = DataLoader(valid_dataset, batch_size=batch_size, shuffle=False, num_workers=2)

    model = build_model(num_classes=2)
    model = model.to(device)

    if resume_model_path is not None:
        print(f'Resuming from checkpoint: {resume_model_path}')
        model.load_state_dict(torch.load(resume_model_path))

    criterion = nn.CrossEntropyLoss()
    optimizer = RMSprop(model.parameters(), lr=learning_rate)
    scheduler = ReduceLROnPlateau(optimizer, mode='min', factor=0.2, patience=5, verbose=True)

    os.makedirs('models', exist_ok=True)

    best_val_acc = 0
    best_model_path = None

    for epoch in range(num_epochs):
        print(f'\nEpoch {epoch+1}/{num_epochs}')

        train_loss, train_acc, train_precision, train_recall = train_epoch(model, train_loader, criterion, optimizer, device)
        print(f'Training Loss: {train_loss:.4f}, Acc: {train_acc:.2f}%, Precision: {train_precision:.2f}, Recall: {train_recall:.2f}')

        val_loss, val_acc, val_precision, val_recall = validate(model, valid_loader, criterion, device)
        print(f'Validation Loss: {val_loss:.4f}, Acc: {val_acc:.2f}%, Precision: {val_precision:.2f}, Recall: {val_recall:.2f}')

        scheduler.step(val_loss)

        if val_acc > best_val_acc:
            best_val_acc = val_acc
            best_model_path = f'models/incep4_best.pth'
            torch.save(model.state_dict(), best_model_path)
            print(f'Saved best model with validation accuracy: {val_acc:.2f}%')

        if optimizer.param_groups[0]['lr'] < 1e-6:
            print('Learning rate too small. Stopping training.')
            break

    final_path = f'models/incep4_final.pth'
    torch.save(model.state_dict(), final_path)
    print(f'Saved final model to {final_path}')

    return {
        'best_val_acc': best_val_acc,
        'best_model_path': best_model_path,
        'final_model_path': final_path
    }

if __name__ == "__main__":
    print("Global Symmetry A1,B1")
    results = train_model(
        data_dir='/content/ds1/train',
        valid_split=0.2,
        enable_image_noise=True,
        num_epochs=5,
        batch_size=40,
        learning_rate=0.0001
    )


Global Symmetry A1,B1
Using device: cuda
{'0': 0, '1': 1}
Total images: 8000
Training images: 6400
Validation images: 1600


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


model.safetensors:   0%|          | 0.00/171M [00:00<?, ?B/s]




Epoch 1/5


Training: 100%|██████████| 160/160 [03:00<00:00,  1.13s/it, loss=0.0049, acc=93.8]

Training Loss: 0.1302, Acc: 93.84%, Precision: 0.95, Recall: 0.93





Validation Loss: 0.0234, Acc: 99.31%, Precision: 0.99, Recall: 1.00
Saved best model with validation accuracy: 99.31%

Epoch 2/5


Training: 100%|██████████| 160/160 [03:01<00:00,  1.14s/it, loss=0.00286, acc=99.6]

Training Loss: 0.0150, Acc: 99.58%, Precision: 1.00, Recall: 1.00





Validation Loss: 0.0039, Acc: 99.81%, Precision: 1.00, Recall: 1.00
Saved best model with validation accuracy: 99.81%

Epoch 3/5


Training: 100%|██████████| 160/160 [03:01<00:00,  1.13s/it, loss=0.192, acc=99.8]

Training Loss: 0.0089, Acc: 99.77%, Precision: 1.00, Recall: 1.00





Validation Loss: 0.0442, Acc: 98.50%, Precision: 0.98, Recall: 0.99

Epoch 4/5


Training: 100%|██████████| 160/160 [03:03<00:00,  1.15s/it, loss=0.000117, acc=99.9]

Training Loss: 0.0049, Acc: 99.89%, Precision: 1.00, Recall: 1.00





Validation Loss: 0.0007, Acc: 100.00%, Precision: 1.00, Recall: 1.00
Saved best model with validation accuracy: 100.00%

Epoch 5/5


Training: 100%|██████████| 160/160 [03:02<00:00,  1.14s/it, loss=0.063, acc=99.5]

Training Loss: 0.0156, Acc: 99.55%, Precision: 0.99, Recall: 1.00





Validation Loss: 0.0192, Acc: 99.44%, Precision: 0.99, Recall: 1.00
Saved final model to models/incep4_final.pth


In [None]:
import torch
import torchvision.transforms as transforms
from torch.utils.data import DataLoader
from sklearn.metrics import accuracy_score, precision_score, recall_score
from PIL import Image
import os
import timm
from torch.utils.data import Dataset
import numpy as np
import torch.nn as nn
class ImageFolderDataset(Dataset):
    def __init__(self, root_dir, transform=None):
        self.root_dir = root_dir
        self.transform = transform
        self.classes = sorted(os.listdir(root_dir))
        self.class_to_idx = {cls: int(cls) for cls in self.classes}
        self.images = []
        self.labels = []

        for class_name in self.classes:
            class_dir = os.path.join(root_dir, class_name)
            class_idx = self.class_to_idx[class_name]

            for img_name in os.listdir(class_dir):
                img_path = os.path.join(class_dir, img_name)
                if img_path.lower().endswith(('.png', '.jpg', '.jpeg')):
                    self.images.append(img_path)
                    self.labels.append(class_idx)

    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):
        img_path = self.images[idx]
        label = self.labels[idx]
        image = Image.open(img_path).convert('RGB')

        if self.transform:
            image = self.transform(image)

        return image, label

def load_model(num_classes=2, model_path='models/incep4_best.pth'):
    model = timm.create_model('inception_v4', pretrained=False, num_classes=num_classes)
    in_features = model.get_classifier().in_features
    model.classifier = nn.Sequential(
        nn.Linear(in_features, 1024),
        nn.ReLU(),
        nn.Linear(1024, num_classes),
        nn.Softmax(dim=1)
    )
    model.load_state_dict(torch.load(model_path))
    model.eval()
    return model

def test_model(model, dataloader, device):
    model.to(device)
    model.eval()
    all_labels = []
    all_preds = []

    with torch.no_grad():
        for images, labels in dataloader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            _, predicted = outputs.max(1)
            all_labels.extend(labels.cpu().numpy())
            all_preds.extend(predicted.cpu().numpy())

    accuracy = accuracy_score(all_labels, all_preds)
    precision = precision_score(all_labels, all_preds, average='binary')
    recall = recall_score(all_labels, all_preds, average='binary')

    print(f'Test Accuracy: {accuracy:.2f}')
    print(f'Test Precision: {precision:.2f}')
    print(f'Test Recall: {recall:.2f}')

if __name__ == "__main__":
    test_dir = '/content/ds1/train_break'  # Path to test dataset
    print("Global Symmetry D(A1)")
    test_transforms = transforms.Compose([
        transforms.Resize((299, 299)),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
    ])

    test_dataset = ImageFolderDataset(root_dir=test_dir, transform=test_transforms)
    test_loader = DataLoader(test_dataset, batch_size=40, shuffle=False, num_workers=2)

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model = load_model(num_classes=2, model_path='models/incep4_best.pth')
    test_model(model, test_loader, device)


Global Symmetry D(A1)


  model.load_state_dict(torch.load(model_path))


Test Accuracy: 0.78
Test Precision: 0.97
Test Recall: 0.59


Acknowledgment:  
    This assignment is collaboratively done by:   
    Keerthana - 210290  
    Meghana - 210073  
    Madhuri - 210568  
    Shobhit Sharma - 210992
(after taking assistance of Danish)