###Task 2

In [50]:
# Import necessary libraries
import torch
import os
from torchvision import transforms, models
from torch.utils.data import DataLoader, Dataset
from PIL import Image
from collections import defaultdict

print("All Necessary Libraries Imported")

All Necessary Libraries Imported


In [51]:
# Loading Models from task 1 to use f10 in this task
models_task1 = torch.load("/content/drive/MyDrive/models_task1.pth")
print(type(models_task1))
print(len(models_task1))

<class 'list'>
10


  models_task1 = torch.load("/content/drive/MyDrive/models_task1.pth")


In [52]:
# Initialize device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

print(f"Device Selected {device}")

Device Selected cuda


In [53]:
# Define transformations
transform = transforms.Compose([
    transforms.Resize((200, 200)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

# Custom Dataset
class CustomImageDataset(Dataset):
    def __init__(self, data, transform=None):
        self.images = data['data']
        self.labels = torch.tensor(data['targets']) if 'targets' in data else None
        self.transform = transform

    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):
        img = Image.fromarray(self.images[idx])
        if self.transform:
            img = self.transform(img)
        label = self.labels[idx] if self.labels is not None else -1
        return img, label

# Feature extraction function
def extract_features(data, feature_extractor, batch_size=32):
    dataset = CustomImageDataset(data, transform=transform)
    loader = DataLoader(dataset, batch_size=batch_size, shuffle=False)

    features, labels = [], []
    with torch.no_grad():
        for images, labels_batch in loader:
            images = images.to(device)
            batch_features = feature_extractor(images).view(images.size(0), -1)  # Flatten
            features.append(batch_features.cpu())
            labels.append(labels_batch)

    return torch.cat(features), torch.cat(labels)

# Initialize feature extractor
def initialize_feature_extractor():
    feature_extractor = models.convnext_base(weights=models.ConvNeXt_Base_Weights.IMAGENET1K_V1)
    feature_extractor = feature_extractor.to(device)
    feature_extractor.eval()
    return feature_extractor

In [54]:
import time

# Initialize feature extractor
print("Initializing feature extractor...")
feature_extractor = initialize_feature_extractor()
print("Feature extractor initialized!\n")

# Cache feature extraction for training datasets (D11 to D20)
train_features_cache = {}
eval_features_cache = {}

data_paths_task2 = [f"/content/drive/MyDrive/dataset/part_two_dataset/train_data/{i+1}_train_data.tar.pth" for i in range(10)]
eval_paths_task2 = [
    f"/content/drive/MyDrive/dataset/part_one_dataset/eval_data/{i+1}_eval_data.tar.pth" for i in range(10)
] + [
    f"/content/drive/MyDrive/dataset/part_two_dataset/eval_data/{i+1}_eval_data.tar.pth" for i in range(10)
]  # 20 heldout datasets

# Cache features for training datasets
print("Caching features for training datasets...\n")
for i in range(10):
    print(f"Processing training dataset D{i+11}...")
    if i not in train_features_cache:
        current_data = torch.load(data_paths_task2[i])
        current_features, current_targets = extract_features(current_data, feature_extractor, batch_size=256)
        train_features_cache[i] = (current_features, current_targets)  # Cache features and targets


# Cache features for evaluation datasets
print("Caching features for evaluation datasets...\n")
for j in range(20):
    print(f"Processing evaluation dataset D̂{j+1}...")
    if j not in eval_features_cache:
        eval_data = torch.load(eval_paths_task2[j])
        eval_features, eval_targets = extract_features(eval_data, feature_extractor, batch_size=256)
        eval_features_cache[j] = (eval_features, eval_targets)

# Save the cached features
print("Saving cached features to disk...")
torch.save(train_features_cache, "/content/drive/MyDrive/train_features_cache2.pth")
torch.save(eval_features_cache, "/content/drive/MyDrive/eval_features_cache2.pth")
print("Cached features saved successfully!")

Initializing feature extractor...
Feature extractor initialized!

Caching features for training datasets...

Processing training dataset D11...


  current_data = torch.load(data_paths_task2[i])


Processing training dataset D12...
Processing training dataset D13...
Processing training dataset D14...
Processing training dataset D15...
Processing training dataset D16...
Processing training dataset D17...
Processing training dataset D18...
Processing training dataset D19...
Processing training dataset D20...
Caching features for evaluation datasets...

Processing evaluation dataset D̂1...


  eval_data = torch.load(eval_paths_task2[j])


Processing evaluation dataset D̂2...
Processing evaluation dataset D̂3...
Processing evaluation dataset D̂4...
Processing evaluation dataset D̂5...
Processing evaluation dataset D̂6...
Processing evaluation dataset D̂7...
Processing evaluation dataset D̂8...
Processing evaluation dataset D̂9...
Processing evaluation dataset D̂10...
Processing evaluation dataset D̂11...
Processing evaluation dataset D̂12...
Processing evaluation dataset D̂13...
Processing evaluation dataset D̂14...
Processing evaluation dataset D̂15...
Processing evaluation dataset D̂16...
Processing evaluation dataset D̂17...
Processing evaluation dataset D̂18...
Processing evaluation dataset D̂19...
Processing evaluation dataset D̂20...
Saving cached features to disk...
Cached features saved successfully!


In [56]:
# Load cached features
train_features_cache = torch.load("/content/drive/MyDrive/train_features_cache2.pth")
eval_features_cache = torch.load("/content/drive/MyDrive/eval_features_cache2.pth")

# Learning with Prototypes (LWP) model
def train_lwp(features, labels):
    unique_classes = torch.unique(labels)
    class_means = {}
    for cls in unique_classes:
        class_indices = (labels == cls)
        class_means[cls.item()] = features[class_indices].mean(dim=0)
    return class_means

# Predict using Learning with Prototypes (Batch Processing)
def predict_lwp(features, class_means):
    # Convert class means to a tensor for batch processing
    mean_tensor = torch.stack(list(class_means.values()))
    mean_classes = torch.tensor(list(class_means.keys()))

    # Calculate distances between features and class means
    distances = torch.cdist(features, mean_tensor)
    closest_indices = torch.argmin(distances, dim=1)
    return mean_classes[closest_indices]

  train_features_cache = torch.load("/content/drive/MyDrive/train_features_cache2.pth")
  eval_features_cache = torch.load("/content/drive/MyDrive/eval_features_cache2.pth")


In [73]:
# Train Learning with Prototypes on all datasets for Task 2
def task_2_cached(data_paths, heldout_paths, initial_model, train_features_cache, eval_features_cache):
    print("Starting Task 2 with cached features...\n")
    models_task2 = [initial_model]  # Start with the initial model
    accuracy_matrix = []

    for i in range(10):
        # Load current dataset D{i+11} from cached features
        train_features, train_labels = train_features_cache[i]

        # Predict labels for D{i+11} using the last model
        pseudo_labels = predict_lwp(train_features, models_task2[-1])

        # Update model based on pseudo-labels
        updated_class_means = train_lwp(train_features, pseudo_labels)

        # Merge the updated class means into the current model
        new_model = defaultdict(torch.Tensor)
        for cls in updated_class_means:
            if cls in models_task2[-1]:
                # Weighted average of old and new class means
                new_model[cls] = (models_task2[-1][cls] + updated_class_means[cls]) / 2
            else:
                new_model[cls] = updated_class_means[cls]
        models_task2.append(new_model)

        # Evaluate the model on all heldout datasets D̂1 to D̂{i+11} using cached evaluation features
        print(f"Evaluating model f{i+11} on datasets...")
        row_accuracies = []
        for j in range(i + 11):
            eval_features, eval_labels = eval_features_cache[j]
            predictions = predict_lwp(eval_features, models_task2[-1])
            accuracy = (predictions == eval_labels).float().mean().item() * 100
            row_accuracies.append(accuracy)
            print(f"Evaluation on D̂{j+1}: Accuracy = {accuracy:.2f}%")
        accuracy_matrix.append(row_accuracies)
        print(f"Model f{i+11} evaluation completed. Current accuracy matrix row: {row_accuracies}")

    # Print accuracy matrix
    print("\nTask 2 Accuracy Matrix:")
    print("     " + "  ".join([f"D̂{i+1}" for i in range(20)]))
    for i, row in enumerate(accuracy_matrix):
        print(f"f{i+11}: " + "  ".join([f"{acc:.2f}%" for acc in row]))

    print("\nTask 2 completed successfully!")
    return models_task2, accuracy_matrix


In [74]:
# Run Task 2 using the initial model and cached data
models_task2, accuracy_matrix_task2 = task_2_cached(data_paths_task2, eval_paths_task2, models_task1[-1], train_features_cache, eval_features_cache)

# Save the Task 2 models
torch.save(models_task2, "/content/drive/MyDrive/models_task2.pth")

Starting Task 2 with cached features...

Evaluating model f11 on datasets...
Evaluation on D̂1: Accuracy = 84.76%
Evaluation on D̂2: Accuracy = 86.24%
Evaluation on D̂3: Accuracy = 84.96%
Evaluation on D̂4: Accuracy = 86.20%
Evaluation on D̂5: Accuracy = 85.40%
Evaluation on D̂6: Accuracy = 85.40%
Evaluation on D̂7: Accuracy = 85.92%
Evaluation on D̂8: Accuracy = 85.48%
Evaluation on D̂9: Accuracy = 85.60%
Evaluation on D̂10: Accuracy = 85.56%
Evaluation on D̂11: Accuracy = 72.76%
Model f11 evaluation completed. Current accuracy matrix row: [84.7599983215332, 86.23999953269958, 84.96000170707703, 86.19999885559082, 85.39999723434448, 85.39999723434448, 85.92000007629395, 85.47999858856201, 85.6000006198883, 85.55999994277954, 72.75999784469604]
Evaluating model f12 on datasets...
Evaluation on D̂1: Accuracy = 82.96%
Evaluation on D̂2: Accuracy = 83.80%
Evaluation on D̂3: Accuracy = 83.16%
Evaluation on D̂4: Accuracy = 84.48%
Evaluation on D̂5: Accuracy = 83.68%
Evaluation on D̂6: Accur