## Group 36 - Task 2

### Paths to change
1. train_paths and eval_paths in cell 4 with dataset paths properly
2. train_paths, eval_paths, feature_cache.pth, eval_feature_cache.pth,f10.pth paths in cell 6 with dataset paths properly

feature_cache2.pth and eval_feature_cache2.pth files will be generated by feature extraction in cell4.

We have explicitly provided link to these files to use directly avoiding cell4.

In [1]:
import torch
import numpy as np
from torchvision.models import efficientnet_b3
from torchvision import transforms
from torch.utils.data import Dataset, DataLoader, TensorDataset
from sklearn.metrics import accuracy_score
from tqdm import tqdm
import os
import matplotlib.pyplot as plt
import warnings
import pandas as pd
from sklearn.utils.class_weight import compute_class_weight
from torchvision.models import efficientnet_b3
from PIL import Image


print("All packages imported successfully!")
import warnings
# Suppress specific warning
warnings.filterwarnings("ignore", category=UserWarning, module="torch.utils.data.dataloader")

warnings.filterwarnings(
    "ignore",
    category=FutureWarning,
    message=r"You are using `torch.load` with `weights_only=False`.*"
)


All packages imported successfully!


In [2]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)

Using device: cpu


In [3]:
# Feature Extractor - EfficientNet_B3

from torchvision.models import efficientnet_b3, EfficientNet_B3_Weights

# EfficientNet feature extractor
def initialize_feature_extractor():
    # model = efficientnet_b3(pretrained=True)
    model = efficientnet_b3(weights=EfficientNet_B3_Weights.IMAGENET1K_V1)
    model.eval()
    model.to(device)
    return model

def get_transform():
    return transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ])

# Extract features using EfficientNet
def extract_features(data, feature_extractor, batch_size=32):
    transform = get_transform()
    images = data['data']  # Assumed to be a numpy array
    labels = torch.tensor(data['targets']) if 'targets' in data else None
    transformed_images = [transform(Image.fromarray(img)) for img in images]
    transformed_images = torch.stack(transformed_images)  # Stack them into a tensor

    # dataset = TensorDataset(transformed_images, labels)  # TensorDataset
    if labels is not None:
        dataset = TensorDataset(transformed_images, labels)  # Dataset with images and labels
    else:
        dataset = TensorDataset(transformed_images)  # Dataset with images only

    dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=False, num_workers=4)
    features = []
    all_labels = []

    with torch.no_grad():
        for batch  in tqdm(dataloader, desc="Extracting Features "):
            if labels is not None:
                images, batch_labels = batch
                all_labels.append(batch_labels)  # Collect labels
            else:
                images = batch[0]
            images = images.to(device)
            feats = feature_extractor(images).view(feature_extractor(images).size(0), -1)  # Flatten the features
            features.append(feats.cpu().numpy())
            # labels.append(_.cpu())
    features = np.concatenate(features)
    all_labels = np.concatenate(all_labels) if labels is not None else None
    return features, all_labels


In [None]:
# Feature Extraction - Will save 2 files features_cache1.pth and eval_features_cache1.pth
# These files will be later imported by our code

# We have already provided the extracted features and can use them directly skipping this cell



features_cache = {}
eval_features_cache = {}

train_paths = [
    f"dataset/part_two_dataset/train_data/{i}_train_data.tar.pth" for i in range(1, 11)
]
eval_paths = [
     f"dataset/part_one_dataset/eval_data/{i}_eval_data.tar.pth" for i in range(1, 11)
] + [
    f"dataset/part_two_dataset/eval_data/{i}_eval_data.tar.pth" for i in range(1, 11)
]

feature_extractor = initialize_feature_extractor()

for i in range(10):
  print(f"Precomputing features for training dataset D{i+11} : ")
  if i not in features_cache:
    current_data = torch.load(train_paths[i])
    current_features,current_targets = extract_features(current_data, feature_extractor)
    features_cache[i] = (current_features, current_targets)  # Cache features for the dataset

for j in range(20):
  print(f"Precomputing features for Eval dataset D{j+1} : ")
  if j not in eval_features_cache:
    eval_data = torch.load(eval_paths[j])
    eval_features,eval_targets = extract_features(eval_data, feature_extractor)
    eval_features_cache[j] = (eval_features, eval_targets)

# Save the features_cache and eval_features_cache
torch.save(features_cache, "features_cache2.pth")
torch.save(eval_features_cache, "eval_features_cache2.pth")

Precomputing features for training dataset D11 : 


Extracting Features : 100%|██████████| 79/79 [00:13<00:00,  5.67it/s]


Precomputing features for training dataset D12 : 


Extracting Features : 100%|██████████| 79/79 [00:12<00:00,  6.14it/s]


Precomputing features for training dataset D13 : 


Extracting Features : 100%|██████████| 79/79 [00:12<00:00,  6.08it/s]


Precomputing features for training dataset D14 : 


Extracting Features : 100%|██████████| 79/79 [00:13<00:00,  6.05it/s]


Precomputing features for training dataset D15 : 


Extracting Features : 100%|██████████| 79/79 [00:13<00:00,  6.02it/s]


Precomputing features for training dataset D16 : 


Extracting Features : 100%|██████████| 79/79 [00:13<00:00,  5.94it/s]


Precomputing features for training dataset D17 : 


Extracting Features : 100%|██████████| 79/79 [00:13<00:00,  5.98it/s]


Precomputing features for training dataset D18 : 


Extracting Features : 100%|██████████| 79/79 [00:13<00:00,  5.89it/s]


Precomputing features for training dataset D19 : 


Extracting Features : 100%|██████████| 79/79 [00:13<00:00,  5.90it/s]


Precomputing features for training dataset D20 : 


Extracting Features : 100%|██████████| 79/79 [00:13<00:00,  5.84it/s]


Precomputing features for Eval dataset D1 : 


Extracting Features : 100%|██████████| 79/79 [00:13<00:00,  5.77it/s]


Precomputing features for Eval dataset D2 : 


Extracting Features : 100%|██████████| 79/79 [00:13<00:00,  5.80it/s]


Precomputing features for Eval dataset D3 : 


Extracting Features : 100%|██████████| 79/79 [00:13<00:00,  5.73it/s]


Precomputing features for Eval dataset D4 : 


Extracting Features : 100%|██████████| 79/79 [00:13<00:00,  5.79it/s]


Precomputing features for Eval dataset D5 : 


Extracting Features : 100%|██████████| 79/79 [00:13<00:00,  5.82it/s]


Precomputing features for Eval dataset D6 : 


Extracting Features : 100%|██████████| 79/79 [00:13<00:00,  5.76it/s]


Precomputing features for Eval dataset D7 : 


Extracting Features : 100%|██████████| 79/79 [00:13<00:00,  5.74it/s]


Precomputing features for Eval dataset D8 : 


Extracting Features : 100%|██████████| 79/79 [00:13<00:00,  5.83it/s]


Precomputing features for Eval dataset D9 : 


Extracting Features : 100%|██████████| 79/79 [00:13<00:00,  5.82it/s]


Precomputing features for Eval dataset D10 : 


Extracting Features : 100%|██████████| 79/79 [00:13<00:00,  5.79it/s]


Precomputing features for Eval dataset D11 : 


Extracting Features : 100%|██████████| 79/79 [00:13<00:00,  5.76it/s]


Precomputing features for Eval dataset D12 : 


Extracting Features : 100%|██████████| 79/79 [00:13<00:00,  5.75it/s]


Precomputing features for Eval dataset D13 : 


Extracting Features : 100%|██████████| 79/79 [00:13<00:00,  5.79it/s]


Precomputing features for Eval dataset D14 : 


Extracting Features : 100%|██████████| 79/79 [00:13<00:00,  5.81it/s]


Precomputing features for Eval dataset D15 : 


Extracting Features : 100%|██████████| 79/79 [00:13<00:00,  5.80it/s]


Precomputing features for Eval dataset D16 : 


Extracting Features : 100%|██████████| 79/79 [00:13<00:00,  5.82it/s]


Precomputing features for Eval dataset D17 : 


Extracting Features : 100%|██████████| 79/79 [00:13<00:00,  5.82it/s]


Precomputing features for Eval dataset D18 : 


Extracting Features : 100%|██████████| 79/79 [00:13<00:00,  5.77it/s]


Precomputing features for Eval dataset D19 : 


Extracting Features : 100%|██████████| 79/79 [00:13<00:00,  5.77it/s]


Precomputing features for Eval dataset D20 : 


Extracting Features : 100%|██████████| 79/79 [00:13<00:00,  5.83it/s]


In [4]:
def update_lwp(features, labels, last_model=None):
    if last_model is None:
      prototypes = {}
      class_counts ={}
    else:
      prototypes = last_model[0]
      class_counts = last_model[1]

    # Iterate over each unique class in the current dataset
    classes = np.unique(labels)
    class_weights = compute_class_weight('balanced', classes=np.unique(labels), y=labels)


    for cls in classes:
        # Get the indices of the samples belonging to this class
        class_indices = np.where(labels == cls)[0]

        # Calculate the mean of the features for this class
        class_features = features[class_indices]

        weight = class_weights[np.where(classes == cls)[0][0]]  # Get the weight for this class
        weighted_features = class_features * weight  # Weight each feature vector by its class weight
        new_mean = class_features.mean(axis=0)

        # Update the prototypes using weighted averaging
        if cls in prototypes:
            # Existing prototype (weighted average update)
            old_prototype = prototypes[cls]
            old_count = class_counts[cls]
            new_count = len(class_indices)

            updated_prototype = (old_prototype * old_count + new_mean * new_count) / (old_count + new_count)
            prototypes[cls] = updated_prototype
            class_counts[cls] += new_count  # Update the count of samples for this class
        else:
        # If it's the first time this class is seen, just store the new mean
            prototypes[cls] = new_mean
            class_counts[cls] = len(class_indices)  # Set the count of samples for this class

    return prototypes, class_counts




# Predict using the Nearest Mean Classifier
def predict_lwp(features, prototypes):
    distances = np.stack([np.linalg.norm(features - proto, axis=1) for proto in prototypes.values()], axis=1)
    return np.argmin(distances, axis=1)

# Self-training process
def task2(train_paths, eval_paths,start_model,features_cache,eval_features_cache):
    accuracies = np.zeros((10, 20))  # Accuracy matrix (10x20)

    prototypes, class_counts = start_model

    models=[]
    models.append(start_model)


    # Predicting for all other datasets (D11, D12, ..., D20) and updating LwP
    for i in range(10):
        print(f"Working for f{i+11}")

        # Loading and Feature Extraction od Dataset i
        current_features, current_targets = features_cache[i]

        predicted_labels = predict_lwp(current_features, prototypes)
        prototypes, class_counts = update_lwp(current_features, predicted_labels, models[-1])
        models.append((prototypes,class_counts))

        # Evaluate the model on all relevant held-out datasets
        for j in range(i+11):
            eval_features, eval_targets = eval_features_cache[j]
            predictions = predict_lwp(eval_features, prototypes)
            accuracy= accuracy_score(eval_targets, predictions)
            accuracies[i, j] = accuracy
            print(f"Evaluation with Eval {j+1} for F {i+11}: {accuracy} ")
        print("\n")

    return accuracies


In [5]:
# Dataset paths
train_paths = [
    f"dataset/part_two_dataset/train_data/{i}_train_data.tar.pth" for i in range(1, 11)
]
eval_paths = [
     f"dataset/part_one_dataset/eval_data/{i}_eval_data.tar.pth" for i in range(1, 11)
] + [
    f"dataset/part_two_dataset/eval_data/{i}_eval_data.tar.pth" for i in range(1, 11)
]

f10 = torch.load("f10.pth")
features_cache = torch.load("features_cache2.pth")
eval_features_cache = torch.load("eval_features_cache2.pth")


# Run the Task2
accuracies = task2(train_paths, eval_paths,f10,features_cache,eval_features_cache)


# Display the accuracy matrix
import pandas as pd
accuracy_df = pd.DataFrame(
    accuracies,
    columns=[f"D{i}" for i in range(1, 21)],
    index=[f"f{i}" for i in range(11, 21)]
)
accuracy_df = accuracy_df.where(abs(accuracy_df) > 1e-4, other=pd.NA)
accuracy_df = accuracy_df.fillna("")

print("\n")
pd.set_option("display.colheader_justify", "center")  # Center column headers
pd.set_option("display.width", None)  # Allow the full width to be displayed
pd.set_option("display.float_format", "{: .4f}".format)  # Set float format to 4 decimal places
print(accuracy_df)


Working for f11
Evaluation with Eval 1 for F 11: 0.8696 
Evaluation with Eval 2 for F 11: 0.8792 
Evaluation with Eval 3 for F 11: 0.8712 
Evaluation with Eval 4 for F 11: 0.8772 
Evaluation with Eval 5 for F 11: 0.8824 
Evaluation with Eval 6 for F 11: 0.8752 
Evaluation with Eval 7 for F 11: 0.8716 
Evaluation with Eval 8 for F 11: 0.872 
Evaluation with Eval 9 for F 11: 0.8712 
Evaluation with Eval 10 for F 11: 0.886 
Evaluation with Eval 11 for F 11: 0.724 


Working for f12
Evaluation with Eval 1 for F 12: 0.8656 
Evaluation with Eval 2 for F 12: 0.878 
Evaluation with Eval 3 for F 12: 0.8676 
Evaluation with Eval 4 for F 12: 0.8772 
Evaluation with Eval 5 for F 12: 0.8792 
Evaluation with Eval 6 for F 12: 0.8736 
Evaluation with Eval 7 for F 12: 0.8684 
Evaluation with Eval 8 for F 12: 0.87 
Evaluation with Eval 9 for F 12: 0.8676 
Evaluation with Eval 10 for F 12: 0.882 
Evaluation with Eval 11 for F 12: 0.7212 
Evaluation with Eval 12 for F 12: 0.4752 


Working for f13
Evaluat