In [10]:
import torch
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import DataLoader
import numpy as np
import json
import os
from tqdm import tqdm # Optional: for a progress bar
import re # For parsing the label string

In [11]:
DATASET_PATH = "../TestDataSet"
# !!! Path to YOUR specific JSON file listing the 100 classes for TestDataSet IN ORDER !!!
# This is the JSON mentioned in the original prompt for your dataset.
DATASET_SPECIFIC_JSON_PATH = "../TestDataSet/labels_list.json"
MODEL_WEIGHTS = 'IMAGENET1K_V1'
BATCH_SIZE = 32
NUM_WORKERS = 0

In [12]:
# --- Device Setup ---
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

# --- Load Pre-trained Model ---
print(f"Loading ResNet-34 model with {MODEL_WEIGHTS} weights...")
model = torchvision.models.resnet34(weights=MODEL_WEIGHTS)
model.to(device)
model.eval()
print("Model loaded successfully.")

Using device: cuda
Loading ResNet-34 model with IMAGENET1K_V1 weights...
Model loaded successfully.


In [13]:
mean_norms = np.array([0.485, 0.456, 0.406])
std_norms = np.array([0.229, 0.224, 0.225])
eval_transforms = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(mean=mean_norms, std=std_norms)
])

In [14]:
# --- Load Dataset using ImageFolder ---
# ImageFolder sorts folders alphabetically and assigns indices 0, 1, 2...
print(f"Loading dataset from: {DATASET_PATH}")
if not os.path.isdir(DATASET_PATH):
    raise FileNotFoundError(f"Dataset directory not found at '{DATASET_PATH}'.")

dataset = torchvision.datasets.ImageFolder(
    root=DATASET_PATH,
    transform=eval_transforms
)

test_loader = DataLoader(
    dataset, batch_size=BATCH_SIZE, shuffle=False,
    num_workers=NUM_WORKERS, pin_memory=True
)

num_imagefolder_classes = len(dataset.classes)
print(f"Dataset loaded: {len(dataset)} images found in {num_imagefolder_classes} classes.")
print("ImageFolder assigned indices based on this alphabetical folder order:", dataset.classes) # Display the order


Loading dataset from: ../TestDataSet
Dataset loaded: 500 images found in 100 classes.
ImageFolder assigned indices based on this alphabetical folder order: ['n02672831', 'n02676566', 'n02687172', 'n02690373', 'n02692877', 'n02699494', 'n02701002', 'n02704792', 'n02708093', 'n02727426', 'n02730930', 'n02747177', 'n02749479', 'n02769748', 'n02776631', 'n02777292', 'n02782093', 'n02783161', 'n02786058', 'n02787622', 'n02788148', 'n02790996', 'n02791124', 'n02791270', 'n02793495', 'n02794156', 'n02795169', 'n02797295', 'n02799071', 'n02802426', 'n02804414', 'n02804610', 'n02807133', 'n02808304', 'n02808440', 'n02814533', 'n02814860', 'n02815834', 'n02817516', 'n02823428', 'n02823750', 'n02825657', 'n02834397', 'n02835271', 'n02837789', 'n02840245', 'n02841315', 'n02843684', 'n02859443', 'n02860847', 'n02865351', 'n02869837', 'n02870880', 'n02871525', 'n02877765', 'n02879718', 'n02883205', 'n02892201', 'n02892767', 'n02894605', 'n02895154', 'n02906734', 'n02909870', 'n02910353', 'n02916936'

In [15]:
print(f"Loading dataset-specific labels from: {DATASET_SPECIFIC_JSON_PATH}")
imagefolder_idx_to_imagenet_idx = {}
try:
    with open(DATASET_SPECIFIC_JSON_PATH, 'r') as f:
        # Assuming the JSON is a list of strings like "401: accordion"
        ordered_labels_from_json = json.load(f)

    if not isinstance(ordered_labels_from_json, list):
        raise ValueError("Expected the JSON file to contain a list of labels.")

    num_json_labels = len(ordered_labels_from_json)

    if num_json_labels != num_imagefolder_classes:
        print(f"\n--- WARNING ---")
        print(f"Mismatch in the number of classes!")
        print(f"ImageFolder found {num_imagefolder_classes} subdirectories (classes) in '{DATASET_PATH}'.")
        print(f"The JSON file '{DATASET_SPECIFIC_JSON_PATH}' contains {num_json_labels} label entries.")
        print(f"The 'same order' assumption requires these numbers to match.")
        print(f"Proceeding, but the mapping might be incorrect.")
        print(f"-------------")
        # Decide if you want to exit here: exit()

    print("Building mapping assuming ImageFolder order matches JSON order...")
    parse_errors = 0
    for i in range(min(num_imagefolder_classes, num_json_labels)): # Iterate up to the minimum length
        label_str = ordered_labels_from_json[i]
        try:
            # Use regex to find the first number (the index) in the string
            match = re.match(r"\s*(\d+)\s*:", label_str)
            if match:
                true_imagenet_index = int(match.group(1))
                imagefolder_idx_to_imagenet_idx[i] = true_imagenet_index
                # print(f"  Mapping ImageFolder index {i} (Folder: {dataset.classes[i]}) -> JSON Entry '{label_str}' -> ImageNet Index {true_imagenet_index}") # Debug
            else:
                raise ValueError(f"Could not parse index from label string: '{label_str}'")
        except Exception as e:
            print(f"Error parsing label at index {i} from JSON ('{label_str}'): {e}")
            parse_errors += 1

    if parse_errors > 0:
        print(f"Warning: Encountered {parse_errors} errors while parsing labels from JSON.")
        print("Evaluation might be inaccurate.")
    elif not imagefolder_idx_to_imagenet_idx:
         print("ERROR: Could not build any mappings. Check JSON format and parsing logic.")
         exit()
    else:
        print("Mapping built successfully based on order correspondence.")


except FileNotFoundError:
    print(f"\n--- ERROR ---")
    print(f"Dataset-specific label file '{DATASET_SPECIFIC_JSON_PATH}' not found.")
    print(f"Please ensure this file exists and the path is correct.")
    print(f"-------------")
    exit()
except Exception as e:
    print(f"An error occurred while loading or processing {DATASET_SPECIFIC_JSON_PATH}: {e}")
    exit()


Loading dataset-specific labels from: ../TestDataSet/labels_list.json
Building mapping assuming ImageFolder order matches JSON order...
Mapping built successfully based on order correspondence.


In [16]:
# --- Evaluation Loop ---
top1_correct = 0
top5_correct = 0
total_samples = 0

print("Starting evaluation...")
with torch.no_grad():
    for inputs, imagefolder_labels in tqdm(test_loader, desc="Evaluating Batches"):
        inputs = inputs.to(device)
        # imagefolder_labels contains the indices assigned by ImageFolder (0, 1, 2...)

        # Prepare lists to hold batch data (only for samples with valid mappings)
        valid_indices_batch = []
        true_imagenet_labels_list = []

        # Convert ImageFolder labels to true ImageNet labels one by one using the order-based map
        for i, label_idx in enumerate(imagefolder_labels.tolist()):
            if label_idx in imagefolder_idx_to_imagenet_idx:
                 valid_indices_batch.append(i)
                 true_imagenet_labels_list.append(imagefolder_idx_to_imagenet_idx[label_idx])
            # else: # This case should not happen if lengths matched and parsing worked
            #     print(f"Warning: ImageFolder index {label_idx} not found in mapping. Skipping sample.")

        # If no valid samples in this batch, skip
        if not valid_indices_batch:
            continue

        # Select only the inputs corresponding to valid labels
        valid_inputs = inputs[valid_indices_batch]
        true_imagenet_labels = torch.tensor(true_imagenet_labels_list, dtype=torch.long).to(device)

        # Get model predictions for the valid inputs
        outputs = model(valid_inputs)
        _, topk_preds = torch.topk(outputs, 5, dim=1)

        # Update counts based on the number of *valid* samples processed in this batch
        batch_valid_samples = true_imagenet_labels.size(0)
        total_samples += batch_valid_samples

        # Top-1 Accuracy
        top1_correct += torch.eq(topk_preds[:, 0], true_imagenet_labels).sum().item()

        # Top-5 Accuracy
        labels_reshaped = true_imagenet_labels.view(-1, 1).expand_as(topk_preds)
        correct_top5 = torch.eq(labels_reshaped, topk_preds).sum(dim=1)
        top5_correct += correct_top5.sum().item()


Starting evaluation...


Evaluating Batches: 100%|██████████| 16/16 [00:01<00:00, 11.65it/s]


In [17]:
# --- Calculate and Report Accuracy ---
if total_samples == 0:
    print("\nNo samples were processed successfully. Check JSON path, format, parsing, and folder structure.")
    top1_accuracy = 0
    top5_accuracy = 0
else:
    top1_accuracy = (top1_correct / total_samples) * 100
    top5_accuracy = (top5_correct / total_samples) * 100

print("\n--- Evaluation Results ---")
print(f"Total Images Evaluated: {total_samples}")
print(f"Top-1 Accuracy: {top1_accuracy:.2f}% ({top1_correct}/{total_samples})")
print(f"Top-5 Accuracy: {top5_accuracy:.2f}% ({top5_correct}/{total_samples})")
print("\nNOTE: Accuracy relies on the assumption that the alphabetical order of folders")
print(f"in '{DATASET_PATH}' exactly matches the order of labels in '{DATASET_SPECIFIC_JSON_PATH}'.")


--- Evaluation Results ---
Total Images Evaluated: 500
Top-1 Accuracy: 76.00% (380/500)
Top-5 Accuracy: 94.20% (471/500)

NOTE: Accuracy relies on the assumption that the alphabetical order of folders
in '../TestDataSet' exactly matches the order of labels in '../TestDataSet/labels_list.json'.
