## Disentangling zoom transforms
`%` of dataset that are solvable with certain group of transforms

In [1]:
import json
import os
import pickle
import random
from functools import partial
from glob import glob

import matplotlib
import numpy as np
import pandas as pd
import seaborn as sns
import timm
import torch
import torch.nn.functional as F
import torchvision.transforms as transforms
import torchvision.transforms.functional as fv
from matplotlib import pyplot as plt
from matplotlib.backends.backend_pdf import PdfPages
from PIL import Image
from torchvision.datasets import ImageFolder
from tqdm import tqdm
import matplotlib.patches as patches

from collections import defaultdict

sns.set()

pd.set_option("display.max_columns", 100)
pd.set_option("display.max_rows", 100)


### Helpers

In [2]:
dataset_names = [
    "imagenet_1k",
    "imagenet_1k_real",
    "imagenet_IN_plus_real",
    "imagenet_r",
    "imagenet_a",
    "imagenet_sketch",
    "objectnet",
]

model_names = ["resnet18", "resnet50", "vit32", "vgg16", "alexnet", "clip_vit_l_14"]

model_name_lookup = {
    "resnet18": "resnet18",
    "resnet50": "resnet50",
    "vit32": "vit_b_32",
    "vgg16": "vgg16",
    "alexnet": "alexnet",
    "clip_vit_l_14": "clip_vit_l_14"
}

pretty_model_names = {
    "resnet18": "ResNet-18",
    "resnet50": "ResNet-50",
    "vit32": "ViT-B/32",
    "vgg16": "VGG-16",
    "alexnet": "AlexNet",
    "clip_vit_l_14": "CLIP ViT-L/14"
}

pretty_dataset_names = {
    "imagenet_1k": "ImageNet",
    "imagenet_1k_real": "ImageNet ReaL",
    "imagenet_IN_plus_real": "ImageNet + ReaL",
    "imagenet_r": "ImageNet-R",
    "imagenet_a": "ImageNet-A",
    "imagenet_sketch": "ImageNet-Sketch",
    "objectnet": "ObjectNet",
}


### Load results

In [3]:
with open('../data/correctness.pkl', 'rb') as f:
    correctness_dfs = pickle.load(f)

## Classification Results

1. Standard 1-crop accuracy
2. Maximum possible accuracy

In [5]:
standard_accuracy = defaultdict(dict)
max_possible_accuracy = defaultdict(dict)

for dataset in dataset_names:
    for model in model_names:
        standard_accuracy[dataset][model] = 100* correctness_dfs[dataset][model].loc['LOC:1_1_Size:256'].mean()
        max_possible_accuracy[dataset][model]  = 100*np.mean(correctness_dfs[dataset][model].max(0))

In [6]:
s_acc_df = pd.DataFrame(standard_accuracy)
s_acc_df.index = [pretty_model_names[i] for i in s_acc_df.index]
s_acc_df.columns = [pretty_dataset_names[i] for i in s_acc_df.columns]
s_acc_df.round(2)

Unnamed: 0,ImageNet,ImageNet ReaL,ImageNet + ReaL,ImageNet-R,ImageNet-A,ImageNet-Sketch,ObjectNet
ResNet-18,69.45,76.94,76.47,32.14,1.37,19.41,27.59
ResNet-50,75.75,82.63,82.97,35.39,0.21,22.91,36.18
ViT-B/32,75.75,81.89,82.59,41.29,9.64,26.83,30.89
VGG-16,71.37,78.9,78.52,26.98,2.69,16.78,28.32
AlexNet,56.16,62.67,61.76,21.1,1.75,10.05,14.23
CLIP ViT-L/14,75.04,80.69,81.95,86.83,71.28,58.23,66.32


In [7]:
m_acc_df = pd.DataFrame(max_possible_accuracy)
m_acc_df.index = [pretty_model_names[i] for i in m_acc_df.index]
m_acc_df.columns = [pretty_dataset_names[i] for i in m_acc_df.columns]
m_acc_df.round(2)

Unnamed: 0,ImageNet,ImageNet ReaL,ImageNet + ReaL,ImageNet-R,ImageNet-A,ImageNet-Sketch,ObjectNet
ResNet-18,95.15,97.76,97.55,66.89,58.87,43.68,71.44
ResNet-50,96.78,98.62,98.57,68.84,66.68,47.64,76.83
ViT-B/32,97.19,98.75,98.91,75.58,78.03,55.99,79.28
VGG-16,95.3,97.9,97.66,60.88,58.27,39.9,71.85
AlexNet,90.03,93.85,93.48,55.52,42.23,29.53,59.65
CLIP ViT-L/14,96.78,98.7,98.8,99.2,98.49,89.0,93.13


### Load Min-Cover

In [8]:
# dump min covers to file

with open('../data/min_covers.pkl', 'rb') as f:
    min_covers = pickle.load(f)

### Disentangling zoom transforms¶

In [9]:
# Initialize a dictionary to store the accuracy results
accuracy_results = {}
exclusivly_solves = {}

# Iterate through the datasets and classifiers in correctness_dfs
for dataset, classifiers in correctness_dfs.items():
    accuracy_results[dataset] = {}
    for classifier, df in classifiers.items():
        # filter by min_covers first
        df = df.loc[min_covers[dataset][classifier][0]]
        
        all_transforms = df.index

        zoom_in_transforms = [x for x in all_transforms if int(x.split(":")[-1]) > 224]
        zoom_out_transforms = [x for x in all_transforms if int(x.split(":")[-1]) < 224]
        zoom_224_transforms = [x for x in all_transforms if int(x.split(":")[-1]) == 224]
        
        # Filter the dataframes based on the zoom groups
        zoom_in_df = df.loc[zoom_in_transforms]
        zoom_out_df = df.loc[zoom_out_transforms]
        zoom_224_df = df.loc[zoom_224_transforms]
        
        # Calculate the accuracy for each zoom group
        zoom_in_accuracy = zoom_in_df.max(0).mean() * 100
        zoom_out_accuracy = zoom_out_df.max(0).mean() * 100
        zoom_224_accuracy = zoom_224_df.max(0).mean() * 100
        
        # Store the accuracy results for each classifier
        accuracy_results[dataset][classifier] = {
            'zoom_in': zoom_in_accuracy,
            'zoom_out': zoom_out_accuracy,
            'zoom_224': zoom_224_accuracy,
        }
        
        # exclusivly solves
        
        exclusivly_solves[dataset] = {}
        for classifier, df in classifiers.items():
            # Filter the dataframes based on the zoom groups
            zoom_in_df = df.loc[zoom_in_transforms]
            zoom_out_df = df.loc[zoom_out_transforms]
            zoom_224_df = df.loc[zoom_224_transforms]
            
            # Calculate exclusive solvable images for each zoom group
            solvable_by_zoom_in = (zoom_in_df.max(0) & ~zoom_out_df.max(0) & ~zoom_224_df.max(0)).sum()
            solvable_by_zoom_out = (~zoom_in_df.max(0) & zoom_out_df.max(0) & ~zoom_224_df.max(0)).sum()
            solvable_by_zoom_224 = (~zoom_in_df.max(0) & ~zoom_out_df.max(0) & zoom_224_df.max(0)).sum()

            # Calculate the total number of images
            total_images = len(df.columns)
            
            # Calculate the exclusive solvable percentages
            zoom_in_percentage = (solvable_by_zoom_in / total_images) * 100
            zoom_out_percentage = (solvable_by_zoom_out / total_images) * 100
            zoom_224_percentage = (solvable_by_zoom_224 / total_images) * 100
            
            # Store the exclusive solvable results for each classifier
            exclusivly_solves[dataset][classifier] = {
                'zoom_in': zoom_in_percentage,
                'zoom_out': zoom_out_percentage,
                'zoom_224': zoom_224_percentage,
            }

In [10]:
dfs = []
for dataset in dataset_names:
    df = pd.DataFrame(accuracy_results[dataset]).round(2)
    df.columns = [pretty_model_names[i] for i in df.columns]
    dfs.append(df.T)
    
max_acc_groups = pd.concat(dfs, keys=[pretty_dataset_names[i] for i in dataset_names])

In [11]:
dfs = []
for dataset in dataset_names:
    df = pd.DataFrame(exclusivly_solves[dataset]).round(2)
    df.columns = [pretty_model_names[i] for i in df.columns]
    dfs.append(df.T)
    
max_exclusivbe = pd.concat(dfs, keys=[pretty_dataset_names[i] for i in dataset_names])

In [12]:
# now concatenate them
df_final = pd.concat([max_acc_groups, max_exclusivbe], axis=1)
df_final

Unnamed: 0,Unnamed: 1,zoom_in,zoom_out,zoom_224,zoom_in.1,zoom_out.1,zoom_224.1
ImageNet,ResNet-18,94.57,79.49,81.16,10.59,0.43,0.08
ImageNet,ResNet-50,96.3,85.84,86.39,7.59,0.4,0.04
ImageNet,ViT-B/32,96.83,86.18,85.12,7.59,0.3,0.02
ImageNet,VGG-16,94.6,82.11,83.08,8.92,0.58,0.07
ImageNet,AlexNet,89.17,62.92,67.98,18.01,0.65,0.18
ImageNet,CLIP ViT-L/14,95.82,90.8,87.04,4.81,0.83,0.05
ImageNet ReaL,ResNet-18,97.37,86.1,87.62,7.38,0.27,0.07
ImageNet ReaL,ResNet-50,98.22,91.07,91.87,4.65,0.25,0.04
ImageNet ReaL,ViT-B/32,98.5,90.79,88.06,4.92,0.18,0.03
ImageNet ReaL,VGG-16,97.38,88.43,89.4,6.02,0.38,0.07


In [14]:
mean_df = df_final.groupby(level=1).mean().round(2)
mean_df = mean_df.reindex([pretty_model_names[i] for i in model_names])
mean_df

Unnamed: 0,zoom_in,zoom_out,zoom_224,zoom_in.1,zoom_out.1,zoom_224.1
ResNet-18,74.28,53.83,53.66,16.85,1.19,0.17
ResNet-50,77.35,59.67,58.32,15.47,1.34,0.16
ViT-B/32,82.13,60.69,60.13,17.7,0.95,0.15
VGG-16,72.54,55.06,54.04,14.86,1.63,0.19
AlexNet,64.6,40.24,42.22,19.04,1.21,0.26
CLIP ViT-L/14,95.13,88.75,84.75,5.64,0.96,0.07
