In [1]:
import json
import os
import matplotlib.pyplot as plt

In [3]:
imagenet100_folder_path = 'data/ImageNet100'

with open(f'{imagenet100_folder_path}/Labels.json', 'r') as f:
    labels = json.load(f)

labels

In [4]:
coarse_grain_classes_list = [
    'Bird', 'Snake', 'Spider', 'Small Fish', 'Turtle', 'Lizard', 'Crab', 'Shark'
]

fine_grain_classes_dict = {
    'n01818515': 'macaw',
    'n01537544': 'indigo bunting, indigo finch, indigo bird, Passerina cyanea',
    'n02007558': 'flamingo',
    'n02002556': 'white stork, Ciconia ciconia',
    'n01614925': 'bald eagle, American eagle, Haliaeetus leucocephalus',
    'n01582220': 'magpie',
    'n01806143': 'peacock',
    'n01795545': 'black grouse',
    'n01531178': 'goldfinch, Carduelis carduelis',
    'n01622779': 'great grey owl, great gray owl, Strix nebulosa',
    'n01833805': 'hummingbird',
    'n01740131': 'night snake, Hypsiglena torquata',
    'n01735189': 'garter snake, grass snake',
    'n01755581': 'diamondback, diamondback rattlesnake, Crotalus adamanteus',
    'n01751748': 'sea snake',
    'n01729977': 'green snake, grass snake',
    'n01729322': 'hognose snake, puff adder, sand viper',
    'n01734418': 'king snake, kingsnake',
    'n01728572': 'thunder snake, worm snake, Carphophis amoenus',
    'n01739381': 'vine snake',
    'n01756291': 'sidewinder, horned rattlesnake, Crotalus cerastes',
    'n01773797': 'garden spider, Aranea diademata',
    'n01775062': 'wolf spider, hunting spider',
    'n01773549': 'barn spider, Araneus cavaticus',
    'n01774384': 'black widow, Latrodectus mactans',
    'n01774750': 'tarantula',
    'n01440764': 'tench, Tinca tinca',
    'n01443537': 'goldfish, Carassius auratus',
    'n01667778': 'terrapin',
    'n01667114': 'mud turtle',
    'n01664065': 'loggerhead, loggerhead turtle, Caretta caretta',
    'n01665541': 'leatherback turtle, leatherback, leathery turtle, Dermochelys coriacea',
    'n01687978': 'agama',
    'n01677366': 'common iguana, iguana, Iguana iguana',
    'n01695060': 'Komodo dragon, Komodo lizard, dragon lizard, giant lizard, Varanus komodoensis',
    'n01685808': 'whiptail, whiptail lizard',
    'n01978287': 'Dungeness crab, Cancer magister',
    'n01986214': 'hermit crab',
    'n01978455': 'rock crab, Cancer irroratus',
    'n01491361': 'tiger shark, Galeocerdo cuvieri',
    'n01484850': 'great white shark, white shark, man-eater, man-eating shark, Carcharodon carcharias',
    'n01494475': 'hammerhead, hammerhead shark'
}

In [5]:
folders_in_imagenet100 = [name for name in os.listdir(imagenet100_folder_path) 
                          if os.path.isdir(os.path.join(imagenet100_folder_path, name))]
print(folders_in_imagenet100)

In [14]:
for i in range(len(os.listdir(f'{imagenet100_folder_path}/test_fine'))):
    print(len(os.listdir(f"{imagenet100_folder_path}/test_fine/{os.listdir(f'{imagenet100_folder_path}/test_fine')[0]}")))

In [6]:
def plot_distribution(test: bool):
    # Initialize an empty dictionary to store the mapping
    class_to_train_folder = {}
    
    # Function to list sub-folders in a given folder
    def list_subfolders(input_folder_path):
        return {name for name in os.listdir(input_folder_path) if os.path.isdir(os.path.join(input_folder_path, name))}
    
    # Iterate over each training folder
    # for train_folder in folders_in_imagenet100:
    folder_path = os.path.join(imagenet100_folder_path, train_folder)
    subfolders = list_subfolders(folder_path)

    # Update the dictionary with subfolder names as keys and the train folder as the value
    for subfolder in subfolders:
        class_to_train_folder[subfolder] = train_folder
    
    # Initialize a dictionary to count images per class
    images_per_class = {class_key: 0 for class_key in class_to_train_folder.keys()}
    
    # Function to count images in a folder
    def count_images(input_folder_path):
        return sum([len(files) for r, d, files in os.walk(input_folder_path)])
    
    # Iterate over each class and its corresponding train folder
    for class_key, train_folder in class_to_train_folder.items():
        folder_path = os.path.join(imagenet100_folder_path, train_folder, class_key)
        images_per_class[class_key] = count_images(folder_path)
    
    # Data for plotting
    class_names = [fine_grain_classes_dict[class_key] for class_key in fine_grain_classes_dict.keys()]
    image_counts = [images_per_class[class_key] for class_key in fine_grain_classes_dict.keys()]
    
    # Increase the figure width for better spacing and readability
    plt.figure(figsize=(15, 10))  # Adjusted width from 10 to 15
    
    bars = plt.bar(class_names, image_counts, color='skyblue', align='center')
    
    # Setting the labels and title
    plt.ylabel('Number of Images')
    plt.xlabel('Class')
    plt.title('Distribution of Images per Class')
    
    # Rotating x-axis labels
    plt.xticks(rotation=90)
    
    # Adding a label with the exact count above each bar, with adjustments for better spacing and readability
    for bar in bars:
        yval = bar.get_height()
        plt.text(bar.get_x() + bar.get_width() / 2.0, yval, str(yval), ha='center', va='bottom', fontsize=8, rotation=45)
    
    plt.tight_layout()  # Adjust layout to make room for the rotated x-axis labels and text
    plt.show()

In [42]:
from transformers import AutoImageProcessor, AutoModel
from PIL import Image
import requests

model_name = 'facebook/dinov2-large'
processor = AutoImageProcessor.from_pretrained(model_name)
model = AutoModel.from_pretrained(model_name)

inputs = processor(images=image, return_tensors="pt")
outputs = model(**inputs)
last_hidden_states = outputs.last_hidden_state

In [43]:
outputs

In [11]:
imagenet_dict = convert_imagenet_hierarchy('Imagenet_hierachy.txt')

In [12]:
imagenet_dict