# Save a json file with a nice indentation

In [1]:
#open llama2_cub.json and save it with indent=4
import json
with open('llama2_cub.json', 'r') as fp:
    descriptors = json.load(fp)
with open('llama2_cub_pretty.json', 'w') as fp:  
    json.dump(descriptors, fp, indent=4)


# Shuffle the descriptors class-wise

In [5]:
import json
import random

input_path = 'descriptors/descriptors_cub_llama2_prompt_0_run_0.json'
output_path = input_path.replace('.json', '_class-wise_permuted.json')

# Load the JSON data
with open(input_path, 'r') as f:
    data = json.load(f)

# Collect all descriptors into a list
all_descriptors = []
for class_label, descriptors in data.items():
    all_descriptors.append(descriptors)

# Shuffle the list of descriptors
random.shuffle(all_descriptors)

# Assign the shuffled descriptors back to the class labels
for i,class_label in enumerate(data):
    data[class_label] = all_descriptors[i]

# Save the modified JSON data
with open(output_path, 'w') as f:
    json.dump(data, f, indent=4)


# Shuffle the descriptors also inter-class wise

In [4]:
import json
import random

input_path = 'descriptors/descriptors_cub_llama2_prompt_0_run_0.json'
output_path = input_path.replace('.json', '_permuted.json')

# Load the JSON data
with open(input_path, 'r') as f:
    data = json.load(f)

# Collect all descriptors into a list
all_descriptors = []
for class_label, descriptors in data.items():
    all_descriptors.extend(descriptors)

# Shuffle the list of descriptors
random.shuffle(all_descriptors)

# Assign the shuffled descriptors back to the class labels
for class_label in data:
    num_descriptors = len(data[class_label])
    data[class_label] = all_descriptors[:num_descriptors]
    all_descriptors = all_descriptors[num_descriptors:]

# Save the modified JSON data
with open(output_path, 'w') as f:
    json.dump(data, f, indent=4)


# Randomize class names

In [4]:
import json
import random
import string

input_path = 'descriptors/descriptors_cub_llama2_prompt_0_run_0_randomized.json'
output_path = input_path.replace('.json', '_randomized_class_names.json')

# Load the JSON data
with open(input_path, 'r') as f:
    data = json.load(f)

# Helper function to generate a random string of length between 15 to 20
def generate_random_string():
    length = random.randint(15, 20)
    return ''.join(random.choices(string.ascii_letters + string.digits, k=length))


# Modify the JSON data: set each class label (key) to a generated random string
new_data = {generate_random_string(): v for k, v in data.items()}

# Save the modified JSON data
with open(output_path, 'w') as f:
    json.dump(new_data, f, indent=4)

# Generate random descriptors

In [None]:
import json
import random
import string

input_path = 'descriptors/descriptors_cub_llama2_prompt_0_run_0.json'
output_path = input_path.replace('.json', '_randomized_descriptors.json')

# Load the JSON data
with open(input_path, 'r') as f:
    data = json.load(f)

# Helper function to generate a random string of length between 15 to 20
def generate_random_string():
    length = random.randint(15, 20)
    return ''.join(random.choices(string.ascii_letters + string.digits, k=length))

# Modify the JSON data: set each class label to have a list of 5 random strings
for class_label in data:
    data[class_label] = [generate_random_string() for _ in range(5)]

# Save the modified JSON data
with open(output_path, 'w') as f:
    json.dump(data, f, indent=4)


# Create a table with the training results 

In [3]:
import json
import pandas as pd
import os

path = '/export/home/ru86qer/classify_by_description_release/eval_results_08_29/'

results = {}
keys = []
with open(path+'eval_cub_llama2_prompt2_xxx_run_3_19:44:18', 'r') as f:
    data = json.load(f)
    key = '7-Billion Parameters'
    results[key] = data
    keys.append(key)
with open(path+'contrastive_descriptions_1_18:14:41.json', 'r') as f:
    data = json.load(f)
    key = "Erroneously: Same Descriptors for All Classes"
    results[key] = data
    keys.append(key)
#with open(path+'eval_cub_llama2_prompt_0_run_0_randomized_randomized_class_names.json', 'r') as f:
    #data = json.load(f)
    #key = 'Random Class names + random descriptions'
    #results[key] = data
    #keys.append(key)
#with open(path+'eval_cub_llama2_prompt_0_run_0.json', 'r') as f:
    #data = json.load(f)
    #key = 'Ordinary Descriptors'
    #results[key] = ordinary_descriptors
    #keys.append(key)

data = {
    "Backbone": ["ViT-B/32", "ViT-B/16", "ViT-L/14", "ViT-L/14@336px"],
    "GPT Values": [52.57,57.75,63.46,65.257]
} | {key: [] for key in keys}

for key in results.keys():
    for backbone in data["Backbone"]:
        data[key].append(results[key][backbone]["Total Description-based Top-1 Accuracy: "])

df = pd.DataFrame(data)

df = df.round(2)

# save this as an excel file
for i in range(0, 10000):
    if not os.path.exists(path+"results_{}.xlsx".format(i)):
        df.to_excel(path+"results_{}.xlsx".format(i), index=False)
        break


# Compare if two json files are equal

In [3]:
import json
#load descriptors_cub_llama2_prompt_0_topk1_run_0.json and descriptors_cub_llama2_prompt_0_topk1_run_1.json and compare them
#
with open('/export/home/ru86qer/classify_by_description_release/descriptors/descriptors_cub_llama2_prompt_0_topk1_run_0.json', 'r') as f:
    descriptors0 = json.load(f)
with open('/export/home/ru86qer/classify_by_description_release/descriptors/descriptors_cub_llama2_prompt_0_topk1_run_1.json', 'r') as f:
    descriptors1 = json.load(f)

for key, value in descriptors0.items():
    if value != descriptors1[key]:
        print(key, value, descriptors1[key]) 

Sooty_Albatross ['black plumage', 'white patches on the face', 'long, pointed wings', 'long, slender bill', 'dark eyes', 'pale legs and feet'] ['black plumage', 'white patches on the face', 'long, slender wings', 'long, pointed tail', 'webbed feet', 'black bill']
Vermilion_Flycatcher ['black mask around the eyes', 'white belly', 'grey-brown back', 'long, pointed tail', 'black legs and feet', 'distinctive black stripes on the wings'] ['black mask around the eyes', 'white throat patch', 'black or dark brown wings and tail', 'medium-sized bird with a long, pointed bill', 'perches on a branch or']
Eared_Grebe ['black, white, and brown plumage', 'distinctive ear patches', 'long, pointed bill', 'dark eyes', 'long, slender neck', 'pale yellow legs and feet'] ['black and white plumage', 'distinctive ear tufts', 'long, slender neck', 'bright red eyes', 'black legs and feet', 'dive and swim in a graceful, slow motion']
Pomarine_Jaeger ['dark grey upperparts', 'white underparts', 'white patches o

# No, they are not equal. Therefore, the llama2 output seems to be stochastic.

In [1]:
import scipy.io

mat = scipy.io.loadmat('/export/scratch/ru86qer/datasets/cars_download/cars_annos.mat')
print(mat.keys())  # This will show you the variable names stored in the .mat file.

dict_keys(['__header__', '__version__', '__globals__', 'annotations', 'class_names'])


In [3]:
mat['annotations']

array([[(array(['car_ims/000001.jpg'], dtype='<U18'), array([[112]], dtype=uint8), array([[7]], dtype=uint8), array([[853]], dtype=uint16), array([[717]], dtype=uint16), array([[1]], dtype=uint8), array([[0]], dtype=uint8)),
        (array(['car_ims/000002.jpg'], dtype='<U18'), array([[48]], dtype=uint8), array([[24]], dtype=uint8), array([[441]], dtype=uint16), array([[202]], dtype=uint8), array([[1]], dtype=uint8), array([[0]], dtype=uint8)),
        (array(['car_ims/000003.jpg'], dtype='<U18'), array([[7]], dtype=uint8), array([[4]], dtype=uint8), array([[277]], dtype=uint16), array([[180]], dtype=uint8), array([[1]], dtype=uint8), array([[0]], dtype=uint8)),
        ...,
        (array(['car_ims/016183.jpg'], dtype='<U18'), array([[25]], dtype=uint8), array([[32]], dtype=uint8), array([[587]], dtype=uint16), array([[359]], dtype=uint16), array([[196]], dtype=uint8), array([[1]], dtype=uint8)),
        (array(['car_ims/016184.jpg'], dtype='<U18'), array([[56]], dtype=uint8), array([