In [22]:
import tensorflow as tf
from tensorflow.keras import layers, models
from tensorflow.keras.applications import MobileNetV3Large
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from google.cloud import storage
import json

In [23]:
# Set up GCS client
client = storage.Client()
print(f"GCS client set up: {client}")

GCS client set up: <google.cloud.storage.client.Client object at 0x3178d64a0>


In [24]:
# Define constants
BUCKET_NAME = 'creature-vision-training-set'
IMAGE_SIZE = (224, 224)
BATCH_SIZE = 32
DATA_DIR = 'incorrect_predictions'

In [25]:
# Function to load JSON data from GCS
def load_json_from_gcs(bucket_name, file_path):
    bucket = client.get_bucket(bucket_name)
    blob = bucket.blob(file_path)
    data = json.loads(blob.download_as_string())
    print(f"Loaded JSON from {file_path}")
    return data

In [26]:
# Function to list all files in a GCS directory
def list_gcs_files(bucket_name, prefix):
    bucket = client.get_bucket(bucket_name)
    files = [blob.name for blob in bucket.list_blobs(prefix=prefix)]
    print(f"Found {len(files)} files in {prefix}")
    return files

In [27]:
all_files = list_gcs_files(BUCKET_NAME, DATA_DIR)
json_files = [f for f in all_files if f.endswith('.json')]
print(f"Total files: {len(all_files)}, JSON files: {len(json_files)}")

Found 3718 files in incorrect_predictions
Total files: 3718, JSON files: 1851


In [28]:
# init empty dict
labels = {}
# init empty set
all_breeds = set()
# 8 mins to run with ~2000 images in folder
for json_file in json_files:
    data = load_json_from_gcs(BUCKET_NAME, json_file)
    # add corresponding image file
    image_file = json_file.replace('_labels.json', '.jpg')
    # add kv pair to labels dict, k=image file name v=api_label
    labels[image_file] = data['api_label']
    # add bread label to set, duplicates ignored because set
    all_breeds.add(data['api_label'])

TOTAL_BREEDS = len(all_breeds)

Loaded JSON from incorrect_predictions/affenpinscher_1728945305_labels.json
Loaded JSON from incorrect_predictions/affenpinscher_1729092005_labels.json
Loaded JSON from incorrect_predictions/affenpinscher_1729283705_labels.json
Loaded JSON from incorrect_predictions/affenpinscher_1729397705_labels.json
Loaded JSON from incorrect_predictions/african_1729649406_labels.json
Loaded JSON from incorrect_predictions/airedale_1727741877_labels.json
Loaded JSON from incorrect_predictions/airedale_1728786306_labels.json
Loaded JSON from incorrect_predictions/airedale_1728822006_labels.json
Loaded JSON from incorrect_predictions/airedale_1729076705_labels.json
Loaded JSON from incorrect_predictions/airedale_1729575005_labels.json
Loaded JSON from incorrect_predictions/airedale_1729597505_labels.json
Loaded JSON from incorrect_predictions/airedale_1729641306_labels.json
Loaded JSON from incorrect_predictions/airedale_1729696506_labels.json
Loaded JSON from incorrect_predictions/airedale_1729769105

In [31]:
def save_labels_and_breeds(labels, all_breeds):
    # Save the labels dictionary to a local JSON file
    with open('labels.json', 'w') as f:
        json.dump(labels, f)
    print("labels dictionary saved to labels.json")

    # Convert all_breeds set to a sorted list
    all_breeds_sorted = sorted(list(all_breeds))

    # Save the all_breeds set to a local JSON file
    with open('all_breeds.json', 'w') as f:
        json.dump(all_breeds_sorted, f)
    print("all_breeds set saved to all_breeds.json")

# Call this function after your loop
save_labels_and_breeds(labels, all_breeds)

print(f"Total number of breeds: {TOTAL_BREEDS}")

labels dictionary saved to labels.json
all_breeds set saved to all_breeds.json
Total number of breeds: 159


In [8]:
'''
Suppose all_breeds contains: {"Bulldog", "Affenpinscher", "Chihuahua"}
After sorting: ["Affenpinscher", "Bulldog", "Chihuahua"]
'for i, breed in ...' unpacks each pair yielded by enumerate()
The enumerate() returns k,v pairs, e.g.:
(0, "Affenpinscher")
(1, "Bulldog")
(2, "Chihuahua")
The dictionary comprehension 'breed: i then creates:
{
"Affenpinscher": 0,
"Bulldog": 1,
"Chihuahua": 2
}
'''
breed_to_index = {breed: i for i, breed in enumerate(sorted(all_breeds))}

In [29]:
len(breed_to_index)

0

In [12]:
for v in breed_to_index.values():
    print(v, breed_to_index[v])