# Rename classes in true labels

Some classes in the true labels are confusing.

For example, "potatoes" could be "potato_white", "potato_brown", "potato_red" etc...

Same with "onion" could be "onion_brown", "onion_white", "onion_red" etc...

This notebook will serve as a place to rename labels.

To start, I'll try "onion" -> "onion_brown".

## Download original labels from GCP/Weights & Biases

In [166]:
# Append the upper level directory to sys
import sys
sys.path.append("..")

import pandas as pd
import numpy as np

from pathlib import Path

# Get config
from configs.default_config import config

args = config

# Connect to GCP
from utils.gcp_utils import set_gcp_credentials, test_gcp_connection
set_gcp_credentials(path_to_key="../utils/google-storage-key.json")
test_gcp_connection()

import wandb

# Initialize a new run
from utils.wandb_utils import wandb_load_artifact, wandb_download_and_load_labels

notes = "Changing class names to be more reflective of their food type."

run = wandb.init(project=args.wandb_project, 
                 job_type=args.wandb_job_type,
                 tags=['manual_photo_upload'],
                 notes=notes)

annotations, class_names, class_dict, reverse_class_dict, labels_path = wandb_download_and_load_labels(wandb_run=run,
wandb_labels_artifact_name=args.wandb_labels_artifact)


[INFO] GCP credentials set!
[INFO] GCP connection successful! Access to GCP for saving/loading data and models available.


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


[INFO] Labels directory: ./artifacts/food_vision_labels:v69
[INFO] Labels path: artifacts/food_vision_labels:v69/annotations.csv
[INFO] Working with: 309 classes


In [167]:
class_names

['achacha',
 'almond_butter',
 'almonds',
 'apple_custard',
 'apple_green',
 'apple_red',
 'apricot',
 'artichoke',
 'asparagus',
 'avocado',
 'bacon',
 'bacon_and_egg_burger',
 'bagel',
 'baklava',
 'banana',
 'banana_bread',
 'barbecue_sauce',
 'basil',
 'bay_leaves',
 'bean_sprouts',
 'beans',
 'beef_curry',
 'beef_diced',
 'beef_kebab',
 'beef_mince',
 'beef_patty',
 'beef_roast',
 'beef_stir_fry',
 'beer',
 'beetroot',
 'biltong',
 'black_pepper',
 'blackberries',
 'blueberries',
 'bok_choy',
 'bread',
 'bread_naan',
 'broccoli',
 'broccolini',
 'brownie',
 'brussel_sprouts',
 'burrito',
 'butter',
 'cabbage_green',
 'cabbage_red',
 'cabbage_savoy',
 'calamari',
 'candy',
 'cape_gooseberries',
 'capsicum_green',
 'capsicum_orange',
 'capsicum_red',
 'capsicum_yellow',
 'carrot',
 'carrot_purple',
 'cashews',
 'cauliflower',
 'celeriac',
 'celery',
 'cheese',
 'cheeseburger',
 'cherries',
 'chervil',
 'chicken_breast',
 'chicken_burger',
 'chicken_stir_fry',
 'chicken_thighs',
 'ch

In [168]:
# Find the class_names with "lettuce" in them
[class_name for class_name in class_names if "cabbage" in class_name]

['cabbage_green', 'cabbage_red', 'cabbage_savoy']

In [169]:
len(class_dict)

309

In [170]:
len(reverse_class_dict)

309

In [171]:
# See the annotations
annotations.head()

Unnamed: 0,filename,image_name,class_name,label,split,clear_or_confusing,whole_food_or_dish,one_food_or_multiple,label_last_updated_at,label_source,image_source
0,test/pain_au_chocolat/4fd7cb42-bd7f-48f1-bfdc-...,4fd7cb42-bd7f-48f1-bfdc-607c2f54b788.jpg,pain_au_chocolat,193,test,,,,,,internet_download
1,test/pain_au_chocolat/2062f52a-781c-4e4f-b8a7-...,2062f52a-781c-4e4f-b8a7-0a108934f453.jpg,pain_au_chocolat,193,test,,,,,,internet_download
2,test/pain_au_chocolat/8003e0f6-37e8-460d-9c14-...,8003e0f6-37e8-460d-9c14-e7c6fe44a37f.jpg,pain_au_chocolat,193,test,,,,,,internet_download
3,test/pain_au_chocolat/839437c8-c643-408f-9f04-...,839437c8-c643-408f-9f04-d0d3bec238c3.jpg,pain_au_chocolat,193,test,,,,,,internet_download
4,test/pain_au_chocolat/ca5c13ff-a535-4b69-9144-...,ca5c13ff-a535-4b69-9144-e06275e01e35.jpg,pain_au_chocolat,193,test,,,,,,internet_download


In [172]:
# Check to see if reverse_class_dict is the same as the reverse of class_dict
reverse_class_dict == {v: k for k, v in class_dict.items()}

True

In [173]:
# Check if class_names == class_dict.keys()
class_names == sorted(list(reverse_class_dict.keys()))

True

In [174]:
# Where are class_names and list(reverse_class_dict.keys()) different?
for i, (class_name, class_dict_key) in enumerate(zip(class_names, sorted(list(reverse_class_dict.keys())))):
    if class_name != class_dict_key:
        print(f"Class name {class_name} at index {i} is different from class_dict_key {class_dict_key}")

In [175]:
# See the class names
class_names

['achacha',
 'almond_butter',
 'almonds',
 'apple_custard',
 'apple_green',
 'apple_red',
 'apricot',
 'artichoke',
 'asparagus',
 'avocado',
 'bacon',
 'bacon_and_egg_burger',
 'bagel',
 'baklava',
 'banana',
 'banana_bread',
 'barbecue_sauce',
 'basil',
 'bay_leaves',
 'bean_sprouts',
 'beans',
 'beef_curry',
 'beef_diced',
 'beef_kebab',
 'beef_mince',
 'beef_patty',
 'beef_roast',
 'beef_stir_fry',
 'beer',
 'beetroot',
 'biltong',
 'black_pepper',
 'blackberries',
 'blueberries',
 'bok_choy',
 'bread',
 'bread_naan',
 'broccoli',
 'broccolini',
 'brownie',
 'brussel_sprouts',
 'burrito',
 'butter',
 'cabbage_green',
 'cabbage_red',
 'cabbage_savoy',
 'calamari',
 'candy',
 'cape_gooseberries',
 'capsicum_green',
 'capsicum_orange',
 'capsicum_red',
 'capsicum_yellow',
 'carrot',
 'carrot_purple',
 'cashews',
 'cauliflower',
 'celeriac',
 'celery',
 'cheese',
 'cheeseburger',
 'cherries',
 'chervil',
 'chicken_breast',
 'chicken_burger',
 'chicken_stir_fry',
 'chicken_thighs',
 'ch

In [176]:
len(class_names)

309

In [177]:
"onion_red" in class_names

True

In [131]:
# Make a copy of the original annotations
original_annotations = annotations.copy()

In [132]:
# Create a function to similarity match the class names (e.g. code which string is most like another string)
from sentence_transformers import SentenceTransformer
from sentence_transformers.util import dot_score
model = SentenceTransformer("all-MiniLM-L6-v2")

def embed_list_of_classes(class_names: list, model: SentenceTransformer):
    """
    Embeds a list of class names.
    """

    # Map the class_name to the embedding
    class_name_to_embedding = {class_name: embedding for class_name, embedding in zip(class_names, model.encode(class_names))}

    return class_name_to_embedding

class_name_embeddings = embed_list_of_classes(class_names, model = model)

# Create a function to similarity match the class names (e.g. code which string is most like another string)
def find_most_similar_class_name(target_class_name, class_name_embedding_dict):
    """
    Finds the most similar class name to the class_name provided.
    """
    # Get the embedding of the target_class_name
    target_class_name_embedding = model.encode([target_class_name])[0]

    # Find the top-3 most similar class_name
    most_similar_class_names = sorted(class_name_embedding_dict.keys(), key=lambda key: dot_score(class_name_embedding_dict[key], target_class_name_embedding), reverse=True)[:3]
    # most_similar_class_name = max(class_name_embedding_dict.keys(), key=lambda key: dot_score(class_name_embedding_dict[key], target_class_name_embedding))

    return most_similar_class_names
    

In [133]:
# Create a function to string match the class names (e.g. code which string is most like another string)
from difflib import SequenceMatcher

def match_string_via_sequence_matcher(target_string, string_list):
    """
    Finds the most similar string to the string provided.
    """
    # Find the top-3 most similar class_name
    most_similar_strings = sorted(string_list, key=lambda string: SequenceMatcher(None, string, target_string).ratio(), reverse=True)[:3]

    return most_similar_strings

In [134]:
match_string_via_sequence_matcher("mushroom", class_names)

['mushroom_flat', 'mushroom_enoki', 'mushroom_button']

In [135]:
len(class_name_embeddings)

308

In [137]:
# Find the most similar class names to each missing class
for class_name in ["cabbage"]:
    top_3_similar_class_embeddings = find_most_similar_class_name(class_name, class_name_embeddings)
    top_3_similar_class_string_matching = match_string_via_sequence_matcher(class_name, class_names)
    print(f"Similar class names to '{class_name}' | Embedding match: {top_3_similar_class_embeddings} | String match: {top_3_similar_class_string_matching}")

Similar class names to 'cabbage' | Embedding match: ['cabbage_green', 'cabbage_red', 'savoy_cabbage'] | String match: ['cabbage_red', 'cabbage_green', 'savoy_cabbage']


## Update annotations

In [138]:
# Create map for class names to rename
class_names_to_rename = {"savoy_cabbage": "cabbage_savoy"}

In [141]:
# Map the class_names to rename to the class_names column of updated_annotations
updated_annotations = original_annotations.copy()

print(f"\n[INFO] Updating rows with different names...")

# Rename the class_names that occur in the class_names_to_rename dictionary keys
for k, v in class_names_to_rename.items():
    print(f"Number of rows with class_name '{k}': {len(updated_annotations[updated_annotations['class_name'] == k])}")
    updated_annotations.loc[updated_annotations["class_name"] == k, "class_name"] = v

# updated_annotations_to_rename["class_name"] = updated_annotations["class_name"]
print(f"\n[INFO] Row counts after updating class names...")
print(f"[INFO] Previous class name counts:")
# Print the number of rows of each key in the dictionary
for k, v in class_names_to_rename.items():
    print(f"Number of rows with class_name '{k}': {len(updated_annotations[updated_annotations['class_name'] == k])}")

print(f"\n[INFO] New class name counts:")
# Print the number of rows of each value in the dictionary
for k, v in class_names_to_rename.items():
    print(f"Number of rows with class_name '{v}': {len(updated_annotations[updated_annotations['class_name'] == v])}")


[INFO] Updating rows with different names...
Number of rows with class_name 'savoy_cabbage': 1936

[INFO] Row counts after updating class names...
[INFO] Previous class name counts:
Number of rows with class_name 'savoy_cabbage': 0

[INFO] New class name counts:
Number of rows with class_name 'cabbage_savoy': 1936


In [142]:
len(updated_annotations.class_name.unique()), len(updated_annotations.label.unique())

(308, 308)

In [143]:
from typing import List, Dict, Tuple

def get_updated_class_names_class_dict_and_reverse_class_dict(df: pd.DataFrame) -> Tuple[List[str], Dict[int, str], Dict[str, int]]:
    """Get an updated class names list, class dict and reverse class dict from a dataframe."""
    updated_class_names = sorted(df.class_name.unique())
    updated_class_dict = {i: class_name for i, class_name in enumerate(updated_class_names)}
    updated_reverse_class_dict = {class_name: i for i, class_name in enumerate(updated_class_names)}
    assert len(updated_class_dict) == len(updated_reverse_class_dict), "Class dict and reverse class dict are not the same length"
    return updated_class_names, updated_class_dict, updated_reverse_class_dict

def map_updated_class_dict_to_updated_annotations(df: pd.DataFrame, updated_reverse_class_dict: Dict[str, int]) -> pd.DataFrame:
    """Map updated class dict to updated annotations dataframe.
    
    For example, go from {"apple_red": 1} -> df["label"] = 1
    """
    updated_annotations = df.copy()
    updated_annotations.loc[:, 'label'] = updated_annotations['class_name'].map(updated_reverse_class_dict)
    assert len(updated_annotations.label.unique()) == len(updated_reverse_class_dict), "Number of unique labels in updated annotations does not match number of unique class names in updated reverse class dict"
    return updated_annotations

In [144]:
updated_class_names, updated_class_dict, updated_reverse_class_dict = get_updated_class_names_class_dict_and_reverse_class_dict(updated_annotations)

updated_annotations = map_updated_class_dict_to_updated_annotations(updated_annotations, updated_reverse_class_dict)

len(updated_class_names), updated_class_names[:10]

(308,
 ['achacha',
  'almond_butter',
  'almonds',
  'apple_custard',
  'apple_green',
  'apple_red',
  'apricot',
  'artichoke',
  'asparagus',
  'avocado'])

In [145]:
# Run a check to make sure the keys of the dictionary aren't present in the values of updated_class_dict
for k, v in class_names_to_rename.items():
    assert k not in updated_class_dict.values(), f"Key '{k}' is present in the values of updated_class_dict"

In [146]:
updated_class_dict

{0: 'achacha',
 1: 'almond_butter',
 2: 'almonds',
 3: 'apple_custard',
 4: 'apple_green',
 5: 'apple_red',
 6: 'apricot',
 7: 'artichoke',
 8: 'asparagus',
 9: 'avocado',
 10: 'bacon',
 11: 'bacon_and_egg_burger',
 12: 'bagel',
 13: 'baklava',
 14: 'banana',
 15: 'banana_bread',
 16: 'barbecue_sauce',
 17: 'basil',
 18: 'bay_leaves',
 19: 'bean_sprouts',
 20: 'beans',
 21: 'beef_curry',
 22: 'beef_diced',
 23: 'beef_kebab',
 24: 'beef_mince',
 25: 'beef_patty',
 26: 'beef_roast',
 27: 'beef_stir_fry',
 28: 'beer',
 29: 'beetroot',
 30: 'biltong',
 31: 'black_pepper',
 32: 'blackberries',
 33: 'blueberries',
 34: 'bok_choy',
 35: 'bread',
 36: 'bread_naan',
 37: 'broccoli',
 38: 'broccolini',
 39: 'brownie',
 40: 'brussel_sprouts',
 41: 'burrito',
 42: 'butter',
 43: 'cabbage_green',
 44: 'cabbage_red',
 45: 'cabbage_savoy',
 46: 'calamari',
 47: 'candy',
 48: 'cape_gooseberries',
 49: 'capsicum_green',
 50: 'capsicum_orange',
 51: 'capsicum_red',
 52: 'capsicum_yellow',
 53: 'carrot',

In [147]:
# Next
# See how many differences there are between updated_annotations and original_annotations
# Upload the new annotations to GCP
# Merge new images if their class_name is in the existing class_names (of the new labels)
# Upload images to GCP
# Track images and labels in W&B
# Train a model and evaluate on new data
# Make a way in data_loader.py to load data from specific sources, e.g. manual_download etc

In [148]:
# updated_annotations.class_name.unique()

In [149]:
# TODO: move this into utils folder 
from utils.misc import check_for_differences_between_df

num_differences = check_for_differences_between_df(updated_annotations, original_annotations)
num_differences

Number of intersecting columns: 11
Checking for differences accross the following columns: ['filename', 'image_name', 'class_name', 'label', 'split', 'clear_or_confusing', 'whole_food_or_dish', 'one_food_or_multiple', 'label_last_updated_at', 'label_source', 'image_source']


115095

In [150]:
updated_annotations.head()

Unnamed: 0,filename,image_name,class_name,label,split,clear_or_confusing,whole_food_or_dish,one_food_or_multiple,label_last_updated_at,label_source,image_source
0,test/pain_au_chocolat/4fd7cb42-bd7f-48f1-bfdc-...,4fd7cb42-bd7f-48f1-bfdc-607c2f54b788.jpg,pain_au_chocolat,193,test,,,,,,internet_download
1,test/pain_au_chocolat/2062f52a-781c-4e4f-b8a7-...,2062f52a-781c-4e4f-b8a7-0a108934f453.jpg,pain_au_chocolat,193,test,,,,,,internet_download
2,test/pain_au_chocolat/8003e0f6-37e8-460d-9c14-...,8003e0f6-37e8-460d-9c14-e7c6fe44a37f.jpg,pain_au_chocolat,193,test,,,,,,internet_download
3,test/pain_au_chocolat/839437c8-c643-408f-9f04-...,839437c8-c643-408f-9f04-d0d3bec238c3.jpg,pain_au_chocolat,193,test,,,,,,internet_download
4,test/pain_au_chocolat/ca5c13ff-a535-4b69-9144-...,ca5c13ff-a535-4b69-9144-e06275e01e35.jpg,pain_au_chocolat,193,test,,,,,,internet_download


In [151]:
config.annotations_columns_to_export

['filename',
 'image_name',
 'class_name',
 'label',
 'split',
 'clear_or_confusing',
 'whole_food_or_dish',
 'one_food_or_multiple',
 'label_last_updated_at',
 'label_source',
 'image_source']

In [152]:
# Show the value counts of the class_name column
updated_annotations['class_name'].value_counts()

choy_sum            2085
finger_lime         2083
radicchio           2045
chervil             2040
quince              2013
                    ... 
ice                    1
squash_spaghetti       1
jalapeno               1
beef_diced             1
thyme                  1
Name: class_name, Length: 308, dtype: int64

In [153]:
# Upload the updated annotations to Google Storage and track the changes
import os
from utils.gcp_utils import upload_to_gs, rename_blob, delete_blob
from utils.wandb_utils import wandb_add_artifact_with_reference
from utils.misc import get_now_time

UPDATED_ANNOTATIONS_TARGET_FILENAME = "updated_annotations.csv"
ORIGINAL_ANNOTATIONS_TARGET_FILENAME = "annotations.csv"
GS_BUCKET_NAME = config.gs_bucket_name

# Export the updated annotations to a CSV
columns_to_export = config.annotations_columns_to_export
print(f"[INFO] Exporting the following columns to {UPDATED_ANNOTATIONS_TARGET_FILENAME}: {columns_to_export}")

# TODO: Check if the updated_annotations_reset_index and the original_annotations actually differ, if so save them and upload them, else exit
if num_differences > 0:
    print(f"[INFO] {num_differences} changes to annotations.csv, updated label files and original annotations are different, saving the updated annotations.csv")

    # Export the updated_annotations_reset_index to a csv
    updated_annotations[columns_to_export].to_csv(UPDATED_ANNOTATIONS_TARGET_FILENAME, index=False)

    # Upload the updated CSV to Google Storage
    upload_to_gs(bucket_name=GS_BUCKET_NAME, 
                 source_file_name=UPDATED_ANNOTATIONS_TARGET_FILENAME, 
                 destination_blob_name=UPDATED_ANNOTATIONS_TARGET_FILENAME)

    # Rename the old CSV on Google Storage
    bucket_to_move_old_annotations_to = "old_annotations"
    name_to_rename_old_annotations = os.path.join(bucket_to_move_old_annotations_to, f"{get_now_time()}_old_annotations.csv")

    rename_blob(bucket_name=GS_BUCKET_NAME,
                blob_name=ORIGINAL_ANNOTATIONS_TARGET_FILENAME,
                new_name=name_to_rename_old_annotations)

    # Rename the "updated_annotations.csv" on Google Storage to "annotations.csv" 
    rename_blob(bucket_name=GS_BUCKET_NAME,
                blob_name=UPDATED_ANNOTATIONS_TARGET_FILENAME,
                new_name=ORIGINAL_ANNOTATIONS_TARGET_FILENAME)

    # Track the changes in the annotations with Weights & Biases
    annotations_path_on_gcs = f"gs://{GS_BUCKET_NAME}/{ORIGINAL_ANNOTATIONS_TARGET_FILENAME}"
    wandb_add_artifact_with_reference(wandb_run=run,
                                      artifact_name="food_vision_labels",
                                      artifact_type="labels",
                                      description="Labels for FoodVision project",
                                      reference_path=annotations_path_on_gcs)
else:
    print("[INFO] No changes to annotations.csv, updated label files and original annotations are the same, try fixing/updating the label files and try again")

[INFO] Exporting the following columns to updated_annotations.csv: ['filename', 'image_name', 'class_name', 'label', 'split', 'clear_or_confusing', 'whole_food_or_dish', 'one_food_or_multiple', 'label_last_updated_at', 'label_source', 'image_source']
[INFO] 115095 changes to annotations.csv, updated label files and original annotations are different, saving the updated annotations.csv
[INFO] Uploading updated_annotations.csv to updated_annotations.csv...
[INFO] Connected to Google Storage bucket: food_vision_bucket_with_object_versioning
[INFO] File updated_annotations.csv uploaded to food_vision_bucket_with_object_versioning/updated_annotations.csv.
[INFO] File size: 31016185 bytes
[INFO] Blob annotations.csv has been renamed to old_annotations/2023-03-21_10-02-52_old_annotations.csv
[INFO] Blob updated_annotations.csv has been renamed to annotations.csv
[INFO] Logging 'food_vision_labels' from 'gs://food_vision_bucket_with_object_versioning/annotations.csv' to Weights & Biases...


In [154]:
# Turn class_name and label into a dictionary
class_name_to_label_dict = dict(zip(updated_annotations.class_name, updated_annotations.label))

# Reverse and sort the dictionary
class_dict_updated = {v: k for k, v in class_name_to_label_dict.items()}

# Sort the class dict alphabetically
class_dict_updated = dict(sorted(class_dict_updated.items()))

class_dict_updated

{0: 'achacha',
 1: 'almond_butter',
 2: 'almonds',
 3: 'apple_custard',
 4: 'apple_green',
 5: 'apple_red',
 6: 'apricot',
 7: 'artichoke',
 8: 'asparagus',
 9: 'avocado',
 10: 'bacon',
 11: 'bacon_and_egg_burger',
 12: 'bagel',
 13: 'baklava',
 14: 'banana',
 15: 'banana_bread',
 16: 'barbecue_sauce',
 17: 'basil',
 18: 'bay_leaves',
 19: 'bean_sprouts',
 20: 'beans',
 21: 'beef_curry',
 22: 'beef_diced',
 23: 'beef_kebab',
 24: 'beef_mince',
 25: 'beef_patty',
 26: 'beef_roast',
 27: 'beef_stir_fry',
 28: 'beer',
 29: 'beetroot',
 30: 'biltong',
 31: 'black_pepper',
 32: 'blackberries',
 33: 'blueberries',
 34: 'bok_choy',
 35: 'bread',
 36: 'bread_naan',
 37: 'broccoli',
 38: 'broccolini',
 39: 'brownie',
 40: 'brussel_sprouts',
 41: 'burrito',
 42: 'butter',
 43: 'cabbage_green',
 44: 'cabbage_red',
 45: 'cabbage_savoy',
 46: 'calamari',
 47: 'candy',
 48: 'cape_gooseberries',
 49: 'capsicum_green',
 50: 'capsicum_orange',
 51: 'capsicum_red',
 52: 'capsicum_yellow',
 53: 'carrot',

In [155]:
# Export class_dict to JSON
import json
with open("class_dict.json", "w") as f:
    json.dump(class_dict_updated, f)

In [156]:
class_dict_updated

{0: 'achacha',
 1: 'almond_butter',
 2: 'almonds',
 3: 'apple_custard',
 4: 'apple_green',
 5: 'apple_red',
 6: 'apricot',
 7: 'artichoke',
 8: 'asparagus',
 9: 'avocado',
 10: 'bacon',
 11: 'bacon_and_egg_burger',
 12: 'bagel',
 13: 'baklava',
 14: 'banana',
 15: 'banana_bread',
 16: 'barbecue_sauce',
 17: 'basil',
 18: 'bay_leaves',
 19: 'bean_sprouts',
 20: 'beans',
 21: 'beef_curry',
 22: 'beef_diced',
 23: 'beef_kebab',
 24: 'beef_mince',
 25: 'beef_patty',
 26: 'beef_roast',
 27: 'beef_stir_fry',
 28: 'beer',
 29: 'beetroot',
 30: 'biltong',
 31: 'black_pepper',
 32: 'blackberries',
 33: 'blueberries',
 34: 'bok_choy',
 35: 'bread',
 36: 'bread_naan',
 37: 'broccoli',
 38: 'broccolini',
 39: 'brownie',
 40: 'brussel_sprouts',
 41: 'burrito',
 42: 'butter',
 43: 'cabbage_green',
 44: 'cabbage_red',
 45: 'cabbage_savoy',
 46: 'calamari',
 47: 'candy',
 48: 'cape_gooseberries',
 49: 'capsicum_green',
 50: 'capsicum_orange',
 51: 'capsicum_red',
 52: 'capsicum_yellow',
 53: 'carrot',

In [157]:
# Turn class_dict into a DataFrame
class_dict_df = pd.DataFrame(class_dict_updated.items(), columns=["label", "class_name"])

# Export to csv
class_dict_df.to_csv("class_dict.csv", index=False)

In [158]:
len(class_dict_df)

308

In [159]:
class_dict_df

Unnamed: 0,label,class_name
0,0,achacha
1,1,almond_butter
2,2,almonds
3,3,apple_custard
4,4,apple_green
...,...,...
303,303,wine_red
304,304,wine_white
305,305,wombok
306,306,yoghurt
