## Manually label and upload photos to the database

* **Input:** folder with classifc image classification format:

```
images/
    class_1/
        image_1.jpg
        image_2.jpg
        ...
    class_2/
        image_3.jpeg
        image_4.jpeg
        ...
    ...
```

* **Output:** Labelled images stored in GCP and tracked with Weights & Biases Artifacts. 


In [43]:
# Append the upper level directory to sys
import sys
sys.path.append("..")

import pandas as pd
import numpy as np

from pathlib import Path

In [44]:
# !rm -rf food_photos/*
# !rm -rf _MACOSX/*

In [45]:
# !unzip -q 2023-02-08-food_photos.zip

In [47]:
# Get list of all paths in food_photos directory
path = Path('food_photos')
all_paths = sorted(list(path.glob('*/*.jpeg')))
all_paths[:10]

[PosixPath('food_photos/apple_green/02ff91bd-e0f6-4c45-8ff7-647c28e61892.jpeg'),
 PosixPath('food_photos/apple_green/146c487e-66c1-4eec-82c0-f55d63e6cd3e.jpeg'),
 PosixPath('food_photos/apple_green/15f9c8ae-0445-4bdf-8aa1-750466fb2f81.jpeg'),
 PosixPath('food_photos/apple_green/16b97ddc-2283-4869-81ab-87304ef686c0.jpeg'),
 PosixPath('food_photos/apple_green/1a1c3398-add7-467a-acf1-4422e76e32a2.jpeg'),
 PosixPath('food_photos/apple_green/258e04e8-a8ca-48c0-a81e-7174e85af4ae.jpeg'),
 PosixPath('food_photos/apple_green/27261ac1-45c9-4654-85b0-c8638c7d2887.jpeg'),
 PosixPath('food_photos/apple_green/296f8b26-0fb0-43d3-85c4-6165fa847415.jpeg'),
 PosixPath('food_photos/apple_green/29c2d5e4-9002-4119-961e-762faadd083c.jpeg'),
 PosixPath('food_photos/apple_green/29d088ed-9990-41b9-b2f4-a4b07a399e8d.jpeg')]

In [48]:
len(all_paths)

1257

In [49]:
import uuid

def is_valid_uuid(string):
    try:
        uuid.UUID(string)
        return True
    except ValueError:
        return False

filename = "some-file-name"
if is_valid_uuid(filename):
    print(f"{filename} is a valid UUID")
else:
    print(f"{filename} is not a valid UUID")

some-file-name is not a valid UUID


In [50]:
# Loop through all_paths, if the path is a UUID, pass, if not, rename the file to a UUID
for p in all_paths:
    if not is_valid_uuid(p.stem):
        new_name = uuid.uuid4()
        # print(new_name)
        new_path_name = p.parent / f'{new_name}.jpeg'
        p.rename(new_path_name)

# Get list of all paths in food_photos directory
path = Path('food_photos')
all_paths = sorted(list(path.glob('*/*.jpeg')))
all_paths[:10]

[PosixPath('food_photos/apple_green/02ff91bd-e0f6-4c45-8ff7-647c28e61892.jpeg'),
 PosixPath('food_photos/apple_green/146c487e-66c1-4eec-82c0-f55d63e6cd3e.jpeg'),
 PosixPath('food_photos/apple_green/15f9c8ae-0445-4bdf-8aa1-750466fb2f81.jpeg'),
 PosixPath('food_photos/apple_green/16b97ddc-2283-4869-81ab-87304ef686c0.jpeg'),
 PosixPath('food_photos/apple_green/1a1c3398-add7-467a-acf1-4422e76e32a2.jpeg'),
 PosixPath('food_photos/apple_green/258e04e8-a8ca-48c0-a81e-7174e85af4ae.jpeg'),
 PosixPath('food_photos/apple_green/27261ac1-45c9-4654-85b0-c8638c7d2887.jpeg'),
 PosixPath('food_photos/apple_green/296f8b26-0fb0-43d3-85c4-6165fa847415.jpeg'),
 PosixPath('food_photos/apple_green/29c2d5e4-9002-4119-961e-762faadd083c.jpeg'),
 PosixPath('food_photos/apple_green/29d088ed-9990-41b9-b2f4-a4b07a399e8d.jpeg')]

In [51]:
# Get all food_types from all_paths
food_types = sorted(list(set([p.parent.name for p in all_paths])))
food_types

['apple_green',
 'apple_red',
 'avocado',
 'bacon',
 'banana',
 'banana_bread',
 'beef_stir_fry',
 'biltong',
 'blueberries',
 'bread',
 'bread_naan',
 'broccoli',
 'broccolini',
 'butter',
 'capsicum',
 'carrot',
 'cheese',
 'cheeseburger',
 'cherries',
 'chicken_thighs',
 'coffee',
 'coleslaw',
 'corn',
 'cucumber',
 'curry_chicken',
 'dates',
 'eggs',
 'fries',
 'garlic',
 'grapes',
 'green_beans',
 'honey',
 'ice_coffee',
 'kiwi_fruit',
 'lemon',
 'lime',
 'lychee',
 'mango',
 'milk',
 'mushrooms',
 'nectarines',
 'omelette',
 'onion_brown',
 'onion_red',
 'onion_white',
 'orange_juice',
 'passionfruit',
 'peach',
 'plum',
 'pomegranate',
 'porridge',
 'potato_bake',
 'potato_brown',
 'potato_white',
 'pumpkin',
 'rice',
 'roast_pork',
 'roast_potatoes',
 'steak',
 'tea',
 'tomato',
 'watermelon',
 'yoghurt',
 'zucchini']

## Download original labels from Weights & Biases

In [52]:
# Get config
from configs.default_config import config

args = config
print(args)

namespace(annotations_columns_to_export=['filename', 'image_name', 'class_name', 'label', 'split', 'clear_or_confusing', 'whole_food_or_dish', 'one_food_or_multiple', 'label_last_updated_at', 'label_source', 'image_source'], auto_augment=True, batch_size=128, epochs=10, gs_bucket_name='food_vision_bucket_with_object_versioning', gs_image_storage_path='https://storage.cloud.google.com/food_vision_bucket_with_object_versioning/all_images/', input_size=224, label_smoothing=0.1, learning_rate=0.001, model='coatnext_nano_rw_224', num_to_try_and_autocorrect=1000, path_to_gcp_credentials='utils/google-storage-key.json', path_to_label_studio_api_key='utils/label_studio_api_key.json', pretrained=True, seed=42, use_mixed_precision=True, wandb_dataset_artifact='food_vision_199_classes_images:latest', wandb_job_type='', wandb_labels_artifact='food_vision_labels:latest', wandb_model_artifact='trained_model:latest', wandb_project='test_wandb_artifacts_by_reference', wandb_run_notes='', wandb_run_tag

In [53]:
# Connect to GCP
from utils.gcp_utils import set_gcp_credentials, test_gcp_connection
set_gcp_credentials(path_to_key="../utils/google-storage-key.json")
test_gcp_connection()

[INFO] GCP credentials set!
[INFO] GCP connection successful! Access to GCP for saving/loading data and models available.


In [54]:
import wandb

# Initialize a new run
from utils.wandb_utils import wandb_load_artifact, wandb_download_and_load_labels

notes = f"add {len(all_paths)} manually taken photos to the training dataset"

run = wandb.init(project=args.wandb_project, 
                 job_type=args.wandb_job_type,
                 tags=['manual_photo_upload'],
                 notes=notes)

annotations, class_names, class_dict, reverse_class_dict, labels_path = wandb_download_and_load_labels(wandb_run=run,
wandb_labels_artifact_name=args.wandb_labels_artifact)

Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33mmrdbourke[0m. Use [1m`wandb login --relogin`[0m to force relogin


[INFO] Labels directory: ./artifacts/food_vision_labels:v18
[INFO] Labels path: artifacts/food_vision_labels:v18/annotations.csv
[INFO] Working with: 199 classes


In [55]:
# Make a copy of the annotations
original_annotations = annotations.copy()

In [57]:
# Get the columns of the annotations
columns_to_create = list(original_annotations.columns)
columns_to_create

['filename',
 'image_name',
 'class_name',
 'label',
 'split',
 'clear_or_confusing',
 'whole_food_or_dish',
 'one_food_or_multiple',
 'label_last_updated_at',
 'label_source',
 'image_source']

In [58]:
# Get all the image paths from food_photos
image_paths = list(Path('food_photos').glob('*/*.jpeg'))
len(image_paths)

1257

In [60]:
# Check to see what food_types are in class_names
food_types_in_class_names = [food_type for food_type in food_types if food_type in class_names]

# Check to see what food_types are not in class_names
food_types_not_in_class_names = [food_type for food_type in food_types if food_type not in class_names]
len(food_types_in_class_names)

55

In [61]:
food_types_not_in_class_names

['apple_green',
 'apple_red',
 'bread_naan',
 'curry_chicken',
 'lychee',
 'onion_red',
 'onion_white',
 'potato_brown',
 'potato_white']

In [63]:
from utils.misc import get_now_time

# Create a list of dictionaries and fill out the columns of the annotations
columns = ['filename',
 'image_name',
 'class_name',
 'label',
 'split',
 'clear_or_confusing',
 'whole_food_or_dish',
 'one_food_or_multiple',
 'label_last_updated_at',
 'label_source',
 'image_source']

food_image_dict_list = []

for image_path in image_paths:
    # Add check to see if image_path.parent.name is in class_names
    if image_path.parent.name not in class_names:
        pass
    else:
        food_image_dict = {}
        food_image_dict['filename'] = image_path
        food_image_dict['image_name'] = image_path.name
        food_image_dict['class_name'] = image_path.parent.name
        food_image_dict['label'] = reverse_class_dict[image_path.parent.name]
        food_image_dict['split'] = 'train'
        # # Label 20% off the images as test
        # if np.random.random() < 0.2:
        #     food_image_dict['split'] = 'test'
        # else:
        #     food_image_dict['split'] = 'train'
        food_image_dict['clear_or_confusing'] = 'clear'
        food_image_dict['whole_food_or_dish'] = 'whole_food'
        food_image_dict['one_food_or_multiple'] = 'one_food'
        food_image_dict['label_last_updated_at'] = get_now_time()
        food_image_dict['label_source'] = 'manual_upload'
        food_image_dict['image_source'] = 'manual_upload'
    
        food_image_dict_list.append(food_image_dict)

# Create a dataframe from the list of dictionaries
import pandas as pd
new_annotations = pd.DataFrame(food_image_dict_list)
new_annotations.head()

Unnamed: 0,filename,image_name,class_name,label,split,clear_or_confusing,whole_food_or_dish,one_food_or_multiple,label_last_updated_at,label_source,image_source
0,food_photos/watermelon/913bb78d-c704-4dfe-9f18...,913bb78d-c704-4dfe-9f18-e8223f4fe888.jpeg,watermelon,194,train,clear,whole_food,one_food,2023-02-08_16-58-09,manual_upload,manual_upload
1,food_photos/watermelon/25660bf7-b26f-4976-9905...,25660bf7-b26f-4976-9905-fb73249506cb.jpeg,watermelon,194,train,clear,whole_food,one_food,2023-02-08_16-58-09,manual_upload,manual_upload
2,food_photos/watermelon/3a220ede-65a6-4fcd-aed4...,3a220ede-65a6-4fcd-aed4-f956c7c3bce9.jpeg,watermelon,194,train,clear,whole_food,one_food,2023-02-08_16-58-09,manual_upload,manual_upload
3,food_photos/watermelon/7d86f328-ffcd-46b0-8bbe...,7d86f328-ffcd-46b0-8bbe-8dd3bc11fc03.jpeg,watermelon,194,train,clear,whole_food,one_food,2023-02-08_16-58-09,manual_upload,manual_upload
4,food_photos/watermelon/3205f1ea-6c4f-41cb-ae2c...,3205f1ea-6c4f-41cb-ae2c-5c1c8dfaec53.jpeg,watermelon,194,train,clear,whole_food,one_food,2023-02-08_16-58-09,manual_upload,manual_upload


In [64]:
# Print how many images are train/test in new_annotations
new_annotations['split'].value_counts()

train    878
Name: split, dtype: int64

In [65]:
new_annotations

Unnamed: 0,filename,image_name,class_name,label,split,clear_or_confusing,whole_food_or_dish,one_food_or_multiple,label_last_updated_at,label_source,image_source
0,food_photos/watermelon/913bb78d-c704-4dfe-9f18...,913bb78d-c704-4dfe-9f18-e8223f4fe888.jpeg,watermelon,194,train,clear,whole_food,one_food,2023-02-08_16-58-09,manual_upload,manual_upload
1,food_photos/watermelon/25660bf7-b26f-4976-9905...,25660bf7-b26f-4976-9905-fb73249506cb.jpeg,watermelon,194,train,clear,whole_food,one_food,2023-02-08_16-58-09,manual_upload,manual_upload
2,food_photos/watermelon/3a220ede-65a6-4fcd-aed4...,3a220ede-65a6-4fcd-aed4-f956c7c3bce9.jpeg,watermelon,194,train,clear,whole_food,one_food,2023-02-08_16-58-09,manual_upload,manual_upload
3,food_photos/watermelon/7d86f328-ffcd-46b0-8bbe...,7d86f328-ffcd-46b0-8bbe-8dd3bc11fc03.jpeg,watermelon,194,train,clear,whole_food,one_food,2023-02-08_16-58-09,manual_upload,manual_upload
4,food_photos/watermelon/3205f1ea-6c4f-41cb-ae2c...,3205f1ea-6c4f-41cb-ae2c-5c1c8dfaec53.jpeg,watermelon,194,train,clear,whole_food,one_food,2023-02-08_16-58-09,manual_upload,manual_upload
...,...,...,...,...,...,...,...,...,...,...,...
873,food_photos/onion_brown/e6e3c735-bcb6-4fce-a0e...,e6e3c735-bcb6-4fce-a0e4-42a0b89449ff.jpeg,onion_brown,117,train,clear,whole_food,one_food,2023-02-08_16-58-09,manual_upload,manual_upload
874,food_photos/onion_brown/3bff4352-6f0d-4832-92b...,3bff4352-6f0d-4832-92b2-12d2ba684abb.jpeg,onion_brown,117,train,clear,whole_food,one_food,2023-02-08_16-58-09,manual_upload,manual_upload
875,food_photos/onion_brown/1b68f896-8471-4a79-9d4...,1b68f896-8471-4a79-9d45-9a1fbfabcee2.jpeg,onion_brown,117,train,clear,whole_food,one_food,2023-02-08_16-58-09,manual_upload,manual_upload
876,food_photos/onion_brown/8d391321-0b96-4696-840...,8d391321-0b96-4696-840b-53745ca4c848.jpeg,onion_brown,117,train,clear,whole_food,one_food,2023-02-08_16-58-09,manual_upload,manual_upload


In [66]:
len(original_annotations), len(new_annotations)

(24650, 878)

In [67]:
# Append the new_annotations to the original_annotations
updated_annotations = pd.concat([original_annotations, new_annotations], ignore_index=True)
# updated_annotations = original_annotations.(new_annotations, ignore_index=True)
updated_annotations

Unnamed: 0,filename,image_name,class_name,label,split,clear_or_confusing,whole_food_or_dish,one_food_or_multiple,label_last_updated_at,label_source,image_source
0,test/pain_au_chocolat/4fd7cb42-bd7f-48f1-bfdc-...,4fd7cb42-bd7f-48f1-bfdc-607c2f54b788.jpg,pain_au_chocolat,121,test,,,,,,internet_download
1,test/pain_au_chocolat/2062f52a-781c-4e4f-b8a7-...,2062f52a-781c-4e4f-b8a7-0a108934f453.jpg,pain_au_chocolat,121,test,,,,,,internet_download
2,test/pain_au_chocolat/8003e0f6-37e8-460d-9c14-...,8003e0f6-37e8-460d-9c14-e7c6fe44a37f.jpg,pain_au_chocolat,121,test,,,,,,internet_download
3,test/pain_au_chocolat/839437c8-c643-408f-9f04-...,839437c8-c643-408f-9f04-d0d3bec238c3.jpg,pain_au_chocolat,121,test,,,,,,internet_download
4,test/pain_au_chocolat/ca5c13ff-a535-4b69-9144-...,ca5c13ff-a535-4b69-9144-e06275e01e35.jpg,pain_au_chocolat,121,test,,,,,,internet_download
...,...,...,...,...,...,...,...,...,...,...,...
25523,food_photos/onion_brown/e6e3c735-bcb6-4fce-a0e...,e6e3c735-bcb6-4fce-a0e4-42a0b89449ff.jpeg,onion_brown,117,train,clear,whole_food,one_food,2023-02-08_16-58-09,manual_upload,manual_upload
25524,food_photos/onion_brown/3bff4352-6f0d-4832-92b...,3bff4352-6f0d-4832-92b2-12d2ba684abb.jpeg,onion_brown,117,train,clear,whole_food,one_food,2023-02-08_16-58-09,manual_upload,manual_upload
25525,food_photos/onion_brown/1b68f896-8471-4a79-9d4...,1b68f896-8471-4a79-9d45-9a1fbfabcee2.jpeg,onion_brown,117,train,clear,whole_food,one_food,2023-02-08_16-58-09,manual_upload,manual_upload
25526,food_photos/onion_brown/8d391321-0b96-4696-840...,8d391321-0b96-4696-840b-53745ca4c848.jpeg,onion_brown,117,train,clear,whole_food,one_food,2023-02-08_16-58-09,manual_upload,manual_upload


In [68]:
# Check the difference in lengths between the original_annotations and updated_annotations
num_differences = len(updated_annotations) - len(original_annotations)
num_differences

878

In [69]:
# Upload the updated annotations to Google Storage and track the changes
from utils.gcp_utils import upload_to_gs, rename_blob, delete_blob
from utils.wandb_utils import wandb_add_artifact_with_reference
from utils.misc import get_now_time
import os

GS_BUCKET_NAME = config.gs_bucket_name

UPDATED_ANNOTATIONS_TARGET_FILENAME = "updated_annotations.csv"
ORIGINAL_ANNOTATIONS_TARGET_FILENAME = "annotations.csv"

# Export the updated annotations to a CSV
columns_to_export = config.annotations_columns_to_export
print(f"[INFO] Exporting the following columns to {UPDATED_ANNOTATIONS_TARGET_FILENAME}: {columns_to_export}")

# TODO: Check if the updated_annotations_reset_index and the original_annotations actually differ, if so save them and upload them, else exit
if num_differences > 0:
    print(f"[INFO] {num_differences} changes to annotations.csv, updated label files and original annotations are different, saving the updated annotations.csv")

    # Export the updated_annotations_reset_index to a csv
    updated_annotations[columns_to_export].to_csv(UPDATED_ANNOTATIONS_TARGET_FILENAME, index=False)

    # Upload the updated CSV to Google Storage
    upload_to_gs(bucket_name=GS_BUCKET_NAME, 
                 source_file_name=UPDATED_ANNOTATIONS_TARGET_FILENAME, 
                 destination_blob_name=UPDATED_ANNOTATIONS_TARGET_FILENAME)

    # Rename the old CSV on Google Storage
    bucket_to_move_old_annotations_to = "old_annotations"
    name_to_rename_old_annotations = os.path.join(bucket_to_move_old_annotations_to, f"{get_now_time()}_old_annotations.csv")

    rename_blob(bucket_name=GS_BUCKET_NAME,
                blob_name=ORIGINAL_ANNOTATIONS_TARGET_FILENAME,
                new_name=name_to_rename_old_annotations)

    # Rename the "updated_annotations.csv" on Google Storage to "annotations.csv" 
    rename_blob(bucket_name=GS_BUCKET_NAME,
                blob_name=UPDATED_ANNOTATIONS_TARGET_FILENAME,
                new_name=ORIGINAL_ANNOTATIONS_TARGET_FILENAME)

    # Track the changes in the annotations with Weights & Biases
    annotations_path_on_gcs = f"gs://{GS_BUCKET_NAME}/{ORIGINAL_ANNOTATIONS_TARGET_FILENAME}"
    wandb_add_artifact_with_reference(wandb_run=run,
                                      artifact_name="food_vision_labels",
                                      artifact_type="labels",
                                      description="Labels for FoodVision project",
                                      reference_path=annotations_path_on_gcs)
else:
    print("[INFO] No changes to annotations.csv, updated label files and original annotations are the same, try fixing/updating the label files and try again")

[INFO] Exporting the following columns to updated_annotations.csv: ['filename', 'image_name', 'class_name', 'label', 'split', 'clear_or_confusing', 'whole_food_or_dish', 'one_food_or_multiple', 'label_last_updated_at', 'label_source', 'image_source']
[INFO] 878 changes to annotations.csv, updated label files and original annotations are different, saving the updated annotations.csv
[INFO] Uploading updated_annotations.csv to updated_annotations.csv...
[INFO] Connected to Google Storage bucket: food_vision_bucket_with_object_versioning
[INFO] File updated_annotations.csv uploaded to food_vision_bucket_with_object_versioning/updated_annotations.csv.
[INFO] File size: 3689276 bytes
[INFO] Blob annotations.csv has been renamed to old_annotations/2023-02-08_16-58-47_old_annotations.csv
[INFO] Blob updated_annotations.csv has been renamed to annotations.csv
[INFO] Logging 'food_vision_labels' from 'gs://food_vision_bucket_with_object_versioning/annotations.csv' to Weights & Biases...


## Upload all photos in food_photos to Google Storage

In [70]:
from utils.gcp_utils import upload_to_gs, get_list_of_blobs

# Get list of images already in GCP bucket
gs_image_paths = get_list_of_blobs(bucket_name=GS_BUCKET_NAME, prefix="all_images")
print(f"[INFO] There are {len(gs_image_paths)} images in the GCP bucket")

[INFO] There are 24650 images in the GCP bucket


In [71]:
gs_image_paths_name = [str(blob.name).split("/")[-1] for blob in gs_image_paths]
gs_image_paths_name[:10]

['00045a69-b09f-4293-8c2e-a7ba27964fb6.jpg',
 '0009bea1-1577-4db5-bc60-b27eaed2d276.jpg',
 '000bd61c-f25c-415c-8cf1-3adc2d788320.jpg',
 '000cb663-06ac-4f96-8f9b-795cd05b5bcf.jpg',
 '000ec632-ee17-4988-9b32-8e4cb1421636.jpg',
 '0010ae4e-0879-4447-9610-e9d422caadd7.jpg',
 '0012b344-1bf9-421b-977e-d4ad8a6dcb7b.jpg',
 '00141b2a-0ed8-45b0-8823-8c0dbef3ac93.jpg',
 '0018f7dd-217b-478b-9858-289922cf9715.jpg',
 '00193056-355d-48dd-a132-746988022ea2.jpg']

In [72]:
# Test uploading of 1 image
one_image = image_paths[0]

# Upload the image to Google Storage
upload_to_gs(bucket_name=GS_BUCKET_NAME,
             source_file_name=one_image,
             destination_blob_name=f"all_images/{one_image.name}")

[INFO] Uploading food_photos/watermelon/913bb78d-c704-4dfe-9f18-e8223f4fe888.jpeg to all_images/913bb78d-c704-4dfe-9f18-e8223f4fe888.jpeg...
[INFO] Connected to Google Storage bucket: food_vision_bucket_with_object_versioning
[INFO] File food_photos/watermelon/913bb78d-c704-4dfe-9f18-e8223f4fe888.jpeg uploaded to food_vision_bucket_with_object_versioning/all_images/913bb78d-c704-4dfe-9f18-e8223f4fe888.jpeg.
[INFO] File size: 177891 bytes


'all_images/913bb78d-c704-4dfe-9f18-e8223f4fe888.jpeg'

In [73]:
# Get list of images already in GCP bucket
gs_image_paths = get_list_of_blobs(bucket_name=GS_BUCKET_NAME, prefix="all_images")
print(f"[INFO] There are {len(gs_image_paths)} images in the GCP bucket")

[INFO] There are 24651 images in the GCP bucket


In [74]:
gs_image_paths_name = [str(blob.name).split("/")[-1] for blob in gs_image_paths]
gs_image_paths_name[:10]

['00045a69-b09f-4293-8c2e-a7ba27964fb6.jpg',
 '0009bea1-1577-4db5-bc60-b27eaed2d276.jpg',
 '000bd61c-f25c-415c-8cf1-3adc2d788320.jpg',
 '000cb663-06ac-4f96-8f9b-795cd05b5bcf.jpg',
 '000ec632-ee17-4988-9b32-8e4cb1421636.jpg',
 '0010ae4e-0879-4447-9610-e9d422caadd7.jpg',
 '0012b344-1bf9-421b-977e-d4ad8a6dcb7b.jpg',
 '00141b2a-0ed8-45b0-8823-8c0dbef3ac93.jpg',
 '0018f7dd-217b-478b-9858-289922cf9715.jpg',
 '00193056-355d-48dd-a132-746988022ea2.jpg']

In [75]:
# Loop through image_paths and upload the image as long as its name isn't in gs_image_paths_name
from tqdm.auto import tqdm

num_images_uploaded = 0
for image_path in tqdm(image_paths):
    if image_path.name not in gs_image_paths_name:
        
        # Upload the image to Google Storage
        upload_to_gs(bucket_name=GS_BUCKET_NAME,
                     source_file_name=image_path,
                     destination_blob_name=f"all_images/{image_path.name}")
        
        num_images_uploaded += 1

print(f"[INFO] Uploaded {num_images_uploaded} images to GCP bucket")

  0%|          | 0/1257 [00:00<?, ?it/s]

[INFO] Uploading food_photos/watermelon/25660bf7-b26f-4976-9905-fb73249506cb.jpeg to all_images/25660bf7-b26f-4976-9905-fb73249506cb.jpeg...
[INFO] Connected to Google Storage bucket: food_vision_bucket_with_object_versioning
[INFO] File food_photos/watermelon/25660bf7-b26f-4976-9905-fb73249506cb.jpeg uploaded to food_vision_bucket_with_object_versioning/all_images/25660bf7-b26f-4976-9905-fb73249506cb.jpeg.
[INFO] File size: 313765 bytes
[INFO] Uploading food_photos/watermelon/3a220ede-65a6-4fcd-aed4-f956c7c3bce9.jpeg to all_images/3a220ede-65a6-4fcd-aed4-f956c7c3bce9.jpeg...
[INFO] Connected to Google Storage bucket: food_vision_bucket_with_object_versioning
[INFO] File food_photos/watermelon/3a220ede-65a6-4fcd-aed4-f956c7c3bce9.jpeg uploaded to food_vision_bucket_with_object_versioning/all_images/3a220ede-65a6-4fcd-aed4-f956c7c3bce9.jpeg.
[INFO] File size: 194947 bytes
[INFO] Uploading food_photos/watermelon/7d86f328-ffcd-46b0-8bbe-8dd3bc11fc03.jpeg to all_images/7d86f328-ffcd-46b0-8

In [76]:
# Track the changes to GCP bucket with Weights & Biases
from utils.wandb_utils import wandb_add_artifact_with_reference

In [77]:
def wandb_add_artifact_with_reference(wandb_run, artifact_name, artifact_type, description, reference_path):
    print(f"[INFO] Logging '{artifact_name}' from '{reference_path}' to Weights & Biases...")
    artifact = wandb.Artifact(name=artifact_name, 
                              type=artifact_type,
                              description=description,
                              )
    artifact.add_reference(reference_path, max_objects=1e9) # default capability to track up to 1 billion images
    wandb_run.log_artifact(artifact)

In [78]:
# Track updated images_dir in Weights & Biases
food_images_path_on_gs = "gs://food_vision_bucket_with_object_versioning/all_images"

wandb_add_artifact_with_reference(wandb_run=run,
                                  artifact_name="food_vision_199_classes_images",
                                  artifact_type="dataset",
                                  description="Images for FoodVision project",
                                  reference_path=food_images_path_on_gs)

[INFO] Logging 'food_vision_199_classes_images' from 'gs://food_vision_bucket_with_object_versioning/all_images' to Weights & Biases...


[34m[1mwandb[0m: Generating checksum for up to 1000000000 objects with prefix "all_images"... Done. 7.2s


In [79]:
original_num_images = len(gs_image_paths)
original_num_images

24651

In [80]:
# Get list of images already in GCP bucket
gs_image_paths = get_list_of_blobs(bucket_name=GS_BUCKET_NAME, prefix="all_images")
print(f"[INFO] There are {len(gs_image_paths)} images in the GCP bucket")

[INFO] There are 25907 images in the GCP bucket


In [None]:
# Next:
# Create labels CSV for all photos in food_photos ✅
# Add labels CSV to original annotations ✅
# Upload all photos in food_photos to GCP ✅
# Track updates in W&B

# Train a model and track how it performs (by only upgrading training data)