In [18]:
# 1. Setup Environment
from google.colab import drive
import os
import pandas as pd
import shutil
from collections import defaultdict
from tqdm.notebook import tqdm
import re

In [19]:
print("Mounting Google Drive...")
drive.mount('/content/drive')

Mounting Google Drive...
Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [20]:
DRIVE_BASE_PATH = "/content/drive/My Drive"
PROJECT_DIR = "Colab Notebooks/lera"

# List all metadata CSV files relative to the PROJECT_DIR
# These will be combined into one large file.
METADATA_FILES = [
    f"plastic_foodware/metadata.csv",
]

# Subdirectory containing the original source images
SOURCE_DIR_SUBPATH = f"plastic_foodware/plastic_foodware_input"

# Subdirectory where the renamed photos and final metadata will be saved
DEST_DIR_SUBPATH = f"plastic_foodware/plastic_foodware_output"

In [21]:
# Construct full paths
BASE_PROJECT_PATH = os.path.join(DRIVE_BASE_PATH, PROJECT_DIR)
SOURCE_BASE_FOLDER = os.path.join(BASE_PROJECT_PATH, SOURCE_DIR_SUBPATH)
DESTINATION_FOLDER = os.path.join(BASE_PROJECT_PATH, DEST_DIR_SUBPATH)

print(f"Project Path: {BASE_PROJECT_PATH}")
print(f"Source Folder: {SOURCE_BASE_FOLDER}")
print(f"Destination Folder: {DESTINATION_FOLDER}")

Project Path: /content/drive/My Drive/Colab Notebooks/lera
Source Folder: /content/drive/My Drive/Colab Notebooks/lera/plastic_foodware/plastic_foodware_input
Destination Folder: /content/drive/My Drive/Colab Notebooks/lera/plastic_foodware/plastic_foodware_output


In [22]:
# --- 3. Define Processing Functions ---

def load_and_concat_metadata(base_path, file_list):
    """Loads all specified CSVs into a single DataFrame."""
    df_list = []
    print(f"Loading {len(file_list)} metadata file(s)...")
    for file_path_rel in file_list:
        file_path = os.path.join(base_path, file_path_rel)
        try:
            df = pd.read_csv(file_path)
            df_list.append(df)
            print(f"  Loaded {file_path} ({len(df)} rows)")
        except FileNotFoundError:
            print(f"  Warning: File not found, skipping: {file_path}")
        except Exception as e:
            print(f"  Error loading {file_path}: {e}")

    if not df_list:
        print("No metadata files were loaded. Exiting.")
        return None

    full_df = pd.concat(df_list, ignore_index=True)
    print(f"Total rows loaded: {len(full_df)}")
    return full_df

def generate_new_filenames(df):
    """Generates new filenames based on 'Group' and an incrementing counter."""

    group_counts = {}

    def create_new_name(row, group_counts_dict):
        group = str(row['Group'])
        original_name = str(row['Original Name'])

        # Get the full extension (e.g., .NIGHT.jpg, .jpg)
        # This finds the *first* dot in the filename, not the path
        basename = os.path.basename(original_name)
        parts = basename.split('.', 1)

        # Handle cases like '.jpg' or '.NIGHT.jpg'
        file_extension = parts[1] if len(parts) > 1 else 'jpg'

        # Increment group count
        count = group_counts_dict.get(group, 0) + 1
        group_counts_dict[group] = count

        return f"{group}_{count}.{file_extension}"

    print("Generating new filenames...")
    df['New Name'] = df.apply(lambda row: create_new_name(row, group_counts), axis=1)
    print("New filenames generated.")
    return df

def copy_and_rename_files(df, source_base, dest_folder, has_subpaths):
    """Copies files from source to destination with new names from the DataFrame."""

    os.makedirs(dest_folder, exist_ok=True)
    print(f"Created/verified destination folder: {dest_folder}")

    stats = defaultdict(int)

    print(f"Copying and renaming files... (Using subpaths: {has_subpaths})")
    for _, row in tqdm(df.iterrows(), total=df.shape[0]):
        original_name_from_meta = str(row['Original Name'])
        new_name = str(row['New Name'])

        if has_subpaths:
            # Use the full relative path from the metadata
            # e.g., source_base + 'input_a/image.jpg'
            source_path = os.path.join(source_base, original_name_from_meta)
        else:
            # Use only the filename from the metadata
            # e.g., source_base + 'image.jpg'
            original_filename = os.path.basename(original_name_from_meta)
            source_path = os.path.join(source_base, original_filename)

        destination_path = os.path.join(dest_folder, new_name)

        if os.path.exists(source_path):
            try:
                shutil.copyfile(source_path, destination_path)
                ext_match = re.search(r'(\..*)$', new_name)
                ext = ext_match.group(1).lower() if ext_match else ".unknown"
                stats[f"copied_{ext}"] += 1
                stats['success'] += 1
            except Exception as e:
                print(f"Error copying '{original_name_from_meta}': {e}")
                stats['errors'] += 1
        else:
            print(f"Source file not found: '{source_path}'")
            stats['not_found'] += 1

    print("\n--- File Copy Summary ---")
    print(f"Successful copies: {stats['success']}")
    print(f"Files not found: {stats['not_found']}")
    print(f"Errors: {stats['errors']}")
    print("Copied file types:")
    for key, count in stats.items():
        if key.startswith('copied_'):
            print(f"  {key.replace('copied_', '')}: {count}")
    print("--------------------------")

def save_metadata(df, dest_folder, filename="metadata.csv"):
    """Saves the DataFrame as metadata.csv in the destination folder."""
    metadata_path = os.path.join(dest_folder, filename)
    try:
        df.to_csv(metadata_path, index=False)
        print(f"Successfully saved metadata to '{metadata_path}'")
    except Exception as e:
        print(f"Error saving metadata: {e}")

def check_for_subdirs(folder_path):
    """Checks if a directory contains any subdirectories."""
    try:
        entries = os.listdir(folder_path)
        for entry in entries:
            if os.path.isdir(os.path.join(folder_path, entry)):
                return True
    except FileNotFoundError:
        print(f"\nWarning: Source directory not found for subpath check: {folder_path}")
        return False
    except Exception as e:
        print(f"\nWarning: Error checking for subpaths: {e}")
        return False
    return False

In [23]:
# --- 4. Run Processing ---
if __name__ == "__main__":
    # 1. Load data
    metadata_df = load_and_concat_metadata(BASE_PROJECT_PATH, METADATA_FILES)

    if metadata_df is not None:
        print(f"\nTotal rows in combined DataFrame: {metadata_df.shape[0]}")
        print("\n--- Original Metadata Head ---")
        from IPython.display import display
        display(metadata_df.head())

        # 2. Generate new names
        metadata_df = generate_new_filenames(metadata_df)
        print("\n--- Metadata with New Names (Head) ---")
        display(metadata_df[['Original Name', 'Group', 'New Name']].head())

        # 3. Auto-detect if source has subfolders
        has_subpaths = check_for_subdirs(SOURCE_BASE_FOLDER)

        # 4. Copy files, passing the detected subpath setting
        copy_and_rename_files(
            metadata_df,
            SOURCE_BASE_FOLDER,
            DESTINATION_FOLDER,
            has_subpaths  # Pass the detected value
        )

        # 5. Save final metadata to the destination
        save_metadata(metadata_df, DESTINATION_FOLDER)

        print("\nScript finished successfully.")
    else:
        print("\nScript terminated due to loading errors.")

Loading 1 metadata file(s)...
  Loaded /content/drive/My Drive/Colab Notebooks/lera/plastic_foodware/metadata.csv (203 rows)
Total rows loaded: 203

Total rows in combined DataFrame: 203

--- Original Metadata Head ---


Unnamed: 0,Original Name,New Name,Group,Code,Other Text,Object Description,File Size (bytes),Timestamp,Status,Gemini Confidence,Grouping Confidence,Colors,Created At,Updated At
0,plastic_foodware_input/PXL_20251008_150823796.jpg,BFA.1.0_7.jpg,BFA.1.0,BFA.1.0,,Round plastic lid,3820257,2025-10-08T10:08:23.000Z,extracted,0.95,1.0,royal blue:#4169E1; alice blue:#F0F8FF; sienna...,2025-10-25T18:09:17.166Z,2025-10-25T18:19:56.975Z
1,plastic_foodware_input/PXL_20251008_150829754.jpg,BFA.1.0_2.jpg,BFA.1.0,,,Plastic bowl set,3508037,2025-10-08T10:08:29.000Z,user_grouped,0.85,1.0,royal blue:#464799; copper:#c4714f; off-white:...,2025-10-25T18:09:02.386Z,2025-10-25T18:19:56.948Z
2,plastic_foodware_input/PXL_20251008_150836524.jpg,BFA.1.0_3.jpg,BFA.1.0,BFA.1.0,TECHNIPLAST,Plastic petri dish,3284263,2025-10-08T10:08:36.000Z,extracted,0.95,1.0,royal blue:#4169E1; sienna:#A0522D; chocolate:...,2025-10-25T18:09:02.570Z,2025-10-25T18:19:56.953Z
3,plastic_foodware_input/PXL_20251008_150847314.jpg,BFA.1.0.jpg,BFA.1.0,,,Plastic bowl set,2902694,2025-10-08T10:08:47.000Z,user_grouped,0.85,1.0,dark blue:#483D8B; white:#F5F5F5; sienna:#A0522D,2025-10-25T18:09:01.292Z,2025-10-25T18:19:56.942Z
4,plastic_foodware_input/PXL_20251016_200439127.jpg,KEN.1.5.jpg,KEN.1.5,BUS-00000-15-F,KEN.1.5,Pink plastic plate,3174189,2025-10-16T15:04:39.000Z,user_grouped,0.95,1.0,lightcoral:#F08080; darkSalmon:#E9967A; salmon...,2025-10-25T18:09:01.681Z,2025-10-25T18:13:53.779Z


Generating new filenames...
New filenames generated.

--- Metadata with New Names (Head) ---


Unnamed: 0,Original Name,Group,New Name
0,plastic_foodware_input/PXL_20251008_150823796.jpg,BFA.1.0,BFA.1.0_1.jpg
1,plastic_foodware_input/PXL_20251008_150829754.jpg,BFA.1.0,BFA.1.0_2.jpg
2,plastic_foodware_input/PXL_20251008_150836524.jpg,BFA.1.0,BFA.1.0_3.jpg
3,plastic_foodware_input/PXL_20251008_150847314.jpg,BFA.1.0,BFA.1.0_4.jpg
4,plastic_foodware_input/PXL_20251016_200439127.jpg,KEN.1.5,KEN.1.5_1.jpg


Created/verified destination folder: /content/drive/My Drive/Colab Notebooks/lera/plastic_foodware/plastic_foodware_output
Copying and renaming files... (Using subpaths: False)


  0%|          | 0/203 [00:00<?, ?it/s]


--- File Copy Summary ---
Successful copies: 203
Files not found: 0
Errors: 0
Copied file types:
  .1.0_1.jpg: 6
  .1.0_2.jpg: 6
  .1.0_3.jpg: 5
  .1.0_4.jpg: 1
  .1.5_1.jpg: 5
  .1.5_2.jpg: 5
  .1.7_1.jpg: 5
  .1.7_2.jpg: 5
  .1.7_3.jpg: 4
  .2.1_1.jpg: 3
  .2.1_2.jpg: 3
  .2.1_3.jpg: 2
  .1.9_1.jpg: 3
  .1.9_2.jpg: 3
  .1.9_3.jpg: 3
  .1.3_1.jpg: 5
  .1.3_2.jpg: 5
  .1.3_3.jpg: 3
  .1.6_1.jpg: 5
  .1.6_2.jpg: 5
  .1.6_3.jpg: 3
  .1.8_1.jpg: 5
  .1.8_2.jpg: 5
  .1.8_3.jpg: 5
  .2.0_1.jpg: 2
  .2.0_2.jpg: 2
  .2.0_3.jpg: 2
  .1.5_3.jpg: 2
  .1.5_4.jpg: 1
  .1.5_5.jpg: 1
  .1.5_6.jpg: 1
  .1.8_4.jpg: 3
  .1.2_1.jpg: 6
  .1.2_2.jpg: 6
  .1.4_1.jpg: 5
  .1.4_2.jpg: 5
  .1.1_1.jpg: 7
  .1.1_2.jpg: 7
  .1.4_3.jpg: 3
  .1.2_3.jpg: 3
  .1.1_3.jpg: 3
  .2.2_1.jpg: 1
  .2.2_2.jpg: 1
  .2.2_3.jpg: 1
  .1.0_5.jpg: 1
  .1.0_6.jpg: 1
  .1.0_7.jpg: 1
  .3.1_1.jpg: 1
  .3.1_2.jpg: 1
  .3.1_3.jpg: 1
  .2.6_1.jpg: 1
  .2.6_2.jpg: 1
  .2.6_3.jpg: 1
  .2.6_4.jpg: 1
  .2.3_1.jpg: 1
  .2.3_2.jpg: 1
  .2.3