In [8]:
print("Installing required libraries...")

!pip install pyheif Pillow

Installing required libraries...


In [3]:
# 1. Setup Environment
from google.colab import drive
import os
import shutil
from collections import defaultdict
import pyheif
from PIL import Image
from tqdm.notebook import tqdm

print("Mounting Google Drive...")
drive.mount('/content/drive')

Installing required libraries...
Mounting Google Drive...
Mounted at /content/drive


In [4]:
# The base path to your project folder in Google Drive
DRIVE_BASE_PATH = "/content/drive/My Drive"
PROJECT_DIR = "Colab Notebooks/lera"

# The specific subfolder containing the raw photos (e.g., 'malawi_scoping_lcs/Sample photos')
INPUT_SUBDIR = "plastic_foodware/All photos"

# The specific subfolder where converted/copied photos will be saved
OUTPUT_SUBDIR = "plastic_foodware/plastic_foodware_input"

In [5]:
# Construct full paths
BASE_PROJECT_PATH = os.path.join(DRIVE_BASE_PATH, PROJECT_DIR)
INPUT_FOLDER_PATH = os.path.join(BASE_PROJECT_PATH, INPUT_SUBDIR)
OUTPUT_FOLDER_PATH = os.path.join(BASE_PROJECT_PATH, OUTPUT_SUBDIR)

print(f"Input folder: {INPUT_FOLDER_PATH}")
print(f"Output folder: {OUTPUT_FOLDER_PATH}")

Input folder: /content/drive/My Drive/Colab Notebooks/lera/plastic_foodware/All photos
Output folder: /content/drive/My Drive/Colab Notebooks/lera/plastic_foodware/plastic_foodware_input


In [6]:
def process_image_files(input_dir, output_dir):
    """
    Converts all .heic files in input_dir to .jpeg in output_dir.
    Copies all other files directly.
    """

    # Ensure the output directory exists
    os.makedirs(output_dir, exist_ok=True)

    print(f"Starting file processing...")

    try:
        files_in_folder = os.listdir(input_dir)
    except FileNotFoundError:
        print(f"Error: Input directory not found: {input_dir}")
        return None

    stats = defaultdict(int)

    for filename in tqdm(files_in_folder):
        input_file_path = os.path.join(input_dir, filename)

        if not os.path.isfile(input_file_path):
            stats['skipped_dirs'] += 1
            continue

        try:
            if filename.lower().endswith('.heic'):
                # Convert HEIC to JPEG
                heif_file = pyheif.read(input_file_path)
                image = Image.frombytes(
                    heif_file.mode,
                    heif_file.size,
                    heif_file.data,
                    "raw",
                    heif_file.mode,
                    heif_file.stride,
                )

                output_filename = os.path.splitext(filename)[0] + '.jpeg'
                output_file_path = os.path.join(output_dir, output_filename)
                image.save(output_file_path, "jpeg")
                stats['converted'] += 1
                stats[f"output_type_.jpeg"] += 1
            else:
                # Copy other files directly
                output_file_path = os.path.join(output_dir, filename)
                shutil.copy2(input_file_path, output_file_path)
                stats['copied'] += 1
                ext = os.path.splitext(filename)[1].lower()
                if not ext:
                    ext = ".no_extension"
                stats[f"output_type_{ext}"] += 1

            # Count input file types
            input_ext = os.path.splitext(filename)[1].lower()
            if not input_ext:
                input_ext = ".no_extension"
            stats[f"input_type_{input_ext}"] += 1

        except Exception as e:
            print(f"Error processing {filename}: {e}")
            stats['errors'] += 1

    print("\n--- Processing Summary ---")
    print(f"Total files processed: {len(files_in_folder)}")
    print(f"Files converted (HEIC): {stats['converted']}")
    print(f"Files copied (non-HEIC): {stats['copied']}")
    print(f"Errors: {stats['errors']}")
    print(f"Skipped (directories): {stats['skipped_dirs']}")

    print("\nInput file types:")
    for key, count in stats.items():
        if key.startswith('input_type_'):
            print(f"  {key.replace('input_type_', '')}: {count}")

    print("\nOutput file types:")
    for key, count in stats.items():
        if key.startswith('output_type_'):
            print(f"  {key.replace('output_type_', '')}: {count}")
    print("--------------------------")
    return stats

In [7]:
if __name__ == "__main__":
    print("Starting script...")
    processing_stats = process_image_files(INPUT_FOLDER_PATH, OUTPUT_FOLDER_PATH)

    if processing_stats:
        print("\n--- Verification ---")
        print(f"Total files in output directory: {sum(v for k, v in processing_stats.items() if k.startswith('output_type_'))}")
    else:
        print("Script finished with errors or no files processed.")

Starting script...
Starting file processing...


  0%|          | 0/203 [00:00<?, ?it/s]


--- Processing Summary ---
Total files processed: 203
Files converted (HEIC): 0
Files copied (non-HEIC): 203
Errors: 0
Skipped (directories): 0

Input file types:
  .jpg: 203

Output file types:
  .jpg: 203
--------------------------

--- Verification ---
Total files in output directory: 203
