In [None]:
!wget http://vision.stanford.edu/aditya86/ImageNetDogs/images.tar
!tar -xf images.tar

In [None]:
from PIL import Image
import os
import numpy as np
from typing import List, Tuple, Dict
from multiprocessing import Pool
import pickle
from google.colab import drive

In [None]:
def load_image(filename: str, folder_path: str) -> np.ndarray:
    """
    Load an image from a file and resize it to 224x224.

    Args:
        filename: The name of the file to load.
        folder_path: The path to the folder containing the file.

    Returns:
        A numpy array with shape (224, 224, 3) containing the loaded image.
    """
    img = Image.open(os.path.join(folder_path, filename)).convert('RGB')
    img = img.resize((224, 224))
    return np.array(img)

def load_images_from_folder(folder_path: str) -> List[np.ndarray]:
    """
    Load images from a directory and resize them to 224x224.

    Args:
        folder_path: Path to the directory containing images.

    Returns:
        A list of numpy arrays, where each array corresponds to an image
        in the directory and has shape (224, 224, 3) (for the RGB color channels).
    """
    with Pool() as p:
        filenames = os.listdir(folder_path)[:30]
        image_list = p.starmap(load_image, [(filename, folder_path) for filename in filenames])
    return image_list

def load_images_from_local_folder() -> Tuple[np.ndarray, np.ndarray, Dict[int, str]]:
    """
    Load images from the "Images" folder and resize them to 224x224.

    Returns:
        A tuple of three numpy arrays:
        - images: an array of shape (num_images, 224, 224, 3) containing all the images.
        - labels: an array of shape (num_images,) containing the corresponding labels.
        - class_dict: a dictionary mapping class indices to class names (dog breeds).
    """
    root_folder = 'Images'
    classes = sorted(os.listdir(root_folder))
    images = []
    labels = []
    class_dict = {}
    for i, class_name in enumerate(classes):
        class_folder = os.path.join(root_folder, class_name)
        if os.path.isdir(class_folder):
            print(f"Loading images for class {i}: {class_name}")
            class_images = load_images_from_folder(class_folder)
            class_labels = [i] * len(class_images)
            images.extend(class_images)
            labels.extend(class_labels)
            class_dict[i] = class_name
    return np.array(images), np.array(labels), class_dict

def save_to_drive(images: np.ndarray, labels: np.ndarray, class_dict: Dict[int, str], folder_name: str):
    """
    Save images and labels to .npy and .pkl files in the user's Google Drive.

    Args:
        images: a numpy array of shape (num_images, 224, 224, 3) containing the images.
        labels: a numpy array of shape (num_images,) containing the corresponding labels.
        class_dict: a dictionary mapping class indices to class names (dog breeds).
        folder_name: the name of the folder to create in the user's Google Drive.

    Returns:
        None.
    """
    drive.mount('/content/drive')
    folder_path = os.path.join('/content/drive/My Drive', folder_name)
    os.makedirs(folder_path, exist_ok=True)
    np.save(os.path.join(folder_path, 'images.npy'), images)
    np.save(os.path.join(folder_path, 'labels.npy'), labels)
    with open(os.path.join(folder_path, 'class_mapping.pkl'), 'wb') as f:
        pickle.dump(class_dict, f)
    print(f"Saved {len(images)} images and labels to {folder_name} in Google Drive")

In [None]:
images, labels, class_dict = load_images_from_local_folder()

In [None]:
save_to_drive(images, labels, class_dict, 'michael_tanmay_ibm_ai_workshop')