# Utilities

This file creates a CSV of the image URLs. That CSV needs to be uploaded to Label Studio.
The URLs point to the images of the desired dataset and they are served with the co-located `./serve_local_files.py`.
Note that `SERVER_DIRECTORY` needs to be the same here and in `./serve_local_files.py`, pointing in both cases to the root folder were the paths are taken from.

In [2]:
import os
import csv

In [19]:
def list_image_files(directory, server_directory,  base_url="http://localhost:8000/"):
    """
    Recursively lists all image URLs from a local server for the images in the given directory and its subdirectories.

    :param directory: Path to the directory.
    :param server_directory: Path to the directory from which the server is started.
    :return: List of URLs to image files served from a local server.
    """
    
    # List of common image extensions
    image_extensions = ['.jpg', '.jpeg', '.png', '.bmp', '.gif', '.tiff', '.webp']

    # Recursively walk through the directory
    image_urls = []
    for dirpath, _, filenames in os.walk(directory):
        for filename in filenames:
            if any(filename.lower().endswith(ext) for ext in image_extensions):
                # Convert file path to a URL path
                relative_path = os.path.relpath(os.path.join(dirpath, filename), server_directory)
                web_path = relative_path.replace('\\', '/')
                full_url = base_url + web_path
                
                image_urls.append(full_url)
                
    return image_urls

In [20]:
def save_to_csv(image_paths, output_file):
    """
    Save list of image paths to a CSV file.

    :param image_paths: List of image paths.
    :param output_file: Path to the output CSV file.
    """
    with open(output_file, 'w', newline='') as csvfile:
        csv_writer = csv.writer(csvfile)
        csv_writer.writerow(["image_path"])  # Writing the header
        for path in image_paths:
            csv_writer.writerow([path])

In [21]:
DIRECTORY_PATH = 'C:/Users/Msagardi/git_repositories/tool_guides/labelstudio/data/flowers/test/'
SERVER_DIRECTORY = 'C:/Users/Msagardi/git_repositories/tool_guides/labelstudio/data'
image_paths = list_image_files(DIRECTORY_PATH, SERVER_DIRECTORY)
print(image_paths[:5]) # ['http://localhost:8000/flowers/test/Image_1.jpg', 'http://localhost:8000/flowers/test/Image_10.jpg', ...

['http://localhost:8000/flowers/test/Image_1.jpg', 'http://localhost:8000/flowers/test/Image_10.jpg', 'http://localhost:8000/flowers/test/Image_100.jpg', 'http://localhost:8000/flowers/test/Image_101.jpg', 'http://localhost:8000/flowers/test/Image_102.jpg']


In [22]:
output_csv_path = 'image_paths.csv'
save_to_csv(image_paths, output_csv_path)

In [23]:
# Now, in ./serve_local_files.py, we need to set
#   SERVER_DIRECTORY
# with the same path as here.
# Then, we execute it:
#   python serve_local_files.py
# With that, we're going to get the images served in the URLs