In [None]:
import os
import openai
import random
import urllib.request
import ipywidgets as widgets
import matplotlib.pyplot as plt
import imgaug as ia
import imageio
import shutil
import yaml
import fnmatch
import numpy as np
from imgaug import augmenters as iaa
from PIL import Image, ImageDraw

# Load your API key from an environment variable or secret management service
openai.api_key = os.getenv("OPENAI_API_KEY")

# Constants

In [None]:
BACKGROUND_PATH = "backgrounds/"
IMAGE_PATH = "dataset/train/images"
LABEL_PATH = "dataset/train/labels"
TRAFFIC_SIGN_PATH = "traffic_signs_only"
TEMP_PATH = "temp"

# Generate Images

In [None]:
# create a slider for the number of images to generate
slider = widgets.IntSlider(min=0, max=1000, step=1, value=10, description='Number of images to generate:')
display(slider)

In [None]:
checkbox = widgets.Checkbox(value=True, description='Use already generated images to create dataset?')
display(checkbox)

In [None]:
# create dataset directory if it doesn't exist
if not os.path.exists(IMAGE_PATH):
    os.makedirs(IMAGE_PATH)
if not os.path.exists(LABEL_PATH):
    os.makedirs(LABEL_PATH)
# Open the YAML file and read the data
with open("data.yaml", "r") as file:
    data = yaml.safe_load(file)
# get class names to list
class_names = data['names']
# create for loop for the number of images to generate
for i in range(slider.value):
    # create a dictionary to save the positions of the traffic sign images on the background image
    traffic_sign_positions = {}
    # remove temporary directory
    if os.path.exists(TEMP_PATH):
        shutil.rmtree(TEMP_PATH)
    
    if checkbox.value == False:
        # a list of different types of roads in different environments 
        road_types = ["highway", "highway with traffic", "highway with traffic and pedestrians", "highway with traffic and pedestrians and construction", "highway with traffic and pedestrians and construction and road works", "autobahn", "street", "street with traffic", "street with pedestrians"]
        # a list of different day and night time
        daytime = ["during daytime", "during nighttime", "during sunrise", "during sunset", "during dusk", "during dawn"]
        # use DALL-E to generate a background image environment that usually contain traffic signs
        random_road_type = f"A photograph of a {road_types[random.randint(0, len(road_types) - 1)]} {daytime[random.randint(0, len(daytime) - 1)]} from drivers perspective."
        response = openai.Image.create(
            prompt=random_road_type,
            n=1,
            size="512x512"
        )
        image_url = response["data"][0]["url"]
        # get amount of images in background directory
        amount_background_images = len(os.listdir(BACKGROUND_PATH))
        # download the image to and save it to the background directory
        urllib.request.urlretrieve(image_url, f"{BACKGROUND_PATH}/background_{amount_background_images}.png")
        # open the image with PIL
        pil_image = Image.open(f"{BACKGROUND_PATH}/background_{amount_background_images}.png")
        # show pil image in notebook
        #print(f"Generated background image with road type '{random_road_type}':")
        #display(pil_image)
    else: # use already generated images to create dataset
        # check if i is higher than the amount of images in the background directory
        if i > len(os.listdir(BACKGROUND_PATH)):
            # choose a random image from the background directory
            pil_image = Image.open(f"{BACKGROUND_PATH}/background_{random.randint(0, len(os.listdir(BACKGROUND_PATH)))}.png")
        else:
            pil_image = Image.open(f"{BACKGROUND_PATH}/background_{i}.png")
    # get 1-4 random images from the traffic sings only directory
    random_images = random.sample(os.listdir(TRAFFIC_SIGN_PATH), random.randint(1, 4))
    # create temporary directory for the traffic sign images
    if not os.path.exists(TEMP_PATH):
        os.mkdir(TEMP_PATH)

    # copy images to temp directory
    for image in random_images:
        shutil.copy(f"{TRAFFIC_SIGN_PATH}/{image}", TEMP_PATH)

    # define augmentation pipeline
    augmentation_pipeline = iaa.Sequential([
        iaa.Affine(rotate=(-10, 10)),
    ])

    images_aug = {}
    # get all images in the temporary directory to a format that imgaug can use and get the image names
    for image in os.listdir(TEMP_PATH):
        image_transformed = Image.open(f"{TEMP_PATH}/{image}")
        # convert image to numpy array
        image_transformed = np.array(image_transformed)
        image_aug = augmentation_pipeline.augment_image(image_transformed)
        images_aug[image] = image_aug
    
    # create for loop to paste the traffic sign images on the background image        
    for name, image in images_aug.items():
        # define a random position for the traffic sign image on the background image to be placed    
        random_position = (random.randint(50, 462), random.randint(50, 462))
        # convert image to PIL image
        image = Image.fromarray(image)
        # fix transparency issues
        image = image.convert("RGBA")
        # define a random size for the traffic sign images
        size = random.randint(15,60)
        # resize the traffic sign image
        image = image.resize((size, size))
        # paste the traffic sign image on the background image
        pil_image.paste(image, random_position, mask=image)
        # create a object to store position and size of the traffic sign image
        size_and_position = {}
        size_and_position["size"] = (size, size)
        size_and_position["position"] = random_position
        # save the position of the traffic sign image
        traffic_sign_positions[name] = size_and_position
    
    # get all strings in list to one string without png extension
    # remove .png extension for each string in list
    random_images = [image.split(".")[0] for image in random_images]
    image_save_name = "_".join(random_images)
    # check if image name already exists
    filenames = os.listdir(IMAGE_PATH)
    # define matching pattern for image name
    pattern = f"{image_save_name}_*.png"
    # Use a list comprehension to count the number of filenames that match the search pattern
    match_count = sum([1 for filename in filenames if fnmatch.fnmatch(filename, pattern)])
    if match_count > 0:
        # if image name already exists, add a number to the end of the image name
        image_save_name = f"{image_save_name}_{match_count + 1}"
    else:
        # if image name does not exist, create the image name
        image_save_name = f"{image_save_name}_0"

    # choose random weather to add to the image
    weather = ["rain", "snow", "fog", "none"]
    random_weather = f"{weather[random.randint(0, len(weather) - 1)]}"
    # transform image to numpy array
    image = np.array(pil_image)
    # add weather to image
    if random_weather == "rain":
        print("Added rain to image.")
        aug = iaa.Rain(speed=(0.1, 0.3), drop_size=(0.1, 0.2))
        image = aug(image=image)
    elif random_weather == "snow":
        print("Added snow to image.")
        aug = iaa.Snowflakes(flake_size=(0.1, 0.3), speed=(0.01, 0.05))
        image = aug(image=image)
    elif random_weather == "fog":
        print("Added fog to image.")
        aug = iaa.Fog()
        image = aug(image=image)
    elif random_weather == "none":
        pass

    # convert image back to PIL image
    pil_image = Image.fromarray(image)

    # save the image to the dataset directory
    pil_image.save(f"{IMAGE_PATH}/{image_save_name}.png")
    # display the image
    print("Generated image with traffic signs:")
    #display(pil_image) 
    
    for image_name, pos_size in traffic_sign_positions.items():
        with Image.open(f"{TEMP_PATH}/{image_name}") as image:
            # get the width and height of the image
            width, height = pos_size["size"]
            # get the position of the image
            x_pos = pos_size["position"][0]
            y_pos = pos_size["position"][1]
            # draw boxes around the traffic sign images
            draw = ImageDraw.Draw(pil_image)
            # draw a rectangle around the traffic sign image
            draw.rectangle([x_pos, y_pos, x_pos + width, y_pos + height], outline="blue", width=2)
            # extract the class name from the image name
            class_name = image_name.split(".")[0]
            # draw the class name above the traffic sign image
            draw.text((x_pos, y_pos - 10), class_name, fill="red")
            # get class name index
            class_id = class_names.index(class_name)
            # save labels in yolov7 format
            with open(f"{LABEL_PATH}/{image_save_name}.txt", "a") as f:
                f.write(f"{class_id} {x_pos} {y_pos} {width} {height}\n")
                
    # show the image with the boxes
    print("Generated image with bounding boxes:")
    #display(pil_image)
    # remove temporary directory
    if os.path.exists(TEMP_PATH):
        shutil.rmtree(TEMP_PATH)

In [15]:
# create a slider for the validation split percentage
val_slider = widgets.IntSlider(min=0, max=100, step=1, value=10, description='Percentage of images to use for validation dataset:')
display(val_slider)

IntSlider(value=10, description='Percentage of images to use for validation dataset:')

In [19]:
# Set the percentage of files to copy
percentage = val_slider.value

# Set the source and destination directories
src_dir_images = IMAGE_PATH
dst_dir_images = 'dataset/val/images'
src_dir_labels = LABEL_PATH
dst_dir_labels = 'dataset/val/labels'

# Create the new destination directories
if not os.path.exists(dst_dir_images):
    os.makedirs(dst_dir_images)
else:
    # copy everything back to the training directory
    for file in os.listdir(dst_dir_images):
        # if file not already in the training directory, copy it back
        if not os.path.exists(os.path.join(src_dir_images, file)):
            shutil.move(os.path.join(dst_dir_images, file), src_dir_images)
        else:
            # if file already exists in the training directory, delete it
            os.remove(os.path.join(dst_dir_images, file))
if not os.path.exists(dst_dir_labels):
    os.makedirs(dst_dir_labels)
else:
    # copy everything back to the training directory
    for file in os.listdir(dst_dir_labels):
        # if file not already in the training directory, copy it back
        if not os.path.exists(os.path.join(src_dir_labels, file)):
            shutil.move(os.path.join(dst_dir_labels, file), src_dir_labels)
        else:
            # if file already exists in the training directory, delete it
            os.remove(os.path.join(dst_dir_labels, file))


# Get the list of image files in the source directory
image_files = os.listdir(src_dir_images)

# Randomly shuffle the list of image files
random.shuffle(image_files)

# Calculate the number of image files to copy
num_image_files = int(len(image_files) * percentage / 100)

# Copy the specified number of image files to the destination directory
for i in range(num_image_files):
    # Get the filenames without the file endings
    image_filename = os.path.splitext(image_files[i])[0]
    label_filename = image_filename + '.txt'

    # Construct the full file paths
    src_path_image = os.path.join(src_dir_images, image_files[i])
    dst_path_image = os.path.join(dst_dir_images, image_files[i])
    src_path_label = os.path.join(src_dir_labels, label_filename)
    dst_path_label = os.path.join(dst_dir_labels, label_filename)

    # Copy the image and label files
    shutil.copy(src_path_image, dst_path_image)
    shutil.copy(src_path_label, dst_path_label)

    # delete the image and label files from the source directories
    os.remove(src_path_image)
    os.remove(src_path_label)
    