In [None]:
# are we running locally or in kaggle?

import os

if os.environ.get('KAGGLE_KERNEL_RUN_TYPE','') == '':
    print("We are running code on Localhost")
    isLocalhost = True

else:
    print("We are running in Kaggle")
    isLocalhost = False

In [None]:
if isLocalhost:
    INPUT_PATH = './input'
else:
    INPUT_PATH='/home/kaggle/input'

    from kaggle_secrets import UserSecretsClient
    user_secrets = UserSecretsClient()

    USER_ID = user_secrets.get_secret("user-id")
    API_TOKEN = user_secrets.get_secret("api-token")


In [None]:
INPUT_BACKGROUND_FILES = INPUT_PATH + '/stanford-background-dataset/images'
INPUT_FOREGROUND_FILES = './examples/output/augmented-cats-and-dogs-multiprocessing'
OUTPUT_DATASET_NAME = 'Random Backgrounds for Cats and Dogs'
OUTPUT_PATH = './output/final-randomized-cats-and-dogs'

# kaggle_secretssed in Augment Cats and Dogs
MIN_SCALE = 0.25
MAX_SCALE = 0.75

#run_limit = 100
run_limit = 999999

In [None]:
# unzip data

import os
from os import path
import zipfile

if isLocalhost:

    if path.exists(INPUT_BACKGROUND_FILES + "/0000047.jpg") == False:

        print('Downloading files')

        ! kaggle datasets download -d balraj98/stanford-background-dataset

        os.makedirs("./input", exist_ok=True)    

        ! mv stanford-background-dataset.zip input

        print('Extracting files')

        os.makedirs("./input/stanford-background-dataset", exist_ok=True)    

        with zipfile.ZipFile("./input/stanford-background-dataset.zip","r") as z:
            z.extractall("./input/stanford-background-dataset")

        print('Cleaning up files')

        ! rm ./input/stanford-background-dataset.zip
        ! rm -R ./input/stanford-background-dataset/labels_colored
        ! rm -R ./input/stanford-background-dataset/labels_raw
        ! rm ./input/stanford-background-dataset/labels_class_dict.csv
        ! rm ./input/stanford-background-dataset/metadata.csv

    else:
        print("Files already exist locally")

print("Done extracting files")

In [None]:
import os
if not os.path.exists(OUTPUT_PATH):
    os.makedirs(OUTPUT_PATH)

In [None]:
import cv2
import numpy as np

# scale image by scale_factor, keep aspect ratio
def scale_image(image, scale_factor):
    
    image = np.array(image)
    image = cv2.resize(image, (0,0), fx=scale_factor, fy=scale_factor)

    image = Image.fromarray(image)
    
    return image

In [None]:
import csv
import random
from PIL import Image, ImageDraw, ImageOps
import matplotlib.pyplot as plt

background_path, background_dir, background_files = next(os.walk(INPUT_BACKGROUND_FILES))
foreground_path, foreground_dir, foreground_files = next(os.walk(INPUT_FOREGROUND_FILES))

metadata_file = open(OUTPUT_PATH + '/metadata.csv', mode='w', newline='')
metadata_writer = csv.writer(metadata_file, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)
metadata_writer.writerow(['filename','x','y','w','h'])

for i in range(run_limit):

    random_background_image_num = random.randrange(len(background_files))
    random_foreground_image_num = random.randrange(len(foreground_files))

    background_image_filename = background_path + '/' + background_files[random_background_image_num]
    foreground_image_filename = foreground_path + '/' + foreground_files[random_foreground_image_num]

    background_image = Image.open(background_image_filename)
    foreground_image = Image.open(foreground_image_filename)

    if 'neg' in foreground_image_filename:
        background_image = ImageOps.grayscale(background_image)
        background_image = ImageOps.invert(background_image)
                
    elif 'bw' in foreground_image_filename:
        background_image = ImageOps.grayscale(background_image)

    # composite randomly scaled forground image at random position on random background image
    scale_factor = random.uniform(MIN_SCALE, MAX_SCALE)

    try:
        foreground_image = scale_image(foreground_image, scale_factor)
    except:
        print('bad file: ', foreground_image_filename, foreground_image.size)
        continue

    # paste foreground image onto background image at random position
    foreground_image_width, foreground_image_height = foreground_image.size
    background_image_width, background_image_height = background_image.size

    x_max_pos = background_image_width - foreground_image_height
    y_max_pos = background_image_height - foreground_image_height
    
    x_pos_factor = random.uniform(0, 1)
    y_pos_factor = random.uniform(0, 1)
    
    x_new_pos = int(x_max_pos * x_pos_factor)
    y_new_pos = int(y_max_pos * y_pos_factor)
    
    background_image = background_image.convert('RGBA') # make images have same number of color channels
    background_image.paste(foreground_image, (x_new_pos, y_new_pos), foreground_image) # 3rd parm is mask

    # draw bounding box 
    # draw = ImageDraw.Draw(background_image)
    # draw.rectangle(((x_new_pos, y_new_pos), (x_new_pos + foreground_image_width, y_new_pos + foreground_image_height)), outline=(255, 0, 0), width = 3)

    # save composite image
    filename = os.path.basename(foreground_image_filename)
    fname, ext = os.path.splitext(filename) 
    out_filename = fname  + '-final.jpg'

    background_image = background_image.convert('RGB') 
    background_image.save(OUTPUT_PATH + '/' + out_filename, 'jpeg')
    
    metadata_writer.writerow([out_filename, x_new_pos, y_new_pos, foreground_image_width, foreground_image_height])

    # plt.imshow(background_image)   
    # plt.show()
    
    # plt.imshow(foreground_image)   
    # plt.show()

metadata_file.close()

print('Done')

In [None]:
# Google Cloud Plaform for Kaggle(Beta) does not support /usr/lib modules at this time 
# Save Output Dataset

if isLocalhost == False:

    ! python -m pip install --index-url https://test.pypi.org/simple/ --no-deps kaggle_uploader-screamatthewind

    import time
    import os

    from kaggle_uploader import kaggle_uploader 

    print("Saving Images to Kaggle")
    start_time = time.time()

    # kaggle_secrets are not supported by Google Cloud Platform for Kaggle(Beta) at this time
    # from kaggle_secrets import UserSecretsClient
    # user_secrets = UserSecretsClient()
    # api_secret = user_secrets.get_secret("Crop Cats and Cogs YOLOv3")

    kaggle_uploader.resources = []
    kaggle_uploader.init_on_kaggle(USER_ID, API_TOKEN)
    kaggle_uploader.base_path = OUTPUT_PATH
    kaggle_uploader.title = OUTPUT_DATASET_NAME
    kaggle_uploader.dataset_id = OUTPUT_DATASET_ID
    kaggle_uploader.user_id = USER_ID

    for filename in os.listdir(kaggle_uploader.base_path):
        print(filename)
        kaggle_uploader.add_resource(filename, filename)

    kaggle_uploader.update("new version")

    run_time = time.time()-start_time
    print('Done Saving Images - Total Time: {:.1f}'.format(run_time) + ' Secs')

    # If you get an error during update, it is typically because of an invalid api key, bad username, 
    # or the dataset does not exist.  This code does not create datasets.  It updates existing ones

else:
    print("Done")