## Imports

In [None]:
#Imports
import cv2
import matplotlib.pyplot as plt
import os
import numpy as np
import albumentations as A
import time
from tqdm import tqdm
from google.colab import drive
import re
from PIL import Image, ImageOps, ImageDraw
from keras.preprocessing.image import ImageDataGenerator
import random
import requests
from io import BytesIO
from bs4 import BeautifulSoup
import urllib
import string
import random

## Mounting Google Drive

In [None]:
#Upload my Files from Google Drive
drive.mount('/content/gdrive')

#extract and unzip folder
!unzip "gdrive/MyDrive/Synthetic-Data/images.zip"

Mounted at /content/gdrive
Archive:  gdrive/MyDrive/Synthetic-Data/images.zip
   creating: images/
  inflating: images/.DS_Store        
  inflating: __MACOSX/images/._.DS_Store  
   creating: images/bg/
   creating: images/real_pics/
  inflating: images/bg/pawel-czerwinski-vI5XwPbGvmY-unsplash.jpeg  
  inflating: __MACOSX/images/bg/._pawel-czerwinski-vI5XwPbGvmY-unsplash.jpeg  
  inflating: images/bg/.DS_Store     
  inflating: __MACOSX/images/bg/._.DS_Store  
  inflating: images/bg/photo-1553095066-5014bc7b7f2d.jpeg  
  inflating: __MACOSX/images/bg/._photo-1553095066-5014bc7b7f2d.jpeg  
  inflating: images/bg/fi1.jpeg      
  inflating: __MACOSX/images/bg/._fi1.jpeg  
  inflating: images/bg/tlC9Xd.jpeg   
  inflating: __MACOSX/images/bg/._tlC9Xd.jpeg  
  inflating: images/bg/abstract-natural-backgrounds-your-design-51859129.jpeg  
  inflating: __MACOSX/images/bg/._abstract-natural-backgrounds-your-design-51859129.jpeg  
  inflating: images/bg/pexels-eberhard-grossgasteiger-1287142.j

## Functions

In [None]:
def masking(img):
  mask = img.getchannel('A')
  return (img, mask)


def Data_Augmentation(path, img, i):
    # convert to numpy array
    img = Image.open(path + img)
    data = np.asarray(img)
    # expand dimension to one sample
    samples = np.expand_dims(data, 0)
    # create image data augmentation generator
    datagen = ImageDataGenerator(
        rotation_range=180,       # random rotation augmentation
        brightness_range=[0.1,2.5],  # random brightness augmentation
        zoom_range=[0.4,2.1]     # random zoom augmentation
    )
    # prepare iterator
    it = datagen.flow(samples, batch_size=1)
    # generate an augmented image
    augmented_data = it.next()[0].astype('uint8')
    # crop the image to ensure the object is always within the screenshot
    h, w, _ = augmented_data.shape
    if h > w:
        pad = int((h - w) / 2)
        augmented_data = augmented_data[pad:pad+w, :]
    elif w > h:
        pad = int((w - h) / 2)
        augmented_data = augmented_data[:, pad:pad+h]
    # convert numpy array to PIL Image object
    augmented_img = Image.fromarray(augmented_data)
    # return the augmented image as a PIL Image object
    #return augmented_img

    folder_path = "augmented_data"
    pic_name = os.path.splitext(pic)[0]
    #image_folder_path = os.path.join(folder_path, "images")
    # Save the image data to a file
    os.makedirs(folder_path, exist_ok=True)
    with open(f"{folder_path}/{pic_name}_{count}.png", "wb") as f:
        augmented_img.save(f, format='PNG')

def append_to_bg(bg, img, obj_names):
    #open images
    img_str = img
    bg = Image.open("images/bg/" + bg)
    img = Image.open("augmented_data/"+img)

    # Get the size of the background image
    bg_width, bg_height = bg.size

    # Resize the image to paste to fit within the background image
    img = img.resize((int(bg_width/2), int(bg_height/2)))

    # Generate a random location for the good image
    offset_x = random.randint(0, bg_width - img.width)
    offset_y = random.randint(0, bg_height - img.height)

    # Get the alpha channel of the image to paste
    mask = img.getchannel('A')

    # Paste the image onto the background using the alpha channel as the mask
    bg.paste(img, (offset_x, offset_y), mask)

    #convert to np array
    np_image = np.asarray(img)

    # Get the bounding box coordinates
    mask = np.array(img.convert('L'))
    xmin, ymin, xmax, ymax = BBox(mask)


    # draw the bounding box on the image
    #color = (0, 255, 0) # green color
    #thickness = 2
    #img_with_bbox = cv2.rectangle(np_image, (xmin, ymin), (xmax, ymax), color, thickness)
    #draw = ImageDraw.Draw(bg)
    #draw.rectangle((offset_x + xmin, offset_y + ymin, offset_x + xmax, offset_y + ymax), outline=(0, 255, 0), width=2)


    name, ext = os.path.splitext(img_str)
    image_name = name.split("_")[0]


    # Convert bounding box coordinates to YOLOv5 format
    center_x = (offset_x + img.width/2) / bg_width
    center_y = (offset_y + img.height/2) / bg_height
    width = img.width / bg_width
    height = img.height / bg_height

    yolo_bbox = [obj_names[image_name], center_x, center_y, width, height]
    #return (bg,yolo_bbox)


    #write to final_data folder
    rand_name = ''.join(random.choices(string.ascii_letters + string.digits, k=6))

    # Save the image data to a file
    with open(f"final_data/images/{rand_name}.png", "wb") as f:
      bg.save(f)

    with open(f"final_data/yolo/{rand_name}.txt", "wb") as f:
      bboxes = " ".join(str(num) for num in yolo_bbox).encode("utf-8")
      f.write(bboxes)





def webscrape(url, folder_path):
  # Create the directory if it doesn't exist
  os.makedirs(folder_path, exist_ok=True)

  # Make a GET request to the URL
  response = requests.get(url)

  # Use BeautifulSoup to parse the HTML content of the response
  soup = BeautifulSoup(response.content, "html.parser")

  # Find all the <img> tags in the HTML
  image_tags = soup.find_all("img")

  # Extract the URLs of the images
  image_urls = [img["src"] for img in image_tags]

  # Download the images and save them locally
  for i, url in enumerate(image_urls):
    try:
      rand_name = ''.join(random.choices(string.ascii_letters + string.digits, k=6))
      # Open a connection to the URL and read the image data
      response = requests.get(url)
      image_data = response.content

      # Save the image data to a file
      with open(f"{folder_path}/{rand_name}.png", "wb") as f:
          f.write(image_data)
          
    except Exception as e:
      # Print a message for any image that couldn't be downloaded
      print(f"Error downloading image {i}: {e}")


def BBox(image_arr):
  # Threshold the image to obtain a binary mask
  _, mask = cv2.threshold(image_arr, 0, 255, cv2.THRESH_BINARY)

  # Find the contours in the binary mask
  contours, _ = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

  # Get the bounding box of the largest contour
  if len(contours) > 0:
      largest_contour = max(contours, key=cv2.contourArea)
      x, y, w, h = cv2.boundingRect(largest_contour)
      return x, y, x+w, y+h

  # If no contours are found, return None
  return None


## Path To Files

In [None]:
#Path to Files
PATH_real_pics = 'images/real_pics'
PATH_bg = 'images/bg'


#Grab the Images
real_pics, bg = [], []

#Picture Paths
real_path = PATH_real_pics + '/'
bg_path = PATH_bg + '/'

## Mapping of object names to values

In [None]:
obj_names = {
    0: "Centriole",
    1: "Golgi",
    2: "Lysosome",
    3: "Mitochondria",
    4: "Nucleus",
    5: "RoughER",
    6: "SmoothER",
    7: "Vessicle"
}
# Reverse the obj_names dictionary
obj_names = {v: k for k, v in obj_names.items()}

## Converting BG sizes / Web Scrapping

In [None]:
#Web Scrapping for random backgrounds
bg_urls = [
    "https://unsplash.com/s/photos/random",
    "https://unsplash.com/t/textures-patterns",
    "https://unsplash.com/t/travel",
    "https://unsplash.com/t/animals",
    'https://unsplash.com/t/film'
]
for url in bg_urls:
  webscrape(url, PATH_bg)

In [None]:
# Loop over the image files in the directory
for pic in os.listdir(PATH_bg):
    try:
        # Open the image file and resize it
        img = Image.open(bg_path + pic)
        img = img.resize((1920, 1080))

         # Save the image data to a file
        with open(f"images/bg/{os.path.basename(bg_path+pic)}", "wb") as f:
          img.save(f)

    except Exception as e:
        # Print an error message for any image that couldn't be opened
        print(f"Error opening image file {pic}: {e}")



Error opening image file kcrAWL.png: cannot identify image file 'images/bg/kcrAWL.png'
Error opening image file JfAAq7.png: cannot identify image file 'images/bg/JfAAq7.png'
Error opening image file 7hpMpk.png: cannot identify image file 'images/bg/7hpMpk.png'
Error opening image file yaD5O1.png: cannot identify image file 'images/bg/yaD5O1.png'
Error opening image file iRxWQO.png: cannot identify image file 'images/bg/iRxWQO.png'
Error opening image file yMnzCY.png: cannot identify image file 'images/bg/yMnzCY.png'
Error opening image file So2Jgb.png: cannot identify image file 'images/bg/So2Jgb.png'
Error opening image file AZb9Uk.png: cannot identify image file 'images/bg/AZb9Uk.png'
Error opening image file .DS_Store: cannot identify image file 'images/bg/.DS_Store'
Error opening image file jqwjDB.png: cannot identify image file 'images/bg/jqwjDB.png'
Error opening image file mylhYZ.png: cannot identify image file 'images/bg/mylhYZ.png'
Error opening image file cegrdx.png: cannot i

In [None]:
file_list = os.listdir(PATH_bg)
num_files = len(file_list)
print("Number of files in folder:", num_files)

Number of files in folder: 545


## Gathering Data

In [None]:
# for pic in os.listdir(PATH_real_pics):
#   real_pics.append(pic)


# real_pics.sort()
# real_pics = real_pics[1:]

In [None]:
# print(real_pics)

## Data Augmentation & Masking

In [None]:
#augmenting images randomly
#real_dataset = []

#how many augmented images per picture : (8xN) = Total
N = 68 #images per item (N items per object to detect)

#for pic in real_pics:
for pic in os.listdir(PATH_real_pics):
  count = 0
  if pic !=".DS_Store":
    #generate N unique images for each pic
    for i in range(N):
      #Augment the picture
      Data_Augmentation(real_path, pic, count)
      count += 1

  

In [None]:
file_list = os.listdir("augmented_data/")
num_files = len(file_list)
print("Number of files in folder:", num_files)

Number of files in folder: 544


# Add new images to background

In [None]:
os.makedirs("final_data")
os.makedirs("final_data/images")
os.makedirs("final_data/yolo")

In [None]:
#final_data = []
count = 0

for aug, bg in zip(os.listdir("augmented_data/"), os.listdir(PATH_bg)):
  # final_data.append(append_to_bg(rand_bg, pic, names[j]))
  # try:
  #   append_to_bg(bg, aug)
  # except:
  #   print(f"cant do {aug}/{bg}")
  try:
    append_to_bg(bg, aug, obj_names)
  except:
    print(f"error{pic}")

errorSmoothER.png
errorSmoothER.png
errorSmoothER.png
errorSmoothER.png
errorSmoothER.png
errorSmoothER.png
errorSmoothER.png
errorSmoothER.png
errorSmoothER.png
errorSmoothER.png
errorSmoothER.png
errorSmoothER.png
errorSmoothER.png
errorSmoothER.png
errorSmoothER.png
errorSmoothER.png
errorSmoothER.png
errorSmoothER.png
errorSmoothER.png
errorSmoothER.png
errorSmoothER.png


# Download Data

In [None]:
!zip -r final_data.zip final_data/
from google.colab import files
files.download('final_data.zip')

  adding: final_data/ (stored 0%)
  adding: final_data/yolo/ (stored 0%)
  adding: final_data/yolo/LcozN2.txt (deflated 42%)
  adding: final_data/yolo/WR19c4.txt (deflated 40%)
  adding: final_data/yolo/MYF0rm.txt (deflated 29%)
  adding: final_data/yolo/yLTQsz.txt (deflated 43%)
  adding: final_data/yolo/zDBgLP.txt (deflated 43%)
  adding: final_data/yolo/8FScev.txt (deflated 44%)
  adding: final_data/yolo/hPwsmN.txt (deflated 44%)
  adding: final_data/yolo/wbMFQu.txt (deflated 52%)
  adding: final_data/yolo/ow140l.txt (deflated 45%)
  adding: final_data/yolo/yG2pzM.txt (deflated 44%)
  adding: final_data/yolo/ps0pen.txt (deflated 48%)
  adding: final_data/yolo/QNqLAq.txt (deflated 36%)
  adding: final_data/yolo/TVUz2Y.txt (deflated 38%)
  adding: final_data/yolo/gD5Z9E.txt (deflated 34%)
  adding: final_data/yolo/EEyPaI.txt (deflated 45%)
  adding: final_data/yolo/CJfNuj.txt (deflated 38%)
  adding: final_data/yolo/2ttFs5.txt (deflated 49%)
  adding: final_data/yolo/xWHzRo.txt (defla

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>