In [10]:
from google.colab import drive
drive.mount("/content/drive")

Mounted at /content/drive


### Preprocesando imágenes de moléculas organometálicas de la Universidad de Neguev (articles_molecules)

In [2]:
import os
import glob
from sklearn.model_selection import train_test_split
from PIL import Image, ImageOps

In [3]:
def resize_with_padding(img, required_size):
  img.thumbnail((required_size, required_size))
  delta_w = required_size - img.size[0]
  delta_h = required_size - img.size[1]
  pad_w = delta_w // 2
  pad_h = delta_h // 2
  padding = (pad_w, pad_h, delta_w - pad_w, delta_h - pad_h)
  return ImageOps.expand(img, padding, fill=(255,255,255))

In [None]:
images_path = "/content/drive/MyDrive/4_ING_INFORMATICA/tfg/datasets/negev/articles_molecules/"
filenames = glob.glob(images_path+"*.png")

os.makedirs(images_path+"preprocess/", exist_ok=True)

required_size = 192
for filename in filenames:
  img = Image.open(filename)
  width, height = img.size
  img = resize_with_padding(img, required_size)
  
  img.convert("RGB").save(images_path+"preprocess/"+filename.split("/")[-1].replace("png","jpg"))

In [None]:
preprop_images_path = images_path+"preprocess/"
filenames = glob.glob(preprop_images_path+"*.jpg")
train, test = train_test_split(filenames, test_size=0.25, random_state=42)

In [None]:
with open(preprop_images_path+"xx_train.txt", "w") as f:
  for filename in train[:-1]:
    f.write(filename+"\n")
  f.write(train[-1])

with open(preprop_images_path+"xx_test.txt", "w") as f:
  for filename in test[:-1]:
    f.write(filename+"\n")
  f.write(test[-1])

### Aumentando dataset

In [3]:
import imgaug as ia
from imgaug import augmenters as iaa

ia.seed(1)

augmentation_seq = iaa.Sequential([
                      iaa.Sometimes(0.5, iaa.GaussianBlur(sigma=(0, 0.5))),
                      iaa.Affine(
                          scale={"x": (0.8, 1.0), "y": (0.8, 1.0)},
                          rotate=(-25, 25),
                          shear=(-5,5),
                          cval=255
                      )
], random_order=True)

In [4]:
import os
import glob
from sklearn.model_selection import train_test_split
import cv2

images_path = "/content/drive/MyDrive/4_ING_INFORMATICA/tfg/datasets/negev/articles_molecules/"
preprop_images_path = images_path+"preprocess/"
filenames = glob.glob(preprop_images_path+"*.jpg")

images = []
for filename in filenames:
  img = cv2.imread(filename)
  images.append(img)

augmented_images = images + augmentation_seq(images=images*2) 
count = 0
for img in augmented_images:
  cv2.imwrite(preprop_images_path+"aug/"+str(count)+".jpg", img)
  count += 1

In [11]:
preprop_aug_images_path = preprop_images_path+"aug/"
filenames = glob.glob(preprop_aug_images_path+"*.jpg")
train, test = train_test_split(filenames, test_size=0.25, random_state=42)

In [12]:
with open(preprop_aug_images_path+"xx_train.txt", "w") as f:
  for filename in train[:-1]:
    f.write(filename+"\n")
  f.write(train[-1])

with open(preprop_aug_images_path+"xx_test.txt", "w") as f:
  for filename in test[:-1]:
    f.write(filename+"\n")
  f.write(test[-1])