In [5]:
%reset -f
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Activation, Flatten, Dense, GlobalAveragePooling2D
from tensorflow.keras import backend as K
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.optimizers import Adam
from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing.image import img_to_array
from tensorflow.keras.utils import to_categorical
from imutils import paths
import matplotlib.pyplot as plt
import numpy as np
from numpy import random
seed = 42
np.random.seed(seed)
from natsort import natsorted
import pandas as pd
import git, glob, os, cv2 
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

repo = git.Repo('.', search_parent_directories=True)
root_path = f'{repo.working_tree_dir}/insectrec/created_data/'
original_datapath = f'{root_path}impy_crops_export'
aug_datapath = f'{root_path}images_augmented'

img_dim = 80

In [6]:
# Creating le for encoding labels
le = LabelEncoder()

# Creating dataframe with all the original data (x: filenames, textlabels, y: nummerical labels)
df_orig = pd.DataFrame()
df_orig['x'] = pd.Series(glob.glob(f"{original_datapath}/*/*.jpg"))
df_orig['textlabels'] = df_orig['x'].apply(lambda x: x.split('/')[-2])
df_orig['y'] = le.fit_transform(df_orig.textlabels)

# Splitting into train/val/test
X_train, X_test, y_train, y_test = train_test_split(df_orig.x, df_orig.y, test_size=0.2, random_state=seed, shuffle=True)
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.2, random_state=seed, shuffle=True)

print(" loading images...")
data = []
labels = []

imagePaths = natsorted(X_train.tolist())
np.random.seed(42)
np.random.shuffle(imagePaths)
for imagePath in imagePaths:
    # load the image, pre-process it, and store it in the data list
    image = cv2.imread(imagePath)
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    image = cv2.resize(image, (img_dim, img_dim))
    image = img_to_array(image)
    data.append(image)

    # extract the class label from the image path and update the
    # labels list
    label = imagePath.split(os.path.sep)[-2]
    labels.append(label)
    
data = np.array(data, dtype="float") / 255.0
print(data.shape)

aug = ImageDataGenerator(rotation_range=30, 
                         width_shift_range=0.1,
                         height_shift_range=0.1, 
#                          zoom_range=0.3, 
                         horizontal_flip=True, 
                         vertical_flip=True, 
#                          brightness_range=[0.8,1.2],
#                          zca_whitening=True,
                         fill_mode="nearest")

name_map = dict(zip(le.transform(le.classes_), le.classes_))
print(name_map)
y = np.array(y_train.tolist(), dtype="float")

aug_imgs_path = './insectrec/created_data/images_augmented/'

rdm = np.random.randint(0,1e6)
for i in np.unique(df_orig.textlabels.unique().tolist()):
    if not os.path.isdir(f'{aug_imgs_path}/{i}'):
        os.mkdir(f'{aug_imgs_path}/{i}')

aug.fit(data)
nb_batches = 0
for X_batch, y_batch in aug.flow(data, y, batch_size=512, seed=42):
    for i, mat in enumerate(X_batch):
        rdm = np.random.randint(0,1e6)
        cv2.imwrite(f'{aug_imgs_path}/{name_map[y_batch[i]]}/{name_map[y_batch[i]]}_{rdm}{i}.jpg', cv2.cvtColor(mat*255, cv2.COLOR_RGB2BGR))
    nb_batches += 1

    if nb_batches > 100:
        break

 loading images...
(11282, 80, 80, 3)
{0: 'bl', 1: 'c', 2: 'm', 3: 'v', 4: 'v(cy)', 5: 'wmv'}


{0: 'bl', 1: 'c', 2: 'm', 3: 'v', 4: 'v(cy)', 5: 'wmv'}
