In [1]:
import numpy as np 
import os
import pandas as pd
from scipy import ndarray
import skimage as sk
from skimage import io
import random
from skimage import transform
from skimage import img_as_ubyte
import shutil

In [2]:
input_dir = "/home/mimus/apiais/data/images/ANPR/training/original_char_seg/"
target_dir = "/home/mimus/apiais/data/images/ANPR/training/masks_char_seg/"


input_img_paths = sorted(
    [
        os.path.join(input_dir, fname)
        for fname in os.listdir(input_dir)
        if fname.endswith(".jpg")
    ]
)
target_img_paths = sorted(
    [
        os.path.join(target_dir, fname)
        for fname in os.listdir(target_dir)
        if fname.endswith(".jpg") and not fname.startswith(".")
    ]
)

print("Number of samples:", len(input_img_paths))

for input_path, target_path in zip(input_img_paths[:10], target_img_paths[:10]):
    print(input_path, "|", target_path)


Number of samples: 11122
/home/mimus/apiais/data/images/ANPR/training/original_char_seg/original_0_0_3.jpg | /home/mimus/apiais/data/images/ANPR/training/masks_char_seg/masks_0_0_3.jpg
/home/mimus/apiais/data/images/ANPR/training/original_char_seg/original_0_0_3_augmented_image_8614.jpg | /home/mimus/apiais/data/images/ANPR/training/masks_char_seg/masks_0_0_3_augmented_image_8614.jpg
/home/mimus/apiais/data/images/ANPR/training/original_char_seg/original_0_10.jpg | /home/mimus/apiais/data/images/ANPR/training/masks_char_seg/masks_0_10.jpg
/home/mimus/apiais/data/images/ANPR/training/original_char_seg/original_0_10_augmented_image_8859.jpg | /home/mimus/apiais/data/images/ANPR/training/masks_char_seg/masks_0_10_augmented_image_8859.jpg
/home/mimus/apiais/data/images/ANPR/training/original_char_seg/original_0_11.jpg | /home/mimus/apiais/data/images/ANPR/training/masks_char_seg/masks_0_11.jpg
/home/mimus/apiais/data/images/ANPR/training/original_char_seg/original_0_11_augmented_image_1074

In [3]:
def split_train_test(data,test_ratio):
    shuffled_indices=np.random.permutation(len(data))
    test_set_size=int(len(data)*test_ratio)
    test_indices=shuffled_indices[:test_set_size]
    train_indices=shuffled_indices[test_set_size:]
    return data.iloc[train_indices], data.iloc[test_indices]

In [4]:
df = pd.DataFrame(input_img_paths) 
train_set,test_set=split_train_test(df,0)
print(len(train_set),"train +",len(test_set),"test")

11122 train + 0 test


In [5]:
#here we need to copy the test files to another folder for later use

test_files = test_set.values.tolist()
test_file_names = []
test_file_names2 = []
for i in test_files:
    test_file_names.append(i[0])
    test_file_names2.append(i[0].replace('/original_', '/masks_'))
#print(test_file_names[0:10])
#print(test_file_names2[0:10])
for name in test_file_names2:
    shutil.move(name, name.replace('/training/', '/test/'))
for name in test_file_names:
    shutil.move(name, name.replace('/training/', '/test/'))

In [6]:
train_files = train_set.values.tolist()
train_file_names = []
train_file_names2 = []
for i in train_files:
    train_file_names.append(i[0])
    train_file_names2.append(i[0].replace('/original_', '/masks_'))

In [7]:
def random_rotation(image_array: ndarray, image_array2: ndarray):
    # pick a random degree of rotation between 25% on the left and 25% on the right
    random_degree = random.uniform(-25, 25)
    return sk.transform.rotate(image_array, random_degree), sk.transform.rotate(image_array2, random_degree)

def random_noise(image_array: ndarray, image_array2: ndarray):
    # add random noise to the image
    return sk.util.random_noise(image_array), image_array2

# dictionary of the transformations we defined earlier
available_transformations = {
    'rotate': random_rotation,
    'noise': random_noise
}

original_img = train_file_names
mask_img = train_file_names2
num_files_desired = 20000

final_val = len(original_img)

# find all files paths from the folder
#print("control 1")
num_generated_files = len(original_img)
counter = 0
while num_generated_files <= num_files_desired:

    # random image from the folder
    image_path = original_img[counter]
    image_path2 = mask_img[counter]
    # read image as an two dimensional array of pixels
    image_to_transform = sk.io.imread(image_path)
    image_to_transform2 = sk.io.imread(image_path2)
    # random num of transformation to apply
    num_transformations_to_apply = random.randint(1, len(available_transformations))
    num_transformations = 0
    transformed_image = None
    while num_transformations <= num_transformations_to_apply:
       # print(num_transformations,num_transformations_to_apply)
        # random transformation to apply for a single image
        key = random.choice(list(available_transformations))
        transformed_image, transformed_image2 = available_transformations[key](image_to_transform, image_to_transform2)
        #aqui deberiamso poner la misma lista de archivos pero con mask
        num_transformations+= 1
    new_file_path = '%s_augmented_image_%s.jpg' % (image_path[:-4], num_generated_files)    
    new_file_path2 = '%s_augmented_image_%s.jpg' % (image_path2[:-4], num_generated_files)    

    # write image to the disk

    io.imsave(new_file_path, img_as_ubyte(transformed_image))
    io.imsave(new_file_path2, img_as_ubyte(transformed_image2))
    num_generated_files += 1
    counter += 1
    print(counter, num_generated_files)


1 11123
2 11124
3 11125
4 11126
5 11127
6 11128
7 11129
8 11130
9 11131
10 11132
11 11133
12 11134
13 11135
14 11136
15 11137
16 11138
17 11139
18 11140
19 11141
20 11142
21 11143
22 11144
23 11145
24 11146
25 11147
26 11148
27 11149
28 11150
29 11151
30 11152
31 11153
32 11154
33 11155
34 11156
35 11157
36 11158
37 11159
38 11160
39 11161
40 11162
41 11163
42 11164
43 11165
44 11166
45 11167
46 11168
47 11169
48 11170
49 11171
50 11172
51 11173
52 11174
53 11175
54 11176
55 11177
56 11178
57 11179
58 11180
59 11181
60 11182
61 11183
62 11184
63 11185
64 11186
65 11187
66 11188
67 11189
68 11190
69 11191
70 11192
71 11193
72 11194
73 11195
74 11196
75 11197
76 11198
77 11199
78 11200
79 11201
80 11202
81 11203
82 11204
83 11205
84 11206
85 11207
86 11208
87 11209
88 11210
89 11211
90 11212
91 11213
92 11214
93 11215
94 11216
95 11217
96 11218
97 11219
98 11220
99 11221
100 11222
101 11223
102 11224
103 11225
104 11226
105 11227
106 11228
107 11229
108 11230
109 11231
110 11232
111 1123