In [1]:
import os
import pandas as pd
import shutil
import tensorflow as tf
from tqdm import tqdm
from tensorflow.keras.preprocessing.image import ImageDataGenerator

def make_dataframe(sdir):
    # sdir is the directory when the class subdirectories are stored
    filepaths=[]
    labels=[]
    classlist=sorted(os.listdir(sdir) )     
    for klass in classlist:
        classpath=os.path.join(sdir, klass) 
        if os.path.isdir(classpath):
            flist=sorted(os.listdir(classpath)) 
            desc=f'{klass:25s}'
            for f in tqdm(flist, ncols=130,desc=desc, unit='files', colour='blue'):
                fpath=os.path.join(classpath,f)
                filepaths.append(fpath)
                labels.append(klass)
    Fseries=pd.Series(filepaths, name='filepaths')
    Lseries=pd.Series(labels, name='labels')
    df=pd.concat([Fseries, Lseries], axis=1) 
    # return a dataframe with columns filepaths, labels
    return df

def make_and_store_images(df, augdir, n,  img_size,  color_mode='rgb', save_prefix='aug-',save_format='jpg'):
    #augdir is the full path where augmented images will be stored
    #n is the number of augmented images that will be created for each class that has less than n image samples
    # img_size  is a tupple(height,width) that specifies the size of the augmented images
    # color_mode is 'rgb by default'
    # save_prefix is the prefix augmented images are identified with by default it is 'aug-'
    #save_format is the format augmented images will be save in, by default it is 'jpg'
    # see documentation of ImageDataGenerator at https://www.tensorflow.org/api_docs/python/tf/keras/preprocessing/image/ImageDataGenerator for details
    df=df.copy()        
    if os.path.isdir(augdir):# start with an empty directory
        shutil.rmtree(augdir)
    os.mkdir(augdir)  # if directory does not exist create it      
    for label in df['labels'].unique():    
        classpath=os.path.join(augdir,label)    
        os.mkdir(classpath) # make class directories within aug directory
    # create and store the augmented images  
    total=0
    # in ImageDateGenerator select the types of augmentation you desire  below are some examples  
    gen=ImageDataGenerator(rotation_range=15,
                               width_shift_range=0.1,
                               height_shift_range=0.1,
                               shear_range=0.01,
                               zoom_range=[0.9, 1.25],
                               horizontal_flip=True,
                               vertical_flip=False,
                               fill_mode='reflect',
                               data_format='channels_last',
                               brightness_range=[0.5, 1.5])
    groups=df.groupby('labels') # group by class
    for label in df['labels'].unique():  # for every class
        classdir=os.path.join(augdir, label)
        group=groups.get_group(label)  # a dataframe holding only rows with the specified label 
        sample_count=len(group)   # determine how many samples there are in this class  
        if sample_count< n: # if the class has less than target number of images
            aug_img_count=0
            delta=n - sample_count  # number of augmented images to create            
            msg='{0:40s} for class {1:^30s} creating {2:^5s} augmented images'.format(' ', label, str(delta))
            print(msg, '\r', end='') # prints over on the same line
            aug_gen=gen.flow_from_dataframe( group,  x_col='filepaths', y_col=None, target_size=img_size,
                                            class_mode=None, batch_size=1, shuffle=False, 
                                            save_to_dir=classdir, save_prefix=save_prefix, color_mode=color_mode,
                                            save_format=save_format)
            while aug_img_count<delta:
                images=next(aug_gen)            
                aug_img_count += len(images)
            total +=aug_img_count        
    print('Total Augmented images created= ', total)

2023-03-27 13:36:25.939496: E tensorflow/stream_executor/cuda/cuda_blas.cc:2981] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


In [2]:
sdir=r'/home/kolapally/code/kolapally/computer_vision/data/faces'
df=make_dataframe(sdir)
print (df.head())
print ('length of dataframe is ',len(df))

augdir=r'/home/kolapally/code/kolapally/computer_vision/data/Aug' # directory to store the images if it does not exist it will be created
n=100 # if the class had N image samples in the sdir, if N<n than in augdir n-N augmented images will be created 
img_size=(128,128) # image size (height,width) of augmented images
make_and_store_images(df, augdir, n,  img_size,  color_mode='rgb', save_prefix='aug-',save_format='jpg')

Angela                   : 100%|[34m████████████████████████████████████████████████████████████[0m| 27/27 [00:00<00:00, 89170.24files/s][0m
Dwight                   : 100%|[34m████████████████████████████████████████████████████████████[0m| 27/27 [00:00<00:00, 66890.85files/s][0m
Jim                      : 100%|[34m████████████████████████████████████████████████████████████[0m| 30/30 [00:00<00:00, 65982.76files/s][0m
Kevin                    : 100%|[34m████████████████████████████████████████████████████████████[0m| 29/29 [00:00<00:00, 35524.19files/s][0m
Michael                  : 100%|[34m████████████████████████████████████████████████████████████[0m| 28/28 [00:00<00:00, 58690.91files/s][0m
Pam                      : 100%|[34m████████████████████████████████████████████████████████████[0m| 30/30 [00:00<00:00, 96717.23files/s][0m

                                           filepaths  labels
0  /home/kolapally/code/kolapally/computer_vision...  Angela
1  /home/kolapally/code/kolapally/computer_vision...  Angela
2  /home/kolapally/code/kolapally/computer_vision...  Angela
3  /home/kolapally/code/kolapally/computer_vision...  Angela
4  /home/kolapally/code/kolapally/computer_vision...  Angela
length of dataframe is  171
Found 27 validated image filenames.      for class             Angela             creating  73   augmented images 





Found 27 validated image filenames.      for class             Dwight             creating  73   augmented images 
Found 30 validated image filenames.      for class              Jim               creating  70   augmented images 
Found 29 validated image filenames.      for class             Kevin              creating  71   augmented images 
Found 28 validated image filenames.      for class            Michael             creating  72   augmented images 
Found 30 validated image filenames.      for class              Pam               creating  70   augmented images 
Total Augmented images created=  429
