In [1]:
import tensorflow as tf
from keras.preprocessing.image import ImageDataGenerator
import cv2
import imutils
import matplotlib.pyplot as plt
from os import listdir
import time    

%matplotlib inline

Using TensorFlow backend.


In [2]:
def hms_string(sec_elapsed):
    h = int(sec_elapsed / (60 * 60))
    m = int((sec_elapsed % (60 * 60)) / 60)
    s = sec_elapsed % 60
    return f"{h}:{m}:{round(s,1)}"

In [3]:
def augment_data(file_dir, n_generated_samples, save_to_dir):
    """
    Arguments:
        file_dir: A string representing the directory where images that we want to augment are found.
        n_generated_samples: A string representing the number of generated samples using the given image.
        save_to_dir: A string representing the directory in which the generated images will be saved.
    """
   
    
    data_gen = ImageDataGenerator(rotation_range=10, 
                                  width_shift_range=0.1, 
                                  height_shift_range=0.1, 
                                  shear_range=0.1, 
                                  brightness_range=(0.3, 1.0),
                                  horizontal_flip=True, 
                                  vertical_flip=True, 
                                  fill_mode='nearest'
                                 )

    
    for filename in listdir(file_dir):
        
        image = cv2.imread(file_dir + '\\' + filename)
        
        image = image.reshape((1,)+image.shape)
        
        save_prefix = 'aug_' + filename[:-4]
        
        i=0
        for batch in data_gen.flow(x=image, batch_size=1, save_to_dir=save_to_dir, 
                                           save_prefix=save_prefix, save_format='jpg'):
            i += 1
            if i > n_generated_samples:
                break

In [4]:
import os

start_time = time.time()

augmented_data_path = r'C:\Users\MyPC\Desktop\project\brain_tumor_dataset'

yes_path=r'C:\Users\MyPC\Desktop\project\brain_tumor_dataset\yes'

no_path=r'C:\Users\MyPC\Desktop\project\brain_tumor_dataset\no'


augment_data(file_dir=yes_path, n_generated_samples=6, save_to_dir=os.path.join(augmented_data_path,'yes'))

augment_data(file_dir=no_path, n_generated_samples=9, save_to_dir=os.path.join(augmented_data_path,'no'))

end_time = time.time()
execution_time = (end_time - start_time)
print(f"Elapsed time: {hms_string(execution_time)}")

Elapsed time: 0:6:0.9


In [5]:
def data_summary(main_path):
    
    yes_path = os.path.join(main_path,'yes')
    no_path = os.path.join(main_path,'no')
        
    
    m_pos = len(listdir(yes_path))
    
    m_neg = len(listdir(no_path))
    
    m = (m_pos+m_neg)
    
    pos_prec = (m_pos* 100.0)/ m
    neg_prec = (m_neg* 100.0)/ m
    
    print(f"Number of examples: {m}")
    print(f"Percentage of positive examples: {pos_prec}%, number of pos examples: {m_pos}") 
    print(f"Percentage of negative examples: {neg_prec}%, number of neg examples: {m_neg}")

In [6]:
data_summary(augmented_data_path)

Number of examples: 2315
Percentage of positive examples: 53.52051835853132%, number of pos examples: 1239
Percentage of negative examples: 46.47948164146868%, number of neg examples: 1076
