In [1]:
import os
import numpy as np
from PIL import Image
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
import tensorflow as tf
from tensorflow.keras import layers, models
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.callbacks import EarlyStopping

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
!cp -r /content/drive/MyDrive/QBIO\ 465\ FP/Original /content/

In [4]:
dataset_path = '/content/drive/MyDrive/QBIO 465 FP/Original'
classes = ['Benign', 'Early', 'Pre', 'Pro']

In [5]:
import os
import shutil
import numpy as np
from tensorflow.keras.preprocessing.image import ImageDataGenerator, img_to_array, load_img, save_img

# === Step 1: Clear and remake augmented folder
augmented_dir = '/content/drive/MyDrive/QBIO 465 FP/Original/Benign/augmented'
shutil.rmtree(augmented_dir, ignore_errors=True)
os.makedirs(augmented_dir, exist_ok=True)

# === Step 2: Copy original benign images into augmented folder
benign_dir = '/content/drive/MyDrive/QBIO 465 FP/Original/Benign'
original_images = [f for f in os.listdir(benign_dir) if f.endswith('.jpg') and not f.startswith('aug_')]

for img_name in original_images:
    src_path = os.path.join(benign_dir, img_name)
    dst_path = os.path.join(augmented_dir, img_name)
    shutil.copy(src_path, dst_path)

# === Step 3: Set up augmentation
datagen = ImageDataGenerator(
    rotation_range=5,
    brightness_range=(0.95, 1.05),
    fill_mode='nearest'
)

# === Step 4: Augment until total 900 images
target_count = 900
current_images = os.listdir(augmented_dir)
original_count = len(original_images)
num_to_generate = target_count - len(current_images)

# Determine how many augmentations per original image
augmentations_per_image = int(np.ceil(num_to_generate / original_count))
generated = 0

for img_name in original_images:
    img_path = os.path.join(benign_dir, img_name)
    img = load_img(img_path)
    x = img_to_array(img)
    x = np.expand_dims(x, axis=0)

    i = 0
    for batch in datagen.flow(x, batch_size=1):
        save_path = os.path.join(augmented_dir, f"aug_{img_name.split('.')[0]}_{i}.jpg")
        save_img(save_path, batch[0])
        i += 1
        generated += 1
        if i >= augmentations_per_image or generated >= num_to_generate:
            break
    if generated >= num_to_generate:
        break

print(f"Final number of Benign images: {len(os.listdir(augmented_dir))}")


Final number of Benign images: 900
