In [7]:
import os
import numpy as np
import random
import pickle
from google.colab import drive
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from keras.preprocessing.image import ImageDataGenerator

In [3]:
# Define the data augmentation parameters with increased intensity
datagen = ImageDataGenerator(
    rotation_range=30,           
    zoom_range=0.2,              
    width_shift_range=0.2,       
    height_shift_range=0.2,      
    shear_range=0.2,             
    brightness_range=[0.2, 2.0],  
    fill_mode='nearest',         
    rescale=1.0 / 255     
)


## Augmentation for Classification

In [8]:
with open('../data/external/data_vgg16.pkl','rb') as f:
    X_train,y_train,X_test,y_test, X_valid, y_valid = pickle.load(f)
    
# Print the shapes to verify
print(f"X_train shape: {X_train.shape}")
print(f"y_train shape: {y_train.shape}")
print(f"X_valid shape: {X_valid.shape}")
print(f"y_valid shape: {y_valid.shape}")
print(f"X_test shape: {X_test.shape}")
print(f"y_test shape: {y_test.shape}")

X_train shape: (936, 224, 224, 3)
y_train shape: (936,)
X_valid shape: (235, 224, 224, 3)
y_valid shape: (235,)
X_test shape: (293, 224, 224, 3)
y_test shape: (293,)


In [9]:
# Apply data augmentation to the training set
X_train_augmented = []
y_train_augmented = []
for i in range(X_train.shape[0]):
    img = X_train[i]
    label = y_train[i]
    for j in range(7):
        x_augmented = datagen.random_transform(img)
        X_train_augmented.append(x_augmented)
        y_train_augmented.append(label)

In [None]:
# Apply data augmentation to the test set
X_test_augmented = []
y_test_augmented = []
for i in range(X_test.shape[0]):
    img = X_test[i]
    label = y_test[i]
    for j in range(7):
        x_augmented = datagen.random_transform(img)
        X_test_augmented.append(x_augmented)
        y_test_augmented.append(label)

In [None]:
# Apply data augmentation to the validation set
X_valid_augmented = []
y_valid_augmented = []
for i in range(X_valid.shape[0]):
    img = X_valid[i]
    label = y_valid[i]
    for j in range(7):
        x_augmented = datagen.random_transform(img)
        X_valid_augmented.append(x_augmented)
        y_valid_augmented.append(label)

In [None]:
# Convert the augmented data to numpy arrays
X_train_augmented = np.array(X_train_augmented)
y_train_augmented = np.array(y_train_augmented)
X_test_augmented = np.array(X_test_augmented)
y_test_augmented = np.array(y_test_augmented)
X_valid_augmented = np.array(X_valid_augmented)
y_valid_augmented = np.array(y_valid_augmented)

In [None]:
# Combine the data
X_train_combined = np.concatenate((X_train, X_train_augmented), axis=0)
y_train_combined = np.concatenate((y_train, y_train_augmented), axis=0)
X_test_combined = np.concatenate((X_test, X_test_augmented), axis=0)
y_test_combined = np.concatenate((y_test, y_test_augmented), axis=0)
X_valid_combined = np.concatenate((X_valid, X_valid_augmented), axis=0)
y_valid_combined = np.concatenate((y_valid, y_valid_augmented), axis=0)

# Check the shape of the combined data
print(f'X_train_combined shape: {X_train_combined.shape}')
print(f'y_train_combined shape: {y_train_combined.shape}')
print(f'X_test_combined shape: {X_test_combined.shape}')
print(f'y_test_combined shape: {y_test_combined.shape}')
print(f'X_valid_combined shape: {X_valid_combined.shape}')
print(f'y_valid_combined shape: {y_valid_combined.shape}')

X_train_combined shape: (7488, 224, 224, 3)
y_train_combined shape: (7488,)
X_test_combined shape: (2344, 224, 224, 3)
y_test_combined shape: (2344,)
X_valid_combined shape: (1880, 224, 224, 3)
y_valid_combined shape: (1880,)


In [None]:
# Save the augmented data to files
np.savez_compressed('../data/external/coral_augmented_vgg16.npz',
                    X_train=X_train_combined,
                    y_train=y_train_combined,
                    X_test=X_test_combined,
                    y_test=y_test_combined,
                    X_valid=X_valid_combined,
                    y_valid=y_valid_combined)

## Augmentation for segmentation

In [23]:
with open('../data/external/data_mask.pkl','rb') as f:
    X_train,y_train,X_test,y_test, X_valid, y_valid = pickle.load(f)
    
# Print the shapes to verify
print(f"X_train shape: {X_train.shape}")
print(f"y_train shape: {y_train.shape}")
print(f"X_valid shape: {X_valid.shape}")
print(f"y_valid shape: {y_valid.shape}")
print(f"X_test shape: {X_test.shape}")
print(f"y_test shape: {y_test.shape}")

X_train shape: (936, 224, 224, 3)
y_train shape: (936, 224, 224)
X_valid shape: (235, 224, 224, 3)
y_valid shape: (235, 224, 224)
X_test shape: (293, 224, 224, 3)
y_test shape: (293, 224, 224)


In [24]:
# Define the data augmentation parameters
datagen = ImageDataGenerator(
    rotation_range=10,
    zoom_range=0.1,
    width_shift_range=0.05,
    height_shift_range=0.05,
    shear_range=0.1,
    brightness_range=[0.5, 1],
    fill_mode='nearest'
    #rescale=1.0 / 255  # Add normalization here
)

In [25]:
# Apply data augmentation to the training set
X_train_augmented = []
y_train_augmented = []
for i in range(X_train.shape[0]):
    img = X_train[i]
    label = y_train[i]
    # Reshape mask to (224, 224, 1) for compatibility with ImageDataGenerator
    label = np.expand_dims(label, axis=-1)
    for j in range(7):
        augmented_data = datagen.random_transform(np.concatenate((img, label), axis=-1))
        # Separate augmented image and mask
        x_augmented = augmented_data[:, :, :3]
        y_augmented = augmented_data[:, :, 3]

        X_train_augmented.append(x_augmented)
        y_train_augmented.append(y_augmented)

In [26]:
# Apply data augmentation to the test set
X_test_augmented = []
y_test_augmented = []
for i in range(X_test.shape[0]):
    img = X_test[i]
    label = y_test[i]
    # Reshape mask to (224, 224, 1) for compatibility with ImageDataGenerator
    label = np.expand_dims(label, axis=-1)
    for j in range(7):
        augmented_data = datagen.random_transform(np.concatenate((img, label), axis=-1))
        # Separate augmented image and mask
        x_augmented = augmented_data[:, :, :3]
        y_augmented = augmented_data[:, :, 3]

        X_test_augmented.append(x_augmented)
        y_test_augmented.append(y_augmented)

In [27]:
# Apply data augmentation to the test set
X_valid_augmented = []
y_valid_augmented = []
for i in range(X_valid.shape[0]):
    img = X_valid[i]
    label = y_valid[i]
    # Reshape mask to (224, 224, 1) for compatibility with ImageDataGenerator
    label = np.expand_dims(label, axis=-1)
    for j in range(7):
        augmented_data = datagen.random_transform(np.concatenate((img, label), axis=-1))
        # Separate augmented image and mask
        x_augmented = augmented_data[:, :, :3]
        y_augmented = augmented_data[:, :, 3]

        X_valid_augmented.append(x_augmented)
        y_valid_augmented.append(y_augmented)

In [28]:
# Convert the augmented data to numpy arrays
X_train_augmented = np.array(X_train_augmented)
y_train_augmented = np.array(y_train_augmented)
X_test_augmented = np.array(X_test_augmented)
y_test_augmented = np.array(y_test_augmented)
X_valid_augmented = np.array(X_valid_augmented)
y_valid_augmented = np.array(y_valid_augmented)

In [29]:
# Combine the data
X_train_combined = np.concatenate((X_train, X_train_augmented), axis=0)
y_train_combined = np.concatenate((y_train, y_train_augmented), axis=0)
X_test_combined = np.concatenate((X_test, X_test_augmented), axis=0)
y_test_combined = np.concatenate((y_test, y_test_augmented), axis=0)
X_valid_combined = np.concatenate((X_valid, X_valid_augmented), axis=0)
y_valid_combined = np.concatenate((y_valid, y_valid_augmented), axis=0)

# Check the shape of the combined data
print(f'X_train_combined shape: {X_train_combined.shape}')
print(f'y_train_combined shape: {y_train_combined.shape}')
print(f'X_test_combined shape: {X_test_combined.shape}')
print(f'y_test_combined shape: {y_test_combined.shape}')
print(f'X_valid_combined shape: {X_valid_combined.shape}')
print(f'y_valid_combined shape: {y_valid_combined.shape}')

X_train_combined shape: (7488, 224, 224, 3)
y_train_combined shape: (7488, 224, 224)
X_test_combined shape: (2344, 224, 224, 3)
y_test_combined shape: (2344, 224, 224)
X_valid_combined shape: (1880, 224, 224, 3)
y_valid_combined shape: (1880, 224, 224)


In [30]:
# Save the augmented data to files(data without annotation box)
np.savez_compressed('../data/external/coral_augmented_mask.npz',
                    X_train=X_train_combined,
                    y_train=y_train_combined,
                    X_test=X_test_combined,
                    y_test=y_test_combined,
                    X_valid=X_valid_combined,
                    y_valid=y_valid_combined)

In [None]:
# Load the augmented data from the saved file
data = np.load('../data/external/coral_augmented_mask.npz')

# Access the arrays from the loaded data
X_train = data['X_train']
y_train = data['y_train']
X_test = data['X_test']
y_test = data['y_test']
X_valid = data['X_valid']
y_valid = data['y_valid']

# Print the shapes to verify
print(f"X_train shape: {X_train.shape}")
print(f"y_train shape: {y_train.shape}")
print(f"X_valid shape: {X_valid.shape}")
print(f"y_valid shape: {y_valid.shape}")
print(f"X_test shape: {X_test.shape}")
print(f"y_test shape: {y_test.shape}")

X_train shape: (7488, 224, 224, 3)
y_train shape: (7488, 224, 224)
X_valid shape: (1880, 224, 224, 3)
y_valid shape: (1880, 224, 224)
X_test shape: (2344, 224, 224, 3)
y_test shape: (2344, 224, 224)
