In [1]:
import os
import numpy as np
import random
import pickle
from google.colab import drive
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from keras.preprocessing.image import ImageDataGenerator

  from IPython.utils import traitlets as _traitlets
2023-11-01 13:36:03.864818: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE4.1 SSE4.2
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
with open('../data/external/data.pkl','rb') as f:
    X_train,y_train,X_test,y_test, X_valid, y_valid = pickle.load(f)
    
# Print the shapes to verify
print(f"X_train shape: {X_train.shape}")
print(f"y_train shape: {y_train.shape}")
print(f"X_valid shape: {X_valid.shape}")
print(f"y_valid shape: {y_valid.shape}")
print(f"X_test shape: {X_test.shape}")
print(f"y_test shape: {y_test.shape}")

X_train shape: (575, 224, 224, 3)
y_train shape: (575,)
X_valid shape: (144, 224, 224, 3)
y_valid shape: (144,)
X_test shape: (180, 224, 224, 3)
y_test shape: (180,)


In [13]:
# Define the data augmentation parameters
datagen = ImageDataGenerator(
    rotation_range=10,
    zoom_range=0.1,
    width_shift_range=0.1,
    height_shift_range=0.1,
    shear_range=0.1,
    brightness_range=[0.5, 1.5],
    fill_mode='nearest'
    #rescale=1.0 / 255  # Add normalization here
)

In [14]:
# Apply data augmentation to the training set
X_train_augmented = []
y_train_augmented = []
for i in range(X_train.shape[0]):
    img = X_train[i]
    label = y_train[i]
    for j in range(5):
        x_augmented = datagen.random_transform(img)
        X_train_augmented.append(x_augmented)
        y_train_augmented.append(label)

In [15]:
# Apply data augmentation to the test set
X_test_augmented = []
y_test_augmented = []
for i in range(X_test.shape[0]):
    img = X_test[i]
    label = y_test[i]
    for j in range(5):
        x_augmented = datagen.random_transform(img)
        X_test_augmented.append(x_augmented)
        y_test_augmented.append(label)

In [16]:
# Apply data augmentation to the test set
X_valid_augmented = []
y_valid_augmented = []
for i in range(X_valid.shape[0]):
    img = X_valid[i]
    label = y_valid[i]
    for j in range(5):
        x_augmented = datagen.random_transform(img)
        X_valid_augmented.append(x_augmented)
        y_valid_augmented.append(label)

In [17]:
# Convert the augmented data to numpy arrays
X_train_augmented = np.array(X_train_augmented)
y_train_augmented = np.array(y_train_augmented)
X_test_augmented = np.array(X_test_augmented)
y_test_augmented = np.array(y_test_augmented)
X_valid_augmented = np.array(X_valid_augmented)
y_valid_augmented = np.array(y_valid_augmented)

In [18]:
# Combine the data
X_train_combined = np.concatenate((X_train, X_train_augmented), axis=0)
y_train_combined = np.concatenate((y_train, y_train_augmented), axis=0)
X_test_combined = np.concatenate((X_test, X_test_augmented), axis=0)
y_test_combined = np.concatenate((y_test, y_test_augmented), axis=0)
X_valid_combined = np.concatenate((X_valid, X_valid_augmented), axis=0)
y_valid_combined = np.concatenate((y_valid, y_valid_augmented), axis=0)

# Check the shape of the combined data
print(f'X_train_combined shape: {X_train_combined.shape}')
print(f'y_train_combined shape: {y_train_combined.shape}')
print(f'X_test_combined shape: {X_test_combined.shape}')
print(f'y_test_combined shape: {y_test_combined.shape}')
print(f'X_valid_combined shape: {X_valid_combined.shape}')
print(f'y_valid_combined shape: {y_valid_combined.shape}')

X_train_combined shape: (3450, 224, 224, 3)
y_train_combined shape: (3450,)
X_test_combined shape: (1080, 224, 224, 3)
y_test_combined shape: (1080,)
X_valid_combined shape: (864, 224, 224, 3)
y_valid_combined shape: (864,)


In [19]:
# Save the augmented data to files
np.savez_compressed('../data/external/coral_augmented_combined_vgg16_imbalaned.npz',
                    X_train=X_train_combined,
                    y_train=y_train_combined,
                    X_test=X_test_combined,
                    y_test=y_test_combined,
                    X_valid=X_valid_combined,
                    y_valid=y_valid_combined)

In [10]:
# Load the augmented data from the saved file
data = np.load('../data/external/coral_augmented_combined_vgg16_imbalaned.npz')

# Access the arrays from the loaded data
X_train = data['X_train']
y_train = data['y_train']
X_test = data['X_test']
y_test = data['y_test']
X_valid = data['X_valid']
y_valid = data['y_valid']

# Print the shapes to verify
print(f"X_train shape: {X_train.shape}")
print(f"y_train shape: {y_train.shape}")
print(f"X_valid shape: {X_valid.shape}")
print(f"y_valid shape: {y_valid.shape}")
print(f"X_test shape: {X_test.shape}")
print(f"y_test shape: {y_test.shape}")

X_train shape: (3450, 224, 224, 3)
y_train shape: (3450,)
X_valid shape: (864, 224, 224, 3)
y_valid shape: (864,)
X_test shape: (1080, 224, 224, 3)
y_test shape: (1080,)


## New augmentation parameter

In [2]:
# Define the data augmentation parameters with increased intensity
datagen = ImageDataGenerator(
    rotation_range=30,           # Increase the rotation range
    zoom_range=0.2,              # Increase the zoom range
    width_shift_range=0.2,       # Increase the width shift range
    height_shift_range=0.2,      # Increase the height shift range
    shear_range=0.2,             # Increase the shear range
    brightness_range=[0.2, 2.0],  # Adjust the brightness range
    fill_mode='nearest',         # Fix the extra parenthesis here
    rescale=1.0 / 255             # Correct the indentation
)


In [5]:
# Apply data augmentation to the training set
X_train_augmented = []
y_train_augmented = []
for i in range(X_train.shape[0]):
    img = X_train[i]
    label = y_train[i]
    for j in range(7):
        x_augmented = datagen.random_transform(img)
        X_train_augmented.append(x_augmented)
        y_train_augmented.append(label)

In [6]:
# Apply data augmentation to the test set
X_test_augmented = []
y_test_augmented = []
for i in range(X_test.shape[0]):
    img = X_test[i]
    label = y_test[i]
    for j in range(7):
        x_augmented = datagen.random_transform(img)
        X_test_augmented.append(x_augmented)
        y_test_augmented.append(label)

In [7]:
# Apply data augmentation to the test set
X_valid_augmented = []
y_valid_augmented = []
for i in range(X_valid.shape[0]):
    img = X_valid[i]
    label = y_valid[i]
    for j in range(7):
        x_augmented = datagen.random_transform(img)
        X_valid_augmented.append(x_augmented)
        y_valid_augmented.append(label)

In [8]:
# Convert the augmented data to numpy arrays
X_train_augmented = np.array(X_train_augmented)
y_train_augmented = np.array(y_train_augmented)
X_test_augmented = np.array(X_test_augmented)
y_test_augmented = np.array(y_test_augmented)
X_valid_augmented = np.array(X_valid_augmented)
y_valid_augmented = np.array(y_valid_augmented)

In [9]:
# Combine the data
X_train_combined = np.concatenate((X_train, X_train_augmented), axis=0)
y_train_combined = np.concatenate((y_train, y_train_augmented), axis=0)
X_test_combined = np.concatenate((X_test, X_test_augmented), axis=0)
y_test_combined = np.concatenate((y_test, y_test_augmented), axis=0)
X_valid_combined = np.concatenate((X_valid, X_valid_augmented), axis=0)
y_valid_combined = np.concatenate((y_valid, y_valid_augmented), axis=0)

# Check the shape of the combined data
print(f'X_train_combined shape: {X_train_combined.shape}')
print(f'y_train_combined shape: {y_train_combined.shape}')
print(f'X_test_combined shape: {X_test_combined.shape}')
print(f'y_test_combined shape: {y_test_combined.shape}')
print(f'X_valid_combined shape: {X_valid_combined.shape}')
print(f'y_valid_combined shape: {y_valid_combined.shape}')

X_train_combined shape: (7592, 224, 224, 3)
y_train_combined shape: (7592,)
X_test_combined shape: (2376, 224, 224, 3)
y_test_combined shape: (2376,)
X_valid_combined shape: (1904, 224, 224, 3)
y_valid_combined shape: (1904,)


In [9]:
# Save the augmented data to files(data without annotation box)
np.savez_compressed('../data/external/coral_augmented_combined.npz',
                    X_train=X_train_combined,
                    y_train=y_train_combined,
                    X_test=X_test_combined,
                    y_test=y_test_combined,
                    X_valid=X_valid_combined,
                    y_valid=y_valid_combined)

In [None]:
# Load the augmented data from the saved file
data = np.load('../data/external/coral_augmented_combined.npz')

# Access the arrays from the loaded data
X_train = data['X_train']
y_train = data['y_train']
X_test = data['X_test']
y_test = data['y_test']
X_valid = data['X_valid']
y_valid = data['y_valid']

# Print the shapes to verify
print(f"X_train shape: {X_train.shape}")
print(f"y_train shape: {y_train.shape}")
print(f"X_valid shape: {X_valid.shape}")
print(f"y_valid shape: {y_valid.shape}")
print(f"X_test shape: {X_test.shape}")
print(f"y_test shape: {y_test.shape}")

### Test data augmentation

In [6]:
with open('../data/external/data_test.pkl','rb') as f:
    X_test,y_test = pickle.load(f)
    
# Print the shapes to verify
print(f"X_test shape: {X_test.shape}")
print(f"y_test shape: {y_test.shape}")

X_test shape: (51, 224, 224, 3)
y_test shape: (51,)


In [9]:
X_test_augmented = np.array(X_test_augmented)
y_test_augmented = np.array(y_test_augmented)

X_test_combined = np.concatenate((X_test, X_test_augmented), axis=0)
y_test_combined = np.concatenate((y_test, y_test_augmented), axis=0)

In [10]:
print(f'X_test_combined shape: {X_test_combined.shape}')
print(f'y_test_combined shape: {y_test_combined.shape}')

X_test_combined shape: (306, 224, 224, 3)
y_test_combined shape: (306,)


In [11]:
# Save the augmented data to files(data without annotation box)
np.savez_compressed('../data/external/coral_augmented_test.npz',
                    X_test=X_test_combined,
                    y_test=y_test_combined)

## Augmenting balanced data


In [4]:
with open('../data/external/data_bl_v2.pkl','rb') as f:
    X_train,y_train,X_test,y_test, X_valid, y_valid = pickle.load(f)
    
# Print the shapes to verify
print(f"X_train shape: {X_train.shape}")
print(f"y_train shape: {y_train.shape}")
print(f"X_valid shape: {X_valid.shape}")
print(f"y_valid shape: {y_valid.shape}")
print(f"X_test shape: {X_test.shape}")
print(f"y_test shape: {y_test.shape}")

X_train shape: (949, 224, 224, 3)
y_train shape: (949,)
X_valid shape: (238, 224, 224, 3)
y_valid shape: (238,)
X_test shape: (297, 224, 224, 3)
y_test shape: (297,)


In [10]:
# Save the augmented data to files(data without annotation box)
np.savez_compressed('../data/external/coral_augmented_combined_bl_v2.npz',
                    X_train=X_train_combined,
                    y_train=y_train_combined,
                    X_test=X_test_combined,
                    y_test=y_test_combined,
                    X_valid=X_valid_combined,
                    y_valid=y_valid_combined)