# Data Augmentation

In [None]:
HGG_image_paths = []
for dirname, _, filenames in os.walk('/kaggle/input/brats-2020-denoised-data/Mask Imposed Image Dataset/HGG'):
    for filename in filenames:
        file_path = os.path.join(dirname,filename)
        HGG_image_paths.append(file_path)
        #print(os.path.join(file_path))
print(len(HGG_image_paths))

In [None]:
LGG_image_paths = []
for dirname, _, filenames in os.walk('/kaggle/input/brats-2020-denoised-data/Mask Imposed Image Dataset/LGG'):
    for filename in filenames:
        file_path = os.path.join(dirname,filename)
        LGG_image_paths.append(file_path)
        #print(os.path.join(file_path))
print(len(LGG_image_paths))

## Inference

There are 46 images in the HGG category and 48 images in the LGG category.

In [None]:
import numpy as np
import os
import PIL
import PIL.Image
import tensorflow as tf
import tensorflow_datasets as tfds
import pathlib

In [None]:
print(tf.__version__)

In [None]:
file_path_npy = '/kaggle/input/brats-2020-denoised-data/Mask Imposed Image Dataset/HGG/masked_img_denoised_image_0.npy'
data_npy = np.load(file_path_npy)
data_npy.shape

In [None]:
num_rows = int(np.ceil(np.sqrt(data_npy.shape[2])))
num_cols = int(np.ceil(data_npy.shape[2] / num_rows))

In [None]:
os.makedirs('/kaggle/working/HGG_slices',exist_ok=True)

In [None]:
os.makedirs('/kaggle/working/LGG_slices',exist_ok=True)

In [None]:
input_dir = '/kaggle/input/brats-2020-denoised-data/Mask Imposed Image Dataset/HGG'
output_dir = '/kaggle/working/HGG_slices'

# Iterate over all .npy files in the input directory
for filename in os.listdir(input_dir):
    if filename.endswith('.npy'):
        # Load the 3D numpy array from the file
        path = os.path.join(input_dir, filename)
        #print(path)
        data = np.load(path)
        
        # Create a new subdirectory for this image
        image_dir = os.path.join(output_dir, os.path.splitext(filename)[0])
        #print(image_dir)
        os.makedirs(image_dir, exist_ok=True)
        
        # Save each 2D slice of the 3D numpy array as a separate PNG file
        for i in range(data.shape[2]):
            slice_path = os.path.join(image_dir, f'slice_{i:03d}.png')
            fig = plt.figure(figsize=(6, 6))
            plt.imsave(slice_path, data[:, :, i], cmap='gray')
            plt.close(fig)

In [None]:
input_dir = '/kaggle/input/brats-2020-denoised-data/Mask Imposed Image Dataset/LGG'
output_dir = '/kaggle/working/LGG_slices'

# Iterate over all .npy files in the input directory
for filename in os.listdir(input_dir):
    if filename.endswith('.npy'):
        # Load the 3D numpy array from the file
        path = os.path.join(input_dir, filename)
        #print(path)
        data = np.load(path)
        
        # Create a new subdirectory for this image
        image_dir = os.path.join(output_dir, os.path.splitext(filename)[0])
        #print(image_dir)
        os.makedirs(image_dir, exist_ok=True)
        
        # Save each 2D slice of the 3D numpy array as a separate PNG file
        for i in range(data.shape[2]):
            slice_path = os.path.join(image_dir, f'slice_{i:03d}.png')
            fig = plt.figure(figsize=(6, 6))
            plt.imsave(slice_path, data[:, :, i], cmap='gray')
            plt.close(fig)

##### Copying folders to delete all black slices and also having backup data.

In [None]:
import os
import shutil

def copy_folder(src_folder, dst_folder):
    if not os.path.exists(dst_folder):
        os.makedirs(dst_folder)
    for item in os.listdir(src_folder):
        src_item = os.path.join(src_folder, item)
        dst_item = os.path.join(dst_folder, item)
        if os.path.isdir(src_item):
            copy_folder(src_item, dst_item)
        else:
            shutil.copy2(src_item, dst_item)

copy_folder("/kaggle/working/LGG_slices", "/kaggle/working/LGG_slices_copy")
copy_folder("/kaggle/working/HGG_slices", "/kaggle/working/HGG_slices_copy")

##### Working in LGG_slices_copy and HGG_slices_copy folders to delete all black images

In [None]:
import os
from PIL import Image

def delete_black_images(folder_path):
    for subdir, dirs, files in os.walk(folder_path):
        for file in files:
            file_path = os.path.join(subdir, file)
            try:
                img = Image.open(file_path)
                pixels = img.load()
                width, height = img.size
                for x in range(width):
                    for y in range(height):
                        if pixels[x, y] != (0, 0, 0):
                            break
                        else:
                            continue
                    break
                else:
                    os.remove(file_path)
                    print(f"Deleted {file_path}")
            except Exception as e:
                print(f"Error: {e}")

delete_black_images("/kaggle/working/LGG_slices_copy")
delete_black_images("/kaggle/working/HGG_slices_copy")

In [None]:
import matplotlib.pyplot as plt
from PIL import Image

# Load image
image_path = "/kaggle/working/LGG_slices_copy/masked_img_denoised_image_64/slice_033.png"
image = Image.open(image_path)

# Convert image to grayscale
image = image.convert("L")

# Get histogram data
histogram = image.histogram()

# Plot histogram
plt.hist(histogram, bins=256, range=(0, 256), color='black')
plt.title("Image Histogram")
plt.xlabel("Pixel Value")
plt.ylabel("Frequency")
plt.show()

In [None]:
import matplotlib.pyplot as plt
import numpy as np
from PIL import Image

# Load image
image_path = "/kaggle/working/LGG_slices_copy/masked_img_denoised_image_64/slice_004.png"
image = Image.open(image_path)

# Convert image to grayscale
image = image.convert("L")

# Normalize pixel values to [0, 1]
pixels = np.asarray(image) / 255.0

# Create heatmap using viridis colormap
heatmap = plt.cm.viridis(pixels)

if np.unique(pixels).size == 1:
    print("The heatmap has no color variations.")
else:
    print("The heatmap has color variations.")

# Plot heatmap
plt.imshow(heatmap)
plt.title("Image Heatmap")
plt.axis("off")
plt.show()


In [None]:
def delete_black_images_heatmap(folder_path):
    for subdir, dirs, files in os.walk(folder_path):
        for file in files:
            image_path = os.path.join(subdir, file)
            image = Image.open(image_path)
            image = image.convert("L")
            pixels = np.asarray(image) / 255.0
            if np.unique(pixels).size == 1:
                print(f"Deleted {image_path}")
                os.remove(image_path)

delete_black_images_heatmap("/kaggle/working/LGG_slices_copy")
delete_black_images_heatmap("/kaggle/working/HGG_slices_copy")            

In [None]:
import os
import math
import matplotlib.pyplot as plt
from PIL import Image

# Get number of images in folder
folder_path = "/kaggle/working/LGG_slices_copy/masked_img_denoised_image_64"
num_images = len(os.listdir(folder_path))

# Calculate number of rows and columns
num_plots = math.ceil(math.sqrt(num_images))
num_cols = num_plots
num_rows = num_plots

# Set figure size
fig_width = 10
fig_height = 10

# Create figure and axes
fig, axs = plt.subplots(num_rows, num_cols, figsize=(fig_width, fig_height))

# Flatten axes array to make it easier to iterate over
axs = axs.flatten()

# Iterate over all images in folder
for i, file in enumerate(os.listdir(folder_path)):
    if i >= num_cols * num_rows:
        break
    file_path = os.path.join(folder_path, file)
    img = Image.open(file_path)
    axs[i].imshow(img)
    axs[i].axis("off")

# Show figure
plt.tight_layout()
plot_path = "/kaggle/working/remove_all_black_slices.png"
plt.savefig(plot_path, dpi=300, bbox_inches="tight")
plt.show()


##### All black images have been deleted now. Copying folders to do data augmentation and also have backup data.

In [None]:
import os
import shutil

def copy_folder(src_folder, dst_folder):
    if not os.path.exists(dst_folder):
        os.makedirs(dst_folder)
    for item in os.listdir(src_folder):
        src_item = os.path.join(src_folder, item)
        dst_item = os.path.join(dst_folder, item)
        if os.path.isdir(src_item):
            copy_folder(src_item, dst_item)
        else:
            shutil.copy2(src_item, dst_item)

copy_folder("/kaggle/working/LGG_slices_copy", "/kaggle/working/LGG_slices_augmented")
copy_folder("/kaggle/working/HGG_slices_copy", "/kaggle/working/HGG_slices_augmented")

##### Peforming data augmentation now

In [None]:
import os
import cv2
from PIL import Image

# Set up data augmentation parameters
rotation_angles = [30, 60, 90]
flip_axes = [0, 1, -1]

# Iterate over all subfolders in folder
folder_path = "/kaggle/working/LGG_slices_augmented"
for subfolder in os.listdir(folder_path):
    subfolder_path = os.path.join(folder_path, subfolder)
    if not os.path.isdir(subfolder_path):
        continue

    # Iterate over all images in subfolder
    for file in os.listdir(subfolder_path):
        file_path = os.path.join(subfolder_path, file)

        # Load image
        img = cv2.imread(file_path)

        # Perform data augmentation
        for angle in rotation_angles:
            rotated = Image.fromarray(img).rotate(angle)
            rotated.save(file_path[:-4] + f"_rotated{angle}.png")

        for axis in flip_axes:
            flipped = cv2.flip(img, axis)
            cv2.imwrite(file_path[:-4] + f"_flipped{axis}.png", flipped)


In [None]:
import os
import cv2
from PIL import Image

# Set up data augmentation parameters
rotation_angles = [30, 60, 90]
flip_axes = [0, 1, -1]

# Iterate over all subfolders in folder
folder_path = "/kaggle/working/HGG_slices_augmented"
for subfolder in os.listdir(folder_path):
    subfolder_path = os.path.join(folder_path, subfolder)
    if not os.path.isdir(subfolder_path):
        continue

    # Iterate over all images in subfolder
    for file in os.listdir(subfolder_path):
        file_path = os.path.join(subfolder_path, file)

        # Load image
        img = cv2.imread(file_path)

        # Perform data augmentation
        for angle in rotation_angles:
            rotated = Image.fromarray(img).rotate(angle)
            rotated.save(file_path[:-4] + f"_rotated{angle}.png")

        for axis in flip_axes:
            flipped = cv2.flip(img, axis)
            cv2.imwrite(file_path[:-4] + f"_flipped{axis}.png", flipped)


##### Saving a plot of augmented slices now

In [None]:
directory_path = "/kaggle/working/HGG_slices_augmented"
subdirectories = [f.path for f in os.scandir(directory_path) if f.is_dir()]

# Print list of subdirectories
print(subdirectories)

In [None]:
import os
import math
import matplotlib.pyplot as plt
from PIL import Image

# Get number of images in folder
folder_path = "/kaggle/working/HGG_slices_augmented/masked_img_denoised_image_25"
num_images = len(os.listdir(folder_path))

# Calculate number of rows and columns
num_plots = math.ceil(math.sqrt(num_images))
num_cols = 10
num_rows = num_plots

# Set figure size
fig_width = 10
fig_height = 10

# Create figure and axes
fig, axs = plt.subplots(num_rows, num_cols, figsize=(fig_width, fig_height))

# Flatten axes array to make it easier to iterate over
axs = axs.flatten()

# Iterate over all images in folder
for i, file in enumerate(os.listdir(folder_path)):
    if i >= num_cols * num_rows:
        break
    file_path = os.path.join(folder_path, file)
    img = Image.open(file_path)
    axs[i].imshow(img)
    axs[i].axis("off")

# Show figure
plt.tight_layout()
plot_path = "/kaggle/working/augmented_slices_HGG.png"
plt.savefig(plot_path, dpi=300, bbox_inches="tight")
plt.show()
