In [None]:
import os
import numpy as np
from PIL import Image
from sklearn.preprocessing import OneHotEncoder
import matplotlib
from matplotlib import pyplot as plt
from tqdm import tqdm
import pandas as pd
# !pip install opencv-python-headless
import cv2
from sklearn.model_selection import train_test_split
from sklearn.utils import shuffle
import keras
from tensorflow.keras import layers, models
from tensorflow.keras.optimizers import Adam, legacy

In [None]:
### load in all the CSV's for each ICH

EPH_df = pd.read_csv('Hemorrhage Segmentation Project/Results_Epidural Hemorrhage Detection_2020-11-16_21.31.26.148.csv') 
EPH_df = EPH_df[['Origin', 'Correct Label']]

IVH_df = pd.read_csv('Hemorrhage Segmentation Project/Results_Brain Hemorrhage Tracing_2020-09-28_15.21.52.597.csv') # IVH
IVH_df = IVH_df[['Origin', 'Correct Label']]

IPH_df = pd.read_csv('Hemorrhage Segmentation Project/Results_Intraparenchymal Hemorrhage Detection_2020-11-16_21.39.31.268.csv')
IPH_df = IPH_df[['Origin', 'Correct Label']]

SDH_df = pd.read_csv('Hemorrhage Segmentation Project/Results_Subdural Hemorrhage Detection_2020-11-16_21.37.19.745.csv')
SDH_df = SDH_df[['Origin', 'Correct Label']]

SAH_df = pd.read_csv('Hemorrhage Segmentation Project/Results_Subarachnoid Hemorrhage Detection_2020-11-16_21.36.18.668.csv')
SAH_df = SAH_df[['Origin', 'Correct Label']]

MCH_df = pd.read_csv('Hemorrhage Segmentation Project/Results_Multiple Hemorrhage Detection_2020-11-16_21.36.24.018.csv') # MCH
MCH_df = MCH_df[['Origin', 'Correct Label']]

In [None]:
EPH_filter = EPH_df['Correct Label'].notna()
EPH_filtered = EPH_df[EPH_filter]
len(EPH_filtered)
EPH_filtered

In [None]:
SDH_df['Labeling State'].value_counts()

In [None]:
SDH_filter = SDH_df['Labeling State'] != 'In Progress'
SDH_filtered = SDH_df[EPH_filter]
len(SDH_filtered)
SDH_filtered

In [None]:
# Define your image directories
file_dir = 'XN1 Data/renders/' 
ICH_types = ['normal', 'epidural', 'subarachnoid', 'intraparenchymal', 'subdural', 'intraventricular', 'multi']
windows = ["brain_bone_window", "brain_window", "max_contrast_window", "subdural_window"]

# Read flagged filenames from flagged.txt
with open('Hemorrhage Segmentation Project/flagged.txt', 'r') as f:
    flagged_files = set(line.strip() for line in f)  # Using a set for fast membership checking

#normal file_max is 6080 (since not same number of files in all windows)

# One Hot Encoding
encoder = OneHotEncoder()
encoder.fit([[0],[1],[2],[3],[4],[5],[6]])
target_shape = (512, 512, 3)

def load_images_from_directory(directory, label, target_shape, file_max=None):
    """Load images from a given directory, check shape, and apply one-hot encoding."""

    train_data = []
    # labels = []
    # wrong_files = []
    same_scan = set()
    
    # for dirname, _, filenames in os.walk(directory):
    filenames = os.listdir(directory)
    if file_max is None:
        file_max = len(filenames)
        
    true_file_max = file_max
        
    for filename in tqdm(filenames[0:file_max], desc=f'Loading images from {directory}', unit='file'):
        if filename in flagged_files:
            true_file_max -= 1
            continue
        else:
            img_path = os.path.join(directory, filename)
            img = Image.open(img_path)
            
            # Convert to array and check shape
            img_np = np.array(img)
            if img_np.shape != target_shape:
                # wrong_files.append(filename) 
                img = img.resize((512,512), Image.LANCZOS)  # Use LANCZOS for high-quality downsampling
               
            img = img.resize((256,256), Image.LANCZOS)  # Use LANCZOS for high-quality downsampling
            
            img_array = np.array(img)
            grayscale_img = img_array
            # grayscale_img = np.mean(img_array, axis=2)
            grayscale_img = cv2.cvtColor(img_array, cv2.COLOR_RGB2GRAY)
            train_data.append(grayscale_img)
            
            # if filename not in same_scan:
            #     labels.append(encoder.transform([[label]]).toarray()[0])
            
            same_scan.add(filename)
                
    return train_data, file_max


def main_loader(directory, windows, label, X_train_dict, y_train, target_shape, file_max = None):
    # Assuming ICH_types is defined somewhere above
    for i, slide in enumerate(windows):
        dir_path = os.path.join(directory, ICH_types[label], slide)  # Adjust path based on your directory structure
        
        # Use tqdm to show progress for the main loader
        print(f"Loading images for {slide}...")
        
        data, file_max = load_images_from_directory(dir_path, label, target_shape, file_max)  # Load images
        X_train_dict[slide].extend(data)  # Append data to the corresponding window

    ## THIS TAKES A LONG TIME FOR BIG ARRAYS
    print("Converting lists to numpy arrays:")
    for window in tqdm(windows, desc='Converting lists', unit='window'):
        X_train_dict[window] = np.array(X_train_dict[window])

    sample = encoder.transform([[label]]).toarray()
    labels = np.tile(sample, (file_max, 1))
    y_train = np.array(labels)
    
    return X_train_dict, y_train

def stack_slices(X_train_dict):
    # Get the windows to stack in the order you want
    windows_to_stack = [
        'brain_bone_window', 
        'brain_window', 
        'subdural_window', 
        'max_contrast_window'
    ]
    
    # Create an empty list to hold the stacked data
    stacked_data = []

    # Use tqdm to show progress for stacking
    for window in tqdm(windows_to_stack, desc='Stacking slices', unit='window'):
        stacked_data.append(X_train_dict[window])

    # Stack the windows along a new axis
    X_train = np.stack(stacked_data, axis=3)
    
    return X_train

# Input Struct
# Shape will be (N, 512, 512, 4)

In [None]:
# Load images for No Hemorrhage (Normal)

NRM_train_dict = {window: [] for window in windows}  # Create a dictionary to hold data for each window
NRM_label = []  # This will hold the labels

NRM_train_dict, NRM_label = main_loader(file_dir, windows, 0, NRM_train_dict, NRM_label, target_shape, 2000)

NRM_train = stack_slices(NRM_train_dict)

In [None]:
plt.imshow(NRM_train[0][:,:,0], cmap='binary')

# NRM_train[0][:, :, 0].shape

In [None]:
plt.imshow(NRM_train[0][:,:,1], cmap='binary')

In [None]:
# Load images for Epidural Hemorrhage

EDH_train_dict = {window: [] for window in windows}  # Create a dictionary to hold data for each window
EDH_label = []  # This will hold the labels


EDH_train_dict, EDH_label = main_loader(file_dir, windows, 1, EDH_train_dict, EDH_label, target_shape)
EDH_train = stack_slices(EDH_train_dict)

sample = encoder.transform([[1]]).toarray()
labels = np.tile(sample, (len(EDH_train), 1))
EDH_label = np.array(labels)

In [None]:
plt.imshow(NRM_train[0], cmap='binary')

In [None]:
# Load images for Subarachnoid Hemorrhage

SAH_train_dict = {window: [] for window in windows}  # Create a dictionary to hold data for each window
SAH_label = []  # This will hold the labels

SAH_train_dict, SAH_label = main_loader(file_dir, windows, 2, SAH_train_dict, SAH_label, target_shape, 2000)

SAH_train = stack_slices(SAH_train_dict)

In [None]:
plt.imshow(SAH_train[0], cmap='binary')

In [None]:
# Load images for Intraparenchymal Hemorrhage

IPH_train_dict = {window: [] for window in windows}  # Create a dictionary to hold data for each window
IPH_label = []  # This will hold the labels

IPH_train_dict, IPH_label = main_loader(file_dir, windows, 3, IPH_train_dict, IPH_label, target_shape, 2000)


IPH_train = stack_slices(IPH_train_dict)

In [None]:
# Load images for Subdural Hemorrhage

SDH_train_dict = {window: [] for window in windows}  # Create a dictionary to hold data for each window
SDH_label = []  # This will hold the labels

SDH_train_dict, SDH_label = main_loader(file_dir, windows, 4, SDH_train_dict, SDH_label, target_shape, 2000)


SDH_train = stack_slices(SDH_train_dict)

In [None]:
# Load images for Intraventricular Hemorrhage

IVH_train_dict = {window: [] for window in windows}  # Create a dictionary to hold data for each window
IVH_label = []  # This will hold the labels

IVH_train_dict, IVH_label = main_loader(file_dir, windows, 5, IVH_train_dict, IVH_label, target_shape, 2000)

IVH_train = stack_slices(IVH_train_dict)

In [None]:
# Load images for Multiclass Hemorrhage

MCH_train_dict = {window: [] for window in windows}  # Create a dictionary to hold data for each window
MCH_label = []  # This will hold the labels

MCH_train_dict, MCH_label = main_loader(file_dir, windows, 6, MCH_train_dict, MCH_label, target_shape, 2000)

MCH_train = stack_slices(MCH_train_dict)

In [None]:
data_dict = {
    "normal": NRM_train,
    "epidural": EDH_train,
    "subarachnoid": SAH_train,
    "intraparenchymal": IPH_train,
    "subdural": SDH_train,
    "intraventricular": IVH_train,
    "multi": MCH_train
}

print(EDH_train.shape)
print(EDH_label.shape)

In [None]:
X_train = np.concatenate((NRM_train, EDH_train, SAH_train, IPH_train, SDH_train, IVH_train, MCH_train), axis=0)
print(X_train.shape)
y_train = np.concatenate((NRM_label, EDH_label, SAH_label, IPH_label, SDH_label, IVH_label, MCH_label), axis=0)
print(y_train.shape)

In [None]:
X_train, X_valid, y_train, y_valid = train_test_split(X_train, y_train, test_size=0.2, random_state=42)

X_train, y_train = shuffle(X_train, y_train)
X_valid, y_valid = shuffle(X_valid, y_valid)

In [None]:
print(X_train.shape)
print(X_valid.shape)
print(y_train.shape)
print(y_valid.shape)

In [None]:
model = models.Sequential()

# First Conv3D layer with 32 filters and a 3x3x3 kernel
model.add(layers.Conv2D(32, kernel_size=(3, 3), input_shape=(256, 256, 4), padding="same"))
model.add(layers.Activation('relu'))
model.add(layers.MaxPooling2D(pool_size=(2, 2)))  # Pooling only in spatial dimensions

# Second Conv3D layer with 64 filters
model.add(layers.Conv2D(64, kernel_size=(3, 3), padding="same"))
model.add(layers.Activation('relu'))
model.add(layers.MaxPooling2D(pool_size=(2, 2)))

# Third Conv3D layer with 128 filters
model.add(layers.Conv2D(128, kernel_size=(3, 3), padding="same"))
model.add(layers.Activation('relu'))
model.add(layers.MaxPooling2D(pool_size=(2, 2)))

# Fourth Conv3D layer with 256 filters for deeper feature extraction
model.add(layers.Conv2D(256, kernel_size=(3, 3), padding="same"))
model.add(layers.Activation('relu'))
model.add(layers.MaxPooling2D(pool_size=(2, 2)))  # Pooling only in spatial dimensions

# Flatten and fully connected layers
model.add(layers.Flatten())
model.add(layers.Dense(1024))
model.add(layers.Activation('relu'))
model.add(layers.Dense(512))
model.add(layers.Activation('relu'))

# Output layer with softmax activation for 7 classes
model.add(layers.Dense(7, activation="softmax"))

# Model summary to check the architecture
model.summary()

In [None]:
model.compile(loss="categorical_crossentropy",
              optimizer=legacy.Adam(),
              metrics=["accuracy"])

In [None]:
model.fit(X_train, y_train, validation_data=(X_valid, y_valid), epochs=10)
# historysgd = model.fit(X_train, y_train, epochs=30, validation_data=(X_valid, y_valid), class_weight=class_weight_dict)