In [None]:
import os
import numpy as np
import pandas as pd

import tensorflow as tf
from tensorflow.keras import layers, models, Input, Model, activations
from tensorflow.keras.callbacks import ModelCheckpoint
from sklearn.preprocessing import LabelBinarizer
import pydicom
from tensorflow.keras.preprocessing.image import img_to_array
from tensorflow.keras.layers import Input, Conv2D, MaxPooling2D, Flatten, Dense, Concatenate, BatchNormalization, Dropout
from tensorflow.keras.models import Model, Sequential
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau


from sklearn.model_selection import StratifiedGroupKFold

from sklearn.preprocessing import LabelEncoder, OneHotEncoder

import matplotlib.pyplot as plt
import seaborn as sns

import csv

In [None]:
# Constants
# Define image size
IMAGE_SIZE = (128, 128)

### Load CSV Data

In [None]:
# Load the CSV file and drop nulls
file_path = 'filtered_masterdata.csv'
data = pd.read_csv(file_path)
data = data[data["condition"] == "Left Subarticular Stenosis"]
data = data.dropna()

### Preprocessing

In [None]:
# Function to split data more in line with balance of severities
def split_data(df):
    sgkf = StratifiedGroupKFold(n_splits=5, shuffle=True, random_state=42)

    for train_idx, test_idx in sgkf.split(df, df['severity'], groups=df['study_id']):
        # We only need the first split, which is 80/20
        return (df.iloc[train_idx], df.iloc[test_idx])


In [None]:

# Function to load and preprocess DICOM images
def load_and_preprocess_dicom(image_path):
    dicom = pydicom.dcmread(image_path)
    image = dicom.pixel_array
    image = np.expand_dims(image, axis=-1)  # Add channel dimension
    image = tf.image.resize(image, IMAGE_SIZE)
    # image = np.expand_dims(image, axis=-1)  # Add channel dimension
    image = image / 255.0  # Normalize to [0, 1]
    return image

In [None]:
# give a dataframe (pass train and test separately)
def preprocess_input(data):
    # Initialize lists to store images, coordinates, and severities
    images = []
    coordinates = []
    severities = []
    img_path = []

    # Process each row in the CSV file
    for index, row in data.iterrows():
        # Construct the full image path
        image_path = os.path.join(f"train_images/{row['study_id']}", f"{row['series_id']}", f"{row['instance_number']}.dcm")
        # Check if the image file exists
        if os.path.exists(image_path):
            print(f"found {image_path}")
            # Load and preprocess the image
            image = load_and_preprocess_dicom(image_path)
            images.append(image)

            img_path.append([image_path,image.shape])

            # Collect coordinates
            coordinate = [row['x'], row['y']]
            coordinates.append(coordinate)

            # Collect severities
            severities.append(row['severity'])
        else:
            img_path.append([image_path,"not found"])

    # Convert lists to numpy arrays
    images = np.array(images)
    coordinates = np.array(coordinates)

    # Handle missing coordinates by filling with a placeholder value (e.g., (0, 0))
    coordinates = np.nan_to_num(coordinates, nan=0.0)

    # One-hot encode severities
    severity_binarizer = LabelBinarizer()
    severities = severity_binarizer.fit_transform(severities)

    print(img_path)
    return (images, coordinates, severities)


In [None]:
(train,val) = split_data(data)
(train_images, train_coordinates, train_severities) = preprocess_input(train)
(val_images, val_coordinates, val_severities) = preprocess_input(val)


# Print shapes to verify
print("Training set shapes:", train_images.shape, train_coordinates.shape, train_severities.shape)
print("Validation set shapes:", val_images.shape, val_coordinates.shape, val_severities.shape)

found train_images/4003253/2448190387/3.dcm
found train_images/4003253/2448190387/11.dcm
found train_images/4003253/2448190387/19.dcm
found train_images/4003253/2448190387/28.dcm
found train_images/4003253/2448190387/35.dcm
found train_images/4646740/3201256954/16.dcm
found train_images/4646740/3201256954/22.dcm
found train_images/4646740/3201256954/29.dcm
found train_images/4646740/3201256954/34.dcm
found train_images/4646740/3201256954/40.dcm
found train_images/11340341/1224932122/5.dcm
found train_images/11340341/1224932122/15.dcm
found train_images/11340341/1224932122/24.dcm
found train_images/11340341/1224932122/33.dcm
found train_images/11340341/1224932122/42.dcm
found train_images/11943292/403244853/8.dcm
found train_images/11943292/403244853/16.dcm
found train_images/11943292/403244853/25.dcm
found train_images/11943292/3800798510/9.dcm
found train_images/11943292/3800798510/16.dcm
found train_images/22191399/3687121182/12.dcm
found train_images/22191399/3687121182/21.dcm
found

In [None]:
def get_best_model(checkpoint_dir):
# Get all checkpoint files
    checkpoint_files = os.listdir(checkpoint_dir)

    # Extract the validation loss from the filenames and find the one with the lowest validation loss
    best_model_path = None
    best_val_loss = float('inf')
    for file in checkpoint_files:
        if file.endswith('.h5'):
            parts = file.split('-')
            val_loss = float(parts[-1].replace('.h5', ''))
            if val_loss < best_val_loss:
                best_val_loss = val_loss
                best_model_path = os.path.join(checkpoint_dir, file)

    print(f"Loading [{best_model_path}]")

    return models.load_model(best_model_path)

In [None]:
def callback (checkpoint_dir):
    # Set up the checkpoint callback
    if not os.path.exists(checkpoint_dir):
        os.makedirs(checkpoint_dir)
    checkpoint_callback = ModelCheckpoint(filepath=checkpoint_dir + '/model-{epoch:02d}-{val_loss:.4f}.h5',
                            save_weights_only=False,
                            monitor='val_loss',
                            mode='min',
                            save_best_only=True,
                            verbose=1)

    early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)
    reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=5, min_lr=0.0001)

    return [checkpoint_callback,early_stopping,reduce_lr]


In [None]:
def model_fit(model,dir,batch=16):
    model.fit([train_images, train_coordinates], train_severities,
        epochs=10, batch_size=batch,
        validation_data=([val_images, val_coordinates], val_severities),
        callbacks=callback(dir))

In [None]:

(xsize, ysize) = IMAGE_SIZE
def create_model():
    (xsize,ysize) = IMAGE_SIZE

    # CNN for image processing
    image_cnn_model = Sequential([
        Conv2D(32, (3, 3), activation='sigmoid', padding='same', input_shape=(xsize, ysize, 1), name='conv2d_image_1'),
        BatchNormalization(name='batch_norm_image_1'),
        MaxPooling2D((2, 2), name='max_pool_image_1'),
        Conv2D(64, (3, 3), activation='sigmoid', padding='same', name='conv2d_image_2'),
        BatchNormalization(name='batch_norm_image_2'),
        MaxPooling2D((2, 2), name='max_pool_image_2'),
        Conv2D(128, (3, 3), activation='sigmoid', padding='same', name='conv2d_image_3'),
        BatchNormalization(name='batch_norm_image_3'),
        MaxPooling2D((2, 2), name='max_pool_image_3'),
        Flatten(name='flatten_image')
    ])

    # MLP for coordinates
    coordinates_mlp_model = Sequential([
        Dense(64, activation='sigmoid', input_shape=(2,), name='dense_coords_1'),
        Dense(32, activation='sigmoid', name='dense_coords_2')
    ])

    # Combining both models using Functional API
    image_input = Input(shape=(xsize, ysize, 1), name='image_input')
    coords_input = Input(shape=(2,), name='coords_input')

    cnn_output = image_cnn_model(image_input)
    mlp_output = coordinates_mlp_model(coords_input)

    combined = Concatenate(name='concat_image_coords')([cnn_output, mlp_output])
    x = Dense(128, activation='sigmoid', name='dense_combined_1')(combined)
    x = Dense(64, activation='sigmoid', name='dense_combined_2')(x)

    # Output layer (adjust 'num_classes' for your classification task)
    num_classes = 3  # Example number of classes
    output = Dense(num_classes, activation='softmax', name='output_layer')(x)

    # Create and compile the model
    model = Model(inputs=[image_input, coords_input], outputs=output)
    model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

    return model

In [None]:

# Summary of the model
model = create_model()
model.summary()

Model: "model"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 image_input (InputLayer)       [(None, 128, 128, 1  0           []                               
                                )]                                                                
                                                                                                  
 coords_input (InputLayer)      [(None, 2)]          0           []                               
                                                                                                  
 sequential (Sequential)        (None, 32768)        93568       ['image_input[0][0]']            
                                                                                                  
 sequential_1 (Sequential)      (None, 32)           2272        ['coords_input[0][0]']       

# Best Model

In [None]:

model = create_model()
model_fit(model,'checkpoint_finalized_32',32)

Epoch 1/10


2024-07-27 13:31:39.997206: W tensorflow/tsl/platform/profile_utils/cpu_utils.cc:128] Failed to get CPU frequency: 0 Hz


Epoch 1: val_loss improved from inf to 1.00223, saving model to checkpoint_finalized_32/model-01-1.0022.h5
Epoch 2/10
Epoch 2: val_loss improved from 1.00223 to 0.88792, saving model to checkpoint_finalized_32/model-02-0.8879.h5
Epoch 3/10
Epoch 3: val_loss did not improve from 0.88792
Epoch 4/10
Epoch 4: val_loss improved from 0.88792 to 0.85781, saving model to checkpoint_finalized_32/model-04-0.8578.h5
Epoch 5/10
Epoch 5: val_loss improved from 0.85781 to 0.83574, saving model to checkpoint_finalized_32/model-05-0.8357.h5
Epoch 6/10
Epoch 6: val_loss did not improve from 0.83574
Epoch 7/10
Epoch 7: val_loss improved from 0.83574 to 0.82935, saving model to checkpoint_finalized_32/model-07-0.8293.h5
Epoch 8/10
Epoch 8: val_loss improved from 0.82935 to 0.82421, saving model to checkpoint_finalized_32/model-08-0.8242.h5
Epoch 9/10
Epoch 9: val_loss improved from 0.82421 to 0.79898, saving model to checkpoint_finalized_32/model-09-0.7990.h5
Epoch 10/10
Epoch 10: val_loss improved from 

In [None]:

model = create_model()
model_fit(model,'checkpoint_finalized_64',64)

Epoch 1/10
Epoch 1: val_loss improved from inf to 0.95790, saving model to checkpoint_finalized_64/model-01-0.9579.h5
Epoch 2/10
Epoch 2: val_loss improved from 0.95790 to 0.95374, saving model to checkpoint_finalized_64/model-02-0.9537.h5
Epoch 3/10
Epoch 3: val_loss improved from 0.95374 to 0.94613, saving model to checkpoint_finalized_64/model-03-0.9461.h5
Epoch 4/10
Epoch 4: val_loss did not improve from 0.94613
Epoch 5/10
Epoch 5: val_loss improved from 0.94613 to 0.91575, saving model to checkpoint_finalized_64/model-05-0.9158.h5
Epoch 6/10
Epoch 6: val_loss improved from 0.91575 to 0.89980, saving model to checkpoint_finalized_64/model-06-0.8998.h5
Epoch 7/10
Epoch 7: val_loss improved from 0.89980 to 0.83644, saving model to checkpoint_finalized_64/model-07-0.8364.h5
Epoch 8/10
Epoch 8: val_loss improved from 0.83644 to 0.83557, saving model to checkpoint_finalized_64/model-08-0.8356.h5
Epoch 9/10
Epoch 9: val_loss did not improve from 0.83557
Epoch 10/10
Epoch 10: val_loss imp

In [None]:

model = create_model()
model_fit(model,'checkpoint_finalized_100',100)

Epoch 1/10
Epoch 1: val_loss improved from inf to 1.07442, saving model to checkpoint_finalized_100/model-01-1.0744.h5
Epoch 2/10
Epoch 2: val_loss improved from 1.07442 to 1.06043, saving model to checkpoint_finalized_100/model-02-1.0604.h5
Epoch 3/10
Epoch 3: val_loss improved from 1.06043 to 0.92809, saving model to checkpoint_finalized_100/model-03-0.9281.h5
Epoch 4/10
Epoch 4: val_loss improved from 0.92809 to 0.91591, saving model to checkpoint_finalized_100/model-04-0.9159.h5
Epoch 5/10
Epoch 5: val_loss did not improve from 0.91591
Epoch 6/10
Epoch 6: val_loss improved from 0.91591 to 0.79651, saving model to checkpoint_finalized_100/model-06-0.7965.h5
Epoch 7/10
Epoch 7: val_loss did not improve from 0.79651
Epoch 8/10
Epoch 8: val_loss improved from 0.79651 to 0.75727, saving model to checkpoint_finalized_100/model-08-0.7573.h5
Epoch 9/10
Epoch 9: val_loss did not improve from 0.75727
Epoch 10/10
Epoch 10: val_loss improved from 0.75727 to 0.74740, saving model to checkpoint_

In [None]:
model=create_model()
model_fit(model,'checkpoint_32_smallerDenseNet',32)

Epoch 1/10
Epoch 1: val_loss improved from inf to 0.97484, saving model to checkpoint_32_smallerDenseNet/model-01-0.9748.h5
Epoch 2/10
Epoch 2: val_loss improved from 0.97484 to 0.91331, saving model to checkpoint_32_smallerDenseNet/model-02-0.9133.h5
Epoch 3/10
Epoch 3: val_loss improved from 0.91331 to 0.87341, saving model to checkpoint_32_smallerDenseNet/model-03-0.8734.h5
Epoch 4/10
Epoch 4: val_loss improved from 0.87341 to 0.87201, saving model to checkpoint_32_smallerDenseNet/model-04-0.8720.h5
Epoch 5/10
Epoch 5: val_loss improved from 0.87201 to 0.84512, saving model to checkpoint_32_smallerDenseNet/model-05-0.8451.h5
Epoch 6/10
Epoch 6: val_loss did not improve from 0.84512
Epoch 7/10
Epoch 7: val_loss improved from 0.84512 to 0.84286, saving model to checkpoint_32_smallerDenseNet/model-07-0.8429.h5
Epoch 8/10
Epoch 8: val_loss improved from 0.84286 to 0.83864, saving model to checkpoint_32_smallerDenseNet/model-08-0.8386.h5
Epoch 9/10
Epoch 9: val_loss improved from 0.83864