Steps Overview:
1. Load DICOM files and metadata files (CSV)
2. Preprocess the DICOM images
3. Merge metadata from CSVs
4. Prepare data for modeling (resize images, normalize, etc.)
5. Build a model for classification
6. Train and evaluate the model


1. Install Required Libraries

In [None]:
!pip install pylibjpeg pylibjpeg-libjpeg pylibjpeg-openjpeg

In [None]:
!pip install pydicom

In [None]:
!pip install pydicom matplotlib pandas tensorflow scikit-learn

# Install necessary libraries
!pip install pydicom SimpleITK matplotlib kaggle

# Set up Kaggle authentication
from google.colab import files
files.upload()  # Upload kaggle.json file

!mkdir ~/.kaggle
!cp kaggle.json ~/.kaggle/
!chmod 600 ~/.kaggle/kaggle.json

# Download the RSNA dataset
!kaggle competitions download -c rsna-2024-lumbar-spine-degenerative-classification
!unzip -q rsna-2024-lumbar-spine-degenerative-classification.zip

2. Load Metadata Files

In [None]:
import pandas as pd

# Load the metadata CSVs
train_labels = pd.read_csv('/content/train_label_coordinates.csv')
train_data = pd.read_csv('/content/train.csv')
train_series_descriptions = pd.read_csv('/content/train_series_descriptions.csv')

# Display the first few rows of each file to check their structure
print("Train Labels:\n", train_labels.head())
print("Train Data:\n", train_data.head())
print("Train Series Descriptions:\n", train_series_descriptions.head())


Load DICOM Images

In [None]:
import os
import pydicom

# Set the root directory for DICOM images
data_path = "/content/train_images"

# Recursively find all DICOM files
dicom_files = []
for dirpath, _, filenames in os.walk(data_path):
    for f in filenames:
        if f.endswith(".dcm"):
            dicom_files.append(os.path.join(dirpath, f))

# Check the number of DICOM files found
print(f"Number of DICOM files found: {len(dicom_files)}")

# Load and visualize a DICOM image
first_dicom = pydicom.dcmread(dicom_files[0])
print(first_dicom)

# Visualize the image
import matplotlib.pyplot as plt
plt.imshow(first_dicom.pixel_array, cmap='gray')
plt.show()


4. Preprocess Images

In [9]:
import cv2
import numpy as np

# Define image size
IMG_SIZE = 224  # You can modify this size based on model input requirements

# Function to resize and normalize DICOM image
def preprocess_dicom(dicom_path):
    dicom_image = pydicom.dcmread(dicom_path)
    pixel_array = dicom_image.pixel_array

    # Resize image to a fixed size
    resized_image = cv2.resize(pixel_array, (IMG_SIZE, IMG_SIZE))

    # Normalize image (scale pixel values to [0, 1])
    normalized_image = resized_image / np.max(resized_image)

    return normalized_image

# Preprocess all DICOM files
processed_images = [preprocess_dicom(file) for file in dicom_files[:100]]  # Limit to first 100 images for now

# Convert to numpy array for modeling
X = np.array(processed_images)
X = X.reshape(X.shape[0], IMG_SIZE, IMG_SIZE, 1)  # Add channel dimension


5. Merge Labels with DICOM Data

In [None]:
import pydicom
import os

# List to store extracted metadata
dicom_metadata = []

# Loop through DICOM files and extract relevant metadata
for dicom_file in dicom_files[:100]:  # Adjust the range to handle all files if needed
    try:
        # Read the DICOM file
        dicom_data = pydicom.dcmread(dicom_file)

        # Extract relevant metadata fields
        study_id = dicom_data.PatientID  # Corresponds to 'study_id'
        series_id = dicom_data.SeriesInstanceUID  # Corresponds to 'series_id'
        instance_number = dicom_data.InstanceNumber  # Corresponds to 'instance_number'

        # Append to the metadata list
        dicom_metadata.append({
            'study_id': study_id,
            'series_id': series_id,
            'instance_number': instance_number,
            'file_path': dicom_file
        })

    except Exception as e:
        print(f"Error reading file {dicom_file}: {e}")

# Convert to a DataFrame for easier merging
dicom_df = pd.DataFrame(dicom_metadata)

# Display the extracted DICOM metadata
print(dicom_df.head())


In [93]:
# If column names are different, rename them for consistency
dicom_df = dicom_df.rename(columns={"SeriesInstanceUID": "series_id"})  # Adjust if necessary


In [107]:
# Load the metadata CSVs
train_labels = pd.read_csv('/content/train_label_coordinates.csv')
train_data = pd.read_csv('/content/train.csv')
train_series_descriptions = pd.read_csv('/content/train_series_descriptions.csv')

In [100]:
# ## naming the new dataframes of the csv files

# train1 = pd.read_csv('/content/train.csv')
# traincor2 = pd.read_csv('/content/train_label_coordinates.csv')
# trainseries3 = pd.read_csv('/content/train_series_descriptions.csv')

In [108]:
merged_df = pd.merge(train_labels, train_series_descriptions, on='series_id', how='inner')
# Assuming 'study_id' is identical in both DataFrames
merged_df['study_id'] = merged_df['study_id_x']  # or merged_df['study_id_y'], depending on preference

# Drop the redundant 'study_id_x' and 'study_id_y' columns
merged_df = merged_df.drop(columns=['study_id_x', 'study_id_y'])

In [None]:
final_merged_df = pd.merge(merged_df, train_data, on='study_id', how='inner')
final_merged_df.head(10)

In [None]:
final_merged_df.columns.tolist()

In [None]:
len(final_merged_df)

In [None]:
import os
import pydicom
import numpy as np
import matplotlib.pyplot as plt
import cv2

# Function to map the level to the corresponding stenosis column in the DataFrame
def get_stenosis_column(level, condition):
    stenosis_map = {
        'L1/L2': {
            'Spinal Canal Stenosis': 'spinal_canal_stenosis_l1_l2',
            'Left Neural Foraminal Narrowing': 'left_neural_foraminal_narrowing_l1_l2',
            'Right Neural Foraminal Narrowing': 'right_neural_foraminal_narrowing_l1_l2',
            'Left Subarticular Stenosis': 'left_subarticular_stenosis_l1_l2',
            'Right Subarticular Stenosis': 'right_subarticular_stenosis_l1_l2',
        },
        'L2/L3': {
            'Spinal Canal Stenosis': 'spinal_canal_stenosis_l2_l3',
            'Left Neural Foraminal Narrowing': 'left_neural_foraminal_narrowing_l2_l3',
            'Right Neural Foraminal Narrowing': 'right_neural_foraminal_narrowing_l2_l3',
            'Left Subarticular Stenosis': 'left_subarticular_stenosis_l2_l3',
            'Right Subarticular Stenosis': 'right_subarticular_stenosis_l2_l3',
        },
        'L3/L4': {
            'Spinal Canal Stenosis': 'spinal_canal_stenosis_l3_l4',
            'Left Neural Foraminal Narrowing': 'left_neural_foraminal_narrowing_l3_l4',
            'Right Neural Foraminal Narrowing': 'right_neural_foraminal_narrowing_l3_l4',
            'Left Subarticular Stenosis': 'left_subarticular_stenosis_l3_l4',
            'Right Subarticular Stenosis': 'right_subarticular_stenosis_l3_l4',
        },
        'L4/L5': {
            'Spinal Canal Stenosis': 'spinal_canal_stenosis_l4_l5',
            'Left Neural Foraminal Narrowing': 'left_neural_foraminal_narrowing_l4_l5',
            'Right Neural Foraminal Narrowing': 'right_neural_foraminal_narrowing_l4_l5',
            'Left Subarticular Stenosis': 'left_subarticular_stenosis_l4_l5',
            'Right Subarticular Stenosis': 'right_subarticular_stenosis_l4_l5',
        },
        'L5/S1': {
            'Spinal Canal Stenosis': 'spinal_canal_stenosis_l5_s1',
            'Left Neural Foraminal Narrowing': 'left_neural_foraminal_narrowing_l5_s1',
            'Right Neural Foraminal Narrowing': 'right_neural_foraminal_narrowing_l5_s1',
            'Left Subarticular Stenosis': 'left_subarticular_stenosis_l5_s1',
            'Right Subarticular Stenosis': 'right_subarticular_stenosis_l5_s1',
        }
    }
    return stenosis_map.get(level, {}).get(condition, None)

# Function to apply CLAHE to the image
def apply_clahe(image):
    clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
    enhanced_image = clahe.apply(image)
    return enhanced_image

# Function to normalize the image data to 8-bit format (uint8)
def normalize_to_uint8(image):
    image_normalized = cv2.normalize(image, None, 0, 255, cv2.NORM_MINMAX)
    return image_normalized.astype(np.uint8)

# Function to color code the severity and normalize it for matplotlib
def get_circle_color(severity):
    severity_color_map = {
        'Normal/Mild': (0, 1.0, 0),    # Green
        'Moderate': (1.0, 1.0, 0),     # Yellow
        'Severe': (1.0, 0, 0)          # Red
    }
    return severity_color_map.get(severity, (1.0, 1.0, 1.0))  # Default to white if unknown

# Function to display images grouped by description with hollow dots for severity
def display_images_grouped_by_description(images_by_description, study_id):
    for description, images in images_by_description.items():
        num_images = len(images)
        fig, axs = plt.subplots(1, num_images, figsize=(20, 6))

        if num_images == 1:
            axs = [axs]  # To ensure axs is iterable even for a single image

        for i, (img, instance_number, x, y, severity, series_id) in enumerate(images):
            enhanced_img = apply_clahe(img)
            axs[i].imshow(enhanced_img, cmap='gray')
            axs[i].scatter([x], [y], facecolors='none', edgecolors=get_circle_color(severity), s=100, linewidths=2)
            axs[i].set_title(f"{instance_number}.dcm")
            axs[i].axis('off')

        plt.suptitle(f"Study ID: {study_id} | Series ID: {series_id} | Description: {description}", fontsize=16)
        plt.tight_layout()
        plt.show()

# Main function to process images and categorize them by description
def process_study_ids_by_description(df, root_dir):
    for study_id in df['study_id'].unique():
        study_df = df[df['study_id'] == study_id]
        images_by_description = {}

        for series_id in study_df['series_id'].unique():
            series_df = study_df[study_df['series_id'] == series_id]
            series_description = series_df['series_description'].iloc[0]

            study_folder = os.path.join(root_dir, str(study_id))
            series_folder = os.path.join(study_folder, str(series_id))

            if not os.path.exists(series_folder):
                print(f"Series folder {series_folder} not found.")
                continue

            for index, row in series_df.iterrows():
                instance_number = int(row['instance_number'])
                x = row['x']
                y = row['y']
                level = row['level']
                condition = row['condition']

                stenosis_column = get_stenosis_column(level, condition)
                if stenosis_column is None:
                    print(f"No corresponding stenosis column found for level {level} and condition {condition}.")
                    continue

                condition_status = row[stenosis_column]
                dcm_file = f"{instance_number}.dcm"
                dcm_path = os.path.join(series_folder, dcm_file)

                if not os.path.exists(dcm_path):
                    print(f"DICOM file {dcm_file} not found for Series ID: {series_id}.")
                    continue

                dicom_data = pydicom.dcmread(dcm_path)
                image = normalize_to_uint8(dicom_data.pixel_array)

                if series_description not in images_by_description:
                    images_by_description[series_description] = []
                images_by_description[series_description].append((image, instance_number, x, y, condition_status, series_id))

        display_images_grouped_by_description(images_by_description, study_id)

# Usage Example
root_dir = '/content/train_images'
process_study_ids_by_description(final_merged_df, root_dir)


In [None]:
import os
import pydicom
import numpy as np
import matplotlib.pyplot as plt
import cv2
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout
import pandas as pd

# Function to map the level to the corresponding stenosis column in the DataFrame
def get_stenosis_column(level, condition):
    stenosis_map = {
        'L1/L2': {
            'Spinal Canal Stenosis': 'spinal_canal_stenosis_l1_l2',
            'Left Neural Foraminal Narrowing': 'left_neural_foraminal_narrowing_l1_l2',
            'Right Neural Foraminal Narrowing': 'right_neural_foraminal_narrowing_l1_l2',
            'Left Subarticular Stenosis': 'left_subarticular_stenosis_l1_l2',
            'Right Subarticular Stenosis': 'right_subarticular_stenosis_l1_l2',
        },
        'L2/L3': {
            'Spinal Canal Stenosis': 'spinal_canal_stenosis_l2_l3',
            'Left Neural Foraminal Narrowing': 'left_neural_foraminal_narrowing_l2_l3',
            'Right Neural Foraminal Narrowing': 'right_neural_foraminal_narrowing_l2_l3',
            'Left Subarticular Stenosis': 'left_subarticular_stenosis_l2_l3',
            'Right Subarticular Stenosis': 'right_subarticular_stenosis_l2_l3',
        },
        'L3/L4': {
            'Spinal Canal Stenosis': 'spinal_canal_stenosis_l3_l4',
            'Left Neural Foraminal Narrowing': 'left_neural_foraminal_narrowing_l3_l4',
            'Right Neural Foraminal Narrowing': 'right_neural_foraminal_narrowing_l3_l4',
            'Left Subarticular Stenosis': 'left_subarticular_stenosis_l3_l4',
            'Right Subarticular Stenosis': 'right_subarticular_stenosis_l3_l4',
        },
        'L4/L5': {
            'Spinal Canal Stenosis': 'spinal_canal_stenosis_l4_l5',
            'Left Neural Foraminal Narrowing': 'left_neural_foraminal_narrowing_l4_l5',
            'Right Neural Foraminal Narrowing': 'right_neural_foraminal_narrowing_l4_l5',
            'Left Subarticular Stenosis': 'left_subarticular_stenosis_l4_l5',
            'Right Subarticular Stenosis': 'right_subarticular_stenosis_l4_l5',
        },
        'L5/S1': {
            'Spinal Canal Stenosis': 'spinal_canal_stenosis_l5_s1',
            'Left Neural Foraminal Narrowing': 'left_neural_foraminal_narrowing_l5_s1',
            'Right Neural Foraminal Narrowing': 'right_neural_foraminal_narrowing_l5_s1',
            'Left Subarticular Stenosis': 'left_subarticular_stenosis_l5_s1',
            'Right Subarticular Stenosis': 'right_subarticular_stenosis_l5_s1',
        }
    }
    return stenosis_map.get(level, {}).get(condition, None)

# Function to apply CLAHE to the image
def apply_clahe(image):
    clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
    enhanced_image = clahe.apply(image)
    return enhanced_image

# Function to normalize the image data to 8-bit format (uint8)
def normalize_to_uint8(image):
    image_normalized = cv2.normalize(image, None, 0, 255, cv2.NORM_MINMAX)
    return image_normalized.astype(np.uint8)

# Mapping severity labels to integers
severity_map = {
    'Normal/Mild': 0,
    'Moderate': 1,
    'Severe': 2
}

# Collect images and convert severity labels to integers using the severity map
def collect_images_and_labels(df, root_dir):
    images = []
    labels = []

    for study_id in df['study_id'].unique():
        study_df = df[df['study_id'] == study_id]

        for series_id in study_df['series_id'].unique():
            series_df = study_df[study_df['series_id'] == series_id]

            study_folder = os.path.join(root_dir, str(study_id))
            series_folder = os.path.join(study_folder, str(series_id))

            if not os.path.exists(series_folder):
                print(f"Series folder {series_folder} not found.")
                continue

            for index, row in series_df.iterrows():
                instance_number = int(row['instance_number'])
                level = row['level']
                condition = row['condition']

                # Skip rows with missing condition or NaN
                stenosis_column = get_stenosis_column(level, condition)
                if stenosis_column is None:
                    print(f"No corresponding stenosis column found for level {level} and condition {condition}.")
                    continue

                condition_status = row.get(stenosis_column, None)
                if pd.isna(condition_status):
                    print(f"Skipping row with missing condition status for study {study_id}, series {series_id}.")
                    continue

                dcm_file = f"{instance_number}.dcm"
                dcm_path = os.path.join(series_folder, dcm_file)

                if not os.path.exists(dcm_path):
                    print(f"DICOM file {dcm_file} not found for Series ID: {series_id}.")
                    continue

                try:
                    dicom_data = pydicom.dcmread(dcm_path)
                    image = normalize_to_uint8(dicom_data.pixel_array)
                    image = apply_clahe(image)  # Apply CLAHE
                    image = cv2.resize(image, (256, 256))  # Resize for CNN

                    # Append image and label (convert severity to integer)
                    images.append(np.expand_dims(image, axis=-1))  # Add channel dimension
                    labels.append(severity_map[condition_status])  # Convert label to integer

                except Exception as e:
                    print(f"Error reading or processing DICOM file {dcm_file}: {e}")
                    continue

    return np.array(images), np.array(labels)

# Build the CNN model
def build_cnn_model():
    model = Sequential()

    # First convolutional layer
    model.add(Conv2D(32, (3, 3), activation='relu', input_shape=(256, 256, 1)))
    model.add(MaxPooling2D((2, 2)))

    # Second convolutional layer
    model.add(Conv2D(64, (3, 3), activation='relu'))
    model.add(MaxPooling2D((2, 2)))

    # Third convolutional layer
    model.add(Conv2D(128, (3, 3), activation='relu'))
    model.add(MaxPooling2D((2, 2)))

    # Flatten and fully connected layers
    model.add(Flatten())
    model.add(Dense(128, activation='relu'))
    model.add(Dropout(0.5))
    model.add(Dense(3, activation='softmax'))  # 3 classes for severity

    # Compile the model
    model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

    return model

# Load data
root_dir = '/content/train_images'  # Replace with your actual directory
images, labels = collect_images_and_labels(final_merged_df, root_dir)

# Convert labels to categorical format
labels_categorical = to_categorical(labels, num_classes=3)

# Build the model
cnn_model = build_cnn_model()

# Train the CNN model
cnn_model.fit(images, labels_categorical, epochs=10, batch_size=32, validation_split=0.2)

# Save the trained model
cnn_model.save('cnn_stenosis_model.h5')


6. Build a CNN Model for Classification

In [None]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense

# Build the model
model = Sequential()

# Add convolutional layers
model.add(Conv2D(32, kernel_size=(3, 3), activation='relu', input_shape=(IMG_SIZE, IMG_SIZE, 1)))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Conv2D(64, kernel_size=(3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))

# Flatten and add dense layers
model.add(Flatten())
model.add(Dense(128, activation='relu'))
model.add(Dense(1, activation='sigmoid'))  # For binary classification

# Compile the model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Summary of the model
model.summary()


7. Train the Model

In [None]:
!pip install pydicom


In [None]:
!pip install tensorflow-addons
!pip install tensorflow keras

In [None]:
import tensorflow as tf
print(tf.__version__)


In [None]:
import os
import pandas as pd
import numpy as np
import pydicom
import cv2
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout
from skimage.transform import resize
import matplotlib.pyplot as plt
from matplotlib.colors import LinearSegmentedColormap
from matplotlib.patches import Rectangle
import warnings
#import tensorflow_addons as tfa

warnings.filterwarnings('ignore')
# Paths
train_path = '/content/train.csv'
train_label_coordinates_path = '/content/train_label_coordinates.csv'
train_series_descriptions_path = '/content/train_series_descriptions.csv'
test_series_descriptions_path = '/content/test_series_descriptions.csv'
sample_submission_path = '/content/sample_submission.csv'

# Load data
df_train = pd.read_csv(train_path)
df_train_label_coordinates = pd.read_csv(train_label_coordinates_path)
df_train_series_descriptions = pd.read_csv(train_series_descriptions_path)
df_test_series_descriptions = pd.read_csv(test_series_descriptions_path)
df_sample_submission = pd.read_csv(sample_submission_path)
# Preprocess data
df_train = df_train.dropna()
df_train_label_coordinates = df_train_label_coordinates.dropna()
df_train_series_descriptions = df_train_series_descriptions.dropna()
df_test_series_descriptions = df_test_series_descriptions.dropna()

# Drop unnecessary columns
df_train = df_train.drop('Unnamed: 0', axis=1, errors='ignore')
df_train_label_coordinates = df_train_label_coordinates.drop('Unnamed: 0', axis=1, errors='ignore')
df_train_series_descriptions = df_train_series_descriptions.drop('Unnamed: 0', axis=1, errors='ignore')
df_test_series_descriptions = df_test_series_descriptions.drop('Unnamed: 0', axis=1, errors='ignore')
# Encode labels
le_condition = LabelEncoder()
df_train_label_coordinates['condition'] = le_condition.fit_transform(df_train_label_coordinates['condition'])

le_level = LabelEncoder()
df_train_label_coordinates['level'] = le_level.fit_transform(df_train_label_coordinates['level'])

le_target = LabelEncoder()
df_train_label_coordinates['target'] = le_target.fit_transform(df_train_label_coordinates['condition'])
# Function to load DICOM images
# Function to load DICOM images
def load_dicom_image(path):
    dicom = pydicom.dcmread(path)  # Replaced read_file with dcmread
    image = dicom.pixel_array
    if image.dtype != np.uint8:
        image = image.astype(np.uint8)
    return image

# Function to extract region
def extract_region(image, x, y, width=128, height=128):
  start_x = int(x - width / 2)
  end_x = int(x + width / 2)
  start_y = int(y - height / 2)
  end_y = int(y + height / 2)

  start_x = max(0, start_x)
  end_x = min(image.shape[1], end_x)
  start_y = max(0, start_y)
  end_y = min(image.shape[0], end_y)

  region = image[start_y:end_y, start_x:end_x]

  if region.size == 0:
      raise ValueError("Extracted region is empty. Please check the coordinates and image dimensions.")

  region = cv2.resize(region, (128, 128))
  return region
# Function to draw rectangle
def draw_rectangle(image, x_coord, y_coord, size, color, label):
  fig, ax = plt.subplots(1, 2, figsize=(10, 5))

  ax[0].imshow(image, cmap='gray')
  ax[0].set_title('Original Image')

  window_size = int(0.2 * min(image.shape))  # Adaptive window size (20%)
  selected_area = image[max(0, int(y_coord) - window_size // 2):min(image.shape[0], int(y_coord) + window_size // 2),
                        max(0, int(x_coord) - window_size // 2):min(image.shape[1], int(x_coord) + window_size // 2)]

  ax[1].imshow(selected_area, cmap='gray')
  rect = Rectangle((window_size // 2 - size // 2, window_size // 2 - size // 2), size, size, linewidth=2, edgecolor=color, facecolor='none')
  ax[1].add_patch(rect)
  ax[1].set_title(f'{label} Area at ({x_coord:.2f}, {y_coord:.2f})')

  plt.show()
# Function to draw severity
def draw_severe(image, x_coord, y_coord, severity):
  colors = [(1, 1, 0), (1, 0.5, 0), (1, 0, 0)]  # Yellow to Red
  cmap = LinearSegmentedColormap.from_list("severity_cmap", colors, N=3)

  severity_level = le_target.inverse_transform([severity])[0]
  severity_levels = le_target.classes_

  if severity_level not in severity_levels:
      raise ValueError(f"Unexpected severity level: {severity_level}")

  severity_index = list(severity_levels).index(severity_level)
  color = cmap(severity_index)

  clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
  image = clahe.apply(image)

  draw_rectangle(image, x_coord, y_coord, 24, color, severity)

  extracted_region = extract_region(image, x_coord, y_coord, width=16, height=16)
  return extracted_region
# Function to load images from study
def load_images_from_study(df, folder):
  base_folder = folder
  images = []
  conditions = []
  levels = []
  targets = []
  for _, row in df.iterrows():
      study_id = row['study_id']
      series_id = row['series_id']
      x = row['x']
      y = row['y']
      condition = row['condition']
      level = row['level']
      target = row['target']

      dicom_folder = f"{base_folder}/{study_id}/{series_id}"

      if os.path.isdir(dicom_folder):
          for dicom_file in os.listdir(dicom_folder):
              dicom_path = os.path.join(dicom_folder, dicom_file)
              img = load_dicom_image(dicom_path)
              if img is not None:
                  extracted_region = draw_severe(img, x, y, int(target))
                  region = extract_region(img, x, y)
                  images.append(region)
                  conditions.append(condition)
                  levels.append(level)
                  targets.append(target)
              else:
                  print(f"Failed to load image for {dicom_path}")
      else:
          print(f"Folder not found: {dicom_folder}")
  return np.array(images), np.array(conditions), np.array(levels), np.array(targets)
# Prepare test data
df_test = df_test_series_descriptions.merge(df_train_label_coordinates[['study_id', 'series_id', 'x', 'y', 'condition', 'level', 'target']], on=['study_id', 'series_id'], how='left')
df_test = df_test.dropna(subset=['x', 'y', 'condition', 'level', 'target'])

if df_test.empty:
  df_test = pd.DataFrame({
      'study_id': [44036939],
      'series_id': [2828203845],
      'x': [240],
      'y': [120],
      'condition': [0],
      'level': [0],
      'target': [0]
  })

X_test_images, X_test_conditions, X_test_levels, _ = load_images_from_study(df_test, '/content/test_images')

if X_test_images.size == 0 or X_test_conditions.size == 0 or X_test_levels.size == 0:
  raise ValueError("Error")

min_samples = min(X_test_images.shape[0], X_test_conditions.shape[0], X_test_levels.shape[0])

X_test_images = X_test_images[:min_samples]
X_test_conditions = X_test_conditions[:min_samples]
X_test_levels = X_test_levels[:min_samples]

X_test_images_resized = np.array([resize(image, (64, 64)) for image in X_test_images])
X_test_images_resized = X_test_images_resized.reshape(-1, 64, 64, 1)

X_test_conditions_encoded = to_categorical(X_test_conditions)
X_test_levels_encoded = to_categorical(X_test_levels)
# Create CNN model
def create_cnn_model(input_shape):
  model = Sequential()
  model.add(Conv2D(32, (3, 3), activation='relu', input_shape=input_shape))
  model.add(MaxPooling2D((2, 2)))
  model.add(Conv2D(64, (3, 3), activation='relu'))
  model.add(MaxPooling2D((2, 2)))
  model.add(Conv2D(128, (3, 3), activation='relu'))
  model.add(MaxPooling2D((2, 2)))
  model.add(Flatten())
  model.add(Dense(128, activation='relu'))
  model.add(Dropout(0.5))
  model.add(Dense(3, activation='softmax'))
  model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
  return model

input_shape = (64, 64, 1)
model = create_cnn_model(input_shape)

# # Create CNN model
# def create_cnn_model(input_shape):
#     model = Sequential()
#     model.add(Conv2D(32, (3, 3), activation='relu', input_shape=input_shape))
#     model.add(MaxPooling2D((2, 2)))
#     model.add(Conv2D(64, (3, 3), activation='relu'))
#     model.add(MaxPooling2D((2, 2)))
#     model.add(Conv2D(128, (3, 3), activation='relu'))
#     model.add(MaxPooling2D((2, 2)))
#     model.add(Flatten())
#     model.add(Dense(128, activation='relu'))
#     model.add(Dropout(0.5))
#     model.add(Dense(3, activation='softmax'))

#     # Compile model with additional metrics
#     model.compile(optimizer='adam',
#                   loss='categorical_crossentropy',
#                   metrics=['accuracy',
#                            tfa.metrics.F1Score(num_classes=3, average='macro'),
#                            tfa.metrics.Precision(),
#                            tfa.metrics.Recall()])
#     return model

# input_shape = (64, 64, 1)
# model = create_cnn_model(input_shape)


# Dummy training data
X_train_images = np.random.rand(10, 64, 64, 1)
X_train_conditions = np.random.randint(0, 3, 10)
X_train_levels = np.random.randint(0, 3, 10)
y_train = to_categorical(np.random.randint(0, 3, 10))

# Train model
model.fit([X_train_images, to_categorical(X_train_conditions), to_categorical(X_train_levels)], y_train, epochs=3)

# Load data
df_train_data = pd.read_csv(train_path)
df_train_labels = pd.read_csv(train_label_coordinates_path)
df_train_series_desc = pd.read_csv(train_series_descriptions_path)
df_test_series_desc = pd.read_csv(test_series_descriptions_path)
df_submission_template = pd.read_csv(sample_submission_path)

# Merge data
df_merged_train_labels = pd.merge(left=df_train_labels, right=df_train_data, how='left', on='study_id').reset_index(drop=True)
df_complete_train_data = pd.merge(left=df_merged_train_labels, right=df_train_series_desc, how='left', on=['study_id', 'series_id']).reset_index(drop=True)

# Convert to category
df_complete_train_data.study_id = df_complete_train_data.study_id.astype('category')
df_complete_train_data.series_id = df_complete_train_data.series_id.astype('category')

# Calculate frequencies
label_columns = df_train_data.columns.drop('study_id').tolist()
df_label_frequencies = pd.DataFrame(label_columns, columns=['label'])
df_label_frequencies['p1'] = 1.0
df_label_frequencies['p2'] = 0.0
df_label_frequencies['p3'] = 0.0
for label in label_columns:
  relative_counts = df_train_data[label].value_counts(normalize=True)
  df_label_frequencies.loc[df_label_frequencies.label == label, 'p1'] = relative_counts.get('Normal/Mild', 0)
  df_label_frequencies.loc[df_label_frequencies.label == label, 'p2'] = relative_counts.get('Moderate', 0)
  df_label_frequencies.loc[df_label_frequencies.label == label, 'p3'] = relative_counts.get('Severe', 0)

# Frequency adjustment
labels = df_train.columns.drop('study_id').tolist()
freqs = pd.DataFrame(labels, columns=['label'])
freqs['p1'] = 1.0
freqs['p2'] = 0.0
freqs['p3'] = 0.0
for l in labels:
  rel_counts = df_train[l].value_counts(normalize=True)
  freqs.loc[freqs.label==l, 'p1'] = rel_counts.get('Normal/Mild', 0)
  freqs.loc[freqs.label==l, 'p2'] = rel_counts.get('Moderate', 0)
  freqs.loc[freqs.label==l, 'p3'] = rel_counts.get('Severe', 0)

# Save train data
df_complete_train_data.to_csv('complete_train_data.csv', index=False)

# Explore specific study and series
study_id = 100206310
df_study_example = df_complete_train_data[df_complete_train_data.study_id == study_id]
series_id = 1012284084
df_series_example = df_study_example[df_study_example.series_id == series_id]
# Explore DICOM files
dicom_path = f'/content/train_images/{study_id}/{series_id}/'
for dirname, _, filenames in os.walk(dicom_path):
  for filename in filenames:
      print(os.path.join(dirname, filename))

for i in range(1, 10 + 1):
  dicom_file = dicom_path + str(i) + '.dcm'
  print(dicom_file)
  ds = pydicom.dcmread(dicom_file)

# Apply frequency adjustment to submission
num_rows = df_submission_template.shape[0]
for i in range(num_rows):
  current_label = df_submission_template.loc[i, 'row_id'].split('_', 1)[1]
  p1 = df_label_frequencies.loc[df_label_frequencies.label == current_label, 'p1'].min()
  p2 = df_label_frequencies.loc[df_label_frequencies.label == current_label, 'p2'].min()
  p3 = df_label_frequencies.loc[df_label_frequencies.label == current_label, 'p3'].min()
  df_submission_template.loc[i, 'normal_mild'] = p1
  df_submission_template.loc[i, 'moderate'] = p2
  df_submission_template.loc[i, 'severe'] = p3

# Save frequency-adjusted submission
df_submission_template.to_csv('submission_with_frequencies.csv', index=False)

# Apply uniform distribution to submission
for i in range(num_rows):
  df_submission_template.loc[i, 'normal_mild'] = 0.34
  df_submission_template.loc[i, 'moderate'] = 0.33
  df_submission_template.loc[i, 'severe'] = 0.33

# Save uniform distribution submission
df_submission_template.to_csv('submission.csv', index=False)


In [None]:
# Assuming that your model is predicting conditions and levels combined, use conditions for simplicity
y_test = X_test_conditions_encoded  # This is the encoded labels for conditions (as a placeholder for true labels)

# Make predictions on the test set
y_pred = model.predict(X_test_images_resized)
y_pred_classes = np.argmax(y_pred, axis=1)  # Get predicted class labels
y_true = np.argmax(y_test, axis=1)  # Get true class labels

# Calculate precision, recall, and F1-score
from sklearn.metrics import precision_score, recall_score, f1_score

precision = precision_score(y_true, y_pred_classes, average='macro')
recall = recall_score(y_true, y_pred_classes, average='macro')
f1 = f1_score(y_true, y_pred_classes, average='macro')

print(f"Precision: {precision:.2f}")
print(f"Recall: {recall:.2f}")
print(f"F1-Score: {f1:.2f}")
