In [None]:
#imports needed packages

import pandas as pd
import numpy as np
import os
import torch
import pickle
import pydicom
from pathlib import Path
from os.path import join
from PIL import Image
import torch.optim as optim
from torchvision import transforms
from torch.utils.data import DataLoader, Dataset
from torchvision.models import convnext_small
import torch.nn as nn
import timm
from timm import create_model

import warnings
warnings.filterwarnings("ignore")


In [None]:
#Creates config class
class CFG:
    verbose = 1
    seed = 21
    sag_labels = 15
    sag_channels = 24
    num_classes = 3
    axial_channels = 15
    axial_labels = 10
    image_size = [512,512]
    batch_size = 12
    
# Set seed for reproducibility
torch.manual_seed(CFG.seed)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [None]:
#Imports CSV files needed for notebook
df_path = "/kaggle/input/rsna-2024-lumbar-spine-degenerative-classification/"
test_df = pd.read_csv(df_path + 'test_series_descriptions.csv')
scaler_path = "/kaggle/input/submission-models/y_coordinate_scaler.pkl"
y_model_path = "/kaggle/input/submission-models/y_coordinate_model.pkl"
sag_model_path = "/kaggle/input/submission-models/sag_model.pth"
axial_model_path = "/kaggle/input/submission-models/axial_model.pth"

In [None]:
# Define the conditions for each model type
sag_conditions = ['spinal_canal_stenosis''left_neural_foraminal_narrowing', 'right_neural_foraminal_narrowing']
axial_conditions = ['left_subarticular_stenosis', 'right_subarticular_stenosis']

# Define the levels
levels = ['l1_l2', 'l2_l3', 'l3_l4', 'l4_l5', 'l5_s1']

# Model Prep

In [None]:
class convnext_small(nn.Module):
    def __init__(self, num_classes, pretrained=False):
        super(convnext_small, self).__init__()
        self.model = timm.create_model('convnext_small', pretrained=pretrained, num_classes=num_classes)
    
    def forward(self, x):
        return self.model(x)

def load_model(in_channels, num_labels, file_path, device):
    model = convnext_small(pretrained=False, num_classes=num_labels)
    
    # Modify the first convolutional layer (stem layer)
    model.model.stem[0] = nn.Conv2d(in_channels, 96, kernel_size=4, stride=4, padding=1)
    
    # Load the model state dict
    state_dict = torch.load(file_path, map_location=device)
    
    # Filter out the keys that are in the state dict and the ones that don't match
    new_state_dict = {k: v for k, v in state_dict.items() if k in model.state_dict()}
    model.load_state_dict(new_state_dict, strict=False)
    
    model.to(device)
    model.eval()
    
    return model

In [None]:
#Loads Sagittal and Axial models
sag_model = load_model(CFG.sag_channels, CFG.sag_labels * CFG.num_classes, sag_model_path, device)
axial_model = load_model(CFG.axial_channels, CFG.axial_labels* CFG.num_classes, axial_model_path, device)

In [None]:
#Loads the scaler file
with open(scaler_path, 'rb') as f:
    scaler = pickle.load(f)
#loads the Y coordinate model
with open(y_model_path, 'rb') as f:
    y_coor_model = pickle.load(f)

# Sagttal Data Prep

In [None]:
#creates function to pull file list from series fodlers
def get_file_list(study_id, series_id):
    data_dir = '/kaggle/input/rsna-2024-lumbar-spine-degenerative-classification/test_images/'
    files = os.listdir(os.path.join(data_dir, str(study_id), str(series_id)))
    file_paths = [os.path.join(data_dir, str(study_id), str(series_id), file) for file in files if file.endswith('.dcm')]
    return file_paths

In [None]:
def expand_dataframe(df, max_files=12):
    # Prepare a list to store the new columns
    new_columns = {f'file_path_{i+1}': [] for i in range(max_files)}

    # Iterate through each row in the DataFrame
    for index, row in df.iterrows():
        study_id = row['study_id']
        series_id = row['series_id']

        # Get the list of files for studt/series id
        file_list = get_file_list(study_id, series_id)

        # checks for enough files, and duplicates file paths if not
        if len(file_list) < max_files:
            file_list = (file_list * ((max_files // len(file_list)) + 1))[:max_files]
        else:
            file_list = file_list[:max_files]

        # Fill the new columns with file paths
        for i in range(max_files):
            new_columns[f'file_path_{i+1}'].append(file_list[i] if i < len(file_list) else None)

    new_columns_df = pd.DataFrame(new_columns)
    expanded_df = pd.concat([df.reset_index(drop=True), new_columns_df], axis=1)

    return expanded_df

In [None]:
def combine_file_paths(df, max_files=12):
    combined_data = {}

    # Iterate through each row in the DataFrame
    for _, row in df.iterrows():
        study_id = row['study_id']
        
        # Extract file paths from the row
        file_paths = [row[f'file_path_{i+1}'] for i in range(max_files) if pd.notna(row[f'file_path_{i+1}'])]

        # Creates study id rows if needed
        if study_id not in combined_data:
            combined_data[study_id] = [None] * (max_files * 2) 

        # Finds the next available index for the file paths
        existing_paths = [path for path in combined_data[study_id] if path is not None]
        start_index = len(existing_paths)

        # Adds file paths to the combined data
        for i, path in enumerate(file_paths):
            if start_index + i < len(combined_data[study_id]):
                combined_data[study_id][start_index + i] = path

    # Create a DataFrame from the combined data
    combined_df = pd.DataFrame.from_dict(combined_data, orient='index', columns=[f'file_path_{i+1}' for i in range(max_files * 2)])

    # Reset index to have study id as a column
    combined_df.reset_index(inplace=True)
    combined_df.rename(columns={'index': 'study_id'}, inplace=True)

    return combined_df

In [None]:
#function to reuse file paths to fill empty columns
def fill_empty_paths(df):
    for index, row in df.iterrows():
        replace_image = row['file_path_1']  
        count = 0
        for i in range(1, 25):
            col_name = f'file_path_{i}'
            if pd.isna(row[col_name]):
                count += 1
                replace_image = row[f'file_path_{count}']
                df.at[index, col_name] = replace_image

    return df

In [None]:
sag_intial_df = test_df[(test_df['series_description'] == 'Sagittal T1') | (test_df['series_description'] == 'Sagittal T2/STIR')]
drop_df = sag_intial_df.drop_duplicates(subset=['study_id', 'series_id'])
expanded_df = expand_dataframe(drop_df)
combine_df = combine_file_paths(expanded_df)
sagittal_df = fill_empty_paths(combine_df)
sagittal_df

# Get Y Coordinates

In [None]:
#creates a row for each level of a series id
def get_levels(df, levels):

    rows = []
    for idx, row in df.iterrows():

        for level in levels:
            new_row = row.copy()
            new_row['level'] = level
            rows.append(new_row)
    
    result_df = pd.DataFrame(rows)
    
    return result_df


In [None]:
#creates function to pull file list from series fodlers
def get_first_file_path(study_id, series_id):
    data_dir = '/kaggle/input/rsna-2024-lumbar-spine-degenerative-classification/test_images/'
    series_path = os.path.join(data_dir, str(study_id), str(series_id))
    first_file = next(file for file in os.listdir(series_path) if file.endswith('.dcm'))
    file_path_1 = os.path.join(series_path, first_file)
    
    return file_path_1

In [None]:
#function to get image metadata
def get_shape(image_path):
    dicom = pydicom.dcmread(image_path)
    height = dicom.Rows
    width = dicom.Columns
    x_pixel_spacing, y_pixel_spacing  = dicom.PixelSpacing
    x_image_position,y_image_position,z_image_position = dicom.ImagePositionPatient
    
    return height, width, x_image_position,y_image_position,z_image_position,x_pixel_spacing,y_pixel_spacing

In [None]:
coor_intial_df = test_df[(test_df['series_description'] == 'Sagittal T1')]
sag_level_df = get_levels(coor_intial_df, levels)
coor_level_df=sag_level_df.copy()
#runs file path function and creates a columns for them
coor_level_df['file_path_1'] = sag_level_df.apply(lambda row: get_first_file_path(row['study_id'], row['series_id']), axis=1)

In [None]:
#runs metadata functions and creates columsn for new variables
for index, row in coor_level_df.iterrows():
    height, width, x_image_position,y_image_position,z_image_position,x_pixel_spacing,y_pixel_spacing = get_shape(row['file_path_1'])
    coor_level_df.at[index, 'height'] = height
    coor_level_df.at[index, 'width'] = width
    coor_level_df.at[index, 'x_image_position'] = x_image_position
    coor_level_df.at[index, 'y_image_position'] = y_image_position
    coor_level_df.at[index, 'z_image_position'] = z_image_position
    coor_level_df.at[index, 'x_pixel_spacing'] = x_pixel_spacing
    coor_level_df.at[index, 'y_pixel_spacing'] = y_pixel_spacing

In [None]:
#prepares data frame for model
image_data_df = coor_level_df[['series_description',
                           'level',
                           'height','width',
                           'x_image_position',
                           'y_image_position',
                           'z_image_position',
                           'x_pixel_spacing',
                           'y_pixel_spacing',]
                           ]

level_values = {'l1_l2': 1, 'l2_l3': 2, 'l3_l4': 3, 'l4_l5': 4, 'l5_s1': 5}
series_values = {'Sagittal T2/STIR': 1, 'Sagittal T1': 2}
# Replace values with integers
image_data_df['series_description'] = image_data_df['series_description'].replace(series_values)
image_data_df['level'] = image_data_df['level'].replace(level_values)
image_data_df = image_data_df.apply(pd.to_numeric, errors='coerce')

In [None]:
image_data_df

In [None]:
#sclaes data and inputs into model
y_input_df = scaler.transform(image_data_df)
level_coordinates = y_coor_model.predict(y_input_df)
# Add the coordinates to the dataframe
coor_level_df['level_coordinate'] = level_coordinates

In [None]:
#drops columns not needed calculate Axial image
columns_to_drop = ['series_description',
                  'width',
                   'x_image_position',
                  'z_image_position',
                  'x_pixel_spacing',
                  ]
space_df =coor_level_df.drop(columns = columns_to_drop)
space_df['distance_from_reference'] = space_df['y_image_position'] + ((space_df['height'] - space_df['level_coordinate'])*space_df['y_pixel_spacing']) 

In [None]:
space_df

# Get Axial Instance

In [None]:
#function to get Z axis position of axial plane images
def get_axial_z_positions(image_paths):
    positions = {}
    for path in image_paths:
        dicom = pydicom.dcmread(path)
        z_image_position = dicom.ImagePositionPatient[2]  # Z position
        positions[path] = z_image_position
    return positions

In [None]:
def match_image_paths(space_df):
    file_paths_dict = {}

    for index, row in space_df.iterrows():
        study_id = row['study_id']
        series_id = row['series_id']
        y_postion = row['distance_from_reference']
        
        # Get file paths and z positions
        file_paths = get_file_list(study_id, series_id)
        z_positions = get_axial_z_positions(file_paths)
        # Calculate the reference z position
        ref_z = y_postion
        
        # Find the closest image based on z-axis position
        closest_path = None
        min_distance = float('inf')
        for path, z in z_positions.items():
            distance = abs(z - ref_z)
            if distance < min_distance:
                min_distance = distance
                closest_path = path
        
        # Add the closest file path to the dataframe
        space_df.at[index, 'file_path'] = closest_path

    return space_df

In [None]:
axial_temp_df = test_df[(test_df['series_description'] == 'Axial T2')]
merged_df = pd.merge(space_df, axial_temp_df, on='study_id', suffixes=('', '_new')) 
merged_df['series_id'] = merged_df['series_id_new']
updated_space_df = merged_df.drop(columns=['series_id_new'])
updated_space_df = match_image_paths(updated_space_df)

In [None]:
pd.set_option('display.max_colwidth', None)

updated_space_df

# Axial Data Prep

In [None]:
#get list of instance numebrs for each series id folder
def get_file_paths_dict(study_id, series_id):
    data_dir = '/kaggle/input/rsna-2024-lumbar-spine-degenerative-classification/test_images/'
    dir_path = os.path.join(data_dir, str(study_id), str(series_id))
    files = [file for file in os.listdir(dir_path) if file.endswith('.dcm')]
    file_paths_dict = {}
    
    for file in files:
        instance_number = int(file.split('.')[0].split('_')[-1])
        file_paths_dict[instance_number] = os.path.join(dir_path, file)
    return file_paths_dict

In [None]:
#gets the adjacent instance numebr to the predicted one
def get_adjacent_file_paths(row, file_paths_dict):
    instance_number = int(row['instance_number'])
    file_path_minus_1 = file_paths_dict.get(instance_number - 1, file_paths_dict[instance_number])
    file_path_current = file_paths_dict.get(instance_number, file_paths_dict[instance_number])
    file_path_plus_1 = file_paths_dict.get(instance_number + 1, file_paths_dict[instance_number])
    
    return pd.Series([file_path_minus_1, file_path_current, file_path_plus_1],
                     index=['file_path_1', 'file_path_2', 'file_path_3'])

In [None]:
#function to combine all of the file paths of each level
def consolidate_file_paths(df, levels):
    grouped = df.groupby('study_id')
    results = []
    
    for name, group in grouped:
        # Sort the group based on the level order
        group['level'] = pd.Categorical(group['level'], categories=levels, ordered=True)
        group = group.sort_values('level')
        
        # Collect the file paths
        file_paths = group[['file_path_1', 'file_path_2', 'file_path_3']].values.flatten().tolist()
        
        # Ensure we have exactly 15 paths, filling with the next available if needed
        consolidated_paths = []
        for i in range(15):
            if i < len(file_paths):
                consolidated_paths.append(file_paths[i])
            else:
                consolidated_paths.append(file_paths[i % len(file_paths)])
        
        # Create a new row with the study_id and the consolidated paths
        result = [name] + consolidated_paths
        results.append(result)
    
    # Create a new DataFrame from the results
    columns = ['study_id'] + [f'file_path_{i+1}' for i in range(15)]
    consolidated_df = pd.DataFrame(results, columns=columns)
    
    return consolidated_df

In [None]:
updated_space_df['instance_number'] = updated_space_df['file_path'].str.extract(r'(\d+)\.dcm$')

updated_space_df[['file_path_1', 'file_path_2', 'file_path_3']] = None

# Apply the function to each row
for index, row in updated_space_df.iterrows():
    file_paths_dict = get_file_paths_dict(row['study_id'], row['series_id'])
    updated_space_df.loc[index, ['file_path_1', 'file_path_2', 'file_path_3']] = get_adjacent_file_paths(row, file_paths_dict)

In [None]:
#drops unneed columns for model
updated_space_df.groupby(['study_id', 'series_id', 'level']).first().reset_index()
columns_to_drop = ['series_id',
                   'instance_number',
                   'file_path',
                   'height',
                   'y_image_position',
                   'y_pixel_spacing',
                   'distance_from_reference',
                   'series_description',
                   'level_coordinate'
                  ]
updated_space_df.drop(columns=columns_to_drop, inplace=True)
axial_df = consolidate_file_paths(updated_space_df, levels)

In [None]:
axial_df

# Predictions

In [None]:
def predict_single_image(model, image_paths, device, num_labels, num_classes, transform):
    images = []

    for image_path in image_paths:
        dicom_data = pydicom.dcmread(image_path)
        image = dicom_data.pixel_array.astype(float)

        if dicom_data.PhotometricInterpretation == "MONOCHROME1":
            image = np.amax(image) - image

        # Standardize the image
        image = (image - np.mean(image)) / np.std(image)
        image = Image.fromarray((image * 255).astype(np.uint8))

        if transform:
            image = transform(image)

        images.append(image)

    # Concatenate images along the channel dimension
    input_tensor = torch.cat(images, dim=0).unsqueeze(0)

    input_tensor = input_tensor.to(device)
    model = model.to(device)

    # Perform inference
    model.eval()
    with torch.no_grad():
        outputs = model(input_tensor)

    # Reshape outputs to batch_size, num_labels, num_classes
    batch_size = outputs.shape[0]
    outputs = outputs.view(batch_size, num_labels, num_classes)

    # Apply softmax to get probabilities
    probabilities = torch.softmax(outputs, dim=2)

    # Convert tensor to list
    predictions = probabilities.squeeze().tolist()

    return predictions

In [None]:
def predictions(df, model, device, plane):
    results = []
    
    # Define the transformation used during training
    transform = transforms.Compose([
        transforms.Resize((CFG.image_size[0], CFG.image_size[1])),
        transforms.ToTensor(),
        #transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) 
    ])

    # Define the levels
    levels = ['l1_l2', 'l2_l3', 'l3_l4', 'l4_l5', 'l5_s1']
    
    # Determine number of labels based on the model and series_description
    if plane == 'sagittal':
        num_labels = CFG.sag_labels
        num_images = CFG.sag_channels
        conditions = ['spinal_canal_stenosis', 'left_neural_foraminal_narrowing', 'right_neural_foraminal_narrowing']
    else:
        num_labels = CFG.axial_labels
        num_images = CFG.axial_channels
        conditions = ['left_subarticular_stenosis', 'right_subarticular_stenosis']
    
    for _, row in df.iterrows():
        study_id = row['study_id']
        
        # Collects all image paths based on the number of images expected
        image_paths = [row[f'file_path_{i+1}'] for i in range(num_images)]
        
        # Performs predictions
        preds = predict_single_image(model, image_paths, device, num_labels, CFG.num_classes, transform)
        
        #breaks up the predictions by condtion and level and puts them in the right order
        for i, condition in enumerate(conditions):
            for j, level in enumerate(levels):
                row_id = f"{study_id}_{condition}_{level}"
                normal_mild = preds[i * len(levels) + j][0]
                moderate = preds[i * len(levels) + j][1]
                severe = preds[i * len(levels) + j][2]
                
                results.append([row_id, normal_mild, moderate, severe])
    
    # Create a DataFrame for the results
    results_df = pd.DataFrame(results, columns=['row_id', 'normal_mild', 'moderate', 'severe'])
    
    return results_df


In [None]:
plane = 'sagittal'
sag_results_df = predictions(sagittal_df, sag_model, device, plane)
plane = 'axial'
axial_results_df = predictions(axial_df, axial_model, device, plane)
predictions_df = pd.concat([sag_results_df, axial_results_df], ignore_index=True)

In [None]:
cols_to_normalize = ['normal_mild', 'moderate', 'severe']
predictions_df[cols_to_normalize] = predictions_df[cols_to_normalize].div(predictions_df[cols_to_normalize].sum(axis=1), axis=0)
predictions_df = predictions_df.sort_values(by='row_id')
predictions_df

In [None]:
predictions_df.to_csv('submission.csv', float_format='%.10f', index=False)