In [7]:
import os
import shutil
from collections import defaultdict

import polars as pl
import pydicom

import keras

import kaggle_evaluation.rsna_inference_server

In [None]:
ID_COL = 'SeriesInstanceUID'

LABEL_COLS = [
    'Left Infraclinoid Internal Carotid Artery',
    'Right Infraclinoid Internal Carotid Artery',
    'Left Supraclinoid Internal Carotid Artery',
    'Right Supraclinoid Internal Carotid Artery',
    'Left Middle Cerebral Artery',
    'Right Middle Cerebral Artery',
    'Anterior Communicating Artery',
    'Left Anterior Cerebral Artery',
    'Right Anterior Cerebral Artery',
    'Left Posterior Communicating Artery',
    'Right Posterior Communicating Artery',
    'Basilar Tip',
    'Other Posterior Circulation',
    'Aneurysm Present',
]

# All tags (other than PixelData and SeriesInstanceUID) that may be in a test set dcm file
DICOM_TAG_ALLOWLIST = [
    'BitsAllocated',
    'BitsStored',
    'Columns',
    'FrameOfReferenceUID',
    'HighBit',
    'ImageOrientationPatient',
    'ImagePositionPatient',
    'InstanceNumber',
    'Modality',
    'PatientID',
    'PhotometricInterpretation',
    'PixelRepresentation',
    'PixelSpacing',
    'PlanarConfiguration',
    'RescaleIntercept',
    'RescaleSlope',
    'RescaleType',
    'Rows',
    'SOPClassUID',
    'SOPInstanceUID',
    'SamplesPerPixel',
    'SliceThickness',
    'SpacingBetweenSlices',
    'StudyInstanceUID',
    'TransferSyntaxUID',
]

loaded_model = None

# Each prediction (except the very first) must be returned within 30 minutes of the series being provided.
def predict(series_path: str) -> pl.DataFrame:
    """Make a prediction."""
    series_id = os.path.basename(series_path)
    
    all_filepaths = []
    for root, _, files in os.walk(series_path):
        for file in files:
            if file.endswith('.dcm'):
                all_filepaths.append(os.path.join(root, file))
    all_filepaths.sort()
    
    # Collect tags from the dicoms
    tags = defaultdict(list)
    tags['SeriesInstanceUID'] = series_id
    global dcms
    for filepath in all_filepaths:
        ds = pydicom.dcmread(filepath, force=True)
        tags['filepath'].append(filepath)
        for tag in DICOM_TAG_ALLOWLIST:
            tags[tag].append(getattr(ds, tag, None))
        # The image is in ds.PixelData

    
    # ... do some machine learning magic ...
    global loaded_model
    if loaded_model == None:
        # Initialize model
        loaded_model = keras.load_model('data_gen/saved_model.keras')

    # Convert to proper submission format
    # For now, I am just predicting zeroes for all 13 specific brain regions,
    # Since my model only predicts the aneurysm_present label
    aneurysm_present_prediction = predict_agg(loaded_model, all_pixel_data)
    label_preds = [0.0] * (len(LABEL_COLS) - 1) + aneurysm_present_prediction
    predictions = pl.DataFrame(
        data=[[series_id] + label_preds],
        schema=[ID_COL, *LABEL_COLS],
        orient='row',
    )
    # ----------------------------------------------------------------------
    assert predictions.columns == [ID_COL, *LABEL_COLS]

    # ----------------------------- IMPORTANT ------------------------------
    # You MUST have the following code in your `predict` function
    # to prevent "out of disk space" errors. This is a temporary workaround
    # as we implement improvements to our evaluation system.
    shutil.rmtree('/kaggle/shared', ignore_errors=True)
    # ----------------------------------------------------------------------
    
    return predictions.drop(ID_COL)

In [None]:
inference_server = kaggle_evaluation.rsna_inference_server.RSNAInferenceServer(predict)

if os.getenv('KAGGLE_IS_COMPETITION_RERUN'):
    inference_server.serve()
else:
    inference_server.run_local_gateway()
    display(pl.read_parquet('/kaggle/working/submission.parquet'))