## Import packages and data from REMbox

In [None]:
import pandas as pd 
import math
from datetime import datetime
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import plotly.express as px
from rembox_integration_tools import REMboxDataQuery
from rembox_integration_tools.rembox_analysis import StudyColumn, SeriesColumn
from pathlib import Path

# om plotly önskas så skrivs följande hvplot.extension("plotly")
hvplot.extension("bokeh")

CLIENT_ID_ENV_VAR = "REMBOX_INT_CLIENT_ID"
CLIENT_PWD_ENV_VAR = "REMBOX_INT_CLIENT_PWD"
TOKEN_URI = "https://autoqa.vll.se/dpqaauth/connect/token" #Var finns access token
API_URI = "https://rembox.vll.se/api" #Var finns API:t
ORIGIN_URI = "https://rembox.vll.se" #Vilken URL

rembox = REMboxDataQuery(
    client_id_environment_variable=CLIENT_ID_ENV_VAR,
    client_secret_environment_variable=CLIENT_PWD_ENV_VAR,
    token_uri=TOKEN_URI,
    api_uri=API_URI,
    origin_uri=ORIGIN_URI,
    verify_ssl_cert=False
)

valid_study_columns = StudyColumn()
valid_series_columns = SeriesColumn()

In [None]:
rembox.reset_filter_options()
def get_data_from_fluoro(rembox: REMboxDataQuery) -> tuple[pd.DataFrame, pd.DataFrame]:

    
    rembox.filter_options.set_inclusive_tags(
        machine_types=["XASTAT"],     # CT-CT, Fluoroscopic-XASTAT, Mobile C-arm-XAMOB, Conventional-DX, Mammography-MG, Intraoral-IO, Panoramic-PX, Dental Cone Beam CT-DCBCT, PET-PET, PET/CT-PETCT, SPECT-SPECT, SPECT/CT-SPECTCT, Nuclear Medicine-NM, Mobile X-ray-DXMOB, Conventional with fluoro-DXXA
        machines=["U601", "U602"]
    )
    
    #rembox.filter_options.set_exclusive_tags() om jag vill ange filter där man bortser från ett visst kriterie

    rembox.filter_options.patient_age_interval_include_nulls = True
    
    rembox.filter_options.study_time_interval_start_date = "2024-01-01T00:00:00Z"
    rembox.filter_options.study_time_interval_end_date = "2024-12-31T00:00:00Z"
    
    rembox.deanonymize_performing_physician = True
    
    rembox.add_columns(
        columns=[
            valid_study_columns.StudyDateTime,
            valid_study_columns.AccessionNumber,
            valid_study_columns.AcquisitionDoseAreaProductTotal,
            valid_study_columns.AcquisitionDoseRPTotal,
            valid_study_columns.AcquisitionPlane,
            #valid_study_columns.CalibrationDate,
            #valid_study_columns.CalibrationFactor,
            #valid_study_columns.CalibrationProtocol,
            #valid_study_columns.CalibrationResponsibleParty,
            #valid_study_columns.CalibrationUncertainty,
            valid_study_columns.City,
            valid_study_columns.ConvFluoroClassifier,
            valid_study_columns.DoseAreaProductTotal,
            valid_study_columns.DoseMeasurementDevice,
            valid_study_columns.DoseRPTotal,
            valid_study_columns.FluoroDoseAreaProductTotal,
            valid_study_columns.FluoroDoseRPTotal,
            valid_study_columns.HasIntent,
            #valid_study_columns.HalfValueLayer,
            valid_study_columns.Hospital,
            valid_study_columns.Id,
            valid_study_columns.Machine,
            #valid_study_columns.MeanBodyThickness,
            #valid_study_columns.MaximumBodyThickness,
            #valid_study_columns.MinimumBodyThickness,
            valid_study_columns.PatientAge,
            valid_study_columns.PatientAgeUnit,
            valid_study_columns.PatientDbId,
            valid_study_columns.PatientId,
            #valid_study_columns.PatientModel,
            #valid_study_columns.PatientsBodyMassIndex,
            #valid_study_columns.PatientsName,
            valid_study_columns.PatientsSex,
            valid_study_columns.PatientsSize,
            valid_study_columns.PatientsSizeDate,
            valid_study_columns.PatientsSizeSource,
            valid_study_columns.PatientsWeight,
            valid_study_columns.PatientsWeightDate,
            valid_study_columns.PatientsWeightSource,
            #valid_study_columns.PerformingPhysicianIdentificationSequence,
            valid_study_columns.PSD, # ------------------------------------------------------PSD?
            valid_study_columns.PerformingPhysicianName,
            #valid_study_columns.PregnancyStatus,
            valid_study_columns.ProcedureCode,
            valid_study_columns.ProcedureCodeMeaning,
            valid_study_columns.ProcedureReported,
            valid_study_columns.ProtocolCode,
            valid_study_columns.ProtocolCodeMeaning,
            #valid_study_columns.ReferenceAuthority,
            #valid_study_columns.ReferencedSopInstanceUid,
            valid_study_columns.ReferencePointDefinition,
            #valid_study_columns.ReferencePointDefinitionCode,
            valid_study_columns.ReferringPhysicianIdentificationSequence,
            valid_study_columns.ReferringPhysiciansName,
            #valid_study_columns.RequestedProcedureCode,
            #valid_study_columns.RequestedProcedureCodeMeaning,
            valid_study_columns.ScopeOfAccumulation,
            valid_study_columns.SoftwareVersions,
            #valid_study_columns.StartOfXrayIrradiation,
            #valid_study_columns.StudyDateTime, --------- La denna överst
            valid_study_columns.StudyDescription,
            valid_study_columns.StudyId,
            valid_study_columns.StudyInstanceUID,
            valid_study_columns.TotalAcquisitionTime,
            valid_study_columns.TotalFluoroTime,
            valid_study_columns.TotalNumberOfIrradiationEvents,
            valid_study_columns.TotalNumberOfRadiographicFrames,
            valid_series_columns.AcquisitionPlaneSeries,
            valid_series_columns.AcquisitionProtocol,
            #valid_series_columns.AcquisitionType,
            #valid_series_columns.ApplicationName, #--------------------------------------Här finns protokollnamn för Azurion
            #valid_series_columns.AnatomicalStructure,
            #valid_series_columns.AnodeTargetMaterial,
            valid_series_columns.AverageXrayTubeCurrent,
            valid_series_columns.CollimatedFieldArea,
            valid_series_columns.CollimatedFieldHeight,
            valid_series_columns.CollimatedFieldWidth,
            #valid_series_columns.ColumnAngulation,
            #valid_series_columns.CrdrMechanicalConfiguration,
            valid_series_columns.DateTimeStarted,
            #valid_series_columns.DerivedEffectiveDiameter,
            #valid_series_columns.DeviationIndex,
            valid_series_columns.DistanceSourceToDetector,
            valid_series_columns.DistanceSourceToIsocenter,
            valid_series_columns.DistanceSourceToReferencePoint,
            #valid_series_columns.DistanceSourceToTablePlane,
            valid_series_columns.DoseAreaProduct,
            valid_series_columns.DoseRP,
            #valid_series_columns.EffectiveDose,
            #valid_series_columns.EffectiveDoseConversionFactor,
            #valid_series_columns.EntranceExposureAtRP,
            #valid_series_columns.ExposedRange,
            valid_series_columns.Exposure,
            #valid_series_columns.ExposureIndex,
            #valid_series_columns.ExposureTime,
            #valid_series_columns.ExposureTimePerRotation,
            valid_series_columns.FluoroMode,
            #valid_series_columns.FluoroFlavour, # -------------------------------------------- Här finns pulsrat för Azurion
            #valid_series_columns.FrameOfReferenceUID,
            #valid_series_columns.IdentificationOfTheXraySource,
            #valid_series_columns.ImageView,
            #valid_series_columns.ImageViewModifier,
            valid_series_columns.IrradiationDuration,
            #valid_series_columns.IrradiationEventLabel,
            valid_series_columns.IrradiationEventType,
            valid_series_columns.IrradiationEventUID,
            valid_series_columns.kVp,
            #valid_series_columns.LabelType,
            #valid_series_columns.Laterality,
            #valid_series_columns.MaximumXrayTubeCurrent,
            #valid_series_columns.MeasurementMethodDose,
            #valid_series_columns.NominalCollimationWidth,
            #valid_series_columns.NominalTotalCollimationWidth,
            valid_series_columns.NumberOfPulses,
            #valid_series_columns.NumberOfXraySources,
            valid_series_columns.PatientEquivalentThickness,
            valid_series_columns.PatientOrientation,
            valid_series_columns.PatientOrientationModifier,
            valid_series_columns.PatientTableRelationship,
            valid_series_columns.PositionerPrimaryAngle,
            #valid_series_columns.PositionerPrimaryEndAngle,
            valid_series_columns.PositionerSecondaryAngle,
            #valid_series_columns.PositionerSecondaryEndAngle,
            #valid_series_columns.ProcedureContext,
            #valid_series_columns.ProjectionEponymousName,
            valid_series_columns.PulseRate,
            valid_series_columns.PulseWidth,
            #valid_series_columns.ReconstructionAlgortihm,
            valid_series_columns.ReferencePointDefinitionText,
            valid_series_columns.SpotSize,
            valid_series_columns.TableCradleTiltAngle,
            valid_series_columns.TableHeadTiltAngle,
            #valid_series_columns.TableHeightEndPosition,
            valid_series_columns.TableHeightPosition,
            valid_series_columns.TableHorizontalRotationAngle,
            #valid_series_columns.TableLateralEndPosition,
            valid_series_columns.TableLateralPosition,
            #valid_series_columns.TableLongitudinalEndPosition,
            valid_series_columns.TableLongitudinalPosition,
            #valid_series_columns.TargetExposureIndex,
            valid_series_columns.TargetRegion,
            #valid_series_columns.WaterEquivalentDiameter,
            #valid_series_columns.WedMeasurementMethod,
            #valid_series_columns.XrayFilterAluminumEquivalent,
            valid_series_columns.XrayFilterMaterial,
            valid_series_columns.XrayFilterThicknessMaximum,
            valid_series_columns.XrayFilterThicknessMinimum,
            valid_series_columns.XrayFilterType,
            #valid_series_columns.XrayGrid,
            #valid_series_columns.XrayGridAspectRatio,
            #valid_series_columns.XrayGridFocalDistance,
            #valid_series_columns.XrayGridPitch,
            #valid_series_columns.XrayModulationType,
            valid_series_columns.XrayTubeCurrent
        ]
    )

    return rembox.run_query()

In [None]:
#Hämta data från REMbox
study_data, series_data = get_data_from_fluoro(rembox=rembox)

## Manage dataframes

In [None]:
study = study_data.copy() #skapa kopia av dataframe på study-nivå för att kunna behålla orginalet
series = series_data.copy() #skapa kopia av dataframe på serie-nivå för att kunna behålla orginalet

In [None]:
#Räkna antal ingrepp och antal unika patienter
exams = study["patientDbId"].count()
patients = study["patientDbId"].nunique() #Count number of distinct elements in specified axis. Can ignore NaN values

print(exams, "undersökningar/ingrepp fördelat på", patients, "patienter")

# Ta bort alla dubletter där 1mm Al-filter visas istället för Cu-filter 
# TODO: Gör separata kolumner för Al och Cu-filter så det inte blir dubbla rader.

series = series[(series['xrayFilterMaterial'] == 'Copper') | (series['xrayFilterMaterial'] == 'Copper or Copper compound')]
print(len(series), "antal irradiation events")

## Join SCAAR and REMbox-data

Gå till https://www.ucr.uu.se/swedeheart/ och logga in med Siths-kort\
Gå till "Rapporter"\
Välj "Export till Excel Angio-PCI"\
Rapportdatum: T.ex 2024-01-01 - 2025-01-01\
Angio/PCI: Enbart angio\
Procedur/Segment: Procedur\
Klicka på "Beställ" och ladda ner resulterande excel-fil.\
Öppna excel-fil, radera första 3 kolumnerna "Personnr eller motsv", "Typ av personnummer" och "Födelsedatum" och spara till input_data.


In [None]:
#Import data from Excel
SCAAR_data_path = "C:/Projekt/GIT/rvbrtg/Data/input_data/SCAAR_DSN_2024_Patid.xlsx" 

SCAAR_data = pd.read_excel(SCAAR_data_path)

#Copy relevant data to a subset

SCAAR_data_subset = SCAAR_data[["PatId", "Kön", "Ålder vid procedur", "Datum för procedur", "Längd (cm)", "Vikt (kg)", "Angiograför", "Punktionställe", "Labnamn", "Stråldos (µGym2)", "Genomlysningstid (h:mm:ss)", ]].copy()

SCAAR_data_subset.columns = ["patientId", "Sex", "Age", "studyDateTime", "Length_cm", "Weight_kg", "Operator", "Accesspoint", "Lab", "KAP_uGym2", "Fluorotime_h_mm_ss"]

#Adjust datatype and unit for KAP

SCAAR_data_subset["KAP_uGym2"] = SCAAR_data_subset["KAP_uGym2"].replace(',','.',regex=True).astype(float)
SCAAR_data_subset["KAP_uGym2"] = SCAAR_data_subset["KAP_uGym2"] * 0.01

SCAAR_data_subset.rename(columns = {"KAP_uGym2":"KAP_Gycm2"}, inplace = True)

# Display the resulting subset
#SCAAR_data.head()
#SCAAR_data_subset.head()

In [None]:
# Ensure both columns are in the same datetime format and time zone
study['studyDateTime'] = pd.to_datetime(study['studyDateTime']).dt.tz_localize(None)
SCAAR_data_subset['studyDateTime'] = pd.to_datetime(SCAAR_data_subset['studyDateTime']).dt.tz_localize(None)

# Extract only the date part for merging
study['studyDate'] = study['studyDateTime'].dt.date
SCAAR_data_subset['studyDate'] = SCAAR_data_subset['studyDateTime'].dt.date

# Perform an inner merge using the date and patientId
matching_subset = study.merge(
    SCAAR_data_subset[['patientId', 'studyDate', 'KAP_Gycm2', 'Weight_kg', 'Length_cm' ,'Age']],
    on=['patientId', 'studyDate'],
    how='inner'
)

# Display the resulting subset
#matching_subset.head()


## Format data, filter, order and write to file

In [None]:
# Format dataframe to SSM order and flurotime unit

matching_subset_SSM = matching_subset[["fluoroDoseAreaProductTotal", "fluoroDoseRPTotal", "totalFluoroTime", "acquisitionDoseAreaProductTotal", "acquisitionDoseRPTotal", "totalNumberOfIrradiationEvents", "Age", "patientsSex", "Length_cm", "Weight_kg", "machine", "doseAreaProductTotal"]].copy()

matching_subset_SSM["totalFluoroTime"] = (matching_subset_SSM["totalFluoroTime"] / 60).round(2) # Format to minutes

In [None]:
# Filter data

matching_subset_weight = matching_subset_SSM[(matching_subset["Weight_kg"] > 60) & (matching_subset_SSM["Weight_kg"] < 90)]

for col in matching_subset_weight.select_dtypes(include=['datetime64[ns, UTC]']).columns:
    matching_subset_weight[col] = matching_subset_weight[col].dt.tz_localize(None)

matching_subset_U601 = matching_subset_weight[matching_subset_weight["machine"] == "U601"]

matching_subset_U601['DAPdiff'] = (
        matching_subset_U601["doseAreaProductTotal"] - matching_subset_U601.loc[:, "doseAreaProductTotal"].median()
).abs()
matching_subset_U601.sort_values('DAPdiff', inplace=True, ignore_index=True)


matching_subset_U602 = matching_subset_weight[matching_subset_weight["machine"] == "U602"]

matching_subset_U602['DAPdiff'] = (
        matching_subset_U602["doseAreaProductTotal"] - matching_subset_U602.loc[:, "doseAreaProductTotal"].median()
).abs()
matching_subset_U602.sort_values('DAPdiff', inplace=True, ignore_index=True)

# Display the resulting subset
#matching_subset_U601.head()

In [None]:
# Export to Excel

matching_subset_U601.to_excel("C:/Projekt/GIT/rvbrtg/Data/output_data/DSN_2024_U601.xlsx")
matching_subset_U602.to_excel("C:/Projekt/GIT/rvbrtg/Data/output_data/DSN_2024_U602.xlsx")