In [None]:
# Importera bibliotek
import numpy as np # math
import pandas as pd
from datetime import datetime
import xlrd
import xlwt
import openpyxl
import xlsxwriter
from rembox_integration_tools import REMboxDataQuery
from rembox_integration_tools.rembox_analysis import StudyColumn, SeriesColumn

CLIENT_ID_ENV_VAR = "REMBOX_INT_CLIENT_ID"
CLIENT_PWD_ENV_VAR = "REMBOX_INT_CLIENT_PWD"
TOKEN_URI = "https://autoqa.vll.se/dpqaauth/connect/token"
API_URI = "https://rembox.vll.se/api"
ORIGIN_URI = "https://rembox.vll.se"

In [None]:
def get_data_from_fluoro(rembox: REMboxDataQuery) -> tuple[pd.DataFrame, pd.DataFrame]:
    valid_study_columns = StudyColumn()
    valid_series_columns = SeriesColumn()
    
    rembox.filter_options.set_inclusive_tags(
        machine_types=["XASTAT"],                # Fluro
        machines=["U601", "U602"]        # PCI-1, PCI-2
    )

    rembox.filter_options.patient_age_interval_include_nulls = True
    
    rembox.filter_options.study_time_interval_start_date = "2022-01-01T00:00:00Z"
    rembox.filter_options.study_time_interval_end_date = "2022-12-10T00:00:00Z"
    
    
    rembox.add_columns(
        columns=[
            valid_study_columns.StudyDateTime,
            valid_study_columns.StudyInstanceUID,
            valid_study_columns.StudyId,
            valid_study_columns.Machine,
            valid_study_columns.AccessionNumber,
            valid_study_columns.StudyDescription,
            valid_study_columns.PatientAge,
            valid_study_columns.DoseAreaProductTotal,
            valid_study_columns.FluoroDoseAreaProductTotal,
            valid_study_columns.AcquisitionDoseAreaProductTotal,
            valid_study_columns.DoseRPTotal,
            valid_study_columns.FluoroDoseRPTotal,
            valid_study_columns.AcquisitionDoseRPTotal,
            valid_study_columns.TotalAcquisitionTime,
            valid_study_columns.TotalFluoroTime,
            valid_study_columns.TotalNumberOfIrradiationEvents,
            valid_study_columns.TotalNumberOfRadiographicFrames,
            valid_study_columns.PerformingPhysicianName,
            valid_study_columns.PerformingPhysicianIdentificationSequence,
            valid_study_columns.PatientDbId,
            valid_study_columns.PatientId,
            valid_series_columns.AcquisitionProtocol,
            valid_series_columns.AcquisitionPlaneSeries,
            valid_series_columns.DoseRP,
            valid_series_columns.DateTimeStarted
        ]
    )

    return rembox.run_query()

In [None]:
rembox = REMboxDataQuery(
    client_id_environment_variable=CLIENT_ID_ENV_VAR,
    client_secret_environment_variable=CLIENT_PWD_ENV_VAR,
    token_uri=TOKEN_URI,
    api_uri=API_URI,
    origin_uri=ORIGIN_URI
)

study_data, series_data = get_data_from_fluoro(rembox=rembox)

In [None]:
#Kontroll av data
study_data

In [None]:
#Räkna antal ingrepp och antal unika patienter
antal = study_data['patientDbId'].count() 
patienter = study_data['patientDbId'].nunique()

print(antal)
print(patienter)

In [None]:
#Läs in export från SCAAR
scaar_data_path = 'input_data/SCAAR_DSN_2022.xls'

scaar = pd.read_excel(scaar_data_path)

In [None]:
#kontroll av data
scaar

In [None]:
#Kontrollera vad kolumnerna heter
print(study_data.columns)
print(scaar.columns)

#Ändra kolumn för att kunna göra en join
scaar.rename(columns={'Pseudonymiserat id': 'patientId'}, inplace=True)
scaar.rename(columns={'Datum för procedur': 'studyDateTime'}, inplace=True)

#kontrollera att ändringen blev bra
print(scaar.columns)

In [None]:
#Skapa kolumn som kan användas för att göra en merge på både datum och personnummer i båda dataframes.
#study_data.studyDateTime[0][:10]
study_data["MergeColumn"] = study_data.patientId + study_data.studyDateTime.str[:10]
#koll att det funkat
study_data["MergeColumn"][0]


In [None]:
#Skapa kolumn som kan användas för att göra en merge på både datum och personnummer i båda dataframes.
scaar['studyDateTime'] = scaar['studyDateTime'].dt.strftime('%Y-%m-%d')
scaar["MergeColumn"] = scaar.patientId + scaar.studyDateTime
#koll att det funkat
scaar["MergeColumn"][0]


In [None]:
#Joina dataframes
study_data_scaar = pd.merge(study_data,scaar,how = 'inner', on = ['MergeColumn'])
#Print för att kolla så att det funkade
print(study_data_scaar.columns)

In [None]:
#Räkna antal ingrepp och antal unika patienter
antal = study_data_scaar['patientDbId'].count() 
patienter = study_data_scaar['patientDbId'].nunique()

print(antal)
print(patienter)

In [None]:
import xlwt
import openpyxl
import xlsxwriter

writer = pd.ExcelWriter('output_data/DSN_PCI_2022_new_V2.xlsx', engine='xlsxwriter')
study_data_scaar.to_excel(writer)
writer.save()