In [None]:

#import csv
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import chardet
from datetime import datetime, timedelta
import locale
from sklearn.decomposition import PCA
from sklearn import preprocessing
import prince

#pd.set_option("future.no_silent_downcasting", True)

# Pre-op data

### Take unique patients in population that appears in the cleaned out anesthesia data set and work from that point

In [None]:
populationDF = pd.read_csv(r'FilteredData\Population.csv')
populationDF

In [None]:
len(list(set(populationDF['CPR'])))

In [None]:
cleanAnesthesiaEventDF = pd.read_csv(r'FilteredData\CleanAnesthesiaTime.csv')
cleanAnesthesiaEventDF

In [None]:
preOpConferenceDF = pd.read_csv(r'FilteredData\Preoperation Conference.csv')
preOpConferenceDF = preOpConferenceDF[preOpConferenceDF['CPR'].isin(cleanAnesthesiaEventDF['CPR'])]
preOpConferenceDF

In [None]:
populationDF = populationDF[populationDF['CPR'].isin(cleanAnesthesiaEventDF['CPR'])]
len(list(set(populationDF['CPR'])))

In [None]:
populationDF = populationDF[populationDF['CPR'].isin(preOpConferenceDF['CPR'])]
populationDF = populationDF.drop_duplicates(subset=['CPR']).reset_index()
populationDF

In [None]:
populationDF['OperationDate'] = pd.to_datetime(populationDF['OperationDate'])
populationDF['DischargeDate'] = pd.to_datetime(populationDF['DischargeDate'])


In [None]:
populationDF['DurationInDaysAfterOperation'] = (populationDF['DischargeDate'] - populationDF['OperationDate']).dt.days
populationDF

#### CPR, Age, Sex

In [None]:
preOpData = populationDF[['CPR' , 'Age' , 'Sex']].copy()
#pd.DataFrame([populationDF['CPR'], populationDF['Age'], populationDF['Sex']])
preOpData.drop_duplicates(subset=['CPR']).reset_index()
preOpData

In [None]:
count = preOpConferenceDF[preOpConferenceDF['CPR'].isin(preOpData['CPR'])]
len(list(set(count['CPR'])))



In [None]:
heightList, weightList = [], []

for index, cpr in preOpConferenceDF.groupby('CPR') :
    weight = cpr[cpr['Measurement'] == 'Vægt']['Value'].iloc[-1] if 'Vægt' in cpr['Measurement'].values else np.nan
    height = cpr[cpr['Measurement'] == 'Højde']['Value'].iloc[-1] if 'Højde' in cpr['Measurement'].values else np.nan

    heightList.append(height)
    weightList.append(weight)

print(heightList.count(np.nan))
print(weightList.count(np.nan))

In [None]:
preOpData['Height'] = heightList
preOpData['Weight'] = weightList
preOpData = preOpData.dropna().reset_index(drop = True)
preOpData

### Alcohol and smoking

In [None]:
alcAndSmoking = pd.read_csv(r'FilteredData\Alcohol and Smoking.csv')

In [None]:
smokingMapping = {0: 'No', 1: 'Yes', 2: 'Not anymore', -1: 'Not asked'}
alcAndSmoking['Smoking'] = alcAndSmoking['Smoking'].map(smokingMapping)

alcAndSmoking = alcAndSmoking[alcAndSmoking['CPR'].isin(preOpData['CPR'])]
highestAlcAndSmoking = alcAndSmoking.groupby('CPR')[['AlcoholPerWeek' , 'Smoking']].max()
highestAlcAndSmoking

In [None]:
len(list(set(alcAndSmoking['CPR'])))

In [None]:
highestAlcAndSmoking['Smoking'].value_counts()

In [None]:
highestAlcAndSmoking['AlcoholPerWeek'].notna().sum()

In [None]:
bins = [ -1, 0, 10, 20, 30, float('inf')]

labels = ['0', '0.5-10', '11-20', '21-30', '31+']


plt.figure(figsize=(12, 6))
hist = pd.cut(alcAndSmoking['AlcoholPerWeek'], bins=bins, labels=labels, include_lowest=True).value_counts().sort_index().plot(kind='bar')


plt.xlabel('Alcohol consumed per week')
plt.ylabel('Frequency')
plt.title('Distribution')
plt.xticks(rotation=45) 
plt.show()

In [None]:
highestAlcAndSmoking = highestAlcAndSmoking.rename(columns = {"Smoking" : "SmokingStatus"})
bins = [ -1, 0, 10, 20, 30, float('inf')]

labels = ['0', '0.5-10', '11-20', '21-30', '31+']

In [None]:
highestAlcAndSmoking['AlcoholPerWeek'] = pd.cut(highestAlcAndSmoking['AlcoholPerWeek'], bins=bins, labels=labels)
alcAndSmokingEncoded = pd.get_dummies(highestAlcAndSmoking, columns=['AlcoholPerWeek'], drop_first=False)
alcAndSmokingEncoded = pd.get_dummies(alcAndSmokingEncoded, columns=['SmokingStatus'], drop_first=False).reset_index()
#alcAndSmokingEncoded = alcAndSmokingEncoded.drop(columns = 'CPR')
alcAndSmokingEncoded


In [None]:
preOpData = preOpData[preOpData['CPR'].isin(alcAndSmokingEncoded['CPR'])]
alcAndSmokingEncoded = alcAndSmokingEncoded[alcAndSmokingEncoded['CPR'].isin(preOpData['CPR'])]
for column in alcAndSmokingEncoded.columns :
    preOpData[column] = alcAndSmokingEncoded[column]
#preOpData = pd.concat([preOpData, alcAndSmokingEncoded], axis = 1)
#preOpData = preOpData.fillna(False)
preOpData

## Diagnoses

In [None]:
diagnosesDF = pd.read_csv(r'FilteredData\Diagnoses of Patients.csv')
diagnosesDF = diagnosesDF[diagnosesDF['CPR'].isin(preOpData['CPR'])]
diagnosesDF

In [None]:
diagnosesDic = {'Essentiel hypertension (DI109)'                                        : 'EssentialHypertension',
                'Kronisk iskæmisk hjertesygdom UNS (DI259)'                             : 'ChronicCAD', #Chronic Coronary artery disease
                'Hyperkolesterolæmi (DE780)'                                            : 'HyperChol',    #Lots of lipids, AKA fats, etc.
                'Hyperlipidæmi UNS (DE785)'                                             : 'HyperChol',   #Lots of lipids, AKA fats, etc.
                'Type 1-diabetes med anden komplikation (DE106)'                        : 'T1Diabetes', #Type 1 Diabetes
                'Type 1-diabetes med fodsår (DE105B)'                                   : 'T1Diabetes', #Type 1 Diabetes
                'Type 1-diabetes med ketoacidose (DE101)'                               : 'T1Diabetes', #Type 1 Diabetes
                'Type 1-diabetes med komplikation UNS (DE108)'                          : 'T1Diabetes', #Type 1 Diabetes
                'Type 1-diabetes med komplikationer i perifere karsystem (DE105)'       : 'T1Diabetes', #Type 1 Diabetes
                'Type 1-diabetes med multiple komplikationer (DE107)'                   : 'T1Diabetes', #Type 1 Diabetes
                'Type 1-diabetes med neurologisk komplikation (DE104)'                  : 'T1Diabetes', #Type 1 Diabetes
                'Type 1-diabetes med nyrekomplikation (DE102)'                          : 'T1Diabetes', #Type 1 Diabetes
                'Type 1-diabetes med perifer angiopati (DE105A)'                        : 'T1Diabetes', #Type 1 Diabetes
                'Type 1-diabetes med øjenkomplikation (DE103)'                          : 'T1Diabetes', #Type 1 Diabetes
                'Type 1-diabetes uden komplikationer (DE109)'                           : 'T1Diabetes', #Type 1 Diabetes
                'Type 1-diabetes UNS (DE109A)'                                          : 'T1Diabetes', #Type 1 Diabetes
                'Type 2 myokardieinfarkt (DI219A)'                                      : 'T2Diabetes', #Type 2 Diabetes
                'Type 2-diabetes (DE11)'                                                : 'T2Diabetes', #Type 2 Diabetes
                'Type 2-diabetes med anden komplikation (DE116)'                        : 'T2Diabetes', #Type 2 Diabetes
                'Type 2-diabetes med fodsår (DE115B)'                                   : 'T2Diabetes', #Type 2 Diabetes
                'Type 2-diabetes med gangræn (DE115C)'                                  : 'T2Diabetes', #Type 2 Diabetes
                'Type 2-diabetes med ketoacidose (DE111)'                               : 'T2Diabetes', #Type 2 Diabetes
                'Type 2-diabetes med komplikation UNS (DE118)'                          : 'T2Diabetes', #Type 2 Diabetes
                'Type 2-diabetes med komplikationer i perifere karsystem (DE115)'       : 'T2Diabetes', #Type 2 Diabetes
                'Type 2-diabetes med multiple komplikationer (DE117)'                   : 'T2Diabetes', #Type 2 Diabetes
                'Type 2-diabetes med neurologisk komplikation (DE114)'                  : 'T2Diabetes', #Type 2 Diabetes
                'Type 2-diabetes med nyrekomplikation (DE112)'                          : 'T2Diabetes', #Type 2 Diabetes
                'Type 2-diabetes med perifer angiopati (DE115A)'                        : 'T2Diabetes', #Type 2 Diabetes
                'Type 2-diabetes med øjenkomplikation (DE113)'                          : 'T2Diabetes', #Type 2 Diabetes
                'Type 2-diabetes uden komplikationer (DE119)'                           : 'T2Diabetes', #Type 2 Diabetes
                'Type 2-diabetes UNS (DE119A)'                                          : 'T2Diabetes', #Type 2 Diabetes
                'Overvægt UNS (DE669)'                                                  : 'Overweight', #Overweight
                'Kronisk obstruktiv lungesygdom (DJ44)'                                 : 'Overweight', #Overweight
                'Fedme (BMI 30-34,9) (DE660B)'                                          : 'Overweight', #Overweight
                'Overvægt (BMI 25-29,9) (DE660A)'                                       : 'Overweight', #Overweight
                'Overvægt og fedme (DE66)'                                              : 'Overweight', #Overweight
                'Anden overvægt eller fedme (DE668)'                                    : 'Overweight', #Overweight
                'Akut nyreinsufficiens UNS (DN179)'                                     : 'AcuteKidneyFailure', #Acute Kidney Failure
                'Kronisk nyreinsufficiens UNS (DN189)'                                  : 'ChronicKidneyFailure', #Chronic Kidney Failure
                'Nyreinsufficiens UNS (DN199)'                                          : 'ChronicKidneyFailure', #Chronic Kidney Failure
                'Kronisk nyreinsufficiens, terminal stadie 5 (DN185)'                   : 'ChronicKidneyFailure', #Chronic Kidney Failure
                'Kronisk nyreinsufficiens, stadie 3 (DN183)'                            : 'ChronicKidneyFailure', #Chronic Kidney Failure
                'Kronisk nyreinsufficiens, stadie 4 (DN184)'                            : 'ChronicKidneyFailure', #Chronic Kidney Failure
                'Kronisk nyreinsufficiens, stadie 2 (DN182)'                            : 'ChronicKidneyFailure', #Chronic Kidney Failure
                'Kronisk nyreinsufficiens uden funktionsnedsættelse, stadie 1 (DN181)'  : 'ChronicKidneyFailure', #Chronic Kidney Failure
                'Aterosklerose UNS (DI709)'                                             : 'Arteriosclerosis', #Arteriosclerosis
                'Aterosklerose i arterie i underekstremitet (DI702)'                    : 'Arteriosclerosis', #Arteriosclerosis
                'Aterosklerotisk gangræn (DI702A)'                                      : 'Arteriosclerosis', #Arteriosclerosis
                'Aterosklerose i anden arterie (DI708)'                                 : 'Arteriosclerosis', #Arteriosclerosis
                'Aterosklerose i aorta (DI700)'                                         : 'Arteriosclerosis', #Arteriosclerosis
                'Cerebral aterosklerose (DI672)'                                        : 'Arteriosclerosis', #Arteriosclerosis
                'Aterosklerose i nyrearterie (DI701)'                                   : 'Arteriosclerosis', #Arteriosclerosis
                'Mesenteriel aterosklerose (DK551A)'                                    : 'Arteriosclerosis', #Arteriosclerosis
                'Aterosklerotisk retinopati (DI708A)'                                   : 'Arteriosclerosis', #Arteriosclerosis
                'Claudicatio intermittens (DI739A)'                                     : 'Arteriosclerosis'} #Arteriosclerosis


diagnosesDF = diagnosesDF.replace({"DiagnosisName" : diagnosesDic})
diagnosesDic = list(diagnosesDic.values())
diagnosesDF = diagnosesDF[diagnosesDF['DiagnosisName'].isin(diagnosesDic)]

diagnosesDF['DiagnosisName'].value_counts()

In [None]:
len(list(set(diagnosesDF['CPR'])))

In [None]:
cleanAnesthesiaEventDF['StartOfProcedure'] = pd.to_datetime(cleanAnesthesiaEventDF['StartOfProcedure'])
diagnosesDF['Date'] = pd.to_datetime(diagnosesDF['Date'])
mergeddiagnosesDF = pd.merge(diagnosesDF, cleanAnesthesiaEventDF[['CPR' , 'StartOfProcedure']] , on = 'CPR' , how = 'left')
mergeddiagnosesDF = mergeddiagnosesDF[mergeddiagnosesDF['Date'] <= mergeddiagnosesDF['StartOfProcedure']]
len(list(set(mergeddiagnosesDF['CPR'])))

In [None]:
diagnosesDF = mergeddiagnosesDF.drop(columns = ['StartOfProcedure', 'Date' , 'DiagnosisSKS'])

In [None]:
diagnosesOneHotDF = pd.get_dummies(diagnosesDF, columns=['DiagnosisName'], drop_first=False).reset_index(drop = True)
diagnosesOneHotDF = diagnosesOneHotDF.groupby('CPR').max().reset_index()
diagnosesOneHotDF = diagnosesOneHotDF[diagnosesOneHotDF['CPR'].isin(preOpData['CPR'])]
diagnosesOneHotDF

In [None]:
preOpDataMerged = pd.merge(preOpData, diagnosesOneHotDF, on = 'CPR' , how = 'left')
preOpDataMerged.fillna(False, inplace = True)
preOpDataMerged.isna().sum()

In [None]:
preOpData = preOpDataMerged
preOpData

In [None]:
len(list(set(diagnosesDF['CPR'])))

## Blood Samples

In [None]:
bloodSamplesDF = pd.read_csv(r'FilteredData\Blood Samples.csv')

In [None]:
bloodSamplesDF['CPR'].isin(preOpData['CPR'])
bloodSamplesDF

In [None]:
bloodDic = {'ALBUMIN;P'                       : 'albumin_p',
            'BILIRUBINER (POC);P'             : 'bili_p',
            'BILIRUBINER(POC);P'              : 'bili_p',
            'BILIRUBINER;P'                   : 'bili_p',
            'KREATINKINASE MB;P'              : 'ckmb',
            'C-REAKTIVT PROTEIN'              : 'crp',
            'C-REAKTIVT PROTEIN [CRP];P'      : 'crp',
            'KOAGULATIONSFAKTOR II+VII+X;P'   : 'faktor_2_7_10',
            'GLUKOSE(POC);P'                  : 'glukose_p',
            'GLUKOSE(POC);P(AB)'              : 'glukose_p',
            'GLUKOSE(POC);P(KB)'              : 'glukose_p',
            'GLUKOSE;P'                       : 'glukose_p',
            'GLUKOSE;P(AB)'                   : 'glukose_p',
            'HÆMOGLOBIN (POC);B'              : 'hb_b',
            'HÆMOGLOBIN;B'                    : 'hb_b',
            'KARBAMID;P'                      : 'karb',
            'KREATININ (ENZ.);P'              : 'krea_p',
            'KREATININ;P'                     : 'krea_p',
            'LAKTAT(POC);P(AB)'               : 'laktat_p',
            'LAKTAT;P(AB)'                    : 'laktat_p',
            'LAKTAT(POC);P(VB)'               : 'laktat_vene',
            'LAKTAT;P(VB)'                    : 'laktat_vene',
            'LEUKOCYTTER;B'                   : 'leukocytter',
            'P(AB)-PCO2;(37 °C POC)'          : 'pco2_art',
            'PCO2;P(AB)'                      : 'pco2_art',
            'P(AB)-PH;(37 °C POC)'            : 'ph_art',
            'PH;P(AB)'                        : 'ph_art',
            'PH;P(CVB)'                       : 'ph_cvb',
            'P(AB)-PO2;(37 °C POC)'           : 'po2_art',
            'PO2;P(AB)'                       : 'po2_art',
            'O2 SAT. (POC);HB(CVB)'           : 'sat_cvb',
            'TROMBOCYTTER;B'                  : 'trombocytter',
            'TROPONIN I (NG/L);P'             : 'troponin_i',
            'TROPONIN I;P'                    : 'troponin_i',
            'TROPONIN (LOKAL);P'              : 'troponin_lokal',
            'TROPONIN T;P'                    : 'troponin_t'}


bloodSamplesDF = bloodSamplesDF.replace({"OrderName" : bloodDic})
bloodDicList = list(bloodDic.values())
bloodSamplesDF = bloodSamplesDF[bloodSamplesDF['OrderName'].isin(bloodDicList)]

bloodSamplesDF['OrderName'].value_counts()


In [None]:
bloodSamplesKept = ['ckmb' , 'crp' , 'hb_b' , 'krea_p' , 'leukocytter' , 'trombocytter' , 'troponin_i' , 'troponin_lokal' , 'troponin_t']
bloodSamplesDF = bloodSamplesDF[bloodSamplesDF['OrderName'].isin(bloodSamplesKept)]
bloodSamplesDF['OrderName'].value_counts()

In [None]:
viewBloodResults = pd.DataFrame({'ResultValue' : list(set(bloodSamplesDF['ResultValue'])), 'Count' : list(set(bloodSamplesDF['ResultValue']))})


viewBloodResults['Count'] = viewBloodResults['ResultValue'].map(bloodSamplesDF['ResultValue'].value_counts())
viewBloodResults = viewBloodResults.sort_values('Count' , ascending = False)
print(len(list(set(bloodSamplesDF['CPR']))))
print(len(list(set(cleanAnesthesiaEventDF['CPR']))))

In [None]:
bloodSamplesDF = bloodSamplesDF[bloodSamplesDF['CPR'].isin(preOpData['CPR'])]
len(list(set(bloodSamplesDF['CPR'])))

In [None]:
cleanAnesthesiaEventDF = cleanAnesthesiaEventDF[cleanAnesthesiaEventDF['CPR'].isin(preOpData['CPR'])]
len(list(set(cleanAnesthesiaEventDF['CPR'])))

In [None]:
bloodSamplesDF['ResultTime'] = pd.to_datetime(bloodSamplesDF['ResultTime'])

In [None]:
mergedBloodDF = pd.merge(bloodSamplesDF, cleanAnesthesiaEventDF[['CPR' , 'StartOfProcedure']] , on = 'CPR' , how = 'left')
mergedBloodDF = mergedBloodDF[mergedBloodDF['ResultTime'] <= mergedBloodDF['StartOfProcedure']]
oneWeekBefore = mergedBloodDF['StartOfProcedure'] - pd.Timedelta(weeks = 1)
mergedBloodDF = mergedBloodDF[mergedBloodDF['ResultTime'] >= oneWeekBefore]
len(list(set(mergedBloodDF['CPR'])))

In [None]:
mergedBloodDF['OrderName'].value_counts()

In [None]:
bloodSamplesDF = mergedBloodDF.drop(columns = 'StartOfProcedure')

In [None]:
def cleaningBloodSamples(val) :
    try:
        val = val.replace('>','').replace('<','')
        return float(val)
    except ValueError:
        return np.nan

bloodSamplesDF['ResultValue'] = bloodSamplesDF['ResultValue'].apply(cleaningBloodSamples)

bloodSamplesDF = bloodSamplesDF.dropna(subset=['ResultValue'])
bloodSamplesDF

In [None]:
len(list(set(bloodSamplesDF['CPR'])))

In [None]:
bloodSamplesLastDF = bloodSamplesDF.sort_values(by = ['CPR' , 'OrderName' , 'ResultTime']).groupby(['CPR' , 'OrderName']).last().reset_index()
bloodSamplesPivotDF = bloodSamplesLastDF.pivot_table(index = 'CPR' , columns = 'OrderName' , values = 'ResultValue' )
bloodSamplesPivotDF['ckmb'] = bloodSamplesPivotDF['ckmb'].notna()
bloodSamplesPivotDF['troponin_i'] = bloodSamplesPivotDF['troponin_i'].notna()
bloodSamplesPivotDF['troponin_t'] = bloodSamplesPivotDF['troponin_t'].notna()


In [None]:
bloodSamplesPivotDF.isna().sum()

In [None]:
len(list(set(bloodSamplesLastDF['CPR'])))

In [None]:
bloodSamplesPivotDF = bloodSamplesPivotDF.dropna()
bloodSamplesPivotDF = bloodSamplesPivotDF.reset_index()
bloodSamplesPivotDF

In [None]:
len(bloodSamplesPivotDF['troponin_i'])

In [None]:
preOpData = preOpData[preOpData['CPR'].isin(bloodSamplesPivotDF['CPR'])].reset_index(drop = True)
preOpData[['ckmb' , 'crp' , 'hb_b' , 'krea_p' , 'leukocytter' , 'trombocytter' , 'troponin_i' , 'troponin_t']] = pd.DataFrame(bloodSamplesPivotDF[['ckmb' , 'crp' , 'hb_b' , 'krea_p' , 'leukocytter' , 'trombocytter' , 'troponin_i' , 'troponin_t']])
preOpData

In [None]:
bloodSamplesPivotDF['ckmb'].value_counts()

In [None]:
bloodSamplesOneHotDF = pd.get_dummies(bloodSamplesDF, columns=['OrderName'], drop_first=False).reset_index()
bloodSamplesOneHotDF = bloodSamplesOneHotDF.drop(columns = ['index', 'ResultTime', 'ResultValue'])
bloodSamplesOneHotDF = bloodSamplesOneHotDF.groupby('CPR').max().reset_index()

prefix = 'OrderName_'
# BE CAREFUL OF THE THIS LIST, TROPONIN_LOKAL HAS BEEN REMOVED
bloodSamplesKept = ['ckmb' , 'crp' , 'hb_b' , 'krea_p' , 'leukocytter' , 'trombocytter' , 'troponin_i' ,  'troponin_t']
cols = list(bloodSamplesOneHotDF)
x = 0
for idx, val in enumerate(cols):
    if val.startswith(prefix):
        bloodSamplesOneHotDF.rename(index=str, columns={val: bloodSamplesKept[x]}, inplace=True)
        x += 1
bloodSamplesOneHotDF = bloodSamplesOneHotDF[bloodSamplesOneHotDF['CPR'].isin(preOpData['CPR'])]
bloodSamplesOneHotDF.sum(axis=0)

In [None]:
preOpData

In [None]:
preOpData['DiagnosisName_AcuteKidneyFailure'].value_counts()

In [None]:
preOpData

In [None]:
preOpData.isna().sum()

## Echocardiography

In [None]:
intellispaceDF = pd.read_csv(r'FilteredData\Intellispace.csv')
intellispaceDF

In [None]:
intellispaceDF = intellispaceDF.sort_values(['CPR', 'MeasurementTime']).reset_index(drop = True)
intellispaceDF

In [None]:
""" intellispaceDF['MeasurementTime'] = pd.to_datetime(intellispaceDF['MeasurementTime'])
mergedintellispaceDF = pd.merge(intellispaceDF, cleanAnesthesiaEventDF[['CPR' , 'StartOfProcedure']] , on = 'CPR' , how = 'left')
mergedintellispaceDF = mergedintellispaceDF[mergedintellispaceDF['MeasurementTime'] <= mergedintellispaceDF['StartOfProcedure']].reset_index(drop = True)
intellispaceDF = mergedintellispaceDF.drop(columns = ['StartOfProcedure'])
len(list(set(mergedintellispaceDF['CPR']))) """

In [None]:
""" intellispaceCount1 = intellispaceDF['MeasureCode'].value_counts().rename_axis('MeasureCode').reset_index(name='counts')
intellispaceCount2 = intellispaceDF['Note'].value_counts().rename_axis('Note').reset_index(name='counts')
intellispaceCount1.to_csv(r'FilteredData\intellispaceCodeCountPreOp.csv')
intellispaceCount2.to_csv(r'FilteredData\intellispaceNoteCountPreOp.csv') """

In [None]:
len(list(set(intellispaceDF['CPR'])))

In [None]:
intellispaceDic = {'LV400dk1' : 'LV diastolic func',
                   'LV400dk3' : 'LV diastolic func',
                   'LV-0077'  : 'LV systolic func',
                   'LV300dk'  : 'LV systolic func',
                   'LV301dk'  : 'LV systolic func',
                   'LV303dk4' : 'LV systolic func',
                   'LV304dk1' : 'LV systolic func',
                   'LV305dk2' : 'LV systolic func',
                   'LV306dk2' : 'LV systolic func',
                   'LV101dk'  : 'LV size',
                   'LV102dk'  : 'LV size',
                   'LV104dk'  : 'LV size',
                   'LV105dk'  : 'LV size',
                   'LV106dk'  : 'LV size',
                   'RV101dk'  : 'RV size',
                   'RV102dk1' : 'RV size',
                   'RV102dk2' : 'RV size',
                   'RV102dk3' : 'RV size',
                   'RV102dk4' : 'RV size',
                   'AT301dk'  : 'RA size',
                   'AT305dk1' : 'RA size',
                   'RA-0020'  : 'RA size',
                   'RV301dk1' : 'RV systolic func',
                   'RV300dk7' : 'RV systolic func',
                   'RV300dk6' : 'RV systolic func',
                   'TV800dk'  : 'Pulmonal hypertension',
                   'TV804dk3' : 'Pulmonal hypertension',
                   'TV804dk4' : 'Pulmonal hypertension',
                   'TV810dk'  : 'Pulmonal hypertension',
                   'MV103dk'  : 'Mitral stenosis AND insufficiency',
                   'MV600dk'  : 'Mitral insufficiency',
                   'MV602dk1' : 'Mitral insufficiency',
                   'MV602dk3' : 'Mitral insufficiency',
                   'MV602dk2' : 'Mitral insufficiency',
                   'AV103dk.' : 'Aortic stenosis AND insufficiency',
                   'MV500dk'  : 'Mitral stenosis',
                   'MV503dk3' : 'Mitral stenosis',
                   'MV503dk4' : 'Mitral stenosis',
                   'AV716dk'  : 'Aortic stenosis',
                   'AV500dk'  : 'Aortic stenosis',
                   'AV504dk1' : 'Aortic stenosis',
                   'AV506dk1' : 'Aortic stenosis',
                   'AV600dk'  : 'Aortic insufficiency',
                   'AV601dk'  : 'Aortic insufficiency',
                   'AV602dk1' : 'Aortic insufficiency',
                   'AV602dk2' : 'Aortic insufficiency',
                   'AV602dk3' : 'Aortic insufficiency',
                   'AV608dk2' : 'Aortic insufficiency',
                   'TV103dk'  : 'Tricuspid stenosis AND insufficiency',
                   'TV500dk'  : 'Tricuspid stenosis',
                   'TV502dk'  : 'Tricuspid stenosis',
                   'TV503dk'  : 'Tricuspid stenosis',
                   'TV504dk'  : 'Tricuspid stenosis',
                   'TV600dk'  : 'Tricuspid insufficiency',
                   'TV602dk1' : 'Tricuspid insufficiency',
                   'TV602dk2' : 'Tricuspid insufficiency',
                   'TV602dk3' : 'Tricuspid insufficiency'
                   }

In [None]:
intellispaceDF = intellispaceDF.dropna()
len(list(set(intellispaceDF['CPR'])))


In [None]:
intellispaceDF = intellispaceDF.replace({'MeasureCode' : intellispaceDic})
intellispaceMissingDF = intellispaceDF[~intellispaceDF['MeasureCode'].isin(intellispaceDic.values())]
intellispaceDF = intellispaceDF[intellispaceDF['MeasureCode'].isin(intellispaceDic.values())]
intellispaceDF

In [None]:
len(list(set(intellispaceDF['CPR'])))

In [None]:
def splitCode(row) :
    parts = row['MeasureCode'].split(' AND ')
    firstWord = parts[0].split()[0]
    return [firstWord + ' ' + part.split(maxsplit = 1)[-1] for part in parts]

intellispaceDF['MeasureCode'] = intellispaceDF.apply(splitCode, axis = 1)
intellispaceDF = intellispaceDF.explode('MeasureCode').reset_index(drop = True)
intellispaceDF['MeasureCode'].value_counts()


Comparing the times of the measurements with the procedure times

In [None]:
intellispaceDF['MeasurementTime'] = pd.to_datetime(intellispaceDF['MeasurementTime'])
mergedintellispaceDF = pd.merge(intellispaceDF, cleanAnesthesiaEventDF[['CPR' , 'StartOfProcedure']] , on = 'CPR' , how = 'left')

# Store measurements before the procedure
mergedintellispaceBEFOREDF = mergedintellispaceDF[mergedintellispaceDF['MeasurementTime'] <= mergedintellispaceDF['StartOfProcedure']].reset_index(drop = True)

# Store measurements after the procedure and then remove all the patients who are measured BEFORE the procedure
mergedintellispaceAFTERDF = mergedintellispaceDF[mergedintellispaceDF['MeasurementTime'] > mergedintellispaceDF['StartOfProcedure']].reset_index(drop = True)
mergedintellispaceAFTERDF = mergedintellispaceAFTERDF[~mergedintellispaceAFTERDF['CPR'].isin(mergedintellispaceBEFOREDF['CPR'])].reset_index(drop = True)

mergedintellispaceDF = mergedintellispaceDF[mergedintellispaceDF['MeasurementTime'] <= mergedintellispaceDF['StartOfProcedure']].reset_index(drop = True)
len(list(set(mergedintellispaceAFTERDF['CPR'])))

In [None]:
len(list(set(mergedintellispaceDF['CPR'])))

In [None]:
mergedintellispaceAFTERDF.drop_duplicates().reset_index(drop = True).to_csv(r'FilteredData\Intellispace Relevant AFTER procedure.csv')

Looking at the missing patients from intellispace

In [None]:
intellispaceMissingDF['MeasurementTime'] = pd.to_datetime(intellispaceMissingDF['MeasurementTime'])
mergedintellispaceMissingDF = pd.merge(intellispaceMissingDF, cleanAnesthesiaEventDF[['CPR' , 'StartOfProcedure']] , on = 'CPR' , how = 'left')
mergedintellispaceMissingDF = mergedintellispaceMissingDF[mergedintellispaceMissingDF['MeasurementTime'] <= mergedintellispaceMissingDF['StartOfProcedure']].reset_index(drop = True)
mergedintellispaceMissingDF = mergedintellispaceMissingDF[~mergedintellispaceMissingDF['CPR'].isin(mergedintellispaceDF['CPR'])].reset_index(drop = True)
len(list(set(mergedintellispaceMissingDF['CPR'])))

In [None]:
intellispaceDF = mergedintellispaceDF.drop(columns = 'StartOfProcedure').reset_index(drop = True)

In [None]:
intellispaceDF

In [None]:
intellispaceGradingDic = {'Venstre ventrikels diastoliske funktion er normal.' : 1,
                          'Grad 1 (normalt fyldningstyk) diastolisk dysfunktion.' : 2, #fyldningstyk kekw
                          'Grad 2 (forhøjet fyldningstryk) diastolisk dysfunktion.' : 3,
                          'Grad 3 (forhøjet fyldningstryk) diastolisk dysfunktion.' : 4,
                          'Ejection Fraction = >55%.' : 1,
                          'Normal left ventricular function (EF 55-65%)' : 1,
                          'The left ventricular ejection fraction is normal.' : 1,
                          'Venstre ventrikel ejection fraction er normal.' : 1,
                          'Venstre ventrikel ejection fraction er stort set normal.' : 1,
                          'Venstre ventrikels systoliske funktion er let reduceret, og LVEF 55 %.' : 1,
                          'Venstre ventrikels systoliske funktion er let reduceret, og LVEF 50 %.' : 2,
                          'Venstre ventrikels systoliske funktion er moderat reduceret, og LVEF 45 %.' : 2,
                          'Ejection Fraction = 45-50%.' : 2,
                          'Ejection Fraction = 50-55%.' : 2,
                          'Mildly decreased left ventricular function (EF 45-50%)' : 2,
                          'Moderately decreased left ventricular function (EF 35-45%)' : 3,
                          'Venstre ventrikels systoliske funktion er moderat reduceret, og LVEF 40 %.' : 3,
                          'Venstre ventrikels systoliske funktion er moderat reduceret, og LVEF 35 %.' : 3,
                          'Ejection Fraction = <25%.' : 4,
                          'Ejection Fraction = 25-35%.' : 4,
                          'Severely decreased left ventricular function (EF <30%)' : 4,
                          'Venstre ventrikels systoliske funktion er betydelig reduceret og LVEF 25 %.' : 4,
                          'Venstre ventrikels systoliske funktion er betydelig reduceret og LVEF 30 %.' : 4,
                          'Venstre ventrikels systoliske funktion er svært reduceret og LVEF 15 %.' : 4,
                          'Venstre ventrikels systoliske funktion er svært reduceret og LVEF 10 %.' : 4,
                          'Venstre ventrikels systoliske funktion er svært reduceret og LVEF 20 %.' : 4,
                          'Venstre ventrikels systoliske funktion er svært reduceret og LVEF 5 %.' : 4,
                          'Venstre ventrikels storrelse er stort set normal.' : 1,
                          'Venstre ventrikels storrelse er normal.' : 1,
                          'Venstre ventrikel er let dilateret.' : 2,
                          'Venstre ventrikel er moderat dilateret.' : 3,
                          'Venstre ventrikel er svært dilateret.' : 4,
                          'Højre ventrikels storrelse er stort set normal.' : 1,
                          'Højre ventrikels storrelse er normal.' : 1,
                          'Højre ventrikels storrelse er let dilateret.' : 2,
                          'Højre ventrikels storrelse er moderat dilateret.' : 3,
                          'Højre ventrikels storrelse er svært dilateret.' : 4,
                          'Det højre atriums storrelse er normal.' : 1,
                          'The right atrium is normal size' : 1,
                          'Det højre atrium er dilateret.' : 2,
                          'The right atrium is mildly enlarged' : 2,
                          'The right atrium is mildly dilated.' : 2,
                          'The right atrium is mild to moderately dilated.' : 3,
                          'The right atrium is moderately enlarged' : 3,
                          'The right atrium is severely dilated.' : 4,
                          'The right atrium is severely enlarged' : 4,
                          'Højre ventrikels systoliske funktionn er stort set normal.' : 1,
                          'Højre ventrikels systoliske funktion er normal.' : 1,
                          'Højre ventrikels systoliske funktion er nedsat.' : 2,
                          'Det systoliske tryk i højre ventrikel er normalt.' : 1,
                          'Højre ventrikels systoliske  tryk er forhøjet til 40 - 50 mm Hg. forenelig med let pulmonal hypertension.' : 2,
                          'Højre ventrikels systoliske  tryk er forhøjet til 50 - 60 mm Hg forenelig med moderat pulmonal hypertension.' : 3,
                          'Højre ventrikels systoliske tryk er højere end 60mmhg. forenelig med svær pulmonal hyperetension.' : 4,
                          'Der er ingen mitralklap insufficiens.' : 1,
                          'Der er let mitralklap insufficiens.' : 2,
                          'Der er moderat mitralklap insufficiens.' : 3,
                          'Der er svær mitralklap insufficiens.' : 4,
                          'Mitralklappen fremtræder såvel strukturelt som funktionelt normal.' : 1,
                          'Der er ingen mitralklap stenose.' : 1,
                          'Der er moderat mitralklap stenose.' : 2,
                          'Der er svær mitralklap stenose.' : 3,
                          'Peak og/eller mean gradienterne over aortaklapprotesen er normale.' : 1,
                          'Der er ingen valvulær aortastenose.' : 1,
                          'Aortaklappen er normal i såvel strukturel som funktionel henseende.' : 1,
                          'Der er en moderat valvulær aortastenose.' : 2,
                          'Der er en svær valvulær aortastenose.' : 3,
                          'Der ses ingen aortaklap insufficiens.' : 1,
                          'Der er en diskret aortaklap  insufficiens.' : 2,
                          'Der er en let aortaklap insufficiens.' : 2,
                          'Der er en moderat aortaklap insufficiens.' : 3,
                          'Der er en svær aortaklap insufficiens.' : 4,
                          'Der ses retrograd flow i aorta descendens tydende på svær aortaklap insufficiens.' : 4,
                          'Der er ingen tricuspidalklap stenose.' : 1,
                          'Tricuspidalklappen er normal i såvel strukturel som funktionel henseende.' : 1,
                          'Der er en let tricuspidalklap stenose.' : 2,
                          'Der er moderat tricuspidalklap stenose.' : 3,
                          'Der er svær tricuspidalklap stenose.' : 4,
                          'Der ses ingen tricuspidalklap insufficiens.' : 1,
                          'Der er en let tricuspidalinsufficiens.' : 2,
                          'Der er moderat tricuspidalinsufficiens.' : 3,
                          'Der er svær tricuspidalinsufficiens.' : 4
                          }
intellispaceDF = intellispaceDF.replace({'Note' : intellispaceGradingDic})
intellispaceDF['Note'].value_counts()

In [None]:
intellispaceLastDF = intellispaceDF.sort_values(by = ['CPR' , 'MeasureCode' , 'MeasurementTime']).groupby(['CPR' , 'MeasureCode']).last().reset_index()
intellispaceLastDF

In [None]:
intellispacePivotDF = intellispaceDF.pivot_table(index = 'CPR' , columns = 'MeasureCode' , values = 'Note' , aggfunc = 'first')
intellispacePivotDF = intellispacePivotDF.fillna(0).reset_index()
intellispacePivotDF

In [None]:
preOpData

In [None]:
intellispacePivotMergeDF = pd.merge(preOpData , intellispacePivotDF , on = 'CPR' , how = 'left')
intellispacePivotMergeDF

In [None]:
len(list(set(intellispaceDF['CPR'])))

In [None]:
intellispacePivotMergeDF.isna().sum()

In [None]:
preOpData = intellispacePivotMergeDF.replace(np.nan, 0)
preOpData

## Pre-op vitals

In [None]:
vitalsDF = pd.read_csv(r'FilteredData\Vitals.csv')
vitalsDF

In [None]:
vitalsDF = vitalsDF[vitalsDF['CPR'].isin(preOpData['CPR'])]
vitalsDF

In [None]:
len(list(set(vitalsDF['CPR'])))

In [None]:
preOpData = preOpData[preOpData['CPR'].isin(vitalsDF['CPR'])]
preOpData

In [None]:
firstVitals = vitalsDF.groupby('CPR').first().reset_index()
numericColumns = firstVitals.select_dtypes(include = [np.number])
firstVitals[numericColumns.columns] = numericColumns.mask(numericColumns <= 0 , np.nan)
firstVitals.isna().sum()

In [None]:
firstVitals = firstVitals.drop(columns = ['PAPMiddle', 'CentralVenousPressureMean'])
firstVitals = firstVitals.dropna()
firstVitals

In [None]:
preOpData = preOpData[preOpData['CPR'].isin(firstVitals['CPR'])]
preOpData

In [None]:
firstVitalsMergeDF = pd.merge(preOpData , firstVitals , on = 'CPR' , how = 'left')
firstVitalsMergeDF

In [None]:
preOpData = firstVitalsMergeDF

In [None]:
preOpData.isna().sum()

In [None]:
count = preOpConferenceDF[preOpConferenceDF['CPR'].isin(preOpData['CPR'])]
len(list(set(count['CPR'])))


In [None]:
weightCount = preOpConferenceDF.groupby('CPR').filter(lambda group: (group['Measurement'] == 'Vægt').any())['CPR'].nunique()
weightCount

In [None]:
heightCount = preOpConferenceDF.groupby('CPR').filter(lambda group: (group['Measurement'] == 'Højde').any())['CPR'].nunique()
heightCount

In [None]:
preOpData = preOpData[preOpData['CPR'].isin(preOpConferenceDF['CPR'])].drop(columns = ['TimeOfMeasurement']).reset_index(drop = True)
preOpData

In [None]:
preOpData.isna().sum()

In [None]:
len(list(set(preOpConferenceDF['CPR'])))

## EuroScoreII

In [None]:
preOpConferenceDF = preOpConferenceDF[preOpConferenceDF['CPR'].isin(preOpData['CPR'])]
preOpData = preOpData[preOpData['CPR'].isin(preOpConferenceDF['CPR'])]
euroScoreIIConferenceList = []

for index, cpr in preOpConferenceDF.groupby('CPR') :
    euroScoreII = cpr[cpr['Measurement'] == 'Mortalitetsrisiko i %']['Value'].iloc[-1] if 'Mortalitetsrisiko i %' in cpr['Measurement'].values else np.nan
    #weight = cpr[cpr['Measurement'] == 'Vægt']['Value'].iloc[-1] if 'Vægt' in cpr['Measurement'].values else np.nan

    euroScoreIIConferenceList.append(euroScoreII)
preOpData['EuroScoreII_1'] = euroScoreIIConferenceList

In [None]:
PATSdf = pd.read_csv(r'FilteredData\PATS.csv')
PATSdf = PATSdf[PATSdf['CPR'].isin(preOpData['CPR'])].reset_index(drop = True)
preOpData['EuroScoreII_2'] = PATSdf['euroSCOREII']
preOpData

In [None]:
euroScoreIIMissing = preOpData[preOpData['EuroScoreII_1'].isna() & preOpData['EuroScoreII_2'].isna()]
euroScoreIIMissing

In [None]:
euroScoreIIMatches = preOpData[preOpData['EuroScoreII_1'].notna() | preOpData['EuroScoreII_2'].notna()]
euroScoreIIMatches

In [None]:
preOpData = preOpData.drop(columns = ['EuroScoreII_1', 'EuroScoreII_2'])
preOpData

In [None]:
preOpData.to_csv(r'FilteredData\DataPreOp.csv' , index = False)

# Peri-op data

In [None]:
PATSdf = PATSdf[PATSdf['CPR'].isin(preOpData['CPR'])]
PATSdf = PATSdf.drop(columns = ['index' , 'euroSCOREII'])
PATSdf = PATSdf.sort_values('CPR').reset_index(drop = True)
PATSdf

In [None]:
PATSdf['ProcedureDate'] = pd.to_datetime(PATSdf['ProcedureDate'])
PATSdf['DeathDate'] = pd.to_datetime(PATSdf['DeathDate'])

In [None]:
PATSdf['TimeAlive'] = (PATSdf['DeathDate']  - PATSdf['ProcedureDate']).dt.days
PATSdf['within30Days'] = ((PATSdf['TimeAlive'] <= 30)).astype(int)
PATSdf['within90Days'] = ((PATSdf['TimeAlive'] <= 90)).astype(int)
PATSdf['within365Days'] = ((PATSdf['TimeAlive'] <= 365)).astype(int)
PATSdf

In [None]:
SKSCodes = PATSdf[['SKSCode1','SKSCode2','SKSCode3','SKSCode4','SKSCode6']]
SKSCodes = pd.get_dummies(SKSCodes.stack(), prefix='', prefix_sep='').groupby(level=0).max()
SKSCodes = pd.concat([PATSdf['CPR'], SKSCodes] , axis = 1)
SKSCodes.iloc[:,1:] = SKSCodes.iloc[:,1:].astype(int)
SKSCodes = SKSCodes.drop(columns = ['FA' , 'FB' , 'FF' , 'FH' , 'FJ' , 'FL' , 'GE' , 'UG' , 'GD' , 'YF' ,'fe'])
SKSCodes = SKSCodes[~((SKSCodes['FX'] == 1) & (SKSCodes[['FC','FE','FG','FK','FM','FN','FP','FW']].sum(axis=1) == 0))].reset_index(drop = True)
SKSCodes

In [None]:
print(len(list(set(preOpData['CPR']))))

In [None]:
phase1AnesDF = pd.read_csv(r'FilteredData/phase1Anesthesia.csv')
phase2AnesDF = pd.read_csv(r'FilteredData/phase2Anesthesia.csv')
phase3AnesDF = pd.read_csv(r'FilteredData/phase3Anesthesia.csv')
phase1HemoDF = pd.read_csv(r'FilteredData/phase1FinalHemo.csv')
phase2HemoDF = pd.read_csv(r'FilteredData/phase2FinalHemo.csv')
phase3HemoDF = pd.read_csv(r'FilteredData/phase3FinalHemo.csv')

In [None]:
phase2AnesDF

In [None]:
print(len(list(set(phase1AnesDF['CPR']))))
print(len(list(set(phase2AnesDF['CPR']))))
print(len(list(set(phase3AnesDF['CPR']))))
print(len(list(set(phase1HemoDF['CPR']))))
print(len(list(set(phase2HemoDF['CPR']))))
print(len(list(set(phase3HemoDF['CPR']))))

In [None]:
phase1AnesDF = phase1AnesDF.dropna().reset_index(drop = True)
phase2AnesDF = phase2AnesDF.dropna().reset_index(drop = True)
phase3AnesDF = phase3AnesDF.dropna().reset_index(drop = True)

In [None]:
phase2AnesDF

In [None]:
organFailureDF = pd.read_csv(r'FilteredData\SecondaryOutcome.csv')
organFailureDF = organFailureDF.drop(columns = ['Unnamed: 0' , 'krea_max' , 'pre_krea']).sort_values(['cpr'])
organFailureDF = organFailureDF[organFailureDF['AKI_any'].notna()].reset_index(drop = True)
organFailureDF

In [None]:
Commonpatients = set(phase1AnesDF['CPR']).intersection(phase2AnesDF['CPR'], phase3AnesDF['CPR'], phase1HemoDF['CPR'], phase2HemoDF['CPR'], phase3HemoDF['CPR'],preOpData['CPR'], SKSCodes['CPR'], organFailureDF['cpr'])
phase1AnesDF = phase1AnesDF[phase1AnesDF['CPR'].isin(Commonpatients)].reset_index(drop = True)
phase2AnesDF = phase2AnesDF[phase2AnesDF['CPR'].isin(Commonpatients)].reset_index(drop = True)
phase3AnesDF = phase3AnesDF[phase3AnesDF['CPR'].isin(Commonpatients)].reset_index(drop = True)
phase1HemoDF = phase1HemoDF[phase1HemoDF['CPR'].isin(Commonpatients)].reset_index(drop = True)
phase2HemoDF = phase2HemoDF[phase2HemoDF['CPR'].isin(Commonpatients)].reset_index(drop = True)
phase3HemoDF = phase3HemoDF[phase3HemoDF['CPR'].isin(Commonpatients)].reset_index(drop = True)
SKSCodes = SKSCodes[SKSCodes['CPR'].isin(Commonpatients)].reset_index(drop = True)
preOpData = preOpData[preOpData['CPR'].isin(Commonpatients)].reset_index(drop = True)


In [None]:
print(len(list(set(phase1AnesDF['CPR']))))
print(len(list(set(phase2AnesDF['CPR']))))
print(len(list(set(phase3AnesDF['CPR']))))
print(len(list(set(phase1HemoDF['CPR']))))
print(len(list(set(phase2HemoDF['CPR']))))
print(len(list(set(phase3HemoDF['CPR']))))

In [None]:
phase2AnesDF

In [None]:
preOpData = preOpData[preOpData['CPR'].isin(phase3AnesDF['CPR'])].reset_index(drop = True)
print(len(list(set(preOpData['CPR']))))

In [None]:
phase1HemoDF = phase1HemoDF.iloc[:,[0,3,6,9,12,15,18,21, #Pulse
                                    1,4,7,10,13,16,19,   #CVP
                                    2,5,8,11,14,17,20,   #MAP
                                    22,23]]              #LAP PAP
phase2HemoDF = phase2HemoDF.iloc[:,[0,1,3,5,7,9,11,13, #CVP
                                    2,4,6,8,10,12,14,   #MAP
                                    15,16]]              #LAP PAP
phase3HemoDF = phase3HemoDF.iloc[:,[0,3,6,9,12,15,18,21, #Pulse
                                    1,4,7,10,13,16,19,   #CVP
                                    2,5,8,11,14,17,20,   #MAP
                                    22,23]]              #LAP PAP

phase1HemoDF

In [None]:
phase2HemoDF

In [None]:
cleanAnesthesiaEventDF['EventTime'] = pd.to_datetime(cleanAnesthesiaEventDF['EventTime'])
cleanAnesthesiaEventPivotDF = cleanAnesthesiaEventDF.pivot(index='CPR', columns='Event', values='EventTime').reset_index()

In [None]:
phase1DF = pd.merge(phase1AnesDF, phase1HemoDF, on = 'CPR' , how = 'left')
phase1DF['ETCO2_log'] = np.log(phase1DF['ETCO2_p50'])
phase1DF['Saturation_log'] = np.log(phase1DF['Saturation_p50'])
phase1DF['Perf. Index_log'] = np.log(phase1DF['Perf. Index_p50'])
phase1DF['Pulse_log'] = np.log(phase1DF['Pulse_p50'])
phase1DF['CVP_log'] = np.log(phase1DF['CVP_p50'])
phase1DF['MAP_log'] = np.log(phase1DF['MAP_p50'])
phase1Merged = pd.merge(phase1DF, cleanAnesthesiaEventPivotDF, on='CPR')
phase1Merged['TimeOfPhase'] = (phase1Merged['Aorta tang på'] - phase1Merged['Induktion']) / pd.Timedelta(minutes = 1)
phase1DF = phase1Merged.drop(columns = ['Aorta tang på', 'Aorta tang af' , 'Induktion' , 'Stop Data Indsamling'])
phase1DF

In [None]:
phase2DF = pd.merge(phase2AnesDF, phase2HemoDF, on = 'CPR' , how = 'left')
phase2DF['Saturation_log'] = np.log(phase2DF['Saturation_p50'])
phase2DF['Perf. Index_log'] = np.log(phase2DF['Perf. Index_p50'])
phase2DF['CVP_log'] = np.log(phase2DF['CVP_p50'])
phase2DF['MAP_log'] = np.log(phase2DF['MAP_p50'])
phase2Merged = pd.merge(phase2DF, cleanAnesthesiaEventPivotDF, on='CPR')
phase2Merged['TimeOfPhase'] = (phase2Merged['Aorta tang af'] - phase2Merged['Aorta tang på']) / pd.Timedelta(minutes = 1)
phase2DF = phase2Merged.drop(columns = ['Aorta tang på', 'Aorta tang af' , 'Induktion' , 'Stop Data Indsamling'])
phase2DF

In [None]:
phase3DF = pd.merge(phase3AnesDF, phase3HemoDF, on = 'CPR' , how = 'left')
phase3DF['ETCO2_log'] = np.log(phase3DF['ETCO2_p50'])
phase3DF['Saturation_log'] = np.log(phase3DF['Saturation_p50'])
phase3DF['Perf. Index_log'] = np.log(phase3DF['Perf. Index_p50'])
phase3DF['Pulse_log'] = np.log(phase3DF['Pulse_p50'])
phase3DF['CVP_log'] = np.log(phase3DF['CVP_p50'])
phase3DF['MAP_log'] = np.log(phase3DF['MAP_p50'])
phase3Merged = pd.merge(phase3DF, cleanAnesthesiaEventPivotDF, on='CPR')
phase3Merged['TimeOfPhase'] = (phase3Merged['Stop Data Indsamling'] - phase3Merged['Aorta tang af']) / pd.Timedelta(minutes = 1)
phase3DF = phase3Merged.drop(columns = ['Aorta tang på', 'Aorta tang af' , 'Induktion' , 'Stop Data Indsamling'])
phase3DF

In [None]:
phase3DF = pd.merge(phase3DF, SKSCodes, on='CPR')
#phase3DF = phase3Merged.drop(columns = ['ProcedureDate' , 'Age'])
phase3DF

In [None]:
PATSdf = PATSdf[PATSdf['CPR'].isin(preOpData['CPR'])].reset_index(drop = True)
PATSdf

Final changes

In [None]:
organFailureDF['AKI_any'].value_counts()

In [None]:
organFailureDF['AKI2_moderate'].value_counts()

In [None]:
organFailureDF['ventilatortime_gt_6h'].value_counts()

In [None]:
organFailureDF['ventilatortime_gt_12h'].value_counts()

In [None]:
cleanAnesthesiaEventDF = cleanAnesthesiaEventDF[cleanAnesthesiaEventDF['CPR'].isin(preOpData['CPR'])]
finalPatients = cleanAnesthesiaEventDF.drop_duplicates(subset = 'CPR').reset_index(drop = True)
finalPatients.to_csv(r'FilteredData/final patient list with end times.csv', columns = ['CPR', 'EndOfProcedure'])
len(list(set(cleanAnesthesiaEventDF['CPR'])))

In [None]:
populationDF = populationDF[populationDF['CPR'].isin(preOpData['CPR'])].reset_index(drop = True)
len(list(set(populationDF['CPR'])))

In [None]:
PATSdf['TimeOfStayInDays'] = populationDF['DurationInDaysAfterOperation']
PATSdf['TimeOfOperationInMinutes'] = cleanAnesthesiaEventDF['TimeInMinutes'].reset_index(drop = True)
PATSdf['ventilatorTime6h'] = organFailureDF['ventilatortime_gt_6h'].map({'Yes': 1, 'No': 0})
PATSdf['AKI'] = organFailureDF['AKI_any'].map({'Yes': 1, 'No': 0})

In [None]:
phase1DF['CPR'], _ = pd.factorize(phase1DF['CPR'])
phase2DF['CPR'], _ = pd.factorize(phase2DF['CPR'])
phase3DF['CPR'], _ = pd.factorize(phase3DF['CPR'])
preOpData['CPR'], _ = pd.factorize(preOpData['CPR'])
PATSdf['CPR'], _ = pd.factorize(PATSdf['CPR'])

In [None]:
preOpData['Sex'] = preOpData['Sex'].map({'Kvinde': 1, 'Mand': 0})
preOpData = preOpData.astype(int)
phase3DF = phase3DF.drop(columns = ['FD' , 'FQ'])

In [None]:
phase1DF.to_csv(r'FinalData\phase1.csv' , index = False)
phase2DF.to_csv(r'FinalData\phase2.csv' , index = False)
phase3DF.to_csv(r'FinalData\phase3.csv' , index = False)
preOpData.to_csv(r'FinalData\PreOp.csv' , index = False)
PATSdf.to_csv(r'FinalData\OutComes.csv' , index = False , columns = ['CPR', 
                                                                     'within30Days' , 
                                                                     'within90Days' , 
                                                                     'within365Days' , 
                                                                     'TimeOfStayInDays' , 
                                                                     'TimeOfOperationInMinutes',
                                                                     'ventilatorTime6h',
                                                                     'AKI'])

In [None]:
preOpData