<a href="https://colab.research.google.com/github/philipp-lampert/mymandible/blob/main/mymandible.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Welcome to the code for the mymandible.com project
Note: The project is still under active development.

Our dataset contains two distinct categories of missing data, each attributed to different underlying reasons for their absence.


*   `NaN` is data that is genuinely missing. If it wasn't, the value would still make sense.

*   `N/A` is data that is missing due to it not being applicable. For example, it is illogical to have a value for `fistula_date` if `fistula = False`.

As Python can represent just one type of missing data, only `NaN` represents true missing values on a technical level. `N/A` is simply another category.

This is important when analyzing patterns of missingness and performing imputation: While it would be illogical trying to impute a `N/A` value (as no answer would make sense) it is very much a valid approach for `NaN` values (where any answer makes sense).

In [1]:
import pandas as pd

In [34]:
df = pd.read_csv("/content/BFlapsRevised_DATA_2023-10-21_2036.csv", keep_default_na=False, na_values="NaN")
df = df.replace("", "N/A") # RedCap leaves all non-applicable fields blank so we have to give them a value here
df.head()


Unnamed: 0,id,collector_name,other_collector_name,sex_female,indication,comorbidity___none,comorbidity___smoking,comorbidity___alcohol,comorbidity___copd,comorbidity___hypertension,comorbidity___type_1_diabetes,comorbidity___type_2_diabetes,comorbidity___atherosclerosis,comorbidity___coronary_heart_disease,comorbidity___peripheral_artery_disease,comorbidity___hyperlipoproteinemia,comorbidity___hypercholesterolemia,comorbidity___osteoporosis,comorbidity___hypothyroidism,comorbidity___hyperthyroidism,comorbidity___chronic_kidney_disease,comorbidity___factor_v_deficiency,comorbidity___cachexia,comorbidity___bleeding_disorder,comorbidity___autoimmune_disease,comorbidity___other,comorbidity___nan,info_other_comorbidity,which_autoimmune_disease,which_bleeding_disorder,prior_flap,age_surgery_years,flap_donor_site,flap_revision,days_to_flap_revision,radiotherapy___none,radiotherapy___pre_surgery,radiotherapy___post_surgery,radiotherapy___nan,chemotherapy___none,chemotherapy___pre_surgery,chemotherapy___post_surgery,chemotherapy___nan,plate_type,long_plate_thickness,urkens_classification___c,urkens_classification___r,urkens_classification___b,urkens_classification___s,urkens_classification___nan,tmj_replacement_type,flap_segment_count,surgery_duration_min,height_cm,weight_kg,bmi,skin_transplanted,venous_anastomosis_type___end_end,venous_anastomosis_type___end_side,venous_anastomosis_type___nan,venous_anastomosis_tool___coupler,venous_anastomosis_tool___suture,venous_anastomosis_tool___nan,predictors_complete,flap_loss,flap_loss_type,days_to_flap_loss,complication___none,complication___whd_recipient_site,complication___whd_donor_site,complication___abscess,complication___fistula,complication___vestibuloplasty,complication___osteoradionecrosis,complication___bone_exposure,complication___nan,complication_plate___none,complication_plate___exposure,complication_plate___removal,complication_plate___fracture,complication_plate___loosening,complication_plate___nan,plate_exposure_location___intraoral,plate_exposure_location___extraoral,plate_exposure_location___nan,implant___none,implant___received,implant___planned,implant___plate_removal,implant___iliac_crest_augmentation,implant___nan,days_to_whd_recipient_site,days_to_whd_donor_site,days_to_abscess,days_to_fistula,days_to_vestibuloplasty,days_to_osteoradionecrosis,days_to_bone_exposure,days_to_plate_exposure,days_to_plate_removal,days_to_plate_fracture,days_to_plate_loosening,days_to_implant_received,days_to_implant_planned,days_to_implant_plate_removal,days_to_iliac_crest_augmentation,days_to_follow_up,outcomes_complete,imaging,days_to_imaging,nonunion,days_to_nonunion,nonunion_location___mandible_flap,nonunion_location___flap_flap,nonunion_location___nan,complication_bony___none,complication_bony___fracture,complication_bony___dislocation,complication_bony___nan,days_to_fracture,days_to_dislocation,tmj_luxation,days_to_tmj_luxation,imaging_complete
0,1,philipp,,False,flap_loss,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,,,,bony,29,fibula,none,,1,0,0,0,1,0,0,0,cad_long,,0,0,1,1,0,,three,441,184,92,27.173913,,0,1,0,0,1,0,2,False,,,0,1,0,0,1,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,1,0,1,0,272.0,,,11.0,,,,220.0,260.0,,,,566.0,,478.0,630,2,opg,630.0,True,210.0,1,0,0,0,0,1,0,,210.0,False,,1
1,2,philipp,,True,malignant_tumor,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,,,,none,67,fibula,none,,1,0,0,0,1,0,0,0,cad_long,,0,0,0,1,0,,one,430,160,51,19.921875,False,0,1,0,0,1,0,2,False,,,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,1,1,0,,,,,503.0,,,,,,,,258.0,308.0,308.0,2345,2,opg,2185.0,False,,0,0,0,1,0,0,0,,,False,,1
2,3,philipp,,False,osteoradionecrosis,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,,,,bony,77,fibula,none,,0,1,0,0,0,1,0,0,cad_long,2_0,1,1,1,0,0,fibula,two,478,188,77,21.785876,True,1,0,0,0,1,0,2,False,,,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,,,,,629.0,,,,,,,334.0,,,,2338,2,opg,2240.0,False,,0,0,0,1,0,0,0,,,False,,1
3,4,philipp,,True,malignant_tumor,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,,,,none,73,fibula,none,,0,0,1,0,0,0,1,0,cad_long,2_5,0,0,1,1,0,,three,474,175,61,19.918367,True,0,1,0,0,1,0,2,False,,,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,1,0,0,,,,,,,,,,,,1714.0,,790.0,,2154,2,opg,2042.0,False,,0,0,0,1,0,0,0,,,False,,1
4,5,philipp,,False,malignant_tumor,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,,,,none,60,fibula,none,,0,0,1,0,0,0,1,0,cad_long,2_5,0,0,1,1,0,,three,536,174,70,23.120624,True,0,1,0,0,1,0,2,True,total,510.0,0,1,0,1,0,0,1,0,0,0,1,1,1,0,0,0,0,0,1,0,0,0,0,0,230.0,,440.0,,,445.0,,230.0,350.0,200.0,,,,,,1713,2,none,,,,0,0,0,0,0,0,0,,,,,1


In [35]:
df = df.drop(["predictors_complete", "outcomes_complete", "imaging_complete"], axis = 1)
df.columns.values

array(['id', 'collector_name', 'other_collector_name', 'sex_female',
       'indication', 'comorbidity___none', 'comorbidity___smoking',
       'comorbidity___alcohol', 'comorbidity___copd',
       'comorbidity___hypertension', 'comorbidity___type_1_diabetes',
       'comorbidity___type_2_diabetes', 'comorbidity___atherosclerosis',
       'comorbidity___coronary_heart_disease',
       'comorbidity___peripheral_artery_disease',
       'comorbidity___hyperlipoproteinemia',
       'comorbidity___hypercholesterolemia', 'comorbidity___osteoporosis',
       'comorbidity___hypothyroidism', 'comorbidity___hyperthyroidism',
       'comorbidity___chronic_kidney_disease',
       'comorbidity___factor_v_deficiency', 'comorbidity___cachexia',
       'comorbidity___bleeding_disorder',
       'comorbidity___autoimmune_disease', 'comorbidity___other',
       'comorbidity___nan', 'info_other_comorbidity',
       'which_autoimmune_disease', 'which_bleeding_disorder',
       'prior_flap', 'age_surgery_ye