In [1]:
# Install necessary packages
!pip install tableone
!pip install openpyxl

Collecting tableone
  Downloading tableone-0.7.12-py3-none-any.whl (32 kB)
Collecting numpy>=1.19.1
  Downloading numpy-1.19.5-cp36-cp36m-manylinux2010_x86_64.whl (14.8 MB)
[K     |████████████████████████████████| 14.8 MB 76.3 MB/s eta 0:00:01
[?25hCollecting tableone
  Downloading tableone-0.7.11-py3-none-any.whl (32 kB)
  Downloading tableone-0.7.10-py2.py3-none-any.whl (32 kB)
Collecting tabulate>=0.8.2
  Downloading tabulate-0.8.10-py3-none-any.whl (29 kB)
Collecting statsmodels>=0.8.0
  Downloading statsmodels-0.12.2-cp36-cp36m-manylinux1_x86_64.whl (9.5 MB)
[K     |████████████████████████████████| 9.5 MB 117.0 MB/s eta 0:00:01
Collecting patsy>=0.5
  Downloading patsy-0.5.3-py2.py3-none-any.whl (233 kB)
[K     |████████████████████████████████| 233 kB 114.2 MB/s eta 0:00:01
[?25hInstalling collected packages: patsy, tabulate, statsmodels, tableone
Successfully installed patsy-0.5.3 statsmodels-0.12.2 tableone-0.7.10 tabulate-0.8.10


In [2]:
import os
import sys
from pathlib import Path
from IPython.display import display

import numpy as np
import pandas as pd
pd.set_option('display.max_columns', None) # Display all columns in a pandas dataframe
pd.set_option('display.max_rows', 200)     # Display max 200 rows in a pandas dataframe
import matplotlib.pyplot as plt
import random

from tableone import TableOne

cwd = Path(os.getcwd())
print(os.listdir("/"))

['bin', 'boot', 'dev', 'etc', 'home', 'lib', 'lib64', 'media', 'mnt', 'opt', 'proc', 'root', 'run', 'sbin', 'srv', 'sys', 'tmp', 'usr', 'var', 'get-docker.sh', 'dxdata-0.36.1-py2.py3-none-any.whl', 'install_r_kernel.R', 'install_r_packages.R', '.dockerenv']


In [3]:
print(os.listdir("/mnt/project/data/processed"))

['1', 'anamnesis.csv', 'densitometry.csv', 'fractures.csv', 'hesin_fractures.csv', 'icd10_codes.csv', 'medicaments.csv', 'merged_table.csv']


In [4]:
df_anamnesis = pd.read_csv("/mnt/project/data/processed/anamnesis.csv", parse_dates=["date_i2"]).rename(columns={"date_i2": "date", "age_i2": "age"})
df_densitometry = pd.read_csv("/mnt/project/data/processed/densitometry.csv", parse_dates=["date_i2"]).rename(columns={"date_i2": "date"})
df_medicaments = pd.read_csv("/mnt/project/data/processed/medicaments.csv", parse_dates=["date_i2"]).rename(columns={"date_i2": "date"})
df_fractures = pd.read_csv("/mnt/project/data/processed/hesin_fractures.csv", parse_dates=["dxa_date"]).rename(columns={"eid": "patientId", "dxa_date": "date"})

# Drop features contained in multiple dataframes
df_anamnesis.drop("postmenopausal", axis=1, inplace=True)
df_densitometry.drop(["height_i2", "weight_i2", "body_mass_index_i2", "age_i2"], axis=1, inplace=True)
df_fractures.drop("last_follow_up", axis=1, inplace=True)

display(df_anamnesis)
display(df_densitometry)
display(df_medicaments)
display(df_fractures)

Unnamed: 0,patientId,date,age,hrt,height,weight,bmi,decrease in height,osteoporotic_fracture_parents,corticosteroids,aromatase_inhibitors,antiepileptic_drugs,ankylosing_spondylitis,copd,hyperpara,type_1_diabetes,rheumatoid_arthritis,number_of_falls,early_menopause,nicotin,alcohol,gastrointestinal_disease
0,1000031,2021-10-21,79,True,153.0,73.5,31.8127,False,False,False,False,False,False,False,False,False,False,No falls,False,False,False,False
1,1000344,2015-02-23,58,False,154.0,48.3,20.6331,False,False,False,False,False,False,False,False,False,False,No falls,False,False,False,False
2,1000898,2018-11-07,59,True,168.0,85.9,31.1729,False,False,False,False,False,False,False,False,False,False,More than one fall,False,False,False,False
3,1000946,2018-02-12,67,True,165.0,84.9,31.1846,False,False,False,False,False,False,False,False,False,False,No falls,False,False,False,False
4,1001273,2019-09-14,54,False,164.0,91.0,33.4252,False,False,False,False,False,False,False,False,False,False,No falls,False,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
23532,6023313,2014-11-20,72,False,159.0,69.3,28.1147,False,False,False,False,False,False,False,False,False,False,More than one fall,False,False,False,False
23533,6023403,2017-09-26,75,False,162.0,75.1,28.6161,False,False,False,False,False,False,False,False,False,False,No falls,False,False,False,False
23534,6023555,2019-06-20,63,False,157.0,66.2,27.5546,False,False,False,False,False,False,False,False,False,False,Only one fall,False,False,False,False
23535,6023580,2019-10-25,57,False,165.0,64.4,23.9441,False,False,False,False,False,False,False,False,False,False,No falls,False,False,False,False


Unnamed: 0,patientId,date,tbs_ls,tscore_ls,tscore_neck,tscore_totalHip
0,1000344,2015-02-23,1.200,-0.719586,-0.863189,-0.928921
1,1000898,2018-11-07,1.095,-0.309509,-0.845577,-0.813459
2,1000946,2018-02-12,1.070,0.548010,-0.246668,0.219435
3,1001554,2018-03-22,1.017,-2.730180,-0.617614,-0.995070
4,1001643,2015-08-04,1.125,-0.986464,-2.195810,-2.392520
...,...,...,...,...,...,...
11698,6020460,2015-12-02,1.170,-1.600300,-1.628120,-1.953780
11699,6020702,2015-09-17,1.052,-2.588480,-1.163530,-1.465380
11700,6022316,2017-09-19,1.237,0.424648,-0.708455,-0.463218
11701,6023119,2015-10-02,1.294,1.472440,0.914185,1.294470


Unnamed: 0,patientId,date,bisphosphonates_prior,bisphosphonates_current,bisphosphonates_new,serm_prior,serm_current,serm_new,teriparatide_prior,teriparatide_current,teriparatide_new,hrt_prior,hrt_current,hrt_new
0,1001643,2015-08-04,False,False,False,False,False,False,False,False,False,True,False,False
1,1004131,2015-05-16,False,False,False,False,False,False,False,False,False,False,False,False
2,1004471,2016-06-22,False,False,False,False,False,False,False,False,False,False,False,False
3,1005492,2016-06-13,False,False,False,False,False,False,False,False,False,False,False,False
4,1007944,2018-11-17,False,False,False,False,False,False,False,False,False,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
23532,6018670,2016-01-04,False,False,False,False,False,False,False,False,False,True,False,False
23533,6018961,2016-01-23,False,False,False,False,False,False,False,False,False,False,False,False
23534,6020935,2017-10-17,False,False,False,False,False,False,False,False,False,True,False,False
23535,6021762,2019-06-09,False,False,False,False,False,False,False,False,False,True,False,False


Unnamed: 0,patientId,date,vertebral_fx,hip_fx,wrist_fx,any_fx,vertebral_right_censored,hip_right_censored,wrist_right_censored,any_right_censored,imminent_fx,previous_fx
0,1001554,2018-03-22,6.8667,6.8667,6.8667,6.8667,True,True,True,True,0.0,0.0
1,1003176,2019-03-02,22.7684,22.7684,22.7684,22.7684,True,True,True,True,0.0,0.0
2,1004439,2019-01-27,22.3413,22.3413,22.3413,22.3413,True,True,True,True,0.0,0.0
3,1005492,2016-06-13,62.3257,62.3257,62.3257,62.3257,True,True,True,True,0.0,0.0
4,1006076,2018-09-18,34.3334,34.3334,34.3334,34.3334,True,True,True,True,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...
8387,6020460,2015-12-02,20.6657,20.6657,20.6657,20.6657,True,True,True,True,0.0,0.0
8388,6020935,2017-10-17,28.3538,28.3538,28.3538,28.3538,True,True,True,True,0.0,0.0
8389,6023119,2015-10-02,45.6026,45.6026,45.6026,45.6026,True,True,True,True,0.0,0.0
8390,6023313,2014-11-20,68.4367,68.4367,68.4367,68.4367,True,True,True,True,0.0,0.0


In [5]:
# join the tables
merged_table = df_fractures.set_index(["patientId", "date"]).join(df_anamnesis.set_index(["patientId", "date"]), how="left")
merged_table = merged_table.join(df_densitometry.set_index(["patientId", "date"]), how="left")
merged_table = merged_table.join(df_medicaments.set_index(["patientId", "date"]), how="left").reset_index()
merged_table

Unnamed: 0,patientId,date,vertebral_fx,hip_fx,wrist_fx,any_fx,vertebral_right_censored,hip_right_censored,wrist_right_censored,any_right_censored,imminent_fx,previous_fx,age,hrt,height,weight,bmi,decrease in height,osteoporotic_fracture_parents,corticosteroids,aromatase_inhibitors,antiepileptic_drugs,ankylosing_spondylitis,copd,hyperpara,type_1_diabetes,rheumatoid_arthritis,number_of_falls,early_menopause,nicotin,alcohol,gastrointestinal_disease,tbs_ls,tscore_ls,tscore_neck,tscore_totalHip,bisphosphonates_prior,bisphosphonates_current,bisphosphonates_new,serm_prior,serm_current,serm_new,teriparatide_prior,teriparatide_current,teriparatide_new,hrt_prior,hrt_current,hrt_new
0,1001554,2018-03-22,6.8667,6.8667,6.8667,6.8667,True,True,True,True,0.0,0.0,67,False,153.0,56.7,24.2215,False,False,False,False,False,True,False,False,False,False,No falls,False,False,False,False,1.017,-2.730180,-0.617614,-0.995070,False,False,False,False,False,False,False,False,False,False,False,False
1,1003176,2019-03-02,22.7684,22.7684,22.7684,22.7684,True,True,True,True,0.0,0.0,57,False,166.0,62.4,22.3744,False,False,False,False,False,False,False,False,False,False,No falls,False,False,False,False,,,,,False,False,False,False,False,False,False,False,False,False,False,False
2,1004439,2019-01-27,22.3413,22.3413,22.3413,22.3413,True,True,True,True,0.0,0.0,62,True,175.0,68.9,23.2896,False,False,False,False,False,False,False,False,False,False,No falls,False,False,False,False,,,,,False,False,False,False,False,False,False,False,False,False,True,False
3,1005492,2016-06-13,62.3257,62.3257,62.3257,62.3257,True,True,True,True,0.0,0.0,58,False,164.0,100.1,36.3260,False,False,False,False,False,False,False,False,False,False,No falls,False,False,False,False,1.199,2.043910,0.874155,0.755662,False,False,False,False,False,False,False,False,False,False,False,False
4,1006076,2018-09-18,34.3334,34.3334,34.3334,34.3334,True,True,True,True,0.0,0.0,69,False,161.0,57.7,22.9677,False,False,False,False,False,False,False,False,False,False,No falls,False,False,False,False,1.089,-0.480305,-0.410832,-0.063631,False,False,False,False,False,False,False,False,False,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8387,6020460,2015-12-02,20.6657,20.6657,20.6657,20.6657,True,True,True,True,0.0,0.0,64,True,171.0,69.8,24.7307,False,False,False,False,False,False,False,False,False,False,No falls,False,False,False,False,1.170,-1.600300,-1.628120,-1.953780,False,False,False,False,False,False,False,False,False,False,False,False
8388,6020935,2017-10-17,28.3538,28.3538,28.3538,28.3538,True,True,True,True,0.0,0.0,69,True,160.0,74.7,0.0029,False,False,False,False,False,False,True,False,False,False,Only one fall,False,False,False,False,,,,,False,False,False,False,False,False,False,False,False,True,False,False
8389,6023119,2015-10-02,45.6026,45.6026,45.6026,45.6026,True,True,True,True,0.0,0.0,57,True,161.0,62.7,23.8912,False,False,False,False,False,False,False,False,False,False,No falls,False,False,False,False,1.294,1.472440,0.914185,1.294470,False,False,False,False,False,False,False,False,False,False,True,False
8390,6023313,2014-11-20,68.4367,68.4367,68.4367,68.4367,True,True,True,True,0.0,0.0,72,False,159.0,69.3,28.1147,False,False,False,False,False,False,False,False,False,False,More than one fall,False,False,False,False,1.099,-0.681709,-1.592420,-1.844660,False,False,False,False,False,False,False,False,False,False,False,False


In [6]:
merged_table.isna().sum()

patientId                           0
date                                0
vertebral_fx                        0
hip_fx                              0
wrist_fx                            0
any_fx                              0
vertebral_right_censored            0
hip_right_censored                  0
wrist_right_censored                0
any_right_censored                  0
imminent_fx                         0
previous_fx                         0
age                                 0
hrt                                 0
height                              0
weight                              0
bmi                                 0
decrease in height                  0
osteoporotic_fracture_parents       0
corticosteroids                     0
aromatase_inhibitors                0
antiepileptic_drugs                 0
ankylosing_spondylitis              0
copd                                0
hyperpara                           0
type_1_diabetes                     0
rheumatoid_a

In [7]:
# Check how many patients have osteoporosis (T-Score <= -2.5)
merged_table["osteoporosis"] = merged_table[["tscore_ls", "tscore_neck", "tscore_totalHip"]].min(axis=1) <= -2.5
merged_table["osteoporosis"].sum()

642

In [8]:
merged_table.to_csv("merged_table.csv", index=False)

In [10]:
%%bash
dx upload merged_table.csv --path /data/final/

ID                          file-GPkb7jQJjxxGkvp2Q8kGJgVy
Class                       file
Project                     project-GP77K38Jjxx9XzFP2KzPQyfG
Folder                      /data/final
Name                        merged_table.csv
State                       closing
Visibility                  visible
Types                       -
Properties                  -
Tags                        -
Outgoing links              -
Created                     Mon Feb 20 11:05:06 2023
Created by                  ollehman
 via the job                job-GPkZzZ8Jjxx0x5JVxzQjV9qV
Last modified               Mon Feb 20 11:05:08 2023
Media type                  
archivalState               "live"
cloudAccount                "cloudaccount-dnanexus"


In [9]:
data_with_tscores = merged_table.dropna()
[print(data_with_tscores[col].value_counts(), "\n") for col in data_with_tscores.loc[:,"vertebral_right_censored":"any_right_censored"].columns]
data_with_tscores

True     5444
False      30
Name: vertebral_right_censored, dtype: int64 

True     5416
False      58
Name: hip_right_censored, dtype: int64 

True     5466
False       8
Name: wrist_right_censored, dtype: int64 

True     5184
False     290
Name: any_right_censored, dtype: int64 



Unnamed: 0,patientId,date,vertebral_fx,hip_fx,wrist_fx,any_fx,vertebral_right_censored,hip_right_censored,wrist_right_censored,any_right_censored,imminent_fx,previous_fx,age,hrt,height,weight,bmi,decrease in height,osteoporotic_fracture_parents,corticosteroids,aromatase_inhibitors,antiepileptic_drugs,ankylosing_spondylitis,copd,hyperpara,type_1_diabetes,rheumatoid_arthritis,number_of_falls,early_menopause,nicotin,alcohol,gastrointestinal_disease,tbs_ls,tscore_ls,tscore_neck,tscore_totalHip,bisphosphonates_prior,bisphosphonates_current,bisphosphonates_new,serm_prior,serm_current,serm_new,teriparatide_prior,teriparatide_current,teriparatide_new,hrt_prior,hrt_current,hrt_new,osteoporosis
0,1001554,2018-03-22,6.8667,6.8667,6.8667,6.8667,True,True,True,True,0.0,0.0,67,False,153.0,56.7,24.2215,False,False,False,False,False,True,False,False,False,False,No falls,False,False,False,False,1.017,-2.730180,-0.617614,-0.995070,False,False,False,False,False,False,False,False,False,False,False,False,True
3,1005492,2016-06-13,62.3257,62.3257,62.3257,62.3257,True,True,True,True,0.0,0.0,58,False,164.0,100.1,36.3260,False,False,False,False,False,False,False,False,False,False,No falls,False,False,False,False,1.199,2.043910,0.874155,0.755662,False,False,False,False,False,False,False,False,False,False,False,False,False
4,1006076,2018-09-18,34.3334,34.3334,34.3334,34.3334,True,True,True,True,0.0,0.0,69,False,161.0,57.7,22.9677,False,False,False,False,False,False,False,False,False,False,No falls,False,False,False,False,1.089,-0.480305,-0.410832,-0.063631,False,False,False,False,False,False,False,False,False,False,False,False,False
5,1006157,2017-05-24,44.5512,44.5512,44.5512,44.5512,True,True,True,True,0.0,0.0,69,True,165.0,77.3,28.7403,False,False,False,False,False,False,False,False,False,False,No falls,False,False,False,False,1.017,-0.431716,-0.895375,-0.643109,False,False,False,False,False,False,False,False,False,True,False,False,False
8,1008512,2018-11-15,7.8852,7.8852,7.8852,7.8852,True,True,True,True,0.0,0.0,56,False,156.0,50.2,21.1671,False,False,False,False,False,False,False,False,False,False,More than one fall,False,False,False,False,1.042,-2.311220,-0.638753,-1.115440,False,False,False,False,False,False,False,False,False,False,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8383,6016008,2017-01-26,53.4878,53.4878,53.4878,53.4878,True,True,True,True,0.0,0.0,72,True,159.0,63.2,26.3059,False,False,False,False,False,False,False,False,False,False,Only one fall,False,False,False,False,0.992,-1.574190,-1.961960,-2.128020,False,False,False,False,False,False,False,False,False,True,False,False,False
8385,6018073,2015-04-07,61.9315,61.9315,61.9315,61.9315,True,True,True,True,0.0,0.0,72,True,159.0,52.7,21.6552,False,True,False,False,False,False,False,False,False,False,No falls,True,False,False,False,1.132,-3.188170,-2.896560,-2.954540,False,False,False,False,False,False,False,False,False,True,False,False,True
8387,6020460,2015-12-02,20.6657,20.6657,20.6657,20.6657,True,True,True,True,0.0,0.0,64,True,171.0,69.8,24.7307,False,False,False,False,False,False,False,False,False,False,No falls,False,False,False,False,1.170,-1.600300,-1.628120,-1.953780,False,False,False,False,False,False,False,False,False,False,False,False,False
8389,6023119,2015-10-02,45.6026,45.6026,45.6026,45.6026,True,True,True,True,0.0,0.0,57,True,161.0,62.7,23.8912,False,False,False,False,False,False,False,False,False,False,No falls,False,False,False,False,1.294,1.472440,0.914185,1.294470,False,False,False,False,False,False,False,False,False,False,True,False,False


In [10]:
def get_x_year_fractures(visit, years=2):
    fx_types = ["vertebral", "hip", "wrist", "any"]
    for fx_type in fx_types:
        if visit[f"{fx_type}_right_censored"] == 0 and visit[f"{fx_type}_fx"] <= (years*12):
            visit[f"{fx_type}_fx"] = 1
        else:
            visit[f"{fx_type}_fx"] = 0
    return visit

In [13]:
big_table_2y = merged_table.apply(lambda x: get_x_year_fractures(x, years=2), axis=1)
big_table_2y

Unnamed: 0,patientId,date,vertebral_fx,hip_fx,wrist_fx,any_fx,vertebral_right_censored,hip_right_censored,wrist_right_censored,any_right_censored,imminent_fx,previous_fx,age,hrt,height,weight,bmi,decrease in height,osteoporotic_fracture_parents,corticosteroids,aromatase_inhibitors,antiepileptic_drugs,ankylosing_spondylitis,copd,hyperpara,type_1_diabetes,rheumatoid_arthritis,number_of_falls,early_menopause,nicotin,alcohol,gastrointestinal_disease,tbs_ls,tscore_ls,tscore_neck,tscore_totalHip,bisphosphonates_prior,bisphosphonates_current,bisphosphonates_new,serm_prior,serm_current,serm_new,teriparatide_prior,teriparatide_current,teriparatide_new,hrt_prior,hrt_current,hrt_new,osteoporosis
0,1001554,2018-03-22,0,0,0,0,True,True,True,True,0.0,0.0,67,False,153.0,56.7,24.2215,False,False,False,False,False,True,False,False,False,False,No falls,False,False,False,False,1.017,-2.730180,-0.617614,-0.995070,False,False,False,False,False,False,False,False,False,False,False,False,True
1,1003176,2019-03-02,0,0,0,0,True,True,True,True,0.0,0.0,57,False,166.0,62.4,22.3744,False,False,False,False,False,False,False,False,False,False,No falls,False,False,False,False,,,,,False,False,False,False,False,False,False,False,False,False,False,False,False
2,1004439,2019-01-27,0,0,0,0,True,True,True,True,0.0,0.0,62,True,175.0,68.9,23.2896,False,False,False,False,False,False,False,False,False,False,No falls,False,False,False,False,,,,,False,False,False,False,False,False,False,False,False,False,True,False,False
3,1005492,2016-06-13,0,0,0,0,True,True,True,True,0.0,0.0,58,False,164.0,100.1,36.3260,False,False,False,False,False,False,False,False,False,False,No falls,False,False,False,False,1.199,2.043910,0.874155,0.755662,False,False,False,False,False,False,False,False,False,False,False,False,False
4,1006076,2018-09-18,0,0,0,0,True,True,True,True,0.0,0.0,69,False,161.0,57.7,22.9677,False,False,False,False,False,False,False,False,False,False,No falls,False,False,False,False,1.089,-0.480305,-0.410832,-0.063631,False,False,False,False,False,False,False,False,False,False,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8387,6020460,2015-12-02,0,0,0,0,True,True,True,True,0.0,0.0,64,True,171.0,69.8,24.7307,False,False,False,False,False,False,False,False,False,False,No falls,False,False,False,False,1.170,-1.600300,-1.628120,-1.953780,False,False,False,False,False,False,False,False,False,False,False,False,False
8388,6020935,2017-10-17,0,0,0,0,True,True,True,True,0.0,0.0,69,True,160.0,74.7,0.0029,False,False,False,False,False,False,True,False,False,False,Only one fall,False,False,False,False,,,,,False,False,False,False,False,False,False,False,False,True,False,False,False
8389,6023119,2015-10-02,0,0,0,0,True,True,True,True,0.0,0.0,57,True,161.0,62.7,23.8912,False,False,False,False,False,False,False,False,False,False,No falls,False,False,False,False,1.294,1.472440,0.914185,1.294470,False,False,False,False,False,False,False,False,False,False,True,False,False
8390,6023313,2014-11-20,0,0,0,0,True,True,True,True,0.0,0.0,72,False,159.0,69.3,28.1147,False,False,False,False,False,False,False,False,False,False,More than one fall,False,False,False,False,1.099,-0.681709,-1.592420,-1.844660,False,False,False,False,False,False,False,False,False,False,False,False,False


In [14]:
binary_columns = [
    "imminent_fx", 
    "previous_fx",
]
for col in binary_columns:
    big_table_2y.loc[:, col] = big_table_2y.loc[:,col].apply(lambda x: int(bool(x)))
    print(col)
    print(big_table_2y[col].value_counts())

imminent_fx
0    8360
1      32
Name: imminent_fx, dtype: int64
previous_fx
0    8133
1     259
Name: previous_fx, dtype: int64


In [15]:
big_table_2y["age"] = big_table_2y["age"].apply(lambda x: x + np.exp(random.randint(-10, -5)))

In [16]:
big_table_2y["age"]

0       67.000045
1       57.000335
2       62.000912
3       58.000045
4       69.000045
          ...    
8387    64.002479
8388    69.000123
8389    57.000045
8390    72.006738
8391    75.002479
Name: age, Length: 8392, dtype: float64

In [17]:
columns = big_table_2y.columns.tolist()

# Drop non-relevant columns
columns = [col for col in columns if col not in [
    "patientId", "date", "any_fx", "vertebral_right_censored", "hip_right_censored", "wrist_right_censored", "any_right_censored",
    ]]

#big_table_2y["any_fx"] = big_table_2y["any_fx"].apply(lambda x: int(bool(x)))

table1 = TableOne(big_table_2y, columns, missing=True, normal_test=True, tukey_test=True, groupby="any_fx", pval=True)
table1

Unnamed: 0_level_0,Unnamed: 1_level_0,Grouped by any_fx,Grouped by any_fx,Grouped by any_fx,Grouped by any_fx,Grouped by any_fx
Unnamed: 0_level_1,Unnamed: 1_level_1,Missing,Overall,0,1,P-Value
n,,,8392,8185,207,
"vertebral_fx, n (%)",0,0.0,8372 (99.8),8185 (100.0),187 (90.3),<0.001
"vertebral_fx, n (%)",1,,20 (0.2),,20 (9.7),
"hip_fx, n (%)",0,0.0,8368 (99.7),8185 (100.0),183 (88.4),<0.001
"hip_fx, n (%)",1,,24 (0.3),,24 (11.6),
"wrist_fx, n (%)",0,0.0,8387 (99.9),8185 (100.0),202 (97.6),<0.001
"wrist_fx, n (%)",1,,5 (0.1),,5 (2.4),
"imminent_fx, n (%)",0,0.0,8360 (99.6),8156 (99.6),204 (98.6),0.043
"imminent_fx, n (%)",1,,32 (0.4),29 (0.4),3 (1.4),
"previous_fx, n (%)",0,0.0,8133 (96.9),7945 (97.1),188 (90.8),<0.001


In [19]:
table1.to_excel("table1.xlsx")

In [20]:
%%bash
dx upload table1.xlsx

ID                          file-GPkbB98Jjxx6xbp3y1Yz5kKK
Class                       file
Project                     project-GP77K38Jjxx9XzFP2KzPQyfG
Folder                      /
Name                        table1.xlsx
State                       closing
Visibility                  visible
Types                       -
Properties                  -
Tags                        -
Outgoing links              -
Created                     Mon Feb 20 11:10:29 2023
Created by                  ollehman
 via the job                job-GPkZzZ8Jjxx0x5JVxzQjV9qV
Last modified               Mon Feb 20 11:10:30 2023
Media type                  
archivalState               "live"
cloudAccount                "cloudaccount-dnanexus"


## Create a table 1 for all fractures (not just in next 2 years)

In [21]:
big_table_inf = merged_table.apply(lambda x: get_x_year_fractures(x, years=100), axis=1)
binary_columns = [
    "imminent_fx", 
    "previous_fx",
]
for col in binary_columns:
    big_table_inf.loc[:, col] = big_table_inf.loc[:,col].apply(lambda x: int(bool(x)))
    print(col)
    print(big_table_inf[col].value_counts())
    
big_table_inf["age"] = big_table_inf["age"].apply(lambda x: x + np.exp(random.randint(-10, -5)))

columns = big_table_inf.columns.tolist()
# Drop non-relevant columns
columns = [col for col in columns if col not in [
    "patientId", "date", "any_fx", "vertebral_right_censored", "hip_right_censored", "wrist_right_censored", "any_right_censored",
    ]]

table1 = TableOne(big_table_inf, columns, missing=True, normal_test=True, tukey_test=True, groupby="any_fx", pval=True)
table1

imminent_fx
0    8360
1      32
Name: imminent_fx, dtype: int64
previous_fx
0    8133
1     259
Name: previous_fx, dtype: int64


Unnamed: 0_level_0,Unnamed: 1_level_0,Grouped by any_fx,Grouped by any_fx,Grouped by any_fx,Grouped by any_fx,Grouped by any_fx
Unnamed: 0_level_1,Unnamed: 1_level_1,Missing,Overall,0,1,P-Value
n,,,8392,7957,435,
"vertebral_fx, n (%)",0,0.0,8346 (99.5),7957 (100.0),389 (89.4),<0.001
"vertebral_fx, n (%)",1,,46 (0.5),,46 (10.6),
"hip_fx, n (%)",0,0.0,8314 (99.1),7957 (100.0),357 (82.1),<0.001
"hip_fx, n (%)",1,,78 (0.9),,78 (17.9),
"wrist_fx, n (%)",0,0.0,8380 (99.9),7957 (100.0),423 (97.2),<0.001
"wrist_fx, n (%)",1,,12 (0.1),,12 (2.8),
"imminent_fx, n (%)",0,0.0,8360 (99.6),7928 (99.6),432 (99.3),0.229
"imminent_fx, n (%)",1,,32 (0.4),29 (0.4),3 (0.7),
"previous_fx, n (%)",0,0.0,8133 (96.9),7732 (97.2),401 (92.2),<0.001


In [22]:
table1.to_excel("table1_global.xlsx")

In [23]:
%%bash
dx upload table1_global.xlsx

ID                          file-GPkbBGjJjxx6vXJkJFbVVPyX
Class                       file
Project                     project-GP77K38Jjxx9XzFP2KzPQyfG
Folder                      /
Name                        table1_global.xlsx
State                       closing
Visibility                  visible
Types                       -
Properties                  -
Tags                        -
Outgoing links              -
Created                     Mon Feb 20 11:10:43 2023
Created by                  ollehman
 via the job                job-GPkZzZ8Jjxx0x5JVxzQjV9qV
Last modified               Mon Feb 20 11:10:45 2023
Media type                  
archivalState               "live"
cloudAccount                "cloudaccount-dnanexus"


In [11]:
big_table_inf = data_with_tscores.apply(lambda x: get_x_year_fractures(x, years=100), axis=1)
binary_columns = [
    "imminent_fx", 
    "previous_fx",
]
for col in binary_columns:
    big_table_inf.loc[:, col] = big_table_inf.loc[:,col].apply(lambda x: int(bool(x)))
    print(col)
    print(big_table_inf[col].value_counts())
    
big_table_inf["age"] = big_table_inf["age"].apply(lambda x: x + np.exp(random.randint(-10, -5)))

columns = big_table_inf.columns.tolist()
# Drop non-relevant columns
columns = [col for col in columns if col not in [
    "patientId", "date", "any_fx", "vertebral_right_censored", "hip_right_censored", "wrist_right_censored", "any_right_censored",
    ]]

table1 = TableOne(big_table_inf, columns, missing=True, normal_test=True, tukey_test=True, groupby="any_fx", pval=True)
table1

imminent_fx
0    5456
1      18
Name: imminent_fx, dtype: int64
previous_fx
0    5330
1     144
Name: previous_fx, dtype: int64


Unnamed: 0_level_0,Unnamed: 1_level_0,Grouped by any_fx,Grouped by any_fx,Grouped by any_fx,Grouped by any_fx,Grouped by any_fx
Unnamed: 0_level_1,Unnamed: 1_level_1,Missing,Overall,0,1,P-Value
n,,,5474,5184,290,
"vertebral_fx, n (%)",0,0.0,5444 (99.5),5184 (100.0),260 (89.7),<0.001
"vertebral_fx, n (%)",1,,30 (0.5),,30 (10.3),
"hip_fx, n (%)",0,0.0,5416 (98.9),5184 (100.0),232 (80.0),<0.001
"hip_fx, n (%)",1,,58 (1.1),,58 (20.0),
"wrist_fx, n (%)",0,0.0,5466 (99.9),5184 (100.0),282 (97.2),<0.001
"wrist_fx, n (%)",1,,8 (0.1),,8 (2.8),
"imminent_fx, n (%)",0,0.0,5456 (99.7),5167 (99.7),289 (99.7),1.000
"imminent_fx, n (%)",1,,18 (0.3),17 (0.3),1 (0.3),
"previous_fx, n (%)",0,0.0,5330 (97.4),5060 (97.6),270 (93.1),<0.001


In [14]:
table1.to_excel("table1_global_nonan.xlsx")

In [15]:
%%bash
dx upload table1_global_nonan.xlsx

ID                          file-GQ06jFjJjxx96p6pqyv2jj45
Class                       file
Project                     project-GP77K38Jjxx9XzFP2KzPQyfG
Folder                      /
Name                        table1_global_nonan.xlsx
State                       closing
Visibility                  visible
Types                       -
Properties                  -
Tags                        -
Outgoing links              -
Created                     Thu Mar  2 09:28:15 2023
Created by                  ollehman
 via the job                job-GQ067PjJjxxGf4JYj7kV0VYg
Last modified               Thu Mar  2 09:28:17 2023
Media type                  
archivalState               "live"
cloudAccount                "cloudaccount-dnanexus"
