In [1]:
%load_ext autoreload
%autoreload 2

import os
from tqdm.auto import tqdm
import pathlib
import datetime
import subprocess
import numpy as np
import pandas as pd
import lifelines

In [2]:
node = !hostname
if "sc" in node[0]:
    base_path = "/sc-projects/sc-proj-ukb-cvd"
else: 
    base_path = "/data/analysis/ag-reils/ag-reils-shared/cardioRS"
print(base_path)

project_label = "22_retina_phewas"
project_path = f"{base_path}/results/projects/{project_label}"
figure_path = f"{project_path}/figures"
output_path = f"{project_path}/data"

pathlib.Path(figure_path).mkdir(parents=True, exist_ok=True)
pathlib.Path(output_path).mkdir(parents=True, exist_ok=True)

experiment = '230426'
experiment_path = f"{output_path}/{experiment}"
print('experiment path:', experiment_path)
pathlib.Path(experiment_path).mkdir(parents=True, exist_ok=True)

name_dict = {
#     "predictions_cropratio0.3": "ConvNextSmall(Retina)+MLP_cropratio0.3",
#     "predictions_cropratio0.5": "ConvNextSmall(Retina)+MLP_cropratio0.5",
#    "predictions_cropratio0.66": "ConvNextSmall(Retina)+MLP_cropratio0.66",
    "predictions": "ConvNextSmall(Retina)+MLP_cropratio0.66",
}

#partitions = [i for i in range(22)]
partitions = [4, 5, 7, 9, 10, 20] # Partitions with eye test centers

/sc-projects/sc-proj-ukb-cvd
experiment path: /sc-projects/sc-proj-ukb-cvd/results/projects/22_retina_phewas/data/230426


In [3]:
today = '230426'

# Read all predictions

In [4]:
data_outcomes = pd.read_feather(f"{output_path}/baseline_outcomes_220627.feather").set_index("eid")

#endpoints_md = pd.read_csv(f"{experiment_path}/endpoints.csv")
#endpoints = sorted(endpoints_md.endpoint.to_list())
all_endpoints = sorted([l.replace('_prevalent', '') for l in list(pd.read_csv(f'/sc-projects/sc-proj-ukb-cvd/results/projects/{project_label}/data/{today}/endpoints.csv').endpoint.values)])
endpoints_not_overlapping_with_preds = []
endpoints = []
for c in all_endpoints:
    if c not in endpoints_not_overlapping_with_preds: 
        endpoints.append(c)

endpoint_defs = pd.read_feather(f"{output_path}/phecode_defs_220306.feather").query("endpoint==@endpoints").sort_values("endpoint").set_index("endpoint")


In [5]:
endpoint_defs.shape

(1170, 7)

In [6]:
# phecodes = ['phecode_979', 'phecode_202']
phecodes = endpoint_defs.index.values.tolist()
models = ['Age+Sex', 'Age+Sex+Retina']

In [7]:
all_preds = []
for phecode in tqdm(phecodes):
    model_preds = []
    for model in models:
        model_preds_endpoint = []
        for partition in partitions:
            preds = pd.read_feather(f'{experiment_path}/coxph/predictions'\
                                    f'/{phecode}_{model}_ImageTraining_[]_ConvNeXt_MLPHead_predictions_{partition}.feather')
            preds = preds[['eid', 'endpoint', 'Ft_10']].rename({'Ft_10': model}, axis=1)
            model_preds_endpoint.append(preds)
        model_preds_endpoint = pd.concat(model_preds_endpoint, axis=0)
        model_preds.append(model_preds_endpoint)
        
    preds = model_preds[0].set_index(['eid', 'endpoint'])  
    for i in range(1, len(model_preds)):
        preds = preds.merge(model_preds[i].set_index(['eid', 'endpoint']), left_index=True, right_index=True)
    all_preds.append(preds)

  0%|          | 0/1170 [00:00<?, ?it/s]

In [8]:
all_preds = pd.concat(all_preds, axis=0)

In [9]:
all_preds.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Age+Sex,Age+Sex+Retina
eid,endpoint,Unnamed: 2_level_1,Unnamed: 3_level_1
1005228,OMOP_4306655,0.011756,0.00417
1006968,OMOP_4306655,0.154481,0.063953
1008081,OMOP_4306655,0.023864,0.015357
1009271,OMOP_4306655,0.014768,0.00746
1009328,OMOP_4306655,0.008035,0.003523


In [10]:
## calculate relative risk
epsilon = 0.000001
all_preds['relative_risks'] = all_preds['Age+Sex+Retina'] / (all_preds['Age+Sex']+epsilon)

In [11]:
relative_risks = pd.pivot(all_preds.reset_index(), index='eid', columns='endpoint', values='relative_risks')

In [12]:
relative_risks.shape

(61256, 1170)

In [13]:
relative_risks.head()

endpoint,OMOP_4306655,phecode_002,phecode_002-1,phecode_003,phecode_004,phecode_005,phecode_007,phecode_007-1,phecode_008,phecode_009,...,phecode_977,phecode_977-4,phecode_977-41,phecode_977-5,phecode_977-51,phecode_977-52,phecode_977-7,phecode_977-71,phecode_977-72,phecode_979
eid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1000128,0.174749,0.446356,0.446607,0.376964,0.456245,0.24947,0.284769,0.289053,0.667286,0.155796,...,0.963882,0.522644,0.544311,1.360218,1.178297,1.48717,0.272905,0.203238,0.271235,0.514556
1000206,0.52077,0.685671,0.732043,0.713312,0.866218,0.277188,0.527613,0.480149,0.913658,0.630599,...,0.741853,0.815088,0.836276,0.868703,,,0.939908,0.819729,0.910948,0.867035
1000212,1.572994,1.392671,1.374585,1.286691,1.262157,0.877082,1.121366,1.15001,0.857802,1.379848,...,1.05423,1.110949,1.115279,0.822944,0.816347,0.733019,1.173676,1.099809,1.185231,1.398514
1000270,1.017653,1.051704,1.050304,1.164627,1.041907,1.214847,0.76214,0.896931,1.176081,1.198521,...,1.090794,0.924574,0.877674,1.046469,0.857232,1.079807,1.119889,1.07659,1.113154,0.868071
1000355,0.779178,1.417964,1.41196,0.930719,1.082596,0.552446,1.416427,1.636062,0.708659,0.777062,...,,1.123843,1.136316,0.987245,,,0.986045,0.863845,0.944397,1.332354


In [14]:
relative_risks.reset_index().to_feather(f'{experiment_path}/relative_risks_{today}.feather')

In [15]:
f'{experiment_path}/relative_risks_{today}.feather'

'/sc-projects/sc-proj-ukb-cvd/results/projects/22_retina_phewas/data/230426/relative_risks_230426.feather'