## endpoint selection by frequency and cindex performance

In [1]:
import os
import math
import pathlib
import numpy as np
import pandas as pd
from tqdm.auto import tqdm
from IPython.display import clear_output

import warnings
from lifelines.utils import CensoringType
from lifelines.utils import concordance_index

In [2]:
node = !hostname
if "sc" in node[0]:
    base_path = "/sc-projects/sc-proj-ukb-cvd"
else: 
    base_path = "/data/analysis/ag-reils/ag-reils-shared/cardioRS"
print(base_path)

project_label = "22_retina_phewas_220603_fullrun"
project_path = f"{base_path}/results/projects/{project_label}"
figure_path = f"{project_path}/figures"
output_path = f"{project_path}/data"

pathlib.Path(figure_path).mkdir(parents=True, exist_ok=True)
pathlib.Path(output_path).mkdir(parents=True, exist_ok=True)

experiment = '220603_fullrun'
experiment_path = f"{output_path}/{experiment}"
pathlib.Path(experiment_path).mkdir(parents=True, exist_ok=True)

name_dict = {
    "predictions_cropratio0.3": "ConvNextSmall(Retina)+MLP_cropratio0.3",
    "predictions_cropratio0.5": "ConvNextSmall(Retina)+MLP_cropratio0.5",
    "predictions_cropratio0.8": "ConvNextSmall(Retina)+MLP_cropratio0.8",
}

partitions = [i for i in range(22)]
partitions

/sc-projects/sc-proj-ukb-cvd


[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21]

In [3]:
endpoint_defs = pd.read_feather(f"{output_path}/phecode_defs_220306.feather").sort_values("endpoint")
endpoint_defs

Unnamed: 0,phecode,endpoint,phecode_string,phecode_category,sex,ICD10_only,phecode_top,leaf
3662,4306655,OMOP_4306655,All-Cause Death,Death,Both,,,
2073,001,phecode_001,Salmonella,ID,Both,0.0,001,1.0
2074,002,phecode_002,Staphylococcus,ID,Both,0.0,002,0.0
2229,002.1,phecode_002-1,Staphylococcus aureus,ID,Both,0.0,002,1.0
2075,003,phecode_003,Escherichia coli,ID,Both,0.0,003,1.0
...,...,...,...,...,...,...,...,...
3558,992,phecode_992,Family history of diseases of the skin and sub...,Stat,Both,0.0,992,1.0
3559,993,phecode_993,Family history of musculoskeletal disease,Stat,Both,0.0,993,1.0
3560,994,phecode_994,Family history of congenital anomalies,Stat,Both,0.0,994,1.0
3561,995,phecode_995,Family history of genetic condition,Stat,Both,0.0,995,1.0


In [4]:
f"{experiment_path}/endpoints.csv"

'/sc-projects/sc-proj-ukb-cvd/results/projects/22_retina_phewas_220603_fullrun/data/220603_fullrun/endpoints.csv'

In [5]:
endpoints_md = pd.read_csv(f"{experiment_path}/endpoints.csv", index_col=0).sort_values("endpoint")
endpoints = sorted(endpoints_md.endpoint.values)

In [6]:
endpoints_md['freq_in_%'] = endpoints_md['freq']*100
endpoints_md = endpoints_md.sort_values('freq_in_%',ascending=False)
endpoints_md

Unnamed: 0,endpoint,eligable,n,freq,phecode,phecode_string,phecode_category,sex,ICD10_only,phecode_top,leaf,freq_in_%
1036,phecode_713,41667,14859,0.356613,713.00,Symptoms related to joints,Musc/Skel,Both,0.0,713.0,0.0,35.661315
1038,phecode_713-3,42789,14672,0.342892,713.30,Pain in joint,Musc/Skel,Both,0.0,713.0,1.0,34.289186
29,phecode_089,37131,11920,0.321026,89.00,Infections,ID,Both,0.0,89.0,0.0,32.102556
604,phecode_460,39153,12430,0.317472,460.00,Acute respiratory infection,Resp,Both,0.0,460.0,0.0,31.747248
1051,phecode_718,40489,10938,0.270147,718.00,Back pain,Musc/Skel,Both,0.0,718.0,0.0,27.014745
...,...,...,...,...,...,...,...,...,...,...,...,...
198,phecode_209-1,61174,102,0.001667,209.10,Pituitary hyperfunction,Endo,Both,0.0,209.0,0.0,0.166738
151,phecode_169-11,61187,102,0.001667,169.11,Immune thrombocytopenic purpura [ITP],Blood,Both,0.0,169.0,1.0,0.166702
453,phecode_384,61191,102,0.001667,384.00,Anomalies of pupillary function,Eye,Both,0.0,384.0,0.0,0.166691
1027,phecode_710-32,61242,102,0.001666,710.32,Genu varum (acquired),Musc/Skel,Both,0.0,710.0,1.0,0.166552


In [7]:
cols_to_drop = [
    'time',
    'ASCVD',
    'ASCVD+Retina',
#    'Age+Sex',
#    'Age+Sex+Retina',
    'QRISK3',
    'QRISK3+Retina',
#    'Retina',
    'SCORE2',
    'SCORE2+Retina'
]

In [8]:
from datetime import date
#today = str(date.today())
today = '2022-07-12'

In [9]:
#name = glue("benchmark_cindex_agesexcoxph_{today}")
name = f"benchmarks_cindex_{today}"
crop_ratio = "0.5" # 0.3, 0.5, 0.8
benchmarks = pd.read_feather(f"{experiment_path}/{name}.feather")
benchmarks = benchmarks.pivot(index=['endpoint', 'model', 'iteration','time'], columns=['score'], values='cindex')
benchmarks = benchmarks.reset_index()
benchmarks = benchmarks[benchmarks.model == f"ImageTraining_[]_ConvNeXt_MLPHead_predictions_cropratio{crop_ratio}"].drop(columns=cols_to_drop)
benchmarks

score,endpoint,model,iteration,Age+Sex,Age+Sex+Retina,Retina
50,OMOP_4306655,ImageTraining_[]_ConvNeXt_MLPHead_predictions_...,0,0.739966,0.751695,0.745762
51,OMOP_4306655,ImageTraining_[]_ConvNeXt_MLPHead_predictions_...,1,0.736012,0.744664,0.738850
52,OMOP_4306655,ImageTraining_[]_ConvNeXt_MLPHead_predictions_...,2,0.750023,0.761563,0.751649
53,OMOP_4306655,ImageTraining_[]_ConvNeXt_MLPHead_predictions_...,3,0.765519,0.780510,0.772937
54,OMOP_4306655,ImageTraining_[]_ConvNeXt_MLPHead_predictions_...,4,0.728511,0.741351,0.738075
...,...,...,...,...,...,...
175595,phecode_979,ImageTraining_[]_ConvNeXt_MLPHead_predictions_...,45,0.644747,0.583827,0.529790
175596,phecode_979,ImageTraining_[]_ConvNeXt_MLPHead_predictions_...,46,0.564771,0.561570,0.510550
175597,phecode_979,ImageTraining_[]_ConvNeXt_MLPHead_predictions_...,47,0.596923,0.621078,0.626647
175598,phecode_979,ImageTraining_[]_ConvNeXt_MLPHead_predictions_...,48,0.550100,0.516790,0.527735


In [31]:
f"{experiment_path}/{name}.feather", f"ImageTraining_[]_ConvNeXt_MLPHead_predictions_cropratio{crop_ratio}"

('/sc-projects/sc-proj-ukb-cvd/results/projects/22_retina_phewas_220603_fullrun/data/220603_fullrun/benchmarks_cindex_2022-07-12.feather',
 'ImageTraining_[]_ConvNeXt_MLPHead_predictions_cropratio0.5')

In [10]:
#benchmark_endpoints = pd.merge(benchmarks, endpoint_defs, left_on='endpoint', right_on='endpoint', how='left')
benchmark_endpoints = pd.merge(benchmarks, endpoints_md, left_on='endpoint', right_on='endpoint', how='left')
benchmark_endpoints['deltaAgeSexRetina-AgeSex'] = benchmark_endpoints['Age+Sex+Retina'] - benchmark_endpoints['Age+Sex']

In [11]:
benchmarks_frequencies = benchmark_endpoints.groupby(['endpoint', 'phecode_category', 'phecode_string'])\
                                            .mean()\
                                            .reset_index()\
                                            .drop(columns=['iteration', 'eligable', 'freq', 'ICD10_only', 'phecode', 'phecode_top', 'leaf'])

In [12]:
#benchmarks_frequencies = benchmarks_frequencies.sort_values(['freq_in_%'], ascending=False)
benchmarks_frequencies_filtered = benchmarks_frequencies[benchmarks_frequencies['freq_in_%'] > 1]
benchmarks_frequencies_filtered

Unnamed: 0,endpoint,phecode_category,phecode_string,Age+Sex,Age+Sex+Retina,Retina,n,freq_in_%,deltaAgeSexRetina-AgeSex
0,OMOP_4306655,Death,All-Cause Death,0.742221,0.754934,0.747410,3548.0,5.792086,0.012712
1,phecode_002,ID,Staphylococcus,0.656445,0.686675,0.696176,658.0,1.079662,0.030230
3,phecode_003,ID,Escherichia coli,0.608134,0.602371,0.597628,1017.0,1.673881,-0.005763
15,phecode_052,ID,Herpesvirus,0.586990,0.586038,0.519922,3872.0,6.901346,-0.000952
16,phecode_052-1,ID,Herpes simplex,0.588014,0.590621,0.518773,797.0,1.331685,0.002607
...,...,...,...,...,...,...,...,...,...
1161,phecode_977,Rx,Long term (current) drug therapy,0.478864,0.589508,0.583433,5389.0,9.938404,0.110644
1164,phecode_977-5,Rx,Long term (current) use of agents affecting ho...,0.882301,0.881502,0.722898,669.0,1.147119,-0.000798
1166,phecode_977-52,Rx,Hormone replacement therapy (postmenopausal),0.705123,0.705813,0.564766,520.0,1.641984,0.000689
1167,phecode_977-7,Rx,Long term (current) use of insulin or oral hyp...,0.607217,0.608043,0.534996,2231.0,3.716351,0.000826


### best 20, sorted by positive c_idx_delta AgeSexRetina-AgeSex in all phecode categories

In [127]:
benchmarks_frequencies[(benchmarks_frequencies['deltaAgeSexRetina-AgeSex'] > 0)].iloc[:20]

Unnamed: 0,endpoint,phecode_category,phecode_string,Age+Sex,Age+Sex+Retina,Retina,n,freq_in_%,deltaAgeSexRetina-AgeSex
5,phecode_005,ID,Mycobacteria,0.241682,0.667991,0.780068,113.0,0.185553,0.426309
943,phecode_674-1,Derm,Hypopigmentation,0.498635,0.760342,0.740235,143.0,0.234273,0.261707
200,phecode_215,Endo,Testicular dysfunction,0.525886,0.763175,0.745145,103.0,0.368872,0.237289
926,phecode_665-3,Derm,Other psoriasis,0.528151,0.725988,0.646116,133.0,0.21759,0.197837
680,phecode_504-1,GI,Gingivitis,0.351915,0.538502,0.492034,129.0,0.211361,0.186587
23,phecode_059-1,ID,COVID-19*,0.531442,0.71089,0.654933,1017.0,1.660246,0.179448
925,phecode_665-2,Derm,Psoriatic arthropathy,0.432629,0.595335,0.713244,121.0,0.198098,0.162706
403,phecode_367-5,Eye,Uveitis,0.504332,0.665631,0.678704,363.0,0.597265,0.161299
199,phecode_211,Endo,Disorders of adrenal glands,0.521008,0.678137,0.681283,175.0,0.285924,0.157129
770,phecode_542-4,GI,Portal hypertension,0.46644,0.620182,0.618866,122.0,0.199219,0.153742


### best 20, sorted by positive c_idx_delta AgeSexRetina-AgeSex, eye-related and cardio-related only

In [132]:
benchmarks_frequencies[((benchmarks_frequencies['phecode_category'] == 'Eye') | (benchmarks_frequencies['phecode_category'] == 'Cardio')) & (benchmarks_frequencies['deltaAgeSexRetina-AgeSex'] > 0)].iloc[:20]

Unnamed: 0,endpoint,phecode_category,phecode_string,Age+Sex,Age+Sex+Retina,Retina,n,freq_in_%,deltaAgeSexRetina-AgeSex
403,phecode_367-5,Eye,Uveitis,0.504332,0.665631,0.678704,363.0,0.597265,0.161299
429,phecode_374-8,Eye,Retinal edema,0.508066,0.638015,0.697966,267.0,0.436332,0.129949
410,phecode_370,Eye,Disorders of iris and ciliary body,0.474445,0.592206,0.628225,227.0,0.37086,0.117761
590,phecode_443-1,Cardio,Stricture of artery [Arterial stenosis],0.681321,0.777938,0.768569,141.0,0.230355,0.096617
444,phecode_377-5,Eye,Vitreous hemorrhage,0.588467,0.659474,0.676065,144.0,0.23546,0.071007
404,phecode_367-52,Eye,Iridocyclitis,0.493759,0.555238,0.530046,363.0,0.597177,0.061478
409,phecode_369-5,Eye,Hereditary corneal dystrophies,0.428489,0.488366,0.646263,173.0,0.282694,0.059877
560,phecode_424-3,Cardio,Diastolic heart failure,0.696669,0.752994,0.767079,154.0,0.251461,0.056325
384,phecode_361-3,Eye,Ptosis of eyelid,0.60402,0.65422,0.668732,387.0,0.634146,0.0502
407,phecode_369-1,Eye,Corneal scars and opacities,0.62327,0.671592,0.672724,103.0,0.168268,0.048322


### best 20, sorted by positive c_idx_delta AgeSexRetina-AgeSex, eye-related only

In [129]:
benchmarks_frequencies[(benchmarks_frequencies['phecode_category'] == 'Eye') & (benchmarks_frequencies['deltaAgeSexRetina-AgeSex'] > 0)].iloc[:20]

Unnamed: 0,endpoint,phecode_category,phecode_string,Age+Sex,Age+Sex+Retina,Retina,n,freq_in_%,deltaAgeSexRetina-AgeSex
403,phecode_367-5,Eye,Uveitis,0.504332,0.665631,0.678704,363.0,0.597265,0.161299
429,phecode_374-8,Eye,Retinal edema,0.508066,0.638015,0.697966,267.0,0.436332,0.129949
410,phecode_370,Eye,Disorders of iris and ciliary body,0.474445,0.592206,0.628225,227.0,0.37086,0.117761
444,phecode_377-5,Eye,Vitreous hemorrhage,0.588467,0.659474,0.676065,144.0,0.23546,0.071007
404,phecode_367-52,Eye,Iridocyclitis,0.493759,0.555238,0.530046,363.0,0.597177,0.061478
409,phecode_369-5,Eye,Hereditary corneal dystrophies,0.428489,0.488366,0.646263,173.0,0.282694,0.059877
384,phecode_361-3,Eye,Ptosis of eyelid,0.60402,0.65422,0.668732,387.0,0.634146,0.0502
407,phecode_369-1,Eye,Corneal scars and opacities,0.62327,0.671592,0.672724,103.0,0.168268,0.048322
406,phecode_369,Eye,Noninflammatory disorders of the cornea,0.605668,0.6523,0.674524,647.0,1.061143,0.046632
466,phecode_389-1,Eye,Ocular pain,0.558739,0.594407,0.485666,379.0,0.623386,0.035668


### macular-related endpoints

In [130]:
phecodes = benchmarks_frequencies.phecode_string.unique()
macular_endpoints = []
for s in phecodes:
    if 'macular' in s or 'Macular' in s: 
        print(s)
        macular_endpoints.append(s)

Macular cyst, hole, or pseudohole
Exuadative (wet) age-related macular degeneration
Age-related macular degeneration
Macular degeneration
Nonexuadative (dry) age-related macular degeneration


In [131]:
benchmarks_frequencies[(benchmarks_frequencies['phecode_string'] == 'Macular cyst, hole, or pseudohole') | \
                       (benchmarks_frequencies['phecode_string'] == 'Macular degeneration') | \
                       (benchmarks_frequencies['phecode_string'] == 'Age-related macular degeneration') | \
                       (benchmarks_frequencies['phecode_string'] == 'Exuadative (wet) age-related macular degeneration') | \
                       (benchmarks_frequencies['phecode_string'] == 'Nonexuadative (dry) age-related macular degeneration')   \
                      ].iloc[:20]

Unnamed: 0,endpoint,phecode_category,phecode_string,Age+Sex,Age+Sex+Retina,Retina,n,freq_in_%,deltaAgeSexRetina-AgeSex
427,phecode_374-52,Eye,"Macular cyst, hole, or pseudohole",0.624822,0.650025,0.664189,251.0,0.410024,0.025203
426,phecode_374-512,Eye,Exuadative (wet) age-related macular degeneration,0.850505,0.854903,0.780072,150.0,0.24491,0.004398
424,phecode_374-51,Eye,Age-related macular degeneration,0.791798,0.789475,0.711261,704.0,1.152719,-0.002323
423,phecode_374-5,Eye,Macular degeneration,0.735025,0.728118,0.682975,1816.0,2.981057,-0.006907
425,phecode_374-511,Eye,Nonexuadative (dry) age-related macular degene...,0.813594,0.778714,0.684067,246.0,0.401941,-0.03488


### inspect jakobs endpoints

In [25]:
endpoints = [
    # generally very important
    "Diabetes mellitus",
    "Ischemic heart disease",
    "Myocardial infarction [Heart attack]",
    "Cerebral infarction [Ischemic stroke]",
    "Heart failure",
    "All-Cause Death",
    # also generally important and relevant
    "Pulmonary embolism",
    "Pneumonia",
    "Chronic obstructive pulmonary disease [COPD]",
    "Chronic liver disease and sequelae",
    "Chronic kidney disease",
    "Dementias and cerebral degeneration",
    # generally important and fun to check
    "Anemia",
    "Osteoporosis",
    "Malignant neoplasm of the skin",
    "Malignant neoplasm of the digestive organs",
    "Psoriasis",
    "Rheumatoid arthritis",
    # important for eye
    "Cataract",
    "Retinal vascular changes and occlusions",
    "Diabetic retinopathy",
    "Macular degeneration",
    "Glaucoma",
    "Blindness and low vision"
]

benchmarks_frequencies_filtered.set_index('phecode_string').loc[endpoints].sort_values('deltaAgeSexRetina-AgeSex', ascending=False)

Unnamed: 0_level_0,endpoint,phecode_category,Age+Sex,Age+Sex+Retina,Retina,n,freq_in_%,deltaAgeSexRetina-AgeSex
phecode_string,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
Diabetes mellitus,phecode_202,Endo,0.594973,0.648915,0.64442,4259.0,7.351215,0.053942
Anemia,phecode_164,Blood,0.604773,0.645832,0.632565,4945.0,8.578219,0.041059
Chronic obstructive pulmonary disease [COPD],phecode_474,Resp,0.699402,0.731702,0.731332,2464.0,4.079335,0.032301
Chronic liver disease and sequelae,phecode_542,GI,0.554513,0.581554,0.538651,2124.0,3.499811,0.027041
Pneumonia,phecode_468,Resp,0.661388,0.685226,0.678466,3199.0,5.358728,0.023838
Ischemic heart disease,phecode_404,Cardio,0.664995,0.682022,0.658081,4055.0,6.897314,0.017027
Retinal vascular changes and occlusions,phecode_374-3,Eye,0.641291,0.657589,0.666331,736.0,1.209571,0.016298
All-Cause Death,OMOP_4306655,Death,0.742221,0.754934,0.74741,3548.0,5.792086,0.012712
Cataract,phecode_371,Eye,0.726379,0.73822,0.637656,6798.0,11.366943,0.01184
Heart failure,phecode_424,Cardio,0.720853,0.732204,0.711638,1996.0,3.280413,0.011352


In [29]:
a = benchmarks_frequencies_filtered.set_index('phecode_string').loc[endpoints]
for i in range(len(a)):
    
    print(f'"{a.endpoint.values[i]}", # {a.index.values[i]}')

"phecode_202", # Diabetes mellitus
"phecode_404", # Ischemic heart disease
"phecode_404-1", # Myocardial infarction [Heart attack]
"phecode_431-11", # Cerebral infarction [Ischemic stroke]
"phecode_424", # Heart failure
"OMOP_4306655", # All-Cause Death
"phecode_440-3", # Pulmonary embolism
"phecode_468", # Pneumonia
"phecode_474", # Chronic obstructive pulmonary disease [COPD]
"phecode_542", # Chronic liver disease and sequelae
"phecode_583", # Chronic kidney disease
"phecode_328", # Dementias and cerebral degeneration
"phecode_164", # Anemia
"phecode_726-1", # Osteoporosis
"phecode_103", # Malignant neoplasm of the skin
"phecode_101", # Malignant neoplasm of the digestive organs
"phecode_665", # Psoriasis
"phecode_705-1", # Rheumatoid arthritis
"phecode_371", # Cataract
"phecode_374-3", # Retinal vascular changes and occlusions
"phecode_374-42", # Diabetic retinopathy
"phecode_374-5", # Macular degeneration
"phecode_375-1", # Glaucoma
"phecode_388", # Blindness and low vision


### top 20 most frequent with positive delta AgeSexRetina - AgeSex

In [139]:
benchmarks_frequencies[(benchmarks_frequencies['deltaAgeSexRetina-AgeSex'] > 0)].iloc[:20]

Unnamed: 0,endpoint,phecode_category,phecode_string,Age+Sex,Age+Sex+Retina,Retina,n,freq_in_%,deltaAgeSexRetina-AgeSex
1036,phecode_713,Musc/Skel,Symptoms related to joints,0.521289,0.535134,0.53032,14859.0,35.661315,0.013845
1038,phecode_713-3,Musc/Skel,Pain in joint,0.523485,0.525711,0.506258,14672.0,34.289186,0.002227
604,phecode_460,Resp,Acute respiratory infection,0.514586,0.529133,0.525222,12430.0,31.747248,0.014547
1051,phecode_718,Musc/Skel,Back pain,0.518426,0.523115,0.506748,10938.0,27.014745,0.004689
1108,phecode_801,Signs/Symptoms,Cough,0.525083,0.5415,0.541756,12618.0,26.478365,0.016417
910,phecode_660,Derm,Infection of the skin,0.514122,0.519565,0.519033,11073.0,23.927136,0.005443
499,phecode_401,Cardio,Hypertension,0.621174,0.638324,0.599087,11114.0,22.899883,0.01715
500,phecode_401-1,Cardio,Essential hypertension,0.621492,0.631292,0.562641,11111.0,22.883799,0.0098
747,phecode_529,GI,Symptoms involving digestive system,0.549573,0.552313,0.511122,11559.0,22.827633,0.00274
952,phecode_679,Derm,Skin symptoms,0.518098,0.529145,0.548445,10496.0,21.226339,0.011047


### top 20 most frequent eye-related or cardio-related with positive delta AgeSexRetina - AgeSex

In [142]:
benchmarks_frequencies[((benchmarks_frequencies['phecode_category'] == 'Eye') | (benchmarks_frequencies['phecode_category'] == 'Cardio')) & (benchmarks_frequencies['deltaAgeSexRetina-AgeSex'] > 0)].iloc[:20]

Unnamed: 0,endpoint,phecode_category,phecode_string,Age+Sex,Age+Sex+Retina,Retina,n,freq_in_%,deltaAgeSexRetina-AgeSex
499,phecode_401,Cardio,Hypertension,0.621174,0.638324,0.599087,11114.0,22.899883,0.01715
500,phecode_401-1,Cardio,Essential hypertension,0.621492,0.631292,0.562641,11111.0,22.883799,0.0098
547,phecode_417,Cardio,Abnormalities of heart beat,0.575542,0.577268,0.556228,6912.0,12.04454,0.001725
411,phecode_371,Eye,Cataract,0.726379,0.73822,0.637656,6798.0,11.366943,0.01184
582,phecode_439,Cardio,Hemorrhoids,0.492407,0.522381,0.518958,4984.0,8.989575,0.029974
415,phecode_374,Eye,Disorders of the retina,0.610183,0.636336,0.632491,4472.0,7.577221,0.026153
397,phecode_367,Eye,Inflammation of the eye,0.534046,0.536115,0.500589,4039.0,7.375148,0.002069
505,phecode_404,Cardio,Ischemic heart disease,0.664995,0.682022,0.658081,4055.0,6.897314,0.017027
374,phecode_360,Eye,Inflammation of eyelids,0.556822,0.556928,0.522885,3710.0,6.402513,0.000106
533,phecode_416-2,Cardio,Atrial fibrillation and flutter,0.685281,0.685347,0.625883,3692.0,6.138601,6.6e-05


### top 20 most frequent eye-related with positive delta AgeSexRetina - AgeSex

In [143]:
benchmarks_frequencies[(benchmarks_frequencies['phecode_category'] == 'Eye') & (benchmarks_frequencies['deltaAgeSexRetina-AgeSex'] > 0)].iloc[:20]

Unnamed: 0,endpoint,phecode_category,phecode_string,Age+Sex,Age+Sex+Retina,Retina,n,freq_in_%,deltaAgeSexRetina-AgeSex
411,phecode_371,Eye,Cataract,0.726379,0.73822,0.637656,6798.0,11.366943,0.01184
415,phecode_374,Eye,Disorders of the retina,0.610183,0.636336,0.632491,4472.0,7.577221,0.026153
397,phecode_367,Eye,Inflammation of the eye,0.534046,0.536115,0.500589,4039.0,7.375148,0.002069
374,phecode_360,Eye,Inflammation of eyelids,0.556822,0.556928,0.522885,3710.0,6.402513,0.000106
398,phecode_367-1,Eye,Conjunctivitis,0.545903,0.546467,0.516032,3398.0,6.088406,0.000565
437,phecode_376,Eye,Disorders of vitreous body,0.579276,0.581237,0.572527,2501.0,4.171323,0.001961
439,phecode_376-2,Eye,Vitreous opacities,0.579421,0.585257,0.501353,2488.0,4.147012,0.005836
455,phecode_386,Eye,Visual disturbances,0.597962,0.598768,0.583665,2201.0,3.686829,0.000806
412,phecode_371-3,Eye,Nuclear cataract,0.728499,0.75922,0.74873,2023.0,3.308475,0.030721
438,phecode_376-1,Eye,Vitreous degeneration,0.555852,0.556531,0.528478,1855.0,3.067485,0.000679
