# Benchmarks

## Initialize

In [3]:
import os
import math
import pathlib
import numpy as np
import pandas as pd
import pyarrow as pa
import pyarrow.feather as feather
from tqdm.auto import tqdm
from IPython.display import clear_output

import warnings
from lifelines.utils import CensoringType
from lifelines.utils import concordance_index

In [4]:
node = !hostname
if "sc" in node[0]:
    base_path = "/sc-projects/sc-proj-ukb-cvd"
else: 
    base_path = "/data/analysis/ag-reils/ag-reils-shared/cardioRS"
print(base_path)

project_label = "22_retina_phewas_220603_fullrun"
project_path = f"{base_path}/results/projects/{project_label}"
figure_path = f"{project_path}/figures"
output_path = f"{project_path}/data"

pathlib.Path(figure_path).mkdir(parents=True, exist_ok=True)
pathlib.Path(output_path).mkdir(parents=True, exist_ok=True)

experiment = '220603_fullrun'
experiment_path = f"{output_path}/{experiment}"
pathlib.Path(experiment_path).mkdir(parents=True, exist_ok=True)

name_dict = {
    "predictions_cropratio0.3": "ConvNextSmall(Retina)+MLP_cropratio0.3",
    "predictions_cropratio0.5": "ConvNextSmall(Retina)+MLP_cropratio0.5",
    "predictions_cropratio0.8": "ConvNextSmall(Retina)+MLP_cropratio0.8",
}

partitions = [i for i in range(22)]
partitions

/sc-projects/sc-proj-ukb-cvd


[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21]

In [5]:
covariates = ["age_at_recruitment_f21022_0_0", "sex_f31_0_0",  "ethnic_background_f21000_0_0"]

In [6]:
data_covariates = pd.read_feather("/sc-projects/sc-proj-ukb-cvd/data/2_datasets_pre/211110_anewbeginning/baseline_covariates_220503.feather")\
    .set_index("eid")[covariates]\
    .assign(age_at_recruitment_f21022_0_0 = lambda x: x.age_at_recruitment_f21022_0_0.astype(np.int32))

In [7]:
data_covariates.reset_index().to_feather(f"{experiment_path}/data_covariates.feather")

In [8]:
data_covariates

Unnamed: 0_level_0,age_at_recruitment_f21022_0_0,sex_f31_0_0,ethnic_background_f21000_0_0
eid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1000018,49,Female,British
1000020,59,Male,British
1000037,59,Female,British
1000043,63,Male,British
1000051,51,Female,British
...,...,...,...
6025150,43,Female,British
6025165,45,Female,British
6025173,57,Male,British
6025182,56,Male,British


In [9]:
data_covariates.isna().sum(axis=0)

age_at_recruitment_f21022_0_0       0
sex_f31_0_0                         0
ethnic_background_f21000_0_0     7334
dtype: int64

In [10]:
data_covariates.describe(include="all")

Unnamed: 0,age_at_recruitment_f21022_0_0,sex_f31_0_0,ethnic_background_f21000_0_0
count,502460.0,502460,495126
unique,,2,19
top,,Female,British
freq,,273353,442551
mean,56.528924,,
std,8.095128,,
min,37.0,,
25%,50.0,,
50%,58.0,,
75%,63.0,,
