# Benchmarks

## Initialize

In [1]:
import os
import math
import pathlib
import numpy as np
import pandas as pd
import pyarrow as pa
import pyarrow.feather as feather
from tqdm.auto import tqdm
from IPython.display import clear_output

import warnings
from lifelines.utils import CensoringType
from lifelines.utils import concordance_index

In [2]:
import ray
#ray.shutdown()

In [3]:
node = !hostname
if "sc" in node[0]:
    base_path = "/sc-projects/sc-proj-ukb-cvd"
else: 
    base_path = "/data/analysis/ag-reils/ag-reils-shared/cardioRS"
print(base_path)

project_label = "22_retina_phewas"
project_path = f"{base_path}/results/projects/{project_label}"
figure_path = f"{project_path}/figures"
output_path = f"{project_path}/data"

pathlib.Path(figure_path).mkdir(parents=True, exist_ok=True)
pathlib.Path(output_path).mkdir(parents=True, exist_ok=True)

experiment = '230905'
experiment_path = f"{output_path}/{experiment}"
print('experiment path:', experiment_path)
pathlib.Path(experiment_path).mkdir(parents=True, exist_ok=True)

name_dict = {
#     "predictions_cropratio0.3": "ConvNextSmall(Retina)+MLP_cropratio0.3",
#     "predictions_cropratio0.5": "ConvNextSmall(Retina)+MLP_cropratio0.5",
#    "predictions_cropratio0.66": "ConvNextSmall(Retina)+MLP_cropratio0.66",
    "predictions": "ConvNextSmall(Retina)+MLP_cropratio0.66",
}

#partitions = [i for i in range(22)]
partitions = [4, 5, 7, 9, 10, 20] # Partitions with eye test centers

/sc-projects/sc-proj-ukb-cvd
experiment path: /sc-projects/sc-proj-ukb-cvd/results/projects/22_retina_phewas/data/230905


In [4]:
import ray
ray.init(num_cpus=16, include_dashboard=False)#, dashboard_port=24762, dashboard_host="0.0.0.0", include_dashboard=True)#, webui_url="0.0.0.0"))

RayContext(dashboard_url=None, python_version='3.9.7', ray_version='1.12.1', ray_commit='4863e33856b54ccf8add5cbe75e41558850a1b75', address_info={'node_ip_address': '10.32.105.6', 'raylet_ip_address': '10.32.105.6', 'redis_address': None, 'object_store_address': '/tmp/ray/session_2023-09-13_16-08-48_852238_2486379/sockets/plasma_store', 'raylet_socket_name': '/tmp/ray/session_2023-09-13_16-08-48_852238_2486379/sockets/raylet', 'webui_url': None, 'session_dir': '/tmp/ray/session_2023-09-13_16-08-48_852238_2486379', 'metrics_export_port': 49576, 'gcs_address': '10.32.105.6:59154', 'address': '10.32.105.6:59154', 'node_id': '3e4deb02fd0849f79d886cda83ec65057a8a75e32e3d13de2711452e'})

In [5]:
import pandas as pd
endpoints = sorted([l.replace('_prevalent', '') for l in list(pd.read_csv('/sc-projects/sc-proj-ukb-cvd/results/projects/22_retinal_risk/data/230905/min100_endpoints.csv').endpoint.values)])
len(endpoints)

773

In [6]:
in_path = pathlib.Path(f"{experiment_path}/loghs")
in_path.mkdir(parents=True, exist_ok=True)

out_path = f"{experiment_path}/coxph/input"
pathlib.Path(out_path).mkdir(parents=True, exist_ok=True)

In [7]:
models = [f.name for f in in_path.iterdir() if f.is_dir() and "ipynb_checkpoints" not in str(f)]

In [8]:
from sklearn.preprocessing import StandardScaler
import pickle
import zstandard

def read_data(fp_in, split):
    temp = pd.read_feather(f"{fp_in}/{split}.feather").set_index("eid")
    return temp   
    
def save_pickle(data, data_path):
    with open(data_path, "wb") as fh:
        cctx = zstandard.ZstdCompressor()
        with cctx.stream_writer(fh) as compressor:
            compressor.write(pickle.dumps(data, protocol=pickle.HIGHEST_PROTOCOL))
    
def read_predictions(model, partition, split):

    fp_in = f"{in_path}/{model}/{partition}"
    
    if pathlib.Path(fp_in).is_dir(): 
        temp = read_data(fp_in, split)
        return temp
    else:
        print(fp_in)
        raise NotImplementedError()

In [9]:
models

['ImageTraining_[]_ConvNeXt_MLPHead_predictions']

In [10]:
for partition in partitions: # test: in [0, 10, 21]
    for split in ["train", "valid", "test"]: # "test_left", 'test_right'
        temp = read_predictions(models[0], partition, split)
        print(partition, split, (temp.isna().sum() > 0).sum())

4 train 0
4 valid 0
4 test 0
5 train 0
5 valid 0
5 test 0
7 train 0
7 valid 0
7 test 0
9 train 0
9 valid 0
9 test 0
10 train 0
10 valid 0
10 test 0
20 train 0
20 valid 0
20 test 0
