In [None]:
import concurrent.futures
import functools
import pathlib
import sys

import pandas as pd
import tqdm

sys.path.insert(0, '../src/')
import parallel_utils

In [None]:
# User-specified directory paths
meta_files_path = pathlib.Path('../data/meta/')
archives_path = pathlib.Path('../data/archives/')

temp_extract_dir = pathlib.Path('../data/extract_dir/')
temp_extract_dir.mkdir(exist_ok=True)

computed_scores_path = pathlib.Path('../data/scores/')
computed_scores_path.mkdir(exist_ok=True)

In [None]:
files_map_df = (
    pd.read_csv(meta_files_path.joinpath('file_map.csv'))
    .assign(
        archive_file_path=lambda df: df['score_file'].apply(archives_path.joinpath),
    )
)

drugs = sorted(set(files_map_df['drug'].astype(int)))

compute_scores_partial = functools.partial(parallel_utils.compute_propensity_scores,
                                           files_map_df=files_map_df,
                                           temporary_directory=temp_extract_dir,
                                           computed_scores_path=computed_scores_path)

with concurrent.futures.ProcessPoolExecutor() as executor:
    all_aucs = list(
        tqdm.tqdm(executor.map(compute_scores_partial, drugs),
                  total=len(drugs))
    )

In [None]:
# Flatten the list of lists of tuples to a list of tuples
all_aucs = [i for l in all_aucs for i in l]
all_auc_df = pd.DataFrame(all_aucs, columns=['drug', 'bootstrap', 'auc'])
all_auc_df.to_csv(meta_files_path.joinpath('all_auc.csv'), index=False)
all_auc_df.head(2)