In [None]:
import concurrent.futures
import functools
import pathlib
import sys

import numpy as np
import scipy.sparse
import tqdm

sys.path.insert(0, '../src/')
import parallel_utils

In [None]:
# User-specified directory paths
meta_files_path = pathlib.Path('../data/meta/')

computed_scores_path = pathlib.Path('../data/scores/')
computed_scores_path.mkdir(exist_ok=True)

In [None]:
# Load matrices of reports by exposures and outcomes
reports_exposures_matrix = scipy.sparse.load_npz(
    meta_files_path.joinpath('all_drug_exposures.npz')
)
reports_outcomes_matrix = scipy.sparse.load_npz(
    meta_files_path.joinpath('all_outcomes_meddra.npz')
)

print(f'Exposures: {reports_exposures_matrix.shape},'
      f' Outcomes: {reports_outcomes_matrix.shape}')

In [None]:
# Load vectors of the ids at each index for exposures and outcomes
drugs_id_vector = np.load(
    meta_files_path.joinpath('drugs_vector.npy')
).astype(str)
outcomes_id_vector = np.load(
    meta_files_path.joinpath('outcomes_vector_meddra.npy')
)

computable_drugs = list(computed_scores_path.glob('*.npz'))
computable_drugs = sorted([int(drug.stem) for drug in computable_drugs])

In [None]:
run_one_drug = functools.partial(
    parallel_utils.prr_one_drug,
    all_exposures=reports_exposures_matrix,
    all_outcomes=reports_outcomes_matrix,
    n_reports=reports_outcomes_matrix.shape[0],
    drug_id_vector=drugs_id_vector,
    outcome_id_vector=outcomes_id_vector,
    scores_path=computed_scores_path,
)

# Compute and save disproportionality files (one for each drug)
with concurrent.futures.ProcessPoolExecutor() as executor:
    results = list(  # noqa: F841
        tqdm.tqdm(executor.map(run_one_drug, computable_drugs),
                  total=len(computable_drugs))
    )