In [None]:
# Author: Karen Feng

In [1]:
from itertools import permutations
import numpy as np
import pandas as pd

In [2]:
twosides = pd.read_csv(
    '../TWOSIDES.csv',
    header=0,
    usecols=[
        'drug_1_rxnorn_id',
        'drug_2_rxnorm_id',
        'condition_concept_name',
        'PRR',
        'PRR_error'],
    dtype={
        'drug_1_rxnorn_id': str,
        'drug_1_concept_name': str,
        'drug_2_rxnorm_id': str,
        'drug_2_concept_name': str,
        'condition_meddra_id': str,
        'condition_concept_name': str,
        'A': int,
        'B': int,
        'C': int,
        'D': int,
        'PRR': float,
        'PRR_error': float,
        'mean_reporting_frequency': float})

In [3]:
input_df = pd.read_csv(
    '../data/conmed_example_data_with_best_rxcuid.csv',
    index_col=0, dtype={'Best RxNorm Id': str}).fillna(np.nan).replace([np.nan], [None])

In [4]:
all_drugs = input_df.groupby('ID')['Best RxNorm Id'].apply(set).apply(list)

In [5]:
all_pairs = all_drugs.apply(lambda x: list(permutations([l for l in x if l], 2)))
all_pairs = pd.DataFrame(all_pairs).explode('Best RxNorm Id').reset_index(drop=False)

In [6]:
all_pairs['drug_1_rxnorn_id'] = all_pairs['Best RxNorm Id'].str[0]
all_pairs['drug_2_rxnorm_id'] = all_pairs['Best RxNorm Id'].str[1]

In [7]:
pair_effects = all_pairs.merge(twosides, how='inner', on=['drug_1_rxnorn_id', 'drug_2_rxnorm_id'])

In [8]:
pair_effects = pair_effects[pair_effects['PRR'] - pair_effects['PRR_error'] > 1]

In [9]:
output_df = pair_effects.groupby('ID')['condition_concept_name'].apply(
    set).apply(';'.join).reset_index()

In [10]:
output_df.to_csv('../data/conmed_example_data_with_twosides.csv', index=False)