# Run AwareDX ad-hoc on any drug and adverse event

In [1]:
from os import path
from collections import Counter, defaultdict
from tqdm.notebook import tqdm
import numpy as np
import pandas as pd 
import feather 
import scipy.stats
from scipy import stats
import pymysql
import pymysql.cursors
from database import Database
from utils import Utils
from drug import Drug

u = Utils()
db = Database('Mimir from Munnin')
np.random.seed(u.RANDOM_STATE)

In [52]:
def compile(results): 
    
    results = results.dropna()
    results = results.reset_index()
    num_tests = results.shape[0]
    results.loc[:,'bonf_p_value'] = results.get('p_value') * num_tests
    #results = results.query('bonf_p_value<1')
    
    drug_adr_pairs = results.get(['drug','itr','adr']).groupby(by=['drug','adr']).count().query('itr==25').reset_index().get(['drug', 'adr'])

    scores = pd.DataFrame(columns=['drug', 'adr', 'p_val_min', 'p_val_med', 'p_val_max', 'logROR_avg','logROR_ci95_low', 'logROR_ci95_upp']).set_index(['drug','adr'])

    def mean_confidence_interval(data, confidence=0.95):
        a = 1.0 * np.array(data)
        n = len(a)
        m, se = np.mean(a), scipy.stats.sem(a)
        h = se * scipy.stats.t.ppf((1 + confidence) / 2., n-1)
        return m, m-h, m+h

    for _, (drug, adr) in tqdm(drug_adr_pairs.iterrows(), total=drug_adr_pairs.shape[0]):
        data = results.query('drug==@drug and adr==@adr')

        bonf_p = data['bonf_p_value'].values 
        scores.at[(drug, adr), 'p_val_min'] = np.min(bonf_p)
        scores.at[(drug, adr), 'p_val_med'] = np.median(bonf_p)
        scores.at[(drug, adr), 'p_val_max'] = np.max(bonf_p)

        logROR = data['logROR'].values 
        mean, lower, upper = mean_confidence_interval(logROR)
        scores.at[(drug, adr), 'logROR_avg'] = mean
        scores.at[(drug, adr), 'logROR_ci95_low'] = lower
        scores.at[(drug, adr), 'logROR_ci95_upp'] = upper

    scores = scores.reset_index()

    name_atc4, name_atc5, name_hlgt, name_soc, name_pt = defaultdict(str), defaultdict(str), defaultdict(str),  defaultdict(str),  defaultdict(str)

    for id_, name in db.run('select * from atc_4_name'): 
        name_atc4[str(id_)] = name

    for id_, name in db.run('select * from atc_5_name'): 
        name_atc5[str(id_)] = name

    for id_, name in db.run('select * from hlgt_name'): 
        name_hlgt[id_] = name

    for id_, name in db.run('select * from soc_name'): 
        name_soc[id_] = name

    for id_, name in db.run('select * from pt_name'): 
        name_pt[id_] = name


    scores['drug_name'] = ''
    scores['drug_class'] = 0
    scores = scores.set_index('drug')
    for id_ in np.unique(scores.index): 
        if name_atc4[id_]: 
            scores.at[id_, 'drug_name'] = name_atc4[id_]
            scores.at[id_, 'drug_class'] = 4
        else:
            scores.at[id_, 'drug_name'] = name_atc5[id_]
            scores.at[id_, 'drug_class'] = 5
    scores = scores.reset_index()

    scores['adr_name'] = ''
    scores['adr_class'] = ''
    scores = scores.set_index('adr')
    for id_ in np.unique(scores.index): 
        if name_soc[id_]: 
            scores.at[id_, 'adr_name'] = name_soc[id_]
            scores.at[id_, 'adr_class'] = 'soc'
        elif name_hlgt[id_]: 
            scores.at[id_, 'adr_name'] = name_hlgt[id_]
            scores.at[id_, 'adr_class'] = 'hlgt'
        elif name_pt[id_]: 
            scores.at[id_, 'adr_name'] = name_pt[id_]
            scores.at[id_, 'adr_class'] = 'pt'
    scores = scores.reset_index()
    
    return scores

In [141]:
drug_name = input(' Enter ATC drug name: ')
q_atc5 = "select atc_5_id from atc_5_name where atc_5_name=\'"+drug_name+"\'"
q_atc4 = "select atc_4_id from atc_4_name where atc_4_name=\'"+drug_name+"\'"
try:
    if db.get_list(q_atc5): 
        drugID = db.get_list(q_atc5)[0]
    else: 
        drugID = db.get_list(q_atc4)[0]
except:
    raise NameError("drug not found")
if not drugID: raise NameError("drug not found")
    
    
adr_name = input(' Enter MedDRA outcome name: ')
q = "select meddra_concept_id from pt_name where meddra_concept_name=\'"+adr_name+"\'"
try: 
    adrID = db.get_list(q)
except: raise NameError("adr not found")
if not adrID: raise NameError("adr not found")
    
filename = 'Ad_Hoc/'+str(drugID)+'_'+str(adrID)
print("Checking for {}".format(filename))

if path.exists(u.DATA_PATH+filename+'.feather'): 
    results = u.load_df(filename)
    print("Found!")
else: 
    print("Not found, running ad-hoc")
    iterations=25
    drug = Drug(drugID, adrID)

    for itr in tqdm(range(1, iterations+1)): 
        drug.match()
        drug.count_adr()
        drug.assign_abcd(itr)
        drug.do_chi_square()
        drug.calc_logROR()
        drug.reset_for_next_itr()

    assert drug.ensure_results(itr)

    results = compile(drug.results)
    u.save_df(results, filename)
    
u.print_table(results)
results

 Enter ATC drug name: Fentanyl
 Enter MedDRA outcome name: Cardiac arrest
Checking for Ad_Hoc/21604201_[35204966]
Found!

M 	 0.27 (0.26, 0.28) 	 fentanyl 	 pt Cardiac arrest


Unnamed: 0,adr,drug,p_val_min,p_val_med,p_val_max,logROR_avg,logROR_ci95_low,logROR_ci95_upp,drug_name,drug_class,adr_name,adr_class
0,35204966,21604201,6.053384e-09,5e-06,0.021325,-0.270892,-0.284261,-0.257523,fentanyl,5,Cardiac arrest,pt
