# Analysis

In [None]:
%load_ext autoreload
%autoreload 2

import numpy as np
import pandas as pd 
import feather 

from utils import Utils
from database import Database

u = Utils()
db = Database('Mimir from Munnin')
np.random.seed(u.RANDOM_STATE)

In [22]:
compiled = u.load_df('compiled')
risks = u.load_df('sex_risks')
percent_sig = risks.shape[0]/compiled.shape[0]
print('{:.1%} \t significant hits'.format(percent_sig))
print(len(np.unique(risks.get('drug').values)), '\t unique drugs')
print(len(np.unique(risks.get('adr').values)), '\t unique adrs')
percent_f = risks.query('logROR_avg>0').shape[0]/risks.shape[0]
print('{:.1%} \t female risks'.format(percent_f))
avg = risks.groupby(by=['drug']).count().get(['adr']).mean().values[0]
print("{:.0f} \t mean adrs per drug".format(avg))

6.9% 	 significant hits
792 	 unique drugs
297 	 unique adrs
62.7% 	 female risks
26 	 mean adrs per drug


## Query Sex-Risks

In [None]:
drug = input('  Drug: ').lower()
data = risks.query('drug_name==@drug')
u.print_table(data)
#u.show(data)

In [None]:
drug = input('  Drug: ').lower()
adr = input('  Adr: ')
data = risks.query('drug_name==@drug and adr_name==@adr')
u.print_table(data)
#u.show(data)

## Analyse sex risks for most prescribed drugs

In [26]:
# https://www.beckershospitalreview.com/pharmacy/10-most-prescribed-drugs-in-the-u-s-in-q1.html
prescribed = [
    'Atorvastatin',
    'Levothyroxine sodium',
    'Lisinopril',
    'Gabapentin',
    'Amlodipine',
    'Amoxicillin',
    'Omeprazole',
    'Metformin',
    'Losartan',
    'Paracetamol'
]
prescribed = [x.lower() for x in prescribed]


ignore = ['Uterine, pelvic and broad ligament disorders', 'Age related factors', 'Cytogenetic investigations','Genitourinary tract disorders NEC','Renal and urinary tract neoplasms malignant and unspecified']
df = risks.query('drug_name in @prescribed').query('adr_name not in @ignore')
avg = df.groupby(by=['drug']).count().get(['adr']).mean().values[0]
print("{:.0f} \t mean adrs per drug".format(avg))

for drug in prescribed: 
    df = risks.query('drug_name ==@drug').query('adr_name not in @ignore')
    u.print_table(df.loc[df.get(['logROR_avg']).idxmax(),:])
    u.print_table(df.loc[df.get(['logROR_avg']).idxmin(),:])

66 	 mean adrs per drug

F 	 1.33 (1.33, 1.34) 	 atorvastatin 	 hlgt Glucose metabolism disorders (incl diabetes mellitus)

M 	 0.59 (0.57, 0.61) 	 atorvastatin 	 hlgt Vascular therapeutic procedures

F 	 0.94 (0.91, 0.97) 	 levothyroxine sodium 	 hlgt Synovial and bursal disorders

M 	 2.15 (2.13, 2.17) 	 levothyroxine sodium 	 hlgt Congenital cardiac disorders

F 	 1.06 (1.04, 1.07) 	 lisinopril 	 hlgt Anterior eye structural change, deposit and degeneration

M 	 0.39 (0.38, 0.40) 	 lisinopril 	 hlgt Suicidal and self-injurious behaviours NEC

F 	 0.58 (0.56, 0.60) 	 gabapentin 	 hlgt Aural disorders NEC

M 	 0.72 (0.70, 0.74) 	 gabapentin 	 hlgt Haemolyses and related conditions

F 	 0.74 (0.72, 0.76) 	 amlodipine 	 hlgt Anterior eye structural change, deposit and degeneration

M 	 0.91 (0.88, 0.94) 	 amlodipine 	 hlgt Hepatobiliary neoplasms malignant and unspecified

F 	 0.46 (0.45, 0.47) 	 amoxicillin 	 hlgt Allergic conditions

M 	 0.67 (0.65, 0.69) 	 amoxicillin 	 hlgt Fatal ou

## Analyse sex risks for drugs with most ADRs

In [28]:
most_adrs = [
    'etanercept',
    'adalimumab',
    'calcium carbonate',
    'acetylsalicylic acid',
    'paracetamol',
    'metformin',
    'omeprazole',
    'methotrexate',
    'amlodipine',
    'furosemide',

]

df = risks.query('drug_name in @most_adrs').query('adr_name not in @ignore')
avg = df.groupby(by=['drug']).count().get(['adr']).mean().values[0]
print("ADRs per drug \t {:.0f}".format(avg))

for drug in most_adrs: 
    df = risks.query('drug_name ==@drug').query('adr_name not in @ignore')
    u.print_table(df.loc[df.get(['logROR_avg']).idxmax(),:])
    u.print_table(df.loc[df.get(['logROR_avg']).idxmin(),:])

ADRs per drug 	 87

F 	 0.93 (0.89, 0.96) 	 etanercept 	 hlgt Somatic symptom and related disorders

M 	 1.03 (1.01, 1.06) 	 etanercept 	 hlgt Vascular therapeutic procedures

F 	 1.04 (1.02, 1.05) 	 adalimumab 	 hlgt Connective tissue disorders (excl congenital)

M 	 1.27 (1.23, 1.32) 	 adalimumab 	 hlgt Urethral disorders (excl calculi)

F 	 1.20 (1.15, 1.25) 	 calcium carbonate 	 hlgt Anterior eye structural change, deposit and degeneration

M 	 0.58 (0.56, 0.59) 	 calcium carbonate 	 hlgt Purine and pyrimidine metabolism disorders

F 	 0.87 (0.83, 0.91) 	 acetylsalicylic acid 	 hlgt Somatic symptom and related disorders

M 	 0.59 (0.57, 0.61) 	 acetylsalicylic acid 	 hlgt Purine and pyrimidine metabolism disorders

F 	 0.73 (0.71, 0.76) 	 paracetamol 	 hlgt Connective tissue disorders (excl congenital)

M 	 1.19 (1.16, 1.23) 	 paracetamol 	 hlgt Urethral disorders (excl calculi)

F 	 0.66 (0.65, 0.68) 	 metformin 	 hlgt Anterior eye structural change, deposit and degeneration

M 	 