<h1>Table of Contents<span class="tocSkip"></span></h1>
<div class="toc"><ul class="toc-item"><li><span><a href="#Load-data" data-toc-modified-id="Load-data-1"><span class="toc-item-num">1&nbsp;&nbsp;</span>Load data</a></span></li><li><span><a href="#All-patients" data-toc-modified-id="All-patients-2"><span class="toc-item-num">2&nbsp;&nbsp;</span>All patients</a></span></li><li><span><a href="#Conditioned-on-gender-(sex)" data-toc-modified-id="Conditioned-on-gender-(sex)-3"><span class="toc-item-num">3&nbsp;&nbsp;</span>Conditioned on gender (sex)</a></span><ul class="toc-item"><li><span><a href="#Male" data-toc-modified-id="Male-3.1"><span class="toc-item-num">3.1&nbsp;&nbsp;</span>Male</a></span></li><li><span><a href="#Female" data-toc-modified-id="Female-3.2"><span class="toc-item-num">3.2&nbsp;&nbsp;</span>Female</a></span></li></ul></li><li><span><a href="#Conditioned-on-age" data-toc-modified-id="Conditioned-on-age-4"><span class="toc-item-num">4&nbsp;&nbsp;</span>Conditioned on age</a></span><ul class="toc-item"><li><span><a href="#Young-(1-19-years)" data-toc-modified-id="Young-(1-19-years)-4.1"><span class="toc-item-num">4.1&nbsp;&nbsp;</span>Young (1-19 years)</a></span></li><li><span><a href="#Adult-(20-65-years)" data-toc-modified-id="Adult-(20-65-years)-4.2"><span class="toc-item-num">4.2&nbsp;&nbsp;</span>Adult (20-65 years)</a></span></li><li><span><a href="#Elderly-(<65-years)" data-toc-modified-id="Elderly-(<65-years)-4.3"><span class="toc-item-num">4.3&nbsp;&nbsp;</span>Elderly (&lt;65 years)</a></span></li></ul></li><li><span><a href="#Save-all-the-populations-in-disproportionality-estimation" data-toc-modified-id="Save-all-the-populations-in-disproportionality-estimation-5"><span class="toc-item-num">5&nbsp;&nbsp;</span>Save all the populations in disproportionality estimation</a></span></li></ul></div>

# Load data


In [1]:
import itertools
from tqdm import tqdm
from collections import Counter
import scipy.stats as stats
import pandas as pd
import numpy as np
import pickle
from statsmodels.stats.multitest import multipletests
# %matplotlib notebook
pd.set_option('display.max_columns', None)
import warnings
warnings.filterwarnings('ignore')

# load the dictionaries for drugs,  AE
# drug_dic = pickle.load(open('../Data/curated/drug_dic.pk', 'rb'))

# In this MeDRA_dic, key is string of PT_name, value is a list:
# [PT, PT_name, HLT,HLT_name,HLGT,HLGT_name,SOC,SOC_name,SOC_abbr]
meddra_se_disease_dic = pickle.load(open('../Data/curated/AE_dic.pk', 'rb'))
MedDRA_dic_all = pickle.load(open('../Data/curated/AE_mapping.pk', 'rb'))

In [2]:
def format_tex(float_number):
    exponent = np.floor(np.log10(float_number))
    mantissa = float_number/10**exponent
    mantissa_format = str(mantissa)[0:3]
    if float_number!=0:
        return "$< {0}\times10^{{{1}}}$".format(mantissa_format, str(int(exponent)))
    else:
        return "$< 0 \times10^{0}$"
    
def weird_division(n, d):
    return n / d if d else 0

def CI(ROR, A, B, C, D):
    ror = np.log(ROR)
    sq = 1.96*np.sqrt(weird_division(1, A) + weird_division(1, B) + weird_division(1, C) +weird_division(1, D))
    CI_up = np.exp(ror + sq)
    CI_down = np.exp(ror - sq)
    return CI_up, CI_down

# All patients

In [3]:
SE_uncondition = pickle.load(open('../Data/pandemic/SE_uncondition.pk', 'rb'))

In [4]:
 # Remove some SE that make less sense or not related to medication
# """15765: device use error, 2688:device physical property issue; 2232:product contamination;4293: compulsions; 
# 10484: infusion; 6275:body height below normal; 2325:large for dates baby. 10870:product distribution issue.
# 8657:poverty of thought content. 2222:poor personal hygiene. 1039:family stress. 1215:nosocomial infection.
# 4347:syringe issue. 4374:confabulation, 647:device occlusion, 6141:product outer packaging issue
# 3716:product contamination with body fluid, 2048:fear of disease. 10249:drain placement.1848:treatment failure
# 2659:device leakage. 8613:device alarm issue. 9141:product label confusion. 8593:device connection issue
# 3820:application site discharge. 6799:post procedural discharge: 1728:poisoning deliberate
# 2576:social problem. 132:device malfunction  2591:needle issue. 4216: exomphalos. 1669:fear of falling
# 713:medical device change. 4425:intercepted medication error. 1809:exposure via partner. 3087 :liquid product physical issue
# 4005:medical device implantation. 5616:application site discomfort. 865:device failure. 4908:device ineffective
# 14262:reproductive complication associated with device. 4624: device colour issue。 1051 educational problem
# 1281:device difficult to use. 4551:pregnancy of partner. 9615:prescribed underdose. 11243:product physical consistency issue
# 1598:product odour abnormal. 437:accident at work. 1451:product packaging quantity issue,1679:incorrect drug administration rate
# 627:hospice care. 238:unevaluable event. 3067: imprisonment. 8012:stress at work  6177:medical device pain
# 277:mass, 1046:thrombosis in device, 905:product size issue. 2071:product label issue. 218:off label use
# 2744:product colour issue.  1224: laboratory test abnormal 2139:product packaging issue. 5240:product contamination physical
# 11139: expired device used. 12123:lack of injection site rotation. 639:device issue. 1095:injury associated with device
# 1747:therapeutic product ineffective, 7592: product dropper issue. 158:incorrect dose administered. 1041:economic problem
# 341:device related infection. 655: product physical issue. 4257:device related sepsis,968:treatment noncompliance
# 353:road traffic accident. 991:medication error. 335:drug ineffective, 14157:device physical property issue, 
# 14881:device power source issue, 11494:off label use, 13223: device malfunction, 15755:unintentional medical device removal
# 13242:drug dependence, 
# r *hallucination, visual* with code :4652, malaise:4515, condition aggravated:4846
# 15201:toxicity to various agents,
# 'eating disorder', 'incoherent', 'out of specification test results', 'antibody test negative', 'gene mutation identification test positive', 'gun shot wound', 
# 'bed sharing','antibody test positive', 'large for dates baby,viral load', 'small for dates baby', 'x-ray', 
# 'scan', 'blood test','female condom', 'sleep study','boredom',' toxicity to various agents',
# 'transplant failure', 'pregnancy after post coital contraception','drug intolerance', 'drug withdrawal syndrome'
# [732,7823,15255,12614,13146,1347,12821,7349,1117,4512,3818,315]
# 521:infection

# """

In [5]:
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)


drop_SE_meddra = ['10072878', '10047290', '10039083', '10037784', '10049521', '10057687', '10067769','10012438', '10023830', '10060991', '10036099', '10007225', '10031161', '10046820', '10000217', '10005654', '10074248', '10049058', '10044287', '10061533', '10039449', 
                  '10061637', '10045104', '10016577', '10027990', '10039179', '10053547', '10021124',  '10014408', '10048852', '10060960', '10015670', '10022961', '10067724', '10047853', 
                  '10038457', '10020880', '10053937', '10008967', '10052894', '10026956', '10008267', '10006580', '10028093', '10012318', '10058890', '10010938', '10070933', '10071504',
                  '10005136', '10053249', '10016235', '10036485', '10003598', '10061416', '10020524',  '10037382', '10048222', '10061545', '10058682', '10019299', '10028862', '10026961',
                  '10042598', '10018404', '10029888', '10012557', '10003476', '10030527', '10068065','10017792', '10064913', '10069880', '10069865', '10053762', '10063829', '10072931', 
                  '10066901', '10013700', '10012587', '10069217', '10069218', '10016173', '10010144', '10022459', '10038001', '10064685', '10070691', '10058049', '10071406', '10071408', 
                  '10056871', '10012578', '10069841', '10003569', '10041738', '10008686', '10010007', '10013663', '10070863', '10014062', '10021630', '10071067', '10061426', '10063478', 
                  '10041092', '10048064', '10061498', '10061726', '10016387', '10053319', '10048909', '10021789', '10019075', '10025482', '10010264', '10073318', '10073513', '10052971', 
                  '10064687', '10053686', '10069327', '10063130', '10073317', '10071404', '10071415', '10036790', '10069868', '10064685', '10071134', '10064684', '10059108', '10016251',
                  '10056871', '10069249', '10012578', '10012575', '10062546', '10069803', '10069889',  '10069853', '10069326', '10069331', '10010151', '10069299', '10069330', '10069250', 
                  '10069226', '10071408', '10060769', '10071412', '10049481', '10062255', '10064538',  '10069289', '10069405', '10053683', '10069173', '10069227', '10049711', '10069229', 
                  '10069222', '10071403', '10069217', '10012587', '10027754', '10069221', '10041291','10069846', '10070691', '10069325', '10070592', '10070305', '10069224', '10072867', 
                  '10051808', '10068945', '10070692', '10071407', '10069878', '10063560', '10069223','10049812', '10065769', '10071287', '10061483', '10071406', '10069228', '10069175',
                  '10069297', '10069802', '10072754', '10061366', '10069837', '10069218', '10069293',  '10073300', '10069272', '10069873', '10041290', '10065535', '10059875', '10053487', 
                  '10072753', '10069176', '10063601', '10012579', '10069266', '10069271', '10069871', '10068515', '10061087', '10071575', '10073311', '10072608', '10069292', '10070617', 
                  '10059057', '10069864', '10071051', '10069329', '10050474', '10069267', '10070468', '10051297', '10070773', '10069232', '10071405', '10072645', '10069294', '10069298', 
                  '10070470', '10062680', '10069300', '10069231', '10062197', '10038773', '10072082', '10065066', '10069841', '10069268', '10073302', '10069273', '10070765', '10059015', 
                  '10069296', '10068444', '10057254', '10072342', '10069295', '10052371', '10069174', '10069867', '10069332', '10063599', '10073170', '10069877', '10060770', '10073306', 
                  '10071409', '10071148', '10065963', '10069220', '10073594', '10073760', '10074266',  '10073305', '10072950', '10051841', '10069861', '10069801', '10071587', '10073301', 
                  '10069838', '10030020', '10070574', '10030018', '10042458', '10042464', '10022081',  '10022086', '10022085', '10022061', '10022067', '10022111', '10022056', '10062519', 
                  '10066083', '10061549', '10022095', '10022093', '10022052', '10022112', '10053424',  '10003053', '10003041', '10003055', '10066044', '10022107', '10059203', '10057880',
                  '10070245', '10054266', '10053483', '10053183', '10051572', '10050104', '10064578',  '10003060', '10003046', '10059830', '10022078', '10048943', '10003036', '10050057',
                  '10022075', '10048744', '10059005', '10061409', '10065577', '10065059', '10059650',  '10048941', '10068317', '10017012', '10063783', '10049666', '10051116', '10017000', 
                  '10051099', '10022076', '10022062', '10063772', '10051154', '10053664', '10048634',  '10064774', '10063782', '10056520', '10022066', '10022079', '10052267', '10057688', 
                  '10067620', '10059008', '10065600', '10052264', '10022105', '10067252', '10022055',  '10059009', '10049041', '10063785', '10063587', '10052268', '10022072', '10022071', 
                  '10050103', '10022104', '10066778', '10058043', '10059048', '10065614', '10022065',  '10022082', '10058464', '10066797', '10054997', '10063860', '10025478', '10051101',
                  '10022044', '10003050', '10022088', '10066210', '10052162', '10063775', '10065615',  '10068689', '10067253', '10067255', '10049043', '10053663', '10022090', '10021542', 
                  '10072136', '10063765', '10063763', '10063862', '10058713', '10065488', '10065460',  '10066211', '10053505', '10022045', '10063779', '10063867', '10063871', '10055123', 
                  '10063850', '10049042', '10063683', '10063848', '10016777', '10067996', '10022048', '10067995', '10036769', '10063786', '10058062', '10065464', '10063857', '10054995', 
                  '10054996', '10053482', '10059241', '10051100', '10063778', '10068607', '10055122',  '10063839', '10050082', '10065489', '10063072', '10066209', '10073059', '10065485', 
                  '10007811', '10063776', '10063762', '10054812', '10063868', '10068159', '10065491',  '10063854', '10063858', '10063873', '10063774', '10065463', '10069667', '10063881', 
                  '10050101', '10052270', '10072694', '10065490', '10059079', '10064109', '10063780', '10066149', '10063874', '10049044', '10053484', '10068922', '10065456', '10049660', 
                  '10055662', '10049260', '10065455', '10065476', '10063771', '10065652', '10063865',  '10065473', '10022083', '10053995', '10065653', '10003048', '10003059', '10052271', 
                  '10066041', '10063870', '10065902', '10066214', '10050100', '10065458', '10073418',  '10056270', '10073412', '10054092', '10060124', '10059386', '10063856', '10063784', 
                  '10068954', '10073457', '10063863', '10001315', '10062102', '10055909', '10073174',  '10073759', '10065461', '10052272', '10033905', '10073779', '10074011', '10048648', 
                  '10066221', '10073989', '10073998', '10074008', '10074013', '10055117', '10073752',  '10074001', '10065487', '10073993', '10069624', '10073992', '10073994', '10074004', 
                  '10073416', '10065457', '10063880', '10074015', '10073996', '10074010', '10074012','10074005', '10074000', '10073624', '10073606', '10063864', '10065454', '10073990', 
                  '10073615', '10068881', '10065475', '10073612', '10063960', '10013654', '10073768',   '10013709', '10036556', '10045188', '10051076', '10072268', '10057362', '10073508', 
                  '10013687', '10022523', '10033295', '10061623', '10013710', '10013722', '10013745',  '10026923', '10000381', '10068071', '10014166', '10049177', '10060942', '10051118',
                  '10066053', '10045542', '10057857', '10056327', '10059866', '10052016', '10064306', '10018981', '10013718', '10029719', '10036573', '10063686', '10052804', '10066368', 
                  '10036567', '10061132', '10067082', '10064373', '10048958', '10050895', '10013756',  '10068072', '10028243', '10067667', '10049998', '10060940', '10049975', '10050845', 
                  '10063370', '10049463', '10013752', '10057856', '10062015', '10052805', '10061133', '10050192', '10061452', '10054807', '10060320', '10060321', '10063122', '10048407',
                  '10013717', '10036575', '10049055', '10060144', '10013753', '10072426', '10050846', '10052744', '10061824', '10062014', '10064374', '10067010', '10066468', '10052237', 
                  '10060145', '10036574', '10050425', '10064773', '10063222', '10052806', '10048723',  '10036578', '10059641', '10067688', '10073085', '10066266', '10053580', '10064381',
                  '10064937', '10073954', '10052970', '10073702', '10013744', '10072385', '10046735', '10051792', '10044439', '10067482', '10053716', '10052428', '10062932', '10018794', 
                  '10048038', '10029897', '10036410', '10048629', '10072170', '10060933', '10050325',  '10048031', '10061468', '10061613', '10053669', '10023439', '10053692', '10010185', 
                  '10051358', '10051373', '10050858', '10010162', '10025127', '10047920', '10020364',  '10066337', '10019314', '10059442', '10024714', '10058672', '10050778', '10065044', 
                  '10057677', '10063181', '10066900', '10019315', '10038533', '10043903', '10024715',  '10054923', '10058041', '10061730', '10050852', '10058042', '10059185', '10063581', 
                  '10051604', '10066194', '10061890', '10041899', '10065240', '10011643', '10057925',  '10058845', '10010183', '10057679', '10054108', '10065386', '10068179', '10059444',
                  '10052277', '10010186', '10056409', '10010184', '10010187', '10065242', '10049169',  '10060345', '10035148', '10059032', '10048870', '10062355', '10013754', '10061822', 
                  '10063671', '10060872', '10003051', '10003054', '10022064', '10022094', '10027091',  '10048396', '10050114', '10050464', '10050729', '10053425', '10053998', '10054846', 
                  '10054994', '10057581', '10057843', '10058142', '10058974', '10059058', '10061111',  '10061153', '10061649', '10063781', '10063866', '10064355', '10064366', '10064382',
                  '10064385', '10064505', '10064998', '10065117', '10065484', '10066967', '10068003',  '10068383', '10069216', '10069842', '10069902', '10071430', '10072720', '10073303', 
                  '10073336', '10074425', '10074495', '10074497', '10074498', '10074508', '10074555',  '10074586', '10074704', '10074758', '10074796', '10074853', '10074860', '10074868', 
                  '10074896', '10074902', '10074903', '10074904', '10074905', '10074906', '10074946',  '10075097', '10075103', '10075107', '10075333', '10075373', '10075461', '10075511',
                  '10075571', '10075573', '10075574', '10075578', '10075580', '10075585', '10075765',  '10075928', '10075933', '10075965', '10075967', '10075971', '10076053', '10076065', 
                  '10076070', '10076073', '10076087', '10076089', '10076091', '10076101', '10076128', '10076133', '10076141', '10076182', '10076232', '10076273', '10076308', '10076309', 
                  '10076368', '10076470', '10076476', '10076481', '10076503', '10076542', '10076544',  '10076573', '10076637', '10076639', '10076869', '10076874', '10076897', '10076936',
                  '10076991', '10077040', '10077107', '10077455', '10077643', '10077659', '10077672', '10077678', '10077767', '10077796', '10077800', '10077801', '10077812', '10078105',
                  '10078156', '10078325', '10078340', '10078390', '10078504', '10078525', '10078668',  '10078675', '10079007', '10079078', '10079212', '10079213', '10079221', '10079277', 
                  '10079315', '10079316', '10079317', '10079381', '10079400', '10079404', '10079466', '10079523', '10079645', '10079843', '10079846', '10079849', '10079903', '10080000', 
                  '10080001', '10080092', '10080099', '10080179', '10080231', '10080304', '10080357','10080359', '10080459', '10080648', '10080714', '10080718', '10080751', '10080753', 
                  '10080754', '10080804', '10080901', '10080903', '10080974', '10081202', '10081301', '10081359', '10081478', '10081479', '10081480', '10081540', '10081572', '10081574', 
                  '10081575', '10081576', '10081577', '10081578', '10081579', '10081580', '10081581','10081675', '10081704', '10081742', '10081743', '10081770', '10081771', '10082169', 
                  '10082200', '10082201', '10082202', '10082204', '10082205', '10082292', '10082458', '10082527', '10083420', '10083599', '10083995', '10061427', '10002653', '10077122', 
                  '10071095', '10001756', '10002730', '10008453', '10014404', '10025250', '10034998', '10037794', '10051082', '10051083', '10052909', '10053073', '10053468', '10053469', 
                  '10054976', '10054977', '10056613', '10057374', '10057480', '10058909', '10059283',  '10059828', '10059862', '10061018', '10061758', '10062035', '10062117', '10064728',
                  '10065100', '10065154', '10065357', '10066377', '10066401', '10067768', '10068048', '10068492', '10072806', '10074079', '10074300', '10074746', '10074842', '10074950', 
                  '10074982', '10078115', '10078798', '10079637', '10080422', '10083202']

meddra_drop_list = ['10003051', '10003054', '10022064', '10022094', '10027091', '10048396', '10050114', '10050464', '10050729', '10053425', '10053998', '10054846', '10054994', '10057581', 
                    '10057843', '10058142', '10058974', '10059058', '10061111', '10061153', '10061649', '10063781', '10063866', '10064355', '10064366', '10064382', '10064385', '10064505', 
                    '10064998', '10065117', '10065484', '10066967', '10068003', '10068383', '10069216', '10069842', '10069902', '10071430', '10072720', '10073303', '10073336', '10074425', 
                    '10074495', '10074497', '10074498', '10074508', '10074555', '10074586', '10074704','10074758', '10074796', '10074853', '10074860', '10074868', '10074896', '10074902', 
                    '10074903', '10074904', '10074905', '10074906', '10074946', '10075097', '10075103',   '10075107', '10075333', '10075373', '10075461', '10075511', '10075571', '10075573', 
                    '10075574', '10075578', '10075580', '10075585', '10075765', '10075928', '10075933', '10075965', '10075967', '10075971', '10076053', '10076065', '10076070', '10076073', 
                    '10076087', '10076089', '10076091', '10076101', '10076128', '10076133', '10076141', '10076182', '10076232', '10076273', '10076308', '10076309', '10076368', '10076470', 
                    '10076476', '10076481', '10076503', '10076542', '10076544', '10076573', '10076637',   '10076639', '10076869', '10076874', '10076897', '10076936', '10076991', '10077040', 
                    '10077107', '10077455', '10077643', '10077659', '10077672', '10077678', '10077767',   '10077796', '10077800', '10077801', '10077812', '10078105', '10078156', '10078325', 
                    '10078340', '10078390', '10078504', '10078525', '10078668', '10078675', '10079007',  '10079078', '10079212', '10079213', '10079221', '10079277', '10079315', '10079316', 
                    '10079317', '10079381', '10079400', '10079404', '10079466', '10079523', '10079645',  '10079843', '10079846', '10079849', '10079903', '10080000', '10080001', '10080092', 
                    '10080099', '10080179', '10080231', '10080304', '10080357', '10080359', '10080459',  '10080648', '10080714', '10080718', '10080751', '10080753', '10080754', '10080804', 
                    '10080901', '10080903', '10080974', '10081202', '10081301', '10081359', '10081478', '10081479', '10081480', '10081540', '10081572', '10081574', '10081575', '10081576', 
                    '10081577', '10081578', '10081579', '10081580', '10081581', '10081675', '10081704', '10081742', '10081743', '10081770', '10081771', '10082169', '10082200', '10082201', 
                    '10082202', '10082204', '10082205', '10082292', '10082458', '10082527', '10083420', '10083599', '10083995', '10061427' ,'10002653',  '10077122',  '10071095', '10001756', 
                    '10002730', '10008453', '10014404', '10025250', '10034998', '10037794', '10051082','10051083', '10052909', '10053073', '10053468', '10053469', '10054976', '10054977', '10056613', 
                    '10057374', '10057480', '10058909', '10059283', '10059828', '10059862', '10061018', '10061758',   '10062035', '10062117', '10064728', '10065100', '10065154', '10065357', 
                    '10068048', '10068492', '10072806', '10074079', '10074300', '10074746', '10074842', '10074950', '10074982',   '10078115', '10078798', '10079637', '10080422', '10083202',  
                    '10040560', '10060938', '10012335', '10011762','10011906','10016256', '10040642', '10000059', '10016322','10033371','10042209', '10079987', '10022116','10050953', '10046274',
                    '10066377', '10066401', '10067768', '10013971', '10013969', '10036590', '10084268', '10051905', '10084271','10084451','10070255','10084380']
# '10084268', '10051905', '10084271','10084451','10070255','10084380','10016256',
##  '10084268' is covid, we remove it to [with all other explicity covid symptoms?]
## '10051905': 'coronavirus infection'
## '10084271'	'sars-cov-2 test positive'
## '10084451'	'suspected covid-19'	
## 10070255	coronavirus test positive	
## 10084380	covid-19 pneumonia	


## 10077122	device delivery system issue
## '10016256'	'fatigue'	
## '10071095'	'growth failure'	

drop_SE_meddra.extend(meddra_drop_list)
# pickle.dump(drop_SE_name, open('../Data/pandemic/drop_SE_name.pk','wb'))
drop_list = drop_SE_meddra

In [6]:
# drop_SE_name

In [7]:
SE_uncondition = SE_uncondition.drop_duplicates('SE')

idd = [i not in drop_list for i in SE_uncondition['SE']]
SE_uncondition = SE_uncondition[idd]

"""Nan = 0/0, in our case means nothing, so we drop them first."""
SE_uncondition = SE_uncondition[SE_uncondition['2019_ROR'].notna()]

In [8]:
# """Find the ID of nonsense SE by keywords, and then copy the IDs to the above drop_list"""
# """Remove the SE with specific word"""
# # drop_word = ['device', 'issue', 'product', 'equipment', 'exposure', 'broken','falling', 'suicide','idea', 'site',
# #             'crime', 'foreign', 'quality','drug','pregnancy','dose', 'nonspecific' ,'homicid','event','wound',
# #              'idea', 'transplant', 'thoughts', 'user','infusion', 'plague', 'technique', 'medication']
# drop_word = ['therapy']

# drop_index = [any(word in se for word in drop_word) for se in SE_uncondition.name]
# drop_list_1 = SE_uncondition[drop_index]
# SE_uncondition.shape, drop_list_1.shape

# print(list(drop_list_1.SE))

# ll = ['eating disorder', 'incoherent', 'out of specification test results', 'antibody test negative', 'gene mutation identification test positive', 'gun shot wound', 
# 'bed sharing','antibody test positive', 'large for dates baby,viral load', 'small for dates baby', 'x-ray', 
# 'scan', 'blood test','female condom', 'sleep study','boredom',' toxicity to various agents',
# 'transplant failure', 'pregnancy after post coital contraception','drug intolerance', 'drug withdrawal syndrome', 'gustatory and olfactory', 'anosmia']
# for i in ll:
#     print(list(SE_uncondition[SE_uncondition.name==i].SE))


In [9]:
SE_uncondition.head(3)

Unnamed: 0,SE,name,2013_A,2013_B,2014_A,2014_B,2015_A,2015_B,2016_A,2016_B,2017_A,2017_B,2018_A,2018_B,2019_A,2019_B,2020_A,2020_B,2013_ROR,2014_ROR,2015_ROR,2016_ROR,2017_ROR,2018_ROR,2019_ROR,2013_Delta,2014_Delta,2015_Delta,2016_Delta,2017_Delta,2018_Delta,2019_Delta
1,10000029,5-alpha-reductase deficiency,0,89334,0,97804,0,179383,0,169233,0,210736,0,244005,0,220920,1,211151,inf,inf,inf,inf,inf,inf,inf,inf,inf,inf,inf,inf,inf,inf
2,10000044,abdomen crushing,0,89334,0,97804,0,179383,0,169233,0,210736,0,244005,0,220920,1,211151,inf,inf,inf,inf,inf,inf,inf,inf,inf,inf,inf,inf,inf,inf
3,10000050,abdominal adhesions,14,89320,14,97790,16,179367,7,169226,21,210715,16,243989,15,220905,12,211140,0.362603,0.396988,0.637138,1.373978,0.570278,0.866684,0.836999,-0.142857,-0.142857,-0.25,0.714286,-0.428571,-0.25,-0.2


In [10]:
SE_uncondition_2019 = SE_uncondition[['SE','name','2019_A', '2019_B', '2020_A','2020_B','2019_ROR','2019_Delta']]
SE_uncondition_2019['p_value'] = SE_uncondition_2019.apply(lambda row: stats.fisher_exact([[row['2019_A'], row['2019_B']], [row['2020_A'], row['2020_B']]])[1], axis = 1)

# multipletests
SE_uncondition_2019['sig'], SE_uncondition_2019['p_corrected']  = multipletests(pvals=SE_uncondition_2019['p_value'], alpha=0.05, method='bonferroni')[0:2]
# calculate 95% confidential interval


### for volcano plot, keep the ROR and P-value of all SE
pickle.dump(SE_uncondition_2019, open('../Data/pandemic/SE_uncondition_2019_volcano.pk', 'wb'))  # update the dataframe with ROR and Delta

SE_uncondition_2019_sig = SE_uncondition_2019[SE_uncondition_2019['sig']==True]
SE_uncondition_2019_sig['CI_upper'] = SE_uncondition_2019_sig.apply(lambda row: CI(row['2019_ROR'], row['2019_A'], row['2019_B'],row['2020_A'], row['2020_B'])[0], axis = 1)
SE_uncondition_2019_sig['CI_lower'] = SE_uncondition_2019_sig.apply(lambda row: CI(row['2019_ROR'], row['2019_A'], row['2019_B'],row['2020_A'], row['2020_B'])[1], axis = 1)

print('the figure for volcano figure is saved')

the figure for volcano figure is saved


In [11]:
SE_uncondition_2019_sig_over = SE_uncondition_2019_sig[SE_uncondition_2019_sig['2019_Delta']>0]
SE_uncondition_2019_sig_under = SE_uncondition_2019_sig[SE_uncondition_2019_sig['2019_Delta']<0]

SE_uncondition_2019_sig_under.sort_values('p_corrected', ascending=True).head()

Unnamed: 0,SE,name,2019_A,2019_B,2020_A,2020_B,2019_ROR,2019_Delta,p_value,sig,p_corrected,CI_upper,CI_lower
10922,10064848,chronic kidney disease,2113,218807,640,210512,0.314822,-0.697113,2.4048830000000003e-169,True,2.389732e-165,0.344,0.288118
14808,10079622,sprue-like enteropathy,353,220567,0,211152,0.0,-1.0,2.504882e-103,True,2.489101e-99,0.0,0.0
9730,10061481,renal injury,556,220364,127,211025,0.238526,-0.771583,6.892114e-61,True,6.848694e-57,0.289263,0.196688
14358,10077512,end stage renal disease,585,220335,213,210939,0.380321,-0.635897,1.674309e-37,True,1.663761e-33,0.444959,0.325073
12565,10070608,infective pulmonary exacerbation of cystic fib...,661,220259,329,210823,0.520008,-0.502269,2.6077470000000002e-23,True,2.5913179999999995e-19,0.59361,0.455532


# Conditioned on gender (sex)
 
The gender contains:
- Male
- Female
- unknown

So that the sum of male and female may not equals to the uncondition. In analysis, we omit unknown gender.

## Male

In [12]:
SE_male = pickle.load(open('../Data/pandemic/SE_male.pk', 'rb'))
idd_male = [i not in drop_list for i in SE_male['SE']]  # drop the nonsense SE
SE_male = SE_male[idd_male]

SE_male = SE_male.drop_duplicates('SE')
SE_male = SE_male[SE_male['2019_ROR'].notna()]
SE_male_2019 = SE_male[['SE','name','2019_A', '2019_B', '2020_A','2020_B','2019_ROR','2019_Delta']]
SE_male_2019['p_value'] = SE_male_2019.apply(lambda row: stats.fisher_exact([[row['2019_A'], row['2019_B']], [row['2020_A'], row['2020_B']]])[1], axis = 1)

# SE_male_2019['p_value'] = SE_male_2019.apply(lambda row: stats.fisher_exact([[row['2020_A'], row['2020_B']], [row['2019_A'], row['2019_B']]])[1], axis = 1)

# multipletests
SE_male_2019['sig'], SE_male_2019['p_corrected']  = multipletests(pvals=SE_male_2019['p_value'], alpha=0.05, method='bonferroni')[0:2]
SE_male_2019_sig = SE_male_2019[SE_male_2019['sig']==True]
SE_male_2019_sig['CI_upper'] = SE_male_2019_sig.apply(lambda row: CI(row['2019_ROR'], row['2019_A'], row['2019_B'],row['2020_A'], row['2020_B'])[0], axis = 1)
SE_male_2019_sig['CI_lower'] = SE_male_2019_sig.apply(lambda row: CI(row['2019_ROR'], row['2019_A'], row['2019_B'],row['2020_A'], row['2020_B'])[1], axis = 1)

SE_male_2019_sig.sort_values('p_corrected', ascending=True)

Unnamed: 0,SE,name,2019_A,2019_B,2020_A,2020_B,2019_ROR,2019_Delta,p_value,sig,p_corrected,CI_upper,CI_lower
143,10001551,alanine aminotransferase increased,185,73490,658,72075,3.626584,2.556757,1.324135e-64,True,9.4914e-61,4.270457,3.07979
2205,10019063,hallucination,318,73357,870,71863,2.792726,1.735849,6.7910150000000005e-62,True,4.8678e-58,3.176776,2.455105
4125,10038669,respiratory arrest,31,73644,250,72483,8.19369,7.064516,1.3363989999999998e-44,True,9.579309999999999e-41,11.902156,5.640705
3300,10029331,neuropathy peripheral,353,73322,803,71930,2.31881,1.274788,1.470464e-42,True,1.0540289999999999e-38,2.629103,2.045137
387,10003481,aspartate aminotransferase increased,170,73505,487,72246,2.914628,1.864706,2.090067e-37,True,1.4981600000000002e-33,3.471706,2.44694
2443,10020983,hypogammaglobulinaemia,5,73670,110,72623,22.317172,21.0,2.219081e-27,True,1.590637e-23,54.688511,9.107145
935,10007515,cardiac arrest,339,73336,631,72102,1.893213,0.861357,3.95022e-22,True,2.831518e-18,2.161195,1.658461
2049,10017955,gastrointestinal haemorrhage,403,73272,707,72026,1.784691,0.754342,5.372975e-21,True,3.851349e-17,2.017811,1.578504
11722,10077692,liver function test increased,137,73538,332,72401,2.461415,1.423358,2.197343e-20,True,1.575055e-16,3.00425,2.016663
1442,10012239,delusion,76,73599,222,72511,2.964882,1.921053,3.0229e-18,True,2.166815e-14,3.847882,2.28451


In [13]:
SE_male_2019_sig_over = SE_male_2019_sig[SE_male_2019_sig['2019_Delta']>0]
SE_male_2019_sig_under = SE_male_2019_sig[SE_male_2019_sig['2019_Delta']<0]

SE_male_2019_sig_under.sort_values('p_corrected', ascending=True).head()

Unnamed: 0,SE,name,2019_A,2019_B,2020_A,2020_B,2019_ROR,2019_Delta,p_value,sig,p_corrected,CI_upper,CI_lower
9069,10064848,chronic kidney disease,336,73339,158,72575,0.475188,-0.529762,2.275878e-15,True,1.631349e-11,0.574247,0.393218
2135,10018687,granulocytopenia,87,73588,18,72715,0.209381,-0.793103,5.350892e-12,True,3.835519e-08,0.347853,0.126031
10352,10070608,infective pulmonary exacerbation of cystic fib...,249,73426,133,72600,0.540214,-0.465863,5.534627e-09,True,3.967221e-05,0.666953,0.437558
3877,10036975,prostatic specific antigen increased,130,73545,51,72682,0.396966,-0.607692,6.008025e-09,True,4.306552e-05,0.548868,0.287103
957,10007617,cardio-respiratory arrest,202,73473,100,72633,0.500775,-0.50495,6.406438e-09,True,4.592135e-05,0.636529,0.393973


In [14]:
l_male = list(SE_male_2019_sig.SE)
l_uncondition = list(SE_uncondition_2019_sig.SE)
set(l_male) - set(l_uncondition)

{'10017788',
 '10018800',
 '10019211',
 '10024378',
 '10028810',
 '10051081',
 '10061114'}

## Female

In [15]:
SE_female = pickle.load(open('../Data/pandemic/SE_female.pk', 'rb'))
idd_female = [i not in drop_list for i in SE_female['SE']]
SE_female = SE_female[idd_female]
SE_female = SE_female.drop_duplicates('SE')
SE_female = SE_female[SE_female['2019_ROR'].notna()]

SE_female_2019 = SE_female[['SE','name','2019_A', '2019_B', '2020_A','2020_B','2019_ROR','2019_Delta']]
SE_female_2019['p_value'] = SE_female_2019.apply(lambda row: stats.fisher_exact([[row['2019_A'], row['2019_B']], [row['2020_A'], row['2020_B']]])[1], axis = 1)

# multipletests
SE_female_2019['sig'], SE_female_2019['p_corrected']  = multipletests(pvals=SE_female_2019['p_value'], 
                                                                  alpha=0.05, method='bonferroni')[0:2]
SE_female_2019_sig = SE_female_2019[SE_female_2019['sig']==True]
SE_female_2019_sig['CI_upper'] = SE_female_2019_sig.apply(lambda row: CI(row['2019_ROR'], row['2019_A'], row['2019_B'],row['2020_A'], row['2020_B'])[0], axis = 1)
SE_female_2019_sig['CI_lower'] = SE_female_2019_sig.apply(lambda row: CI(row['2019_ROR'], row['2019_A'], row['2019_B'],row['2020_A'], row['2020_B'])[1], axis = 1)


In [16]:
SE_female_2019_sig_over = SE_female_2019_sig[SE_female_2019_sig['2019_Delta']>0]
SE_female_2019_sig_under = SE_female_2019_sig[SE_female_2019_sig['2019_Delta']<0]

SE_female_2019_sig_under.sort_values('p_corrected', ascending=True).head()

Unnamed: 0,SE,name,2019_A,2019_B,2020_A,2020_B,2019_ROR,2019_Delta,p_value,sig,p_corrected,CI_upper,CI_lower
9704,10064848,chronic kidney disease,494,117471,194,113713,0.405691,-0.607287,5.921405e-29,True,4.7584410000000005e-25,0.479076,0.343547
8663,10061481,renal injury,185,117780,59,113848,0.329934,-0.681081,1.494678e-15,True,1.201123e-11,0.442328,0.246098
11115,10070608,infective pulmonary exacerbation of cystic fib...,378,117587,184,113723,0.503312,-0.513228,4.457624e-15,True,3.582147e-11,0.600392,0.421929
1029,10007617,cardio-respiratory arrest,249,117716,99,113808,0.411243,-0.60241,4.684562e-15,True,3.764514e-11,0.519156,0.325761
2312,10018687,granulocytopenia,61,117904,8,113899,0.135759,-0.868852,4.682178e-11,True,3.762598e-07,0.283704,0.064964


In [17]:
## Anything only occur in female but not in uncondition?
l_male = list(SE_male_2019_sig.SE)
l_female = list(SE_female_2019_sig.SE)
l_uncondition = list(SE_uncondition_2019_sig.SE)
set(l_female) - set(l_uncondition)

{'10003988',
 '10016825',
 '10022000',
 '10033553',
 '10034829',
 '10047899',
 '10048439',
 '10048621',
 '10054787',
 '10062237'}

# Conditioned on age


## Young (1-19 years)

In [18]:
SE_young = pickle.load(open('../Data/pandemic/SE_young.pk', 'rb'))
idd_young = [i not in drop_list for i in SE_young['SE']]
SE_young = SE_young[idd_young]

SE_young = SE_young.drop_duplicates('SE')
SE_young = SE_young[SE_young['2019_ROR'].notna()]
SE_young_2019 = SE_young[['SE','name','2019_A', '2019_B', '2020_A','2020_B','2019_ROR','2019_Delta']]
SE_young_2019['p_value'] = SE_young_2019.apply(lambda row: stats.fisher_exact([[row['2019_A'], row['2019_B']], [row['2020_A'], row['2020_B']]])[1], axis = 1)

# multipletests
SE_young_2019['sig'], SE_young_2019['p_corrected']  = multipletests(pvals=SE_young_2019['p_value'], alpha=0.05, method='bonferroni')[0:2]
SE_young_2019_sig = SE_young_2019[SE_young_2019['sig']==True]
SE_young_2019_sig['CI_upper'] = SE_young_2019_sig.apply(lambda row: CI(row['2019_ROR'], row['2019_A'], row['2019_B'],row['2020_A'], row['2020_B'])[0], axis = 1)
SE_young_2019_sig['CI_lower'] = SE_young_2019_sig.apply(lambda row: CI(row['2019_ROR'], row['2019_A'], row['2019_B'],row['2020_A'], row['2020_B'])[1], axis = 1)


In [19]:
SE_young_2019_sig_over = SE_young_2019_sig[SE_young_2019_sig['2019_Delta']>0]
SE_young_2019_sig_under = SE_young_2019_sig[SE_young_2019_sig['2019_Delta']<0]
SE_young_2019_sig_over.sort_values('p_corrected', ascending=True).head()

Unnamed: 0,SE,name,2019_A,2019_B,2020_A,2020_B,2019_ROR,2019_Delta,p_value,sig,p_corrected,CI_upper,CI_lower
1590,10020983,hypogammaglobulinaemia,4,7598,78,6528,22.696232,18.5,1.412207e-21,True,4.462573e-18,62.026222,8.304857
2161,10029366,neutrophil count decreased,14,7588,90,6516,7.486188,5.428571,1.732967e-17,True,5.476176e-14,13.159375,4.25879
1904,10025256,lymphocyte count decreased,7,7595,69,6537,11.452501,8.857143,3.831113e-16,True,1.210632e-12,24.937961,5.259443
3849,10052015,cytokine release syndrome,20,7582,93,6513,5.41322,3.65,4.513442e-15,True,1.426248e-11,8.785297,3.335453
4842,10061188,haematotoxicity,2,7600,49,6557,28.397133,23.5,3.044908e-14,True,9.621911e-11,116.816549,6.903107


## Adult (20-65 years)

In [20]:
SE_adult = pickle.load(open('../Data/pandemic/SE_adult.pk', 'rb'))
idd_adult = [i not in drop_list for i in SE_adult['SE']]
SE_adult = SE_adult[idd_adult]

SE_adult = SE_adult.drop_duplicates('SE')

SE_adult = SE_adult[SE_adult['2019_ROR'].notna()]
SE_adult_2019 = SE_adult[['SE','name','2019_A', '2019_B', '2020_A','2020_B','2019_ROR','2019_Delta']]
SE_adult_2019['p_value'] = SE_adult_2019.apply(lambda row: stats.fisher_exact([[row['2019_A'], row['2019_B']], [row['2020_A'], row['2020_B']]])[1], axis = 1)

# multipletests
SE_adult_2019['sig'], SE_adult_2019['p_corrected']  = multipletests(pvals=SE_adult_2019['p_value'], alpha=0.05, method='bonferroni')[0:2]
SE_adult_2019_sig = SE_adult_2019[SE_adult_2019['sig']==True]
SE_adult_2019_sig['CI_upper'] = SE_adult_2019_sig.apply(lambda row: CI(row['2019_ROR'], row['2019_A'], row['2019_B'],row['2020_A'], row['2020_B'])[0], axis = 1)
SE_adult_2019_sig['CI_lower'] = SE_adult_2019_sig.apply(lambda row: CI(row['2019_ROR'], row['2019_A'], row['2019_B'],row['2020_A'], row['2020_B'])[1], axis = 1)


SE_adult_2019_sig_over = SE_adult_2019_sig[SE_adult_2019_sig['2019_Delta']>0]
SE_adult_2019_sig_under = SE_adult_2019_sig[SE_adult_2019_sig['2019_Delta']<0]
SE_adult_2019_sig_over.sort_values('p_corrected', ascending=True).head()

Unnamed: 0,SE,name,2019_A,2019_B,2020_A,2020_B,2019_ROR,2019_Delta,p_value,sig,p_corrected,CI_upper,CI_lower
4180,10038669,respiratory arrest,23,74186,412,65846,20.181895,16.913043,1.7437760000000001e-103,True,1.267377e-99,30.71828,13.259495
142,10001551,alanine aminotransferase increased,172,74037,683,65575,4.483351,2.97093,1.735457e-86,True,1.26133e-82,5.301086,3.791759
947,10007515,cardiac arrest,313,73896,751,65507,2.70663,1.399361,4.120472e-54,True,2.9947589999999995e-50,3.089439,2.371254
379,10003481,aspartate aminotransferase increased,158,74051,484,65774,3.448776,2.063291,3.1348659999999995e-48,True,2.278421e-44,4.128491,2.880969
2074,10017955,gastrointestinal haemorrhage,164,74045,411,65847,2.818109,1.506098,4.2162110000000003e-32,True,3.064342e-28,3.378388,2.350747


## Elderly (<65 years)

In [21]:
SE_elderly = pickle.load(open('../Data/pandemic/SE_elderly.pk', 'rb'))
idd_elderly = [i not in drop_list for i in SE_elderly['SE']]
SE_elderly = SE_elderly[idd_elderly]

SE_elderly = SE_elderly.drop_duplicates('SE')
SE_elderly = SE_elderly[SE_elderly['2019_ROR'].notna()]
SE_elderly_2019 = SE_elderly[['SE','name','2019_A', '2019_B', '2020_A','2020_B','2019_ROR','2019_Delta']]
SE_elderly_2019['p_value'] = SE_elderly_2019.apply(lambda row: stats.fisher_exact([[row['2019_A'], row['2019_B']], [row['2020_A'], row['2020_B']]])[1], axis = 1)

# multipletests
SE_elderly_2019['sig'], SE_elderly_2019['p_corrected']  = multipletests(pvals=SE_elderly_2019['p_value'], alpha=0.05, method='bonferroni')[0:2]
SE_elderly_2019_sig = SE_elderly_2019[SE_elderly_2019['sig']==True]
SE_elderly_2019_sig['CI_upper'] = SE_elderly_2019_sig.apply(lambda row: CI(row['2019_ROR'], row['2019_A'], row['2019_B'],row['2020_A'], row['2020_B'])[0], axis = 1)
SE_elderly_2019_sig['CI_lower'] = SE_elderly_2019_sig.apply(lambda row: CI(row['2019_ROR'], row['2019_A'], row['2019_B'],row['2020_A'], row['2020_B'])[1], axis = 1)


SE_elderly_2019[SE_elderly_2019.name=='pyrexia']

Unnamed: 0,SE,name,2019_A,2019_B,2020_A,2020_B,2019_ROR,2019_Delta,p_value,sig,p_corrected
3435,10037660,pyrexia,617,45300,639,45732,1.025873,0.035656,0.670009,False,1.0


In [22]:
SE_elderly_2019_sig_over = SE_elderly_2019_sig[SE_elderly_2019_sig['2019_Delta']>0]
SE_elderly_2019_sig_under = SE_elderly_2019_sig[SE_elderly_2019_sig['2019_Delta']<0]


SE_elderly_2019_sig_under.sort_values('p_corrected', ascending=True).head()

Unnamed: 0,SE,name,2019_A,2019_B,2020_A,2020_B,2019_ROR,2019_Delta,p_value,sig,p_corrected,CI_upper,CI_lower
1471,10014866,enteritis,47,45870,12,46359,0.252626,-0.744681,3e-06,True,0.016724,0.476264,0.134001
2790,10028813,nausea,1850,44067,1601,44770,0.851816,-0.134595,4e-06,True,0.024577,0.911922,0.795672
3343,10036975,prostatic specific antigen increased,65,45852,23,46348,0.350059,-0.646154,5e-06,True,0.030115,0.563297,0.217544
1675,10017413,full blood count decreased,211,45706,129,46242,0.604288,-0.388626,6e-06,True,0.034219,0.752567,0.485224
2188,10022004,influenza like illness,177,45740,103,46268,0.57528,-0.418079,6e-06,True,0.03742,0.733701,0.451065


# Save all the populations in disproportionality estimation

In [23]:
condition_list = ['SE_uncondition_2019_sig_over', 'SE_uncondition_2019_sig_under', 'SE_male_2019_sig_over', 'SE_male_2019_sig_under',
                 'SE_female_2019_sig_over', 'SE_female_2019_sig_under', 
                 'SE_young_2019_sig_over', 'SE_young_2019_sig_under', 'SE_adult_2019_sig_over', 'SE_adult_2019_sig_under',
                 'SE_elderly_2019_sig_over', 'SE_elderly_2019_sig_under']

for condition in condition_list:    
    pickle.dump(locals()[condition], open('../Data/pandemic/results/'+condition+'_step1.pk', 'wb'))
    print(condition,'saved')
    

SE_uncondition_2019_sig_over saved
SE_uncondition_2019_sig_under saved
SE_male_2019_sig_over saved
SE_male_2019_sig_under saved
SE_female_2019_sig_over saved
SE_female_2019_sig_under saved
SE_young_2019_sig_over saved
SE_young_2019_sig_under saved
SE_adult_2019_sig_over saved
SE_adult_2019_sig_under saved
SE_elderly_2019_sig_over saved
SE_elderly_2019_sig_under saved
