In [1]:
import pandas as pd
import numpy as np
import plotly.graph_objects as go
from pymongo import MongoClient

In [69]:
client = MongoClient("mongodb://root:openfda@localhost:27017")
client.list_database_names()

['admin', 'config', 'local', 'openfda_20_22_2', 'openfda_latest']

In [5]:
db = client['openfda_latest']
db.list_collection_names()

['drugAdverseEvent']

In [7]:
collection = db['drugAdverseEvent']
collection.estimated_document_count()

14131000

In [11]:
vaccine_names = [
    "Spikevax", "Comirnaty", "Vaxzevria", "Covishield", "Nuvaxovid", "Vidprevtyn", "Janssen"
]

vaccine_names = [i.upper() for i in vaccine_names]

print(vaccine_names)

['SPIKEVAX', 'COMIRNATY', 'VAXZEVRIA', 'COVISHIELD', 'NUVAXOVID', 'VIDPREVTYN', 'JANSSEN']


In [13]:
# zatory, problemy z krzepliowścią, układem krwionośnym, sercem

heart_blood_reactions = [
    'Thrombosis', 
    'Thrombocytopenia',
    'Myocardial infarction',
    'Pulmonary embolism',
    'Deep vein thrombosis',
    'Venous thrombosis',
    'Portal vein thrombosis',
    'Atrial thrombosis',
    'Immune thrombocytopenia',
    'Cardio-respiratory arrest',
    'Cardiac arrest',
    'Cardiac failure',
    'Circulatory collapse',
    'Myocardial infarction',
    'Acute myocardial infarction',
    'Myocarditis',
    'Peripheral artery thrombosis',
    'Disseminated intravascular coagulation'
]

arrhythmia_reactions = [
    'Arrhythmia', 'Ventricular tachycardia', 'Tachycardia', 'Atrial fibrillation', 'Tachyarrhythmia'
]

cerebral_reactions = [
    'Cerebral venous thrombosis', 'Cerebral haemorrhage', 'Cerebrovascular accident'
    'Haemorrhagic stroke', 'Hemorrhagic stroke', 'Ischaemic stroke', 'Ischemic stroke',
    'Transient ischaemic attack', 'Transient ischaemic stroke', 'Transient ischemic stroke',
    'Subarachnoid haemorrhage', 'Haemorrhagic transformation stroke'
]

anaphylaxis_death = [    
    'Anaphylactic reaction',
    'Death',
    'Sudden death'
]

all_reactions = [i.lower() for i in heart_blood_reactions + cerebral_reactions + anaphylaxis_death]

print(all_reactions)

['thrombosis', 'thrombocytopenia', 'myocardial infarction', 'pulmonary embolism', 'deep vein thrombosis', 'venous thrombosis', 'portal vein thrombosis', 'atrial thrombosis', 'immune thrombocytopenia', 'cardio-respiratory arrest', 'cardiac arrest', 'cardiac failure', 'circulatory collapse', 'myocardial infarction', 'acute myocardial infarction', 'myocarditis', 'peripheral artery thrombosis', 'disseminated intravascular coagulation', 'cerebral venous thrombosis', 'cerebral haemorrhage', 'cerebrovascular accidenthaemorrhagic stroke', 'hemorrhagic stroke', 'ischaemic stroke', 'ischemic stroke', 'transient ischaemic attack', 'transient ischaemic stroke', 'transient ischemic stroke', 'subarachnoid haemorrhage', 'haemorrhagic transformation stroke', 'anaphylactic reaction', 'death', 'sudden death']


In [59]:
stage_project = {
    "$project": {
        '_id': 0,
        'patientAge': 1,
        'patientSex': 1,
        'product': {"$toUpper": '$medicinalProduct'},
        'patientReaction': {"$toLower": '$patientReactions'}
    }
}

# product
stage_unwind_product = {"$unwind": "$medicinalProduct"}

stage_match_product = {
   "$match": {
       "product": { "$in": vaccine_names }
    }
}

# reaction
stage_unwind_reaction = {"$unwind": "$patientReactions"}

stage_match_reaction = {
   "$match": {
       "$or": [
           {
               "patientReaction": { "$in": all_reactions }
           },
           {
               "patientReaction": {
                  '$regex': 'stroke|thrombosis|embolism|infarction|cerebral|infarction|anaphylactic|anaphylaxis', 
                  '$options': 'i'
              }
           }
       ]
   }
}

pipeline = [
    stage_unwind_product,
    stage_unwind_reaction,
    stage_project,
    stage_match_product,
    stage_match_reaction,
]

In [60]:
cursor = collection.aggregate(pipeline)

In [61]:
df = pd.DataFrame(list(cursor))

In [62]:
choices = ['thrombosis', 'embolism', 'death', 'stroke', 'infarction', 'ischemic']

conditions = [df['patientReaction'].str.contains(x, case=False) for x in choices]

df['reaction'] = np.select(conditions, choices, default='other')

In [63]:
df['age+'] = df['patientAge'].round(-1)

df = df[(df['patientAge'] >= 0)&(df['patientAge'] <= 130)&(df['patientSex'] !=-1)]

In [64]:
df['sex'] = np.where(df['patientSex']==1, 'F', 'M')

In [65]:
df_agg = df.groupby(['sex', 'reaction'], as_index=False).size()

In [66]:
df_agg.sort_values(by='size', ascending=False)

Unnamed: 0,sex,reaction,size
9,M,other,75
7,M,embolism,62
3,F,other,46
11,M,thrombosis,46
5,F,thrombosis,37
4,F,stroke,30
0,F,death,26
10,M,stroke,21
1,F,embolism,20
2,F,infarction,19


In [67]:
df_agg = df_agg.sort_values(by='reaction')

fig = go.Figure()
fig.add_trace(go.Bar(
    y=df_agg[df_agg.sex=='F'].reaction,
    x=df_agg[df_agg.sex=='F']['size'],
    name='F',
    orientation='h',
    marker=dict(
        color='rgba(246, 78, 139, 0.6)',
        line=dict(color='rgba(246, 78, 139, 1.0)', width=3)
    )
))
fig.add_trace(go.Bar(
    y=df_agg[df_agg.sex=='M'].reaction,
    x=df_agg[df_agg.sex=='M']['size'],
    name='M',
    orientation='h',
    marker=dict(
        color='rgba(58, 71, 80, 0.6)',
        line=dict(color='rgba(58, 71, 80, 1.0)', width=3)
    )
))

fig.update_layout(barmode='stack')
fig.show()

In [68]:
df.patientReaction.unique()

array(['thrombosis', 'circulatory collapse', 'pulmonary embolism',
       'sudden death', 'anaphylactic reaction', 'death',
       'cardio-respiratory arrest', 'thrombocytopenia',
       'deep vein thrombosis', 'arteriovenous fistula thrombosis',
       'transient ischaemic attack', 'cardiac arrest', 'ischaemic stroke',
       'lacunar stroke', 'venous thrombosis', 'cerebral arteriosclerosis',
       'haemorrhagic stroke', 'myocardial infarction',
       'acute myocardial infarction', 'cerebral haematoma',
       'immune thrombocytopenia', 'haemorrhagic transformation stroke',
       'cerebral haemorrhage', 'peripheral artery thrombosis',
       'atrial thrombosis', 'cardiac failure', 'portal vein thrombosis',
       'jugular vein thrombosis', 'cerebral venous thrombosis',
       'splenic infarction', 'hepatic infarction', 'aortic thrombosis',
       'venous thrombosis limb', 'anaphylactic shock',
       'cerebral artery embolism', 'myocarditis',
       'disseminated intravascular coag