# Merging Vax and Symptoms CSV

In [5]:
# import dependencies
import pandas as pd
import numpy as np

In [13]:
vax_csv = "../Resources/Data/clean_vaers_vax.csv"
symptoms_csv = "../Resources/Data/grouped_vaers_symptoms.csv"

In [14]:
vaers_vax = pd.read_csv(vax_csv, index_col=None, header=0, encoding='latin1', low_memory=False)
vaers_vax.head()

Unnamed: 0,VAERS_ID,VAX_TYPE,VAX_MANU,VAX_NAME
0,916600,COVID19,MODERNA,COVID19 (COVID19 (MODERNA))
1,916601,COVID19,MODERNA,COVID19 (COVID19 (MODERNA))
2,916602,COVID19,PFIZER\BIONTECH,COVID19 (COVID19 (PFIZER-BIONTECH))
3,916603,COVID19,MODERNA,COVID19 (COVID19 (MODERNA))
4,916604,COVID19,MODERNA,COVID19 (COVID19 (MODERNA))


In [15]:
# reading vaers symptoms as dataframe
vaers_symptoms = pd.read_csv(symptoms_csv, index_col=None, header=0, encoding='latin1', low_memory=False)
vaers_symptoms.head()

Unnamed: 0.1,Unnamed: 0,VAERS_ID,SYMPTOM1,SYMPTOM2,SYMPTOM3,SYMPTOM4,SYMPTOM5
0,0,916600,Dysphagia,Epiglottitis,0,0,0
1,1,916601,Anxiety,Dyspnoea,0,0,0
2,2,916602,Chest discomfort,Dysphagia,Pain in extremity,Visual impairment,0
3,3,916603,Dizziness,Fatigue,Mobility decreased,0,0
4,4,916604,Injection site erythema,Injection site pruritus,Injection site swelling,Injection site warmth,0


In [16]:
# using merge function by setting how='inner'
vaxsymp = pd.merge(vaers_vax, vaers_symptoms, 
                   on='VAERS_ID', 
                   how='inner')
vaxsymp.head()

Unnamed: 0.1,VAERS_ID,VAX_TYPE,VAX_MANU,VAX_NAME,Unnamed: 0,SYMPTOM1,SYMPTOM2,SYMPTOM3,SYMPTOM4,SYMPTOM5
0,916600,COVID19,MODERNA,COVID19 (COVID19 (MODERNA)),0,Dysphagia,Epiglottitis,0,0,0
1,916601,COVID19,MODERNA,COVID19 (COVID19 (MODERNA)),1,Anxiety,Dyspnoea,0,0,0
2,916602,COVID19,PFIZER\BIONTECH,COVID19 (COVID19 (PFIZER-BIONTECH)),2,Chest discomfort,Dysphagia,Pain in extremity,Visual impairment,0
3,916603,COVID19,MODERNA,COVID19 (COVID19 (MODERNA)),3,Dizziness,Fatigue,Mobility decreased,0,0
4,916604,COVID19,MODERNA,COVID19 (COVID19 (MODERNA)),4,Injection site erythema,Injection site pruritus,Injection site swelling,Injection site warmth,0


In [17]:
# check column list
list(vaxsymp.columns)

['VAERS_ID',
 'VAX_TYPE',
 'VAX_MANU',
 'VAX_NAME',
 'Unnamed: 0',
 'SYMPTOM1',
 'SYMPTOM2',
 'SYMPTOM3',
 'SYMPTOM4',
 'SYMPTOM5']

In [18]:
# after you run this once, it will error if you try it again because well.. its deleted.
# del vaxsymp['Unnamed: 0']

In [21]:
# check DF
vaxsymp

Unnamed: 0,VAERS_ID,VAX_TYPE,VAX_MANU,VAX_NAME,SYMPTOM1,SYMPTOM2,SYMPTOM3,SYMPTOM4,SYMPTOM5
0,916600,COVID19,MODERNA,COVID19 (COVID19 (MODERNA)),Dysphagia,Epiglottitis,0,0,0
1,916601,COVID19,MODERNA,COVID19 (COVID19 (MODERNA)),Anxiety,Dyspnoea,0,0,0
2,916602,COVID19,PFIZER\BIONTECH,COVID19 (COVID19 (PFIZER-BIONTECH)),Chest discomfort,Dysphagia,Pain in extremity,Visual impairment,0
3,916603,COVID19,MODERNA,COVID19 (COVID19 (MODERNA)),Dizziness,Fatigue,Mobility decreased,0,0
4,916604,COVID19,MODERNA,COVID19 (COVID19 (MODERNA)),Injection site erythema,Injection site pruritus,Injection site swelling,Injection site warmth,0
...,...,...,...,...,...,...,...,...,...
389314,1410433,COVID19,MODERNA,COVID19 (COVID19 (MODERNA)),Bell's palsy,Erythema,Urticaria,0,0
389315,1410455,COVID19,MODERNA,COVID19 (COVID19 (MODERNA)),Cold sweat,Pallor,Syncope,0,0
389316,1410468,COVID19,MODERNA,COVID19 (COVID19 (MODERNA)),Chills,Headache,0,0,0
389317,1410479,COVID19,MODERNA,COVID19 (COVID19 (MODERNA)),No adverse event,Off label use,Product administered to patient of inappropria...,0,0


# Dropping other non-Covid related vaccines

In [20]:
covid_df = vaxsymp[vaxsymp["VAX_TYPE"] == 'COVID19']
covid_df

Unnamed: 0,VAERS_ID,VAX_TYPE,VAX_MANU,VAX_NAME,SYMPTOM1,SYMPTOM2,SYMPTOM3,SYMPTOM4,SYMPTOM5
0,916600,COVID19,MODERNA,COVID19 (COVID19 (MODERNA)),Dysphagia,Epiglottitis,0,0,0
1,916601,COVID19,MODERNA,COVID19 (COVID19 (MODERNA)),Anxiety,Dyspnoea,0,0,0
2,916602,COVID19,PFIZER\BIONTECH,COVID19 (COVID19 (PFIZER-BIONTECH)),Chest discomfort,Dysphagia,Pain in extremity,Visual impairment,0
3,916603,COVID19,MODERNA,COVID19 (COVID19 (MODERNA)),Dizziness,Fatigue,Mobility decreased,0,0
4,916604,COVID19,MODERNA,COVID19 (COVID19 (MODERNA)),Injection site erythema,Injection site pruritus,Injection site swelling,Injection site warmth,0
...,...,...,...,...,...,...,...,...,...
389314,1410433,COVID19,MODERNA,COVID19 (COVID19 (MODERNA)),Bell's palsy,Erythema,Urticaria,0,0
389315,1410455,COVID19,MODERNA,COVID19 (COVID19 (MODERNA)),Cold sweat,Pallor,Syncope,0,0
389316,1410468,COVID19,MODERNA,COVID19 (COVID19 (MODERNA)),Chills,Headache,0,0,0
389317,1410479,COVID19,MODERNA,COVID19 (COVID19 (MODERNA)),No adverse event,Off label use,Product administered to patient of inappropria...,0,0


In [26]:
# check to make sure only covid vaccines are in the list
covid_list = covid_df['VAX_MANU'].unique()
covid_list

array(['MODERNA', 'PFIZER\\BIONTECH', 'UNKNOWN MANUFACTURER', 'JANSSEN'],
      dtype=object)

In [28]:
# save to csv
covid_df.to_csv(r'../Resources/Data/VAERSvaxsymptoms.csv', index=True)