## Filtering and Merging 2021VAERS Datasets & Filtering Global Vaccination Dataset

In [1]:
# Import dependencies
import pandas as pd

from sqlalchemy import create_engine

### 2021 VAERS Datasets-Data Filtering and Cleaning

#### 2021VAERSVAX.csv: Filter the data based on vaccination type = COVID19,  and remove unneeded columns

In [4]:
# Import data file
VAERSvax = "dataUsed/2021VAERSVAX.csv"

# Read data file, need the encoding to read properly
VAERSvax_raw = pd.read_csv(VAERSvax, encoding="ISO-8859-1")
VAERSvax_raw

Unnamed: 0,VAERS_ID,VAX_TYPE,VAX_MANU,VAX_LOT,VAX_DOSE_SERIES,VAX_ROUTE,VAX_SITE,VAX_NAME
0,916710,COVID19,MODERNA,,1,IM,LA,COVID19 (COVID19 (MODERNA))
1,916741,COVID19,PFIZER\BIONTECH,EH9899,1,SYR,LA,COVID19 (COVID19 (PFIZER-BIONTECH))
2,916742,COVID19,PFIZER\BIONTECH,,1,IM,,COVID19 (COVID19 (PFIZER-BIONTECH))
3,916746,COVID19,MODERNA,037K20A,1,IM,LA,COVID19 (COVID19 (MODERNA))
4,916772,COVID19,PFIZER\BIONTECH,EJ1685,UNK,IM,LA,COVID19 (COVID19 (PFIZER-BIONTECH))
...,...,...,...,...,...,...,...,...
3009,983720,COVID19,MODERNA,039K20A,1,IM,LA,COVID19 (COVID19 (MODERNA))
3010,983721,COVID19,MODERNA,039K20A,1,IM,RA,COVID19 (COVID19 (MODERNA))
3011,983766,COVID19,MODERNA,013L20A,1,IM,RA,COVID19 (COVID19 (MODERNA))
3012,983919,COVID19,MODERNA,030L20A,1,IM,LA,COVID19 (COVID19 (MODERNA))


In [5]:
# Extract the columns names
columns = list(VAERSvax_raw.columns)
columns

['VAERS_ID',
 'VAX_TYPE',
 'VAX_MANU',
 'VAX_LOT',
 'VAX_DOSE_SERIES',
 'VAX_ROUTE',
 'VAX_SITE',
 'VAX_NAME']

In [6]:
# Create a dataframe with the wanted columns only
wanted_columns = VAERSvax_raw[['VAERS_ID',
 'VAX_TYPE',
 'VAX_MANU',
 'VAX_DOSE_SERIES',
 'VAX_SITE'
]]
wanted_columns

Unnamed: 0,VAERS_ID,VAX_TYPE,VAX_MANU,VAX_DOSE_SERIES,VAX_SITE
0,916710,COVID19,MODERNA,1,LA
1,916741,COVID19,PFIZER\BIONTECH,1,LA
2,916742,COVID19,PFIZER\BIONTECH,1,
3,916746,COVID19,MODERNA,1,LA
4,916772,COVID19,PFIZER\BIONTECH,UNK,LA
...,...,...,...,...,...
3009,983720,COVID19,MODERNA,1,LA
3010,983721,COVID19,MODERNA,1,RA
3011,983766,COVID19,MODERNA,1,RA
3012,983919,COVID19,MODERNA,1,LA


In [8]:
# Filter the data by vax_type = COVID19
# Reset index
filtered_data = wanted_columns[wanted_columns["VAX_TYPE"]=="COVID19"]
filtered_data.reset_index(drop=True, inplace=True)
filtered_data

Unnamed: 0,VAERS_ID,VAX_TYPE,VAX_MANU,VAX_DOSE_SERIES,VAX_SITE
0,916710,COVID19,MODERNA,1,LA
1,916741,COVID19,PFIZER\BIONTECH,1,LA
2,916742,COVID19,PFIZER\BIONTECH,1,
3,916746,COVID19,MODERNA,1,LA
4,916772,COVID19,PFIZER\BIONTECH,UNK,LA
...,...,...,...,...,...
2839,983720,COVID19,MODERNA,1,LA
2840,983721,COVID19,MODERNA,1,RA
2841,983766,COVID19,MODERNA,1,RA
2842,983919,COVID19,MODERNA,1,LA


#### 2021VAERSData.csv

In [3]:
# Import data file
VAERSData = "dataUsed/2021VAERSData.csv"

# Read data file, need the encoding to read properly
vaer_data_raw = pd.read_csv(VAERSData, encoding="ISO-8859-1")
vaer_data_raw

Unnamed: 0,VAERS_ID,RECVDATE,STATE,AGE_YRS,CAGE_YR,CAGE_MO,SEX,RPT_DATE,SYMPTOM_TEXT,DIED,...,CUR_ILL,HISTORY,PRIOR_VAX,SPLTTYPE,FORM_VERS,TODAYS_DATE,BIRTH_DEFECT,OFC_VISIT,ER_ED_VISIT,ALLERGIES
0,916710,01/01/2021,MO,23.0,23.0,,F,,"Acute appendicitis, onset morning of 1/1/2021 ...",,...,,Hypothyroidism,,,2,01/01/2021,,,Y,NKDA
1,916741,01/01/2021,AR,68.0,68.0,,F,,"on dec 22 I felt some myalgias, chills, fatigu...",,...,had surgery R hand for advanced arthritis 11/1...,Rheumatoid arthritis - mostly affecting R wris...,,,2,01/01/2021,,,,bee stings
2,916742,01/01/2021,MN,29.0,29.0,,F,,Within 15 minutes of receiving the vaccine I b...,,...,,Sports induced asthma,,,2,01/01/2021,,Y,Y,"Amoxicillin, penicillin, oxycodone, roxicodone..."
3,916746,01/01/2021,TX,49.0,49.0,,F,,Anaphylaxis. Immediately experienced shortness...,,...,,,,,2,01/01/2021,,,Y,"Shellfish, Iodine"
4,916772,01/01/2021,GA,55.0,55.0,,M,,Vaccine on 12/22/2020 and started feeling bad ...,,...,,"HTN, Insomnia,High Cholesterol,",,,2,01/01/2021,,Y,,Codeine
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2941,983428,01/28/2021,NE,68.0,68.0,,F,,Pt. was admitted to hospital on 1/6/21 with fa...,Y,...,Denied any illness at time of the vaccination....,"Erysipelas, Aphthous pharyngitis, Sixth nerve ...",,,2,01/28/2021,,,Y,NKA to medications
2942,983482,01/28/2021,CA,90.0,90.0,,M,,per daughter pt suffered a stroke 1 hour after...,Y,...,,Prostate cancer (HCC) Nervous and Auditory N...,,,2,01/28/2021,,,,"lisinopril, nsaids"
2943,983766,01/28/2021,NC,90.0,90.0,,F,,Pt started complaining of chest heaviness and ...,Y,...,"Heart failure with reduced EF, history of mode...","Heart failure with reduced EF, history of mode...",,,2,01/28/2021,,,Y,No known allergies to drugs or food
2944,983919,01/28/2021,FL,69.0,69.0,,M,,death,Y,...,,,,,2,01/28/2021,,,,


#### 2021VAERSSymptoms.csv

In [9]:
# Import data file
VAERSsymptoms = "dataUsed/2021VAERSSYMPTOMS.csv"

# Read data file, need the encoding to read properly
vaer_symptoms_raw = pd.read_csv(VAERSsymptoms, encoding="ISO-8859-1")
vaer_symptoms_raw

Unnamed: 0,VAERS_ID,SYMPTOM1,SYMPTOMVERSION1,SYMPTOM2,SYMPTOMVERSION2,SYMPTOM3,SYMPTOMVERSION3,SYMPTOM4,SYMPTOMVERSION4,SYMPTOM5,SYMPTOMVERSION5
0,916710,Appendicitis,23.1,Band neutrophil percentage increased,23.1,Surgery,23.1,White blood cell count increased,23.1,,
1,916741,Chills,23.1,Complex regional pain syndrome,23.1,Fatigue,23.1,Headache,23.1,Joint range of motion decreased,23.1
2,916741,Myalgia,23.1,Pain in extremity,23.1,Peripheral swelling,23.1,X-ray abnormal,23.1,,
3,916742,Anaphylactic reaction,23.1,Blood test,23.1,Burning sensation,23.1,Central venous catheterisation,23.1,Dysphonia,23.1
4,916742,Intensive care,23.1,Pruritus,23.1,Rash,23.1,Rash macular,23.1,Throat tightness,23.1
...,...,...,...,...,...,...,...,...,...,...,...
4707,983721,Death,23.1,,,,,,,,
4708,983766,Blood pH decreased,23.1,Cardiac failure acute,23.1,Chest discomfort,23.1,Death,23.1,Dyspnoea,23.1
4709,983766,International normalised ratio increased,23.1,N-terminal prohormone brain natriuretic peptid...,23.1,SARS-CoV-2 test negative,23.1,,,,
4710,983919,Death,23.1,,,,,,,,


### World Wide Dataset-Data Cleaning

In [10]:
# Import data file
worldWideData = "dataUsed/WorldWideData.csv"

# Read data file, need the encoding to read properly
world_raw = pd.read_csv(worldWideData, encoding="ISO-8859-1")
world_raw

Unnamed: 0,iso_code,continent,location,date,total_cases,new_cases,new_cases_smoothed,total_deaths,new_deaths,new_deaths_smoothed,...,gdp_per_capita,extreme_poverty,cardiovasc_death_rate,diabetes_prevalence,female_smokers,male_smokers,handwashing_facilities,hospital_beds_per_thousand,life_expectancy,human_development_index
0,AFG,Asia,Afghanistan,2020-02-24,1.0,1.0,,,,,...,1803.987,,597.029,9.59,,,37.746,0.5,64.83,0.511
1,AFG,Asia,Afghanistan,2020-02-25,1.0,0.0,,,,,...,1803.987,,597.029,9.59,,,37.746,0.5,64.83,0.511
2,AFG,Asia,Afghanistan,2020-02-26,1.0,0.0,,,,,...,1803.987,,597.029,9.59,,,37.746,0.5,64.83,0.511
3,AFG,Asia,Afghanistan,2020-02-27,1.0,0.0,,,,,...,1803.987,,597.029,9.59,,,37.746,0.5,64.83,0.511
4,AFG,Asia,Afghanistan,2020-02-28,1.0,0.0,,,,,...,1803.987,,597.029,9.59,,,37.746,0.5,64.83,0.511
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
67890,ZWE,Africa,Zimbabwe,2021-02-05,34331.0,160.0,197.000,1303.0,15.0,17.857,...,1899.775,21.4,307.846,1.82,1.6,30.7,36.791,1.7,61.49,0.571
67891,ZWE,Africa,Zimbabwe,2021-02-06,34487.0,156.0,173.429,1316.0,13.0,17.571,...,1899.775,21.4,307.846,1.82,1.6,30.7,36.791,1.7,61.49,0.571
67892,ZWE,Africa,Zimbabwe,2021-02-07,34552.0,65.0,166.286,1326.0,10.0,15.571,...,1899.775,21.4,307.846,1.82,1.6,30.7,36.791,1.7,61.49,0.571
67893,ZWE,Africa,Zimbabwe,2021-02-08,34658.0,106.0,158.571,1339.0,13.0,15.000,...,1899.775,21.4,307.846,1.82,1.6,30.7,36.791,1.7,61.49,0.571
