In [1]:
dataset_id = "vaers_modeled"
!bq --location=US mk --dataset {dataset_id}

Dataset 'studied-brand-266702:vaers_modeled' successfully created.


### Split staging Events table into separate entities: Patient table and Adverse_Event table 

#### Create Patient table

In [20]:
%%bigquery

select DISTINCT ROW_NUMBER() OVER() as PATIENT_ID, VAERS_ID, STATE, AGE_YRS, SEX, ALLERGIES
from vaers_staging.Events

Unnamed: 0,PATIENT_ID,VAERS_ID,STATE,AGE_YRS,SEX,ALLERGIES
0,14,803845,WV,65.0,F,dexlansoprazole
1,45,820113,DE,67.0,U,Sulphate drugs Penicillin
2,123,794191,MS,,M,
3,143,794340,AR,,M,
4,239,794809,WV,,M,
...,...,...,...,...,...,...
44339,44141,819290,WI,89.0,F,clindamycin cephalexin
44340,44184,843628,WI,89.0,F,none
44341,44209,815089,WI,90.0,F,
44342,44233,824511,WI,90.0,F,none


#### Create Adverse_Event table

In [13]:
%%bigquery

select VAERS_ID, ONSET_DATE, RECOVD, DIED, DATEDIED, L_THREAT, OFC_VISIT, ER_VISIT, ER_ED_VISIT, HOSPITAL, HOSPDAYS, X_STAY, DISABLE, BIRTH_DEFECT, OTHER_MEDS, CUR_ILL, HISTORY, PRIOR_VAX from vaers_staging.Events

Unnamed: 0,VAERS_ID,ONSET_DATE,RECOVD,DIED,DATEDIED,L_THREAT,OFC_VISIT,ER_VISIT,ER_ED_VISIT,HOSPITAL,HOSPDAYS,X_STAY,DISABLE,BIRTH_DEFECT,OTHER_MEDS,CUR_ILL,HISTORY,PRIOR_VAX
0,794224,2018-12-29,,,,,,,,,,,,,,,,
1,794226,2018-12-28,,,,,,,,,,,,,WELLBUTRIN; omeprazole; verapamil; fish oil; m...,,High BP; Elevated cholesterol; GERD; No alcoho...,
2,794359,2000-01-14,N,,,,,,True,,,,,,"Prescription: PLAQUENIL, FLORINEF, PRILOSEC, m...",None known,Lupus,
3,794763,,U,,,,,,,,,,,,"Just vitamin C; zinc, B complex",Productive persistent cough; fatigue,None.,
4,795410,2019-01-05,,,,,,,,,,,,,Lorsartan; amlodipine; multivitamin; B13,,HTN,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
44339,839583,2019-10-03,N,,,,True,,,,,,,,"Synthroid, Zoloft, ASA, Preservision AREDS 2, ...",No illness at time of vaccination. No illness...,"HTN heart disease w/o failure, unspecified psy...",
44340,798122,2019-01-18,Y,,,,True,,,,,,,,,,,
44341,843026,,Y,,,,,,,,,,,,,,,
44342,827038,2019-08-02,N,,,,,,,,,,,,,,,


### Generate Primary Key for Symptom table. No other changes made to table as Symptom is an entity by itself.

In [15]:
%%bigquery
create table vaers_modeled.Symptom as
select DISTINCT ROW_NUMBER() OVER() as SYMPTOM_ID, VAERS_ID, SYMPTOM1, SYMPTOM2, SYMPTOM3, SYMPTOM4, SYMPTOM5 
from vaers_staging.Symptoms

In [19]:
%%bigquery
select * from vaers_modeled.Symptom
limit 10

Unnamed: 0,SYMPTOM_ID,VAERS_ID,SYMPTOM1,SYMPTOM2,SYMPTOM3,SYMPTOM4,SYMPTOM5
0,8649,836654,Tic,,,,
1,5418,820782,Tic,,,,
2,3608,811540,Tic,,,,
3,13391,821476,Tic,Tremor,,,
4,12214,809042,Acne,Rash,,,
5,16245,827723,Acne,Pruritus,,,
6,11414,808547,Acne,Pain,,,
7,220,795240,Acne,,,,
8,10585,846795,Coma,,,,
9,15197,800927,Coma,Seizure,,,


### Creating table joining patient information from Events table with Symptoms table. Entity is patient.

In [1]:
%%bigquery
create table vaers_modeled.Patient_Event as
select e.VAERS_ID, e.STATE, e.AGE_YRS, e.SEX, e.DIED, e.DATEDIED, e.L_THREAT, e.ER_VISIT, e.HOSPITAL, e.HOSPDAYS, e.X_STAY, e.DISABLE,
e.RECOVD, e.OTHER_MEDS, e.CUR_ILL, e.BIRTH_DEFECT, e.ALLERGIES,
s.SYMPTOM1, s.SYMPTOM2, s.SYMPTOM3, s.SYMPTOM4, s.SYMPTOM5
from vaers_staging.Events e, vaers_staging.Symptoms s
where e.VAERS_ID = s.VAERS_ID

In [2]:
%%bigquery
select * from vaers_modeled.Patient_Event
where state is not null and died is not null
limit 10
#Added where clause to show more information

Unnamed: 0,VAERS_ID,STATE,AGE_YRS,SEX,DIED,DATEDIED,L_THREAT,ER_VISIT,HOSPITAL,HOSPDAYS,...,RECOVD,OTHER_MEDS,CUR_ILL,BIRTH_DEFECT,ALLERGIES,SYMPTOM1,SYMPTOM2,SYMPTOM3,SYMPTOM4,SYMPTOM5
0,834405,AR,1.0,M,True,2018-09-22,,,,,...,N,None.,None.,,None.,Decreased appetite,Pyrexia,,,
1,806371,AR,0.92,F,True,2019-03-02,,,,,...,N,Tylenol for pain.,,,Latex,Blood test,Death,Eyelid function disorder,Febrile convulsion,Pyrexia
2,806371,AR,0.92,F,True,2019-03-02,,,,,...,N,Tylenol for pain.,,,Latex,Urine analysis,,,,
3,844189,AR,75.0,M,True,2019-10-29,,,,,...,N,"Levemir, Pravastatin, Furosemide, Lisinopril, ...","Diabetes, COPD",,none,Aspiration,Cyanosis,Death,Pulse absent,Resuscitation
4,844189,AR,75.0,M,True,2019-10-29,,,,,...,N,"Levemir, Pravastatin, Furosemide, Lisinopril, ...","Diabetes, COPD",,none,Syncope,Unresponsive to stimuli,,,
5,815652,AZ,0.5,F,True,2019-05-22,,,,,...,N,,Mild congestion earlier in month. Ongoing tee...,,No known allergies,Death,Resuscitation,Unresponsive to stimuli,,
6,809231,CA,0.17,M,True,2015-01-30,,,,,...,N,,Runny nose,,,Hypersomnia,Poor feeding infant,,,
7,823271,CA,0.08,F,True,2019-07-02,,,,,...,N,,,,,Death,Decreased appetite,,,
8,826060,CA,,F,True,,,,,,...,N,,,,,Death,,,,
9,840255,CA,55.0,F,True,2019-10-05,,,,,...,N,none,none,,no,Pain,Palpitations,,,


### Showing Report table has PK

In [2]:
%%bigquery
select count(*) as count from vaers_modeled.Report

Unnamed: 0,count
0,44344


In [4]:
%%bigquery
select count(distinct VAERS_ID) as count from vaers_modeled.Report

Unnamed: 0,count
0,44344


### Showing duplicates of VAERS_ID in Patient_Event and Vaccine_Event tables
##### As there are less distinct VAERS_ID values than the total number of rows, there exists duplicates of VAERS_ID
##### Also, given that the count of distinct VAERS_ID matches the count of PK above, we know that referential integrity is preserved

In [18]:
%%bigquery
select count(*) as count from vaers_modeled.Vaccine_Event

Unnamed: 0,count
0,56272


In [19]:
%%bigquery
select count(distinct VAERS_ID) as count from vaers_modeled.Vaccine_Event

Unnamed: 0,count
0,44344


In [20]:
%%bigquery
select count(*) as count from vaers_modeled.Patient_Event

Unnamed: 0,count
0,55260


In [21]:
%%bigquery
select count(distinct VAERS_ID) as count from vaers_modeled.Patient_Event

Unnamed: 0,count
0,44343


### Queries showing the duplicate entries. Explanations in TRANSFORMS.txt

In [24]:
%%bigquery
select v1.* from vaers_modeled.Vaccine_Event v1
join (SELECT VAERS_ID, COUNT(*)
FROM vaers_modeled.Vaccine_Event
group by VAERS_ID
HAVING count(*) > 1) v2
on v1.VAERS_ID = v2.VAERS_ID
order by v1.VAERS_ID
limit 10

Unnamed: 0,VAERS_ID,VAX_DATE,ONSET_DATE,V_ADMINBY,V_FUNDBY,PRIOR_VAX,VAX_TYPE,VAX_NAME,VAX_MANU,VAX_ROUTE,VAX_SITE
0,794159,2018-12-27,2018-12-28,PVT,,,MMRV,MEASLES + MUMPS + RUBELLA + VARICELLA (PROQUAD),MERCK & CO. INC.,SC,LL
1,794159,2018-12-27,2018-12-28,PVT,,,DTAPIPV,DTAP + IPV (KINRIX),GLAXOSMITHKLINE BIOLOGICALS,IM,LL
2,794162,2018-12-19,2018-12-26,PVT,,,HEPA,HEP A (HAVRIX),GLAXOSMITHKLINE BIOLOGICALS,IM,LL
3,794162,2018-12-19,2018-12-26,PVT,,,MMRV,MEASLES + MUMPS + RUBELLA + VARICELLA (PROQUAD),MERCK & CO. INC.,IM,RL
4,794165,2018-12-28,2018-12-28,PHM,,,PNC13,PNEUMO (PREVNAR13),PFIZER\WYETH,IM,RA
5,794165,2018-12-28,2018-12-28,PHM,,,FLU3,INFLUENZA (SEASONAL) (FLUZONE HIGH-DOSE),SANOFI PASTEUR,IM,LA
6,794170,2018-12-20,2018-12-30,PVT,,,VARCEL,VARICELLA (VARIVAX),MERCK & CO. INC.,IM,LL
7,794170,2018-12-20,2018-12-30,PVT,,,HEPA,HEP A (VAQTA),MERCK & CO. INC.,IM,RL
8,794170,2018-12-20,2018-12-30,PVT,,,MMR,MEASLES + MUMPS + RUBELLA (MMR II),MERCK & CO. INC.,IM,LL
9,794178,2018-11-14,2018-11-29,MIL,,,SMALL,SMALLPOX (ACAM2000),SANOFI PASTEUR,,LA


In [26]:
%%bigquery
select p1.* from vaers_modeled.Patient_Event p1
join (SELECT VAERS_ID, COUNT(*)
FROM vaers_modeled.Patient_Event
group by VAERS_ID
HAVING count(*) > 1) p2
on p1.VAERS_ID = p2.VAERS_ID
order by p1.VAERS_ID
limit 10

Unnamed: 0,VAERS_ID,STATE,AGE_YRS,SEX,DIED,DATEDIED,L_THREAT,ER_VISIT,HOSPITAL,HOSPDAYS,...,DISABLE,OTHER_MEDS,CUR_ILL,BIRTH_DEFECT,ALLERGIES,SYMPTOM1,SYMPTOM2,SYMPTOM3,SYMPTOM4,SYMPTOM5
0,794157,IL,68.0,F,,,,,,,...,,"Levothyroxine, sertraline, vitamin B complex, ...",,,,Apathy,Arthralgia,Asthenia,Injection site erythema,Injection site pain
1,794157,IL,68.0,F,,,,,,,...,,"Levothyroxine, sertraline, vitamin B complex, ...",,,,Injection site pruritus,Injection site swelling,Injection site warmth,Listless,Night sweats
2,794160,TX,79.0,F,,,,,,,...,,ATORVASTATIN; CELEBREX; XYZAL; D-3; ELIQUIS; A...,SINUSITIS; ATHROSCLEROSIS; DIABETES; HYPERCHOL...,,ACE INHIBITORS,Asthenia,Chills,Fatigue,Influenza like illness,Injection site erythema
3,794160,TX,79.0,F,,,,,,,...,,ATORVASTATIN; CELEBREX; XYZAL; D-3; ELIQUIS; A...,SINUSITIS; ATHROSCLEROSIS; DIABETES; HYPERCHOL...,,ACE INHIBITORS,Injection site swelling,Lip blister,Lip swelling,Pain,
4,794163,MA,65.0,F,,,,,,,...,,"Extra strength arthritis Tylenol, Simvastatin,...",,,"Chocolate, nuts, legumes",Myalgia,Nausea,Pyrexia,,
5,794163,MA,65.0,F,,,,,,,...,,"Extra strength arthritis Tylenol, Simvastatin,...",,,"Chocolate, nuts, legumes",Abdominal pain,Arthralgia,Dizziness,Fatigue,Headache
6,794168,MA,43.0,M,,,,,,,...,,probiotics; magnesium,,,NKDA,Vomiting,,,,
7,794168,MA,43.0,M,,,,,,,...,,probiotics; magnesium,,,NKDA,Dizziness,Ear pain,Labyrinthitis,Migraine,Nausea
8,794168,MA,43.0,M,,,,,,,...,,probiotics; magnesium,,,NKDA,Nuclear magnetic resonance imaging normal,Tinnitus,Vertigo,Vestibular migraine,Viral infection
9,794175,,66.0,F,,,,,,,...,,none,none,,none,Injected limb mobility decreased,Injection site pain,Loss of consciousness,,
