### Model VAERS Dataset

In [1]:
dataset_id = "vaers_modeled"
!bq --location=US mk --dataset {dataset_id}

Dataset 'studied-brand-266702:vaers_modeled' successfully created.


### Split staging Events table into separate entities: Patient table and Adverse_Event table 

#### Create Patient table

In [3]:
%%bigquery
create or replace table vaers_modeled.Patient as
select DISTINCT ROW_NUMBER() OVER(ORDER BY VAERS_ID) as PATIENT_ID, VAERS_ID, STATE, CAST(AGE_YRS AS INT64) AS AGE_YRS, SEX, ALLERGIES
from vaers_staging.Events

In [6]:
%%bigquery
select * from vaers_modeled.Patient

Unnamed: 0,PATIENT_ID,VAERS_ID,STATE,AGE_YRS,SEX,ALLERGIES
0,229,794528,,,U,
1,261,794563,,,M,
2,340,794652,,,M,
3,643,795089,,,M,
4,2002,796889,,,U,
...,...,...,...,...,...,...
44339,39016,843090,WY,72.0,F,
44340,40995,845432,WY,73.0,M,Penicillin
44341,8433,805451,WY,78.0,M,Penicillin; MSG
44342,38184,842145,WY,80.0,F,"Morphine, Percodan, Demerol, Darvocet, codeine"


#### Create Adverse_Event table

In [5]:
%%bigquery
create table vaers_modeled.Adverse_Event as
select VAERS_ID, ONSET_DATE, RECOVD, DIED, DATEDIED, L_THREAT, OFC_VISIT, ER_VISIT, ER_ED_VISIT, HOSPITAL, HOSPDAYS, X_STAY, DISABLE, BIRTH_DEFECT, OTHER_MEDS, CUR_ILL, HISTORY, PRIOR_VAX 
from vaers_staging.Events

In [6]:
%%bigquery
select * from vaers_modeled.Adverse_Event

Unnamed: 0,VAERS_ID,ONSET_DATE,RECOVD,DIED,DATEDIED,L_THREAT,OFC_VISIT,ER_VISIT,ER_ED_VISIT,HOSPITAL,HOSPDAYS,X_STAY,DISABLE,BIRTH_DEFECT,OTHER_MEDS,CUR_ILL,HISTORY,PRIOR_VAX
0,794804,,N,,,,True,,,True,,,,,,,,
1,795533,,N,,,,True,,,True,,,,,,,,
2,795798,,U,,,,,,,,,,,,,,,
3,796623,,U,,,,True,,,,,,,,,,,
4,798695,,Y,,,,,,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
44339,839265,2019-10-06,Y,,,,True,,,,,,,,"I was in a car accident in June, needed surger...",See item #9 re: the car accident and low B12,"IBS, sensitive to drugs and foods, low blood p...",generally pain in the arm after certain shots
44340,838596,2019-10-06,N,,,,,,,,,,,,Adderall 10 mg Toujeo 80U Short-acting insulin...,Well-managed T2D. No recent medication changes,Type-2 diabetes Bipolar disorder type 2 Adult ADD,
44341,839769,2019-10-06,U,,,,,,,,,,,,"Lorazepam 0.1 as needed, Not every day:Multi-V...",Patient was fine the day of vaccine but had ba...,Had surgery on left arm (detached bicep and ro...,
44342,848745,2019-10-06,N,,,,True,,True,True,24.0,,True,,aspirin 81 MG tab Sig: � Take 81 mg by mou...,Patient had no previously been seen in clinic ...,Hypertension HLD (hyperlipidemia) Allergic...,


### Generate Primary Key for Symptom table. No other changes made to table as Symptom is an entity by itself.

In [7]:
%%bigquery
create table vaers_modeled.Symptom as
select ROW_NUMBER() OVER(VAERS_ID) as SYMPTOM_ID, VAERS_ID, SYMPTOM1, SYMPTOM2, SYMPTOM3, SYMPTOM4, SYMPTOM5 
from vaers_staging.Symptoms

In [8]:
%%bigquery
select * from vaers_modeled.Symptom

Unnamed: 0,SYMPTOM_ID,VAERS_ID,SYMPTOM1,SYMPTOM2,SYMPTOM3,SYMPTOM4,SYMPTOM5
0,26,794308,Product administered to patient of inappropria...,,,,
1,31,794344,Exposure during pregnancy,,,,
2,44,794402,Pain,,,,
3,56,794493,Vertigo,,,,
4,270,795388,Adverse event,,,,
...,...,...,...,...,...,...,...
55255,54995,844760,Hypoaesthesia,Injected limb mobility decreased,Injection site pain,Paraesthesia,Product administered at inappropriate site
55256,55040,834189,Arthralgia,Immediate post-injection reaction,Musculoskeletal pain,Nausea,Pain
55257,55157,833481,Erythema,Extensive swelling of vaccinated limb,Injection site erythema,Injection site swelling,Injection site warmth
55258,55204,849460,Lymphocyte percentage decreased,Mean cell haemoglobin concentration normal,Mean cell haemoglobin normal,Mean cell volume normal,Mean platelet volume increased


### Split staging Vaccines table into separate entities: Vaccine and Manufacturer

#### Create Vaccine table

In [7]:
%%bigquery
create table vaers_modeled.Vaccine as
select ROW_NUMBER() OVER(ORDER BY VAX_NAME) as VAX_ID, VAX_NAME, VAX_TYPE
from vaers_staging.Vaccines
group by VAX_NAME, VAX_TYPE

In [23]:
%%bigquery
select * from vaers_modeled.Vaccine
order by VAX_NAME

Unnamed: 0,VAX_ID,VAX_NAME,VAX_TYPE
0,1,ADENOVIRUS (NO BRAND NAME),ADEN
1,2,"ADENOVIRUS TYPES 4 & 7, LIVE, ORAL (NO BRAND N...",ADEN_4_7
2,3,ANTHRAX (BIOTHRAX),ANTH
3,4,ANTHRAX (NO BRAND NAME),ANTH
4,5,BCG (NO BRAND NAME),BCG
...,...,...,...
120,121,YELLOW FEVER (STAMARIL),YF
121,122,YELLOW FEVER (YF-VAX),YF
122,123,ZOSTER (NO BRAND NAME),VARZOS
123,124,ZOSTER (SHINGRIX),VARZOS


#### Create Manufacturer table

In [15]:
%%bigquery
create table vaers_modeled.Manufacturer as
select ROW_NUMBER() OVER(ORDER BY VAX_MANU) as MANU_ID, VAX_MANU
from vaers_staging.Vaccines
group by VAX_MANU

In [17]:
%%bigquery
select * from vaers_modeled.Manufacturer
order by VAX_MANU

Unnamed: 0,MANU_ID,VAX_MANU
0,1,AVENTIS PASTEUR
1,2,"BERNA BIOTECH, LTD."
2,3,CONNAUGHT LABORATORIES
3,4,CSL LIMITED
4,5,DYNAVAX TECHNOLOGIES CORPORATION
5,6,EMERGENT BIOSOLUTIONS
6,7,GLAXOSMITHKLINE BIOLOGICALS
7,8,INTERCELL AG
8,9,MASS. PUB HLTH BIOL LAB
9,10,"MEDIMMUNE VACCINES, INC."


### Join attributes from staging Events, staging Vaccines, modeled Vaccine and modeled Manufacturer to form new entity: Vaccination

#### The Vaccination table will also serve as the junction table for the modeled Vaccine and Manufacturer tables

In [51]:
%%bigquery
create table vaers_modeled.Vaccination as
select ROW_NUMBER() OVER(ORDER BY e.VAERS_ID) as VACCINATION_ID, e.VAERS_ID, e.VAX_DATE, mv.VAX_ID, m.MANU_ID, e.V_ADMINBY, e.V_FUNDBY, v.VAX_ROUTE, v.VAX_SITE
from (vaers_staging.Events e inner join vaers_staging.Vaccines v on e.VAERS_ID = v.VAERS_ID)
    left join vaers_modeled.Vaccine mv on mv.VAX_NAME = v.VAX_NAME
    left join vaers_modeled.Manufacturer m on m.VAX_MANU = v.VAX_MANU
group by e.VAERS_ID, e.VAX_DATE, mv.VAX_ID, m.MANU_ID, e.V_ADMINBY, e.V_FUNDBY, v.VAX_ROUTE, v.VAX_SITE

In [54]:
%%bigquery
select * from vaers_modeled.Vaccination 

Unnamed: 0,VACCINATION_ID,VAERS_ID,VAX_DATE,VAX_ID,MANU_ID,V_ADMINBY,V_FUNDBY,VAX_ROUTE,VAX_SITE
0,425,794649,2018-12-27,33,21,UNK,,UN,UN
1,1086,795341,2019-01-03,44,21,PVT,,IM,RA
2,1131,795400,,81,21,UNK,,UN,UN
3,1283,795541,2019-01-03,94,21,UNK,,,
4,1401,795669,2019-01-11,122,17,MIL,,SC,LA
...,...,...,...,...,...,...,...,...,...
55937,40558,834723,2017-04-01,125,11,PHM,,,
55938,48791,842609,,125,11,MIL,,OT,
55939,55479,851363,2017-11-03,125,11,UNK,,OT,
55940,55702,852054,2017-06-23,125,11,UNK,,OT,


### Identify Primary Key (PK) for each modeled table

#### Patient table : PK is PATIENT_ID

In [7]:
%%bigquery
select count(*) as total_records from vaers_modeled.Patient

Unnamed: 0,total_records
0,44344


In [8]:
%%bigquery
select count(distinct PATIENT_ID) as distinct_id from vaers_modeled.Patient

Unnamed: 0,distinct_id
0,44344


#### Adverse_Event table: PK is VAERS_ID

In [70]:
%%bigquery
select count(*) as total_records from vaers_modeled.Adverse_Event

Unnamed: 0,total_records
0,44344


In [72]:
%%bigquery
select count(distinct VAERS_ID) as distinct_id from vaers_modeled.Adverse_Event

Unnamed: 0,distinct_id
0,44344


#### Symptom table: PK is SYMPTOM_ID

In [73]:
%%bigquery
select count(*) as total_records from vaers_modeled.Symptom

Unnamed: 0,total_records
0,55260


In [74]:
%%bigquery
select count(distinct SYMPTOM_ID) as distinct_id from vaers_modeled.Symptom

Unnamed: 0,distinct_id
0,55260


#### Vaccine table: PK is VAX_ID

In [75]:
%%bigquery
select count(*) as total_records from vaers_modeled.Vaccine

Unnamed: 0,total_records
0,125


In [76]:
%%bigquery
select count(distinct VAX_ID) as distinct_id from vaers_modeled.Vaccine

Unnamed: 0,distinct_id
0,125


#### Manufacturer table: PK is MANU_ID

In [77]:
%%bigquery
select count(*) as total_records from vaers_modeled.Manufacturer

Unnamed: 0,total_records
0,21


In [78]:
%%bigquery
select count(distinct MANU_ID) as distinct_id from vaers_modeled.Manufacturer

Unnamed: 0,distinct_id
0,21


#### Vaccination table: PK is VACCINATION_ID

In [79]:
%%bigquery
select count(*) as total_records from vaers_modeled.Vaccination

Unnamed: 0,total_records
0,55942


In [81]:
%%bigquery
select count(distinct VACCINATION_ID) as distinct_id from vaers_modeled.Vaccination

Unnamed: 0,distinct_id
0,55942


### Check for presence of duplicate records in each modeled table

In [88]:
%%bigquery
select count(*) as total_records from vaers_modeled.Patient

Unnamed: 0,total_records
0,44344


In [87]:
%%bigquery
SELECT COUNT(*) as distinct_records FROM (SELECT DISTINCT * FROM vaers_modeled.Patient) 

Unnamed: 0,distinct_records
0,44344


In [89]:
%%bigquery
select count(*) as total_records from vaers_modeled.Adverse_Event

Unnamed: 0,total_records
0,44344


In [90]:
%%bigquery
SELECT COUNT(*) as distinct_records FROM (SELECT DISTINCT * FROM vaers_modeled.Adverse_Event) 

Unnamed: 0,distinct_records
0,44344


In [91]:
%%bigquery
select count(*) as total_records from vaers_modeled.Symptom

Unnamed: 0,total_records
0,55260


In [93]:
%%bigquery
SELECT COUNT(*) as distinct_records FROM (SELECT DISTINCT * FROM vaers_modeled.Symptom) 

Unnamed: 0,distinct_records
0,55260


In [94]:
%%bigquery
select count(*) as total_records from vaers_modeled.Vaccine

Unnamed: 0,total_records
0,125


In [95]:
%%bigquery
SELECT COUNT(*) as distinct_records FROM (SELECT DISTINCT * FROM vaers_modeled.Vaccine) 

Unnamed: 0,distinct_records
0,125


In [96]:
%%bigquery
select count(*) as total_records from vaers_modeled.Manufacturer

Unnamed: 0,total_records
0,21


In [97]:
%%bigquery
SELECT COUNT(*) as distinct_records FROM (SELECT DISTINCT * FROM vaers_modeled.Manufacturer) 

Unnamed: 0,distinct_records
0,21


In [98]:
%%bigquery
select count(*) as total_records from vaers_modeled.Vaccination

Unnamed: 0,total_records
0,55942


In [99]:
%%bigquery
SELECT COUNT(*) as distinct_records FROM (SELECT DISTINCT * FROM vaers_modeled.Vaccination) 

Unnamed: 0,distinct_records
0,55942


### >> No duplicates found on all modeled tables

### Check for referential integrity violations 

In [101]:
%%bigquery
select count(*) as count_of_missing_PK 
from vaers_modeled.Patient p left join vaers_modeled.Adverse_Event e on e.VAERS_ID = p.VAERS_ID 
where e.VAERS_ID is null 

Unnamed: 0,count_of_missing_PK
0,0


In [1]:
%%bigquery
select count(*) as count_of_missing_PK 
from vaers_modeled.Symptom s left join vaers_modeled.Adverse_Event e on e.VAERS_ID = s.VAERS_ID 
where e.VAERS_ID is null 

Unnamed: 0,count_of_missing_PK
0,0


In [2]:
%%bigquery
select count(*) as count_of_missing_PK 
from vaers_modeled.Vaccination v left join vaers_modeled.Adverse_Event e on e.VAERS_ID = v.VAERS_ID 
where e.VAERS_ID is null 

Unnamed: 0,count_of_missing_PK
0,0


In [3]:
%%bigquery
select count(*) as count_of_missing_PK 
from vaers_modeled.Vaccination vt left join vaers_modeled.Vaccine v on vt.VAX_ID = v.VAX_ID 
where v.VAX_ID is null 

Unnamed: 0,count_of_missing_PK
0,0


In [4]:
%%bigquery
select count(*) as count_of_missing_PK 
from vaers_modeled.Vaccination vt left join vaers_modeled.Manufacturer m on vt.MANU_ID = m.MANU_ID 
where m.MANU_ID is null 

Unnamed: 0,count_of_missing_PK
0,0


### >> No referential integrity violations on all modeled tables

## Beam pipeline

### Standardize data in Adverse_Event table using Beam pipeline (Direct Runner)

In [3]:
%run Adverse_Event_beam.py

  experiments = p.options.view_as(DebugOptions).experiments or []
INFO:apache_beam.runners.direct.direct_runner:Running pipeline with DirectRunner.
INFO:apache_beam.io.gcp.bigquery_tools:Using location 'US' from table <TableReference
 datasetId: 'vaers_modeled'
 projectId: 'studied-brand-266702'
 tableId: 'Adverse_Event'> referenced by query SELECT * FROM vaers_modeled.Adverse_Event limit 50


Current RECOVD:  N
New RECOVD:  False
Current RECOVD:  N
New RECOVD:  False
Current RECOVD:  U
New RECOVD:  None
Current RECOVD:  U
New RECOVD:  None
Current RECOVD:  U
New RECOVD:  None
Current RECOVD:  U
New RECOVD:  None
Current RECOVD:  U
New RECOVD:  None
Current RECOVD:  U
New RECOVD:  None
Current RECOVD:  U
New RECOVD:  None
Current RECOVD:  U
New RECOVD:  None
Current RECOVD:  U
New RECOVD:  None
Current RECOVD:  U
New RECOVD:  None
Current RECOVD:  U
New RECOVD:  None
Current RECOVD:  U
New RECOVD:  None
Current RECOVD:  U
New RECOVD:  None
Current RECOVD:  U
New RECOVD:  None
Current RECOVD:  U
New RECOVD:  None
Current RECOVD:  U
New RECOVD:  None
Current RECOVD:  U
New RECOVD:  None
Current RECOVD:  U
New RECOVD:  None
Current RECOVD:  U
New RECOVD:  None
Current RECOVD:  U
New RECOVD:  None
Current RECOVD:  U
New RECOVD:  None
Current RECOVD:  U
New RECOVD:  None
Current RECOVD:  U
New RECOVD:  None
Current RECOVD:  U
New RECOVD:  None
Current RECOVD:  U
New RECOVD:  None

INFO:apache_beam.io.filebasedsink:Starting finalize_write threads with num_shards: 1 (skipped: 0), batches: 1, num_threads: 1
INFO:apache_beam.io.filebasedsink:Renamed 1 shards in 0.10 seconds.
INFO:apache_beam.io.filebasedsink:Starting finalize_write threads with num_shards: 1 (skipped: 0), batches: 1, num_threads: 1
INFO:apache_beam.io.filebasedsink:Renamed 1 shards in 0.10 seconds.
INFO:apache_beam.io.filebasedsink:Starting finalize_write threads with num_shards: 1 (skipped: 0), batches: 1, num_threads: 1
INFO:apache_beam.io.filebasedsink:Renamed 1 shards in 0.10 seconds.
INFO:apache_beam.io.gcp.bigquery_tools:Created table studied-brand-266702.vaers_modeled.Adverse_Event_Beam with schema <TableSchema
 fields: [<TableFieldSchema
 fields: []
 mode: 'NULLABLE'
 name: 'VAERS_ID'
 type: 'INTEGER'>, <TableFieldSchema
 fields: []
 mode: 'NULLABLE'
 name: 'ONSET_DATE'
 type: 'DATE'>, <TableFieldSchema
 fields: []
 mode: 'NULLABLE'
 name: 'RECOVD'
 type: 'BOOLEAN'>, <TableFieldSchema
 field

### Verify presence of Primary Key in Beam result table

#### Adverse_Event_Beam table: PK is VAERS_ID

In [4]:
%%bigquery
select count(*) as total_records from vaers_modeled.Adverse_Event_Beam

Unnamed: 0,total_records
0,50


In [5]:
%%bigquery
select count(distinct VAERS_ID) as distinct_id from vaers_modeled.Adverse_Event_Beam

Unnamed: 0,distinct_id
0,50


#### Adverse_Event_Beam table is a parent table and does not have any Foreign Key

### Standardize data in Vaccination table using Beam pipeline (Direct Runner)

In [8]:
%run Vaccination_beam.py

  experiments = p.options.view_as(DebugOptions).experiments or []
INFO:apache_beam.runners.direct.direct_runner:Running pipeline with DirectRunner.
INFO:apache_beam.io.gcp.bigquery_tools:Using location 'US' from table <TableReference
 datasetId: 'vaers_modeled'
 projectId: 'studied-brand-266702'
 tableId: 'Vaccination'> referenced by query SELECT * FROM vaers_modeled.Vaccination limit 50


Current V_FUNDBY, VAX_ROUTE and VAX_SITE:  None SC LA
New V_FUNDBY, VAX_ROUTE and VAX_SITE:  UNK SC LA
Current V_FUNDBY, VAX_ROUTE and VAX_SITE:  None UN UN
New V_FUNDBY, VAX_ROUTE and VAX_SITE:  UNK UN UN
Current V_FUNDBY, VAX_ROUTE and VAX_SITE:  None UN UN
New V_FUNDBY, VAX_ROUTE and VAX_SITE:  UNK UN UN
Current V_FUNDBY, VAX_ROUTE and VAX_SITE:  None UN None
New V_FUNDBY, VAX_ROUTE and VAX_SITE:  UNK UN UN
Current V_FUNDBY, VAX_ROUTE and VAX_SITE:  PVT UN LA
New V_FUNDBY, VAX_ROUTE and VAX_SITE:  PVT UN LA
Current V_FUNDBY, VAX_ROUTE and VAX_SITE:  None IM LA
New V_FUNDBY, VAX_ROUTE and VAX_SITE:  UNK IM LA
Current V_FUNDBY, VAX_ROUTE and VAX_SITE:  None SYR RA
New V_FUNDBY, VAX_ROUTE and VAX_SITE:  UNK SYR RA
Current V_FUNDBY, VAX_ROUTE and VAX_SITE:  None None None
New V_FUNDBY, VAX_ROUTE and VAX_SITE:  UNK UN UN
Current V_FUNDBY, VAX_ROUTE and VAX_SITE:  None IM AR
New V_FUNDBY, VAX_ROUTE and VAX_SITE:  UNK IM AR
Current V_FUNDBY, VAX_ROUTE and VAX_SITE:  None IM LA
New V_FUNDBY

INFO:apache_beam.io.gcp.bigquery_tools:Created table studied-brand-266702.vaers_modeled.Vaccination_Beam with schema <TableSchema
 fields: [<TableFieldSchema
 fields: []
 mode: 'NULLABLE'
 name: 'VACCINATION_ID'
 type: 'INTEGER'>, <TableFieldSchema
 fields: []
 mode: 'NULLABLE'
 name: 'VAERS_ID'
 type: 'INTEGER'>, <TableFieldSchema
 fields: []
 mode: 'NULLABLE'
 name: 'VAX_DATE'
 type: 'DATE'>, <TableFieldSchema
 fields: []
 mode: 'NULLABLE'
 name: 'VAX_ID'
 type: 'INTEGER'>, <TableFieldSchema
 fields: []
 mode: 'NULLABLE'
 name: 'MANU_ID'
 type: 'INTEGER'>, <TableFieldSchema
 fields: []
 mode: 'NULLABLE'
 name: 'V_ADMINBY'
 type: 'STRING'>, <TableFieldSchema
 fields: []
 mode: 'NULLABLE'
 name: 'V_FUNDBY'
 type: 'STRING'>, <TableFieldSchema
 fields: []
 mode: 'NULLABLE'
 name: 'VAX_ROUTE'
 type: 'STRING'>, <TableFieldSchema
 fields: []
 mode: 'NULLABLE'
 name: 'VAX_SITE'
 type: 'STRING'>]>. Result: <Table
 creationTime: 1583449439522
 etag: 'CDjDjnSwfPA2QtdRzMgI2g=='
 id: 'studied-bra

### Verify presence of Primary Key in Beam result table

#### Vaccination_Beam table: PK is VACCINATION_ID

In [11]:
%%bigquery
select count(*) as total_records from vaers_modeled.Vaccination_Beam

Unnamed: 0,total_records
0,50


In [12]:
%%bigquery
select count(distinct VACCINATION_ID) as distinct_id from vaers_modeled.Vaccination_Beam

Unnamed: 0,distinct_id
0,50


#### Vaccination_Beam table: FK is VAERS_ID from Adverse_Event table

In [20]:
%%bigquery
select count(*) as count_of_invalid_FK 
from vaers_modeled.Vaccination_Beam v left join vaers_modeled.Adverse_Event e on e.VAERS_ID = v.VAERS_ID 
where e.VAERS_ID is null 

Unnamed: 0,count_of_invalid_FK
0,0


#### Vaccination_Beam table: FK is VAX_ID from Vaccine table

In [26]:
%%bigquery
select count(*) as count_of_invalid_FK 
from vaers_modeled.Vaccination_Beam v left join vaers_modeled.Vaccine va on va.VAX_ID = v.VAX_ID 
where va.VAX_ID is null 

Unnamed: 0,count_of_invalid_FK
0,0


#### Vaccination_Beam table: FK is MANU_ID from Manufacturer table

In [24]:
%%bigquery
select count(*) as count_of_invalid_FK 
from vaers_modeled.Vaccination_Beam v left join vaers_modeled.Manufacturer m on m.MANU_ID = v.MANU_ID 
where m.MANU_ID is null 

Unnamed: 0,count_of_invalid_FK
0,0


### Standardize data in Adverse_Event table using Beam pipeline (Dataflow Runner)

In [1]:
%run Adverse_Event_beam_dataflow.py

  kms_key=transform.kms_key))


### Verify presence of Primary Key in Beam result table

#### Adverse_Event_Beam_DF table: PK is VAERS_ID

In [6]:
%%bigquery
select count(*) as total_records from vaers_modeled.Adverse_Event_Beam_DF

Unnamed: 0,total_records
0,44344


In [7]:
%%bigquery
select count(distinct VAERS_ID) as distinct_id from vaers_modeled.Adverse_Event_Beam_DF

Unnamed: 0,distinct_id
0,44344


#### Adverse_Event_Beam_DF table is a parent table and does not have any Foreign Key

### Standardize data in Vaccination table using Beam pipeline (Dataflow Runner)

In [4]:
%run Vaccination_beam_dataflow.py

  kms_key=transform.kms_key))


### Verify presence of Primary Key in Beam result table

#### Vaccination_Beam_DF table: PK is VACCINATION_ID

In [1]:
%%bigquery
select count(*) as total_records from vaers_modeled.Vaccination_Beam_DF

Unnamed: 0,total_records
0,55942


In [2]:
%%bigquery
select count(distinct VACCINATION_ID) as distinct_id from vaers_modeled.Vaccination_Beam_DF

Unnamed: 0,distinct_id
0,55942


#### Vaccination_Beam_DF table: FK is VAERS_ID from Adverse_Event table

In [3]:
%%bigquery
select count(*) as count_of_invalid_FK 
from vaers_modeled.Vaccination_Beam_DF v left join vaers_modeled.Adverse_Event e on e.VAERS_ID = v.VAERS_ID 
where e.VAERS_ID is null 

Unnamed: 0,count_of_invalid_FK
0,0


#### Vaccination_Beam_DF table: FK is VAX_ID from Vaccine table

In [4]:
%%bigquery
select count(*) as count_of_invalid_FK 
from vaers_modeled.Vaccination_Beam_DF v left join vaers_modeled.Vaccine va on va.VAX_ID = v.VAX_ID 
where va.VAX_ID is null 

Unnamed: 0,count_of_invalid_FK
0,0


#### Vaccination_Beam_DF table: FK is MANU_ID from Manufacturer table

In [5]:
%%bigquery
select count(*) as count_of_invalid_FK 
from vaers_modeled.Vaccination_Beam_DF v left join vaers_modeled.Manufacturer m on m.MANU_ID = v.MANU_ID 
where m.MANU_ID is null 

Unnamed: 0,count_of_invalid_FK
0,0
