### Model FAERS Dataset

In [1]:
dataset_id = "faers_modeled"
!bq --location=US mk --dataset {dataset_id}

Dataset 'studied-brand-266702:faers_modeled' successfully created.


In [124]:
# Remove limit on number of columns shown
import pandas as pd
pd.set_option('display.max_columns', None)

### Split Demographic table into separate entities: Patient table, Manufacturer table, Adverse_Event table and Case table 

#### Create Patient table

In [98]:
%%bigquery
create or replace table faers_modeled.Patient as
select ROW_NUMBER() OVER(ORDER BY PRIMARYID) as PATIENT_ID, PRIMARYID as CASE_ID, SEX, AGE, AGE_COD as AGE_UNIT, AGE_GRP as AGE_GROUP, WT as WEIGHT, WT_COD as WEIGHT_UNIT 
from faers_staging.Demographic

In [141]:
%%bigquery
select * from faers_modeled.Patient

Unnamed: 0,PATIENT_ID,CASE_ID,SEX,AGE,AGE_UNIT,AGE_GROUP,WEIGHT,WEIGHT_UNIT
0,171,68158312,M,32,YR,,,
1,206,69718655,F,85,YR,,,
2,376,77051356,F,18,YR,,,
3,1131,96488145,F,32,YR,,64.919998,KG
4,1177,97467892,M,17,YR,,,
...,...,...,...,...,...,...,...,...
394061,363876,157503071,M,84,YR,E,,
394062,368632,157557171,M,84,YR,,,
394063,387912,157786831,F,84,YR,,89.000000,KG
394064,389348,158222071,,84,YR,,,


#### Create Manufacturer table

In [145]:
%%bigquery
create or replace table faers_modeled.Manufacturer as
select ROW_NUMBER() OVER(ORDER BY MFR_SNDR) as MANU_ID, MFR_SNDR as DRUG_MANU 
from faers_staging.Demographic
group by MFR_SNDR

In [148]:
%%bigquery
select * from faers_modeled.Manufacturer
order by DRUG_MANU

Unnamed: 0,MANU_ID,DRUG_MANU
0,1,3M
1,2,AAA PHARMA
2,3,ABBVIE
3,4,ACADIA PHARMACEUTICALS
4,5,ACCESS
...,...,...
466,467,XIROMED
467,468,XTTRIUM
468,469,YUNG SHIN PHARMACEUTICAL
469,470,ZO SKIN HEALTH


#### Create Adverse_Event table by combining with Outcome table

In [151]:
%%bigquery
create or replace table faers_modeled.Adverse_Event as
select ROW_NUMBER() OVER(ORDER BY d.PRIMARYID) as EVENT_ID, d.PRIMARYID as CASE_ID, EVENT_DT_NUM as EVENT_DATE, OCCR_COUNTRY as COUNTRY, OUTC_COD as OUTCOME
from faers_staging.Demographic d left join faers_staging.Outcome o on d.primaryid = o.primaryid

In [152]:
%%bigquery
select * from faers_modeled.Adverse_Event

Unnamed: 0,EVENT_ID,CASE_ID,EVENT_DATE,COUNTRY,OUTCOME
0,1,36703554,,BE,CA
1,4832,115186895,2000-01-11,FR,CA
2,5937,118586296,,FR,CA
3,6931,121660918,,FR,CA
4,14666,133369907,2004-11-02,FR,CA
...,...,...,...,...,...
463265,462636,1460936610,2018-07-02,CA,OT
463266,462712,1464829211,2017-10-23,FR,OT
463267,462770,1469008910,,PL,OT
463268,462923,1484172811,2017-12-29,BE,OT


In [159]:
%%bigquery
select count(distinct case_id) from faers_modeled.Adverse_Event

Unnamed: 0,f0_
0,394066


#### Create Case (Report) table

In [153]:
%%bigquery
create or replace table faers_modeled.Case as
select PRIMARYID as CASE_ID, I_F_CODE as STATUS, REPT_COD as TYPE, REPT_DT_NUM as CASE_DATE, FDA_DT_NUM as FDA_DATE, MFR_DT_NUM as MANU_DATE, MANU_ID, TO_MFR as MANU_NOTIFD, OCCP_COD as REPORTER_OCCP, REPORTER_COUNTRY 
from faers_staging.Demographic d join faers_modeled.Manufacturer m on d.MFR_SNDR = m.DRUG_MANU 

In [154]:
%%bigquery
select * from faers_modeled.Case

Unnamed: 0,CASE_ID,STATUS,TYPE,CASE_DATE,FDA_DATE,MANU_DATE,MANU_ID,MANU_NOTIFD,REPORTER_OCCP,REPORTER_COUNTRY
0,156066741,I,DIR,,2018-11-06,,162,N,OT,US
1,156117181,I,DIR,,2018-11-07,,162,N,MD,US
2,156809361,I,DIR,,2018-11-20,,162,N,OT,US
3,157079191,I,DIR,,2018-11-28,,162,N,PH,US
4,157170581,I,DIR,,2018-11-29,,162,N,,US
...,...,...,...,...,...,...,...,...,...,...
394061,155370741,I,DIR,2018-05-05,2018-10-16,,162,N,CN,US
394062,158618321,I,DIR,2018-05-09,2018-12-31,,162,N,OT,US
394063,158632541,I,DIR,2018-05-09,2018-12-31,,162,N,OT,US
394064,155042791,I,DIR,2018-05-10,2018-10-09,,162,N,CN,US


### Split Drug table into separate entities: Drug table (unique drug records), Active_Ingredient table and Administration (drug dispensing) table 

#### Create Drug table (with unique drug entries)

In [181]:
%%bigquery
create or replace table faers_modeled.Drug as 
select ROW_NUMBER() OVER() as DRUG_ID, DRUGNAME as DRUG_NAME
from faers_staging.Drug
group by DRUGNAME

In [187]:
%%bigquery
select * from faers_modeled.Drug
order by DRUG_ID

Unnamed: 0,DRUG_ID,DRUG_NAME
0,1,ANTIHEMOPHILIC FACTOR
1,2,TAXOTERE
2,3,DIGOXIN.
3,4,PREDNISOLONE.
4,5,PRAVASTATIN.
...,...,...
61839,61840,EFAVIRENZ MYLAN
61840,61841,"RIFADINE IV 600 MG, POUDRE ET SOLVANT POUR SOL..."
61841,61842,LITO
61842,61843,Ofloxacine


#### Create Active_Ingredient table

In [347]:
%%bigquery
create or replace table faers_modeled.Active_Ingredient as 
select ROW_NUMBER() OVER(ORDER BY PROD_AI) as INGREDIENT_ID, PROD_AI as ACTIVE_INGREDIENT
from faers_staging.Drug
group by PROD_AI

In [348]:
%%bigquery
select * from faers_modeled.Active_Ingredient

Unnamed: 0,INGREDIENT_ID,ACTIVE_INGREDIENT
0,109,ACETAMINOPHEN\CAFFEINE CITRATE
1,143,ACETAMINOPHEN\CODEINE\DOXYLAMINE
2,146,ACETAMINOPHEN\DEXTROMETHORPHAN HYDROBROMIDE\DO...
3,153,ACETAMINOPHEN\DEXTROMETHORPHAN HYDROBROMIDE\PS...
4,154,ACETAMINOPHEN\DEXTROMETHORPHAN\PSEUDOEPHEDRINE
...,...,...
5778,5527,TRANYLCYPROMINE SULFATE
5779,5578,TRIMETHOPRIM SULFATE
5780,5623,UMBRALISIB
5781,5691,VILDAGLIPTIN


#### Create Administration (administering of drug) table

In [343]:
%%bigquery
create or replace table faers_modeled.Administration as 
select ROW_NUMBER() OVER(ORDER BY PRIMARYID) as ADMIN_ID, PRIMARYID as CASE_ID, DRUG_ID, INGREDIENT_ID, DRUG_SEQ, ROLE_COD as DRUG_ROLE, LOT_NUM as DRUG_LOT, ROUTE as DRUG_ROUTE, DOSE_FORM, DOSE_FREQ, DOSE_AMT, DOSE_UNIT, CUM_DOSE_CHR as CUM_DOSE, CUM_DOSE_UNIT, DECHAL, RECHAL  
from faers_staging.Drug d left join faers_modeled.Drug d2 on d.DRUGNAME = d2.DRUG_NAME
    left join faers_modeled.Active_Ingredient ai on d.PROD_AI = ai.ACTIVE_INGREDIENT

In [344]:
%%bigquery
select * from faers_modeled.Administration

Unnamed: 0,ADMIN_ID,CASE_ID,DRUG_ID,INGREDIENT_ID,DRUG_SEQ,DRUG_ROLE,DRUG_LOT,DRUG_ROUTE,DOSE_FORM,DOSE_FREQ,DOSE_AMT,DOSE_UNIT,CUM_DOSE,CUM_DOSE_UNIT,DECHAL,RECHAL
0,864,68090726,11649,4481,32,C,,Unknown,,,,,,,U,
1,1345,70848306,69,3723,33,C,,Oral,,,60.0,MG,,,U,
2,3022,80189603,3226,4852,34,C,,Unknown,,,,,,,U,
3,3028,80189603,11762,1133,36,C,,Unknown,,,,,,,U,
4,3037,80189603,2507,4994,28,C,,Intravenous (not otherwise specified),,,10.0,ML,,,U,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1546830,1536501,1427960118,671,2024,21,SS,,Unknown,,,,,,,U,
1546831,1537882,1434453010,1056,4505,21,C,,,,,,,,,D,
1546832,1538814,1441227316,2588,567,21,SS,UNKNOWN,Unknown,FILM-COATED TABLET,,5.0,MG,,,N,
1546833,1542634,1472865711,3770,2185,21,C,,Oral,UNKNOWN,,95.0,MG,,,,


### Generate Primary Key for Reaction table and join with Case table for Foreign Key

In [230]:
%%bigquery
create or replace table faers_modeled.Reaction as 
select ROW_NUMBER() OVER(ORDER BY CASE_ID) as REACTION_ID, CASE_ID, PT as REACTION, DRUG_REC_ACT as RECUR_REACTION
from faers_staging.Reaction r left join faers_modeled.Case c on r.PRIMARYID = c.CASE_ID 

In [231]:
%%bigquery
select * from faers_modeled.Reaction

Unnamed: 0,REACTION_ID,CASE_ID,REACTION,RECUR_REACTION
0,55,39776825,Swelling,
1,208,59983173,Haematemesis,
2,233,60789672,Hypotension,
3,242,61160272,Anuria,
4,257,61160272,Liver function test increased,
...,...,...,...,...
1250973,1145651,157561591,Wrong technique in product usage process,
1250974,1146993,157567781,Wrong technique in product usage process,
1250975,1148989,157577141,Wrong technique in product usage process,
1250976,1160363,157628701,Wrong technique in product usage process,


### Create Diagnosis table from Indication table and join with Administration table for Foreign Key

In [242]:
%%bigquery
create or replace table faers_modeled.Diagnosis as 
select ROW_NUMBER() OVER(ORDER BY ADMIN_ID) as DIAGNOSIS_ID, ADMIN_ID, INDI_PT as DIAGNOSIS
from faers_staging.Indication i left join faers_modeled.Administration a on i.PRIMARYID = a.CASE_ID and i.INDI_DRUG_SEQ = a.DRUG_SEQ 

In [243]:
%%bigquery
select * from faers_modeled.Diagnosis

Unnamed: 0,DIAGNOSIS_ID,ADMIN_ID,DIAGNOSIS
0,224,298,Prophylaxis against graft versus host disease
1,329,430,Sedation
2,397,526,Congestive cardiomyopathy
3,743,1027,Colorectal cancer metastatic
4,892,1239,Renal transplant
...,...,...,...
1064659,1064040,1545857,Product used for unknown indication
1064660,1064076,1545924,Product used for unknown indication
1064661,1064145,1546028,Product used for unknown indication
1064662,1064477,1546544,Product used for unknown indication


### Identify Primary Key (PK) for each modeled table

#### Patient table : PK is PATIENT_ID

In [244]:
%%bigquery
select count(*) as total_records from faers_modeled.Patient

Unnamed: 0,total_records
0,394066


In [245]:
%%bigquery
select count(distinct PATIENT_ID) as distinct_id from faers_modeled.Patient

Unnamed: 0,distinct_id
0,394066


#### Manufacturer table : PK is MANU_ID

In [246]:
%%bigquery
select count(*) as total_records from faers_modeled.Manufacturer

Unnamed: 0,total_records
0,471


In [247]:
%%bigquery
select count(distinct MANU_ID) as distinct_id from faers_modeled.Manufacturer

Unnamed: 0,distinct_id
0,471


#### Case table : PK is CASE_ID

In [248]:
%%bigquery
select count(*) as total_records from faers_modeled.Case

Unnamed: 0,total_records
0,394066


In [249]:
%%bigquery
select count(distinct CASE_ID) as distinct_id from faers_modeled.Case

Unnamed: 0,distinct_id
0,394066


#### Adverse_Event table : PK is EVENT_ID

In [250]:
%%bigquery
select count(*) as total_records from faers_modeled.Adverse_Event

Unnamed: 0,total_records
0,463270


In [251]:
%%bigquery
select count(distinct EVENT_ID) as distinct_id from faers_modeled.Adverse_Event

Unnamed: 0,distinct_id
0,463270


#### Drug table : PK is DRUG_ID

In [252]:
%%bigquery
select count(*) as total_records from faers_modeled.Drug

Unnamed: 0,total_records
0,61844


In [253]:
%%bigquery
select count(distinct DRUG_ID) as distinct_id from faers_modeled.Drug

Unnamed: 0,distinct_id
0,61844


#### Active_Ingredient table : PK is INGREDIENT_ID

In [254]:
%%bigquery
select count(*) as total_records from faers_modeled.Active_Ingredient

Unnamed: 0,total_records
0,5783


In [255]:
%%bigquery
select count(distinct INGREDIENT_ID) as distinct_id from faers_modeled.Active_Ingredient

Unnamed: 0,distinct_id
0,5783


#### Administration table : PK is ADMIN_ID

In [256]:
%%bigquery
select count(*) as total_records from faers_modeled.Administration

Unnamed: 0,total_records
0,1546835


In [257]:
%%bigquery
select count(distinct ADMIN_ID) as distinct_id from faers_modeled.Administration

Unnamed: 0,distinct_id
0,1546835


#### Reaction table : PK is REACTION_ID

In [258]:
%%bigquery
select count(*) as total_records from faers_modeled.Reaction

Unnamed: 0,total_records
0,1250978


In [259]:
%%bigquery
select count(distinct REACTION_ID) as distinct_id from faers_modeled.Reaction

Unnamed: 0,distinct_id
0,1250978


#### Diagnosis table : PK is DIAGNOSIS_ID

In [260]:
%%bigquery
select count(*) as total_records from faers_modeled.Diagnosis

Unnamed: 0,total_records
0,1064664


In [261]:
%%bigquery
select count(distinct DIAGNOSIS_ID) as distinct_id from faers_modeled.Diagnosis

Unnamed: 0,distinct_id
0,1064664


### Check for referential integrity violations 

In [262]:
%%bigquery
select count(*) as count_of_missing_PK
from faers_modeled.Patient p left join faers_modeled.Case c on p.CASE_ID = c.CASE_ID
where c.CASE_ID is null

Unnamed: 0,count_of_missing_PK
0,0


In [264]:
%%bigquery
select count(*) as count_of_missing_PK
from faers_modeled.Adverse_Event a left join faers_modeled.Case c on a.CASE_ID = c.CASE_ID
where c.CASE_ID is null

Unnamed: 0,count_of_missing_PK
0,0


In [351]:
%%bigquery
select count(*) as count_of_missing_PK
from faers_modeled.Administration a left join faers_modeled.Case c on a.CASE_ID = c.CASE_ID
where c.CASE_ID is null

Unnamed: 0,count_of_missing_PK
0,0


In [272]:
%%bigquery
select count(*) as count_of_missing_PK
from faers_modeled.Reaction r left join faers_modeled.Case c on r.CASE_ID = c.CASE_ID
where c.CASE_ID is null

Unnamed: 0,count_of_missing_PK
0,0


In [263]:
%%bigquery
select count(*) as count_of_missing_PK
from faers_modeled.Case c left join faers_modeled.Manufacturer m on c.MANU_ID = m.MANU_ID
where m.MANU_ID is null

Unnamed: 0,count_of_missing_PK
0,0


In [271]:
%%bigquery
select count(*) as count_of_missing_PK
from faers_modeled.Diagnosis d left join faers_modeled.Administration a on d.ADMIN_ID = a.ADMIN_ID
where a.ADMIN_ID is null

Unnamed: 0,count_of_missing_PK
0,0


In [265]:
%%bigquery
select count(*) as count_of_missing_PK
from faers_modeled.Administration a left join faers_modeled.Drug d on a.DRUG_ID = d.DRUG_ID
where d.DRUG_ID is null

Unnamed: 0,count_of_missing_PK
0,0


In [352]:
%%bigquery
select count(*) as count_of_missing_PK
from faers_modeled.Administration a left join faers_modeled.Active_Ingredient ai on a.INGREDIENT_ID = ai.INGREDIENT_ID
where ai.INGREDIENT_ID is null

Unnamed: 0,count_of_missing_PK
0,0


### Part 1: Data Transformation

![image.png](attachment:image.png)

#### Transformation 1: Standardize age in unit of years on FAERS Patient table using SQL

##### Step 1: Create intermediate table to convert age in months to years (1 year has 12 months)

In [47]:
%%bigquery
create or replace table faers_modeled.Patient_SQL_1 as
select *, CAST(FLOOR(AGE/12) as INT64) as AGE_YRS 
from faers_modeled.Patient
where AGE_UNIT = 'MON' and AGE is not null 

In [61]:
%%bigquery
# Check if transformation done correctly
select * from faers_modeled.Patient_SQL_1 
order by AGE_YRS desc
limit 5

Unnamed: 0,PATIENT_ID,CASE_ID,SEX,AGE,AGE_UNIT,AGE_GROUP,WEIGHT,WEIGHT_UNIT,AGE_YRS
0,333152,157159021,M,1169,MON,,,,97
1,107835,154761773,M,1138,MON,,67.599998,KG,94
2,172311,155429181,F,1087,MON,,63.5,KG,90
3,174281,155449802,M,1087,MON,,63.5,KG,90
4,66049,153440393,M,1066,MON,,63.5,KG,88


##### Step 2: Create intermediate table to convert age in weeks to years (1 year has approximately 52.14 weeks)

In [49]:
%%bigquery
create or replace table faers_modeled.Patient_SQL_2 as
select *, CAST(FLOOR(AGE/52.14) as INT64) as AGE_YRS 
from faers_modeled.Patient
where AGE_UNIT = 'WK' and AGE is not null 

In [62]:
%%bigquery
# Check if transformation done correctly
select * from faers_modeled.Patient_SQL_2 
order by AGE_YRS desc
limit 5

Unnamed: 0,PATIENT_ID,CASE_ID,SEX,AGE,AGE_UNIT,AGE_GROUP,WEIGHT,WEIGHT_UNIT,AGE_YRS
0,299378,156787721,F,4058,WK,,108.900002,KG,77
1,311617,156921091,F,3918,WK,,95.300003,KG,75
2,72197,153866612,F,3743,WK,,74.800003,KG,71
3,88075,154551062,M,3754,WK,,99.800003,KG,71
4,68655,153618282,F,3708,WK,,84.800003,KG,71


##### Step 3: Create intermediate table to convert age in days to years (1 year has 365 days)

In [51]:
%%bigquery
create or replace table faers_modeled.Patient_SQL_3 as
select *, CAST(FLOOR(AGE/365) as INT64) as AGE_YRS 
from faers_modeled.Patient
where AGE_UNIT = 'DY' and AGE is not null 

In [63]:
%%bigquery
# Check if transformation done correctly
select * from faers_modeled.Patient_SQL_3 
order by AGE_YRS desc
limit 5

Unnamed: 0,PATIENT_ID,CASE_ID,SEX,AGE,AGE_UNIT,AGE_GROUP,WEIGHT,WEIGHT_UNIT,AGE_YRS
0,293787,156725961,F,34926,DY,,63.5,KG,95
1,277787,156552871,F,34465,DY,,68.0,KG,94
2,372072,157597251,M,33628,DY,,,,92
3,207752,155805301,F,33534,DY,,,,91
4,58812,152886243,F,33131,DY,,,,90


##### Step 4: Create intermediate table to convert age in hours to years (1 year has 365 x 24 hours)

In [53]:
%%bigquery
create or replace table faers_modeled.Patient_SQL_4 as
select *, CAST(FLOOR(AGE/24/365) as INT64) as AGE_YRS 
from faers_modeled.Patient
where AGE_UNIT = 'HR' and AGE is not null 

In [64]:
%%bigquery
# Check if transformation done correctly
select * from faers_modeled.Patient_SQL_4
order by AGE_YRS desc
limit 5

Unnamed: 0,PATIENT_ID,CASE_ID,SEX,AGE,AGE_UNIT,AGE_GROUP,WEIGHT,WEIGHT_UNIT,AGE_YRS
0,90852,154582931,M,1,HR,,0.8,KG,0
1,86450,154532711,,3,HR,,,,0
2,173485,155441451,M,1,HR,,3.42,KG,0
3,86152,154529621,M,1,HR,,0.8,KG,0
4,75872,154092492,,1,HR,,0.8,KG,0


##### Step 5: Create intermediate table to convert age in decades to years (1 decade has 10 years)

In [66]:
%%bigquery
create or replace table faers_modeled.Patient_SQL_5 as
select *, CAST(FLOOR(AGE*10) as INT64) as AGE_YRS 
from faers_modeled.Patient
where AGE_UNIT = 'DEC' and AGE is not null 

In [67]:
%%bigquery
# Check if transformation done correctly
select * from faers_modeled.Patient_SQL_5
order by AGE_YRS desc
limit 5

Unnamed: 0,PATIENT_ID,CASE_ID,SEX,AGE,AGE_UNIT,AGE_GROUP,WEIGHT,WEIGHT_UNIT,AGE_YRS
0,366463,157532431,F,10,DEC,E,,,100
1,104496,154726761,F,10,DEC,E,,,100
2,22358,143393573,F,10,DEC,,,,100
3,289439,156678922,,10,DEC,,,,100
4,214240,155873141,,10,DEC,,,,100


##### Step 6: Create final table based on Patient_SQL_1 + Patient_SQL_2 + Patient_SQL_3 + Patient_SQL_4 + Patient_SQL_5 + age in years & null ages from original Patient table 

In [59]:
%%bigquery
create or replace table faers_modeled.Patient_SQL_Final as
(select PATIENT_ID, CASE_ID, SEX, AGE_YRS, AGE_GROUP, WEIGHT, WEIGHT_UNIT
 from faers_modeled.Patient_SQL_1
union all
 select PATIENT_ID, CASE_ID, SEX, AGE_YRS, AGE_GROUP, WEIGHT, WEIGHT_UNIT
 from faers_modeled.Patient_SQL_2
union all
 select PATIENT_ID, CASE_ID, SEX, AGE_YRS, AGE_GROUP, WEIGHT, WEIGHT_UNIT
 from faers_modeled.Patient_SQL_3
union all
 select PATIENT_ID, CASE_ID, SEX, AGE_YRS, AGE_GROUP, WEIGHT, WEIGHT_UNIT
 from faers_modeled.Patient_SQL_4
union all
 select PATIENT_ID, CASE_ID, SEX, AGE_YRS, AGE_GROUP, WEIGHT, WEIGHT_UNIT
 from faers_modeled.Patient_SQL_5
union all
 select PATIENT_ID, CASE_ID, SEX, AGE as AGE_YRS, AGE_GROUP, WEIGHT, WEIGHT_UNIT
 from faers_modeled.Patient
 where AGE is null or AGE_UNIT = 'YR'
)

In [68]:
%%bigquery
# Check number of rows of final table
select count(*) as number_of_final_records from faers_modeled.Patient_SQL_Final

Unnamed: 0,number_of_final_records
0,394066


In [69]:
%%bigquery
# Check number of rows of initial table
select count(*) as number_of_initial_records from faers_modeled.Patient

Unnamed: 0,number_of_initial_records
0,394066


##### Number of records before and after transformation match -> All records retained

### Part 2: Verify BigQuery tables have valid Primary Key and Foreign Key 

#### Patient_SQL_Final table : PK is PATIENT_ID

In [70]:
%%bigquery
select count(*) as total_records from faers_modeled.Patient_SQL_Final

Unnamed: 0,total_records
0,394066


In [71]:
%%bigquery
select count(distinct PATIENT_ID) as distinct_id from faers_modeled.Patient_SQL_Final

Unnamed: 0,distinct_id
0,394066


#### Patient_SQL_Final table : FK is CASE_ID

In [72]:
%%bigquery
select count(*) as count_of_missing_PK
from faers_modeled.Patient_SQL_Final p left join faers_modeled.Case c on p.CASE_ID = c.CASE_ID
where c.CASE_ID is null

Unnamed: 0,count_of_missing_PK
0,0


In [40]:
%%bigquery
# Check if transformation done correctly
select * from faers_modeled.Patient_SQL_5
order by AGE_YRS desc

Unnamed: 0,PATIENT_ID,CASE_ID,SEX,AGE,AGE_UNIT,AGE_GROUP,WEIGHT,WEIGHT_UNIT,AGE_YRS
0,289439,156678922,,10,DEC,,,,100
1,214240,155873141,,10,DEC,,,,100
2,104496,154726761,F,10,DEC,E,,,100
3,333021,157157552,F,10,DEC,,,,100
4,290607,156692001,F,10,DEC,E,,,100
...,...,...,...,...,...,...,...,...,...
1613,271428,156485122,F,1,DEC,C,,,10
1614,382370,157721021,F,1,DEC,,,,10
1615,309489,156897882,M,1,DEC,,,,10
1616,373711,157616451,M,1,DEC,,,,10


In [9]:
%%bigquery
select p.* 
from faers_modeled.Patient p inner join faers_modeled.Adverse_Event e on p.CASE_ID = e.CASE_ID
where AGE is not null and COUNTRY = 'US'

Unnamed: 0,PATIENT_ID,CASE_ID,SEX,AGE,AGE_UNIT,AGE_GROUP,WEIGHT,WEIGHT_UNIT
0,1484,101771493,F,18,YR,,,
1,2137,106905644,F,23,YR,,71.279999,KG
2,6260,125138594,M,88,YR,E,,
3,8669,129979573,M,89,YR,E,,
4,8770,130149705,F,13,YR,,41.000000,KG
...,...,...,...,...,...,...,...,...
156800,233759,156082281,F,87,YR,E,47.169998,KG
156801,255313,156312581,M,87,YR,,,
156802,309967,156903101,F,87,YR,,,
156803,383583,157735971,M,87,YR,,,


In [8]:
%%bigquery
select * 
from faers_modeled.Adverse_Event
where COUNTRY = 'US'

Unnamed: 0,EVENT_ID,CASE_ID,EVENT_DATE,COUNTRY,OUTCOME
0,145408,154839901,2018-07-23,US,RI
1,149612,154877891,2018-03-16,US,RI
2,150667,154887471,2018-05-03,US,RI
3,150785,154888481,2018-08-13,US,RI
4,242337,155742391,,US,RI
...,...,...,...,...,...
289050,459900,1210013511,2015-08-15,US,OT
289051,459918,1214739312,2014-11-15,US,OT
289052,460820,1317941810,2011-04-15,US,OT
289053,461285,1361160912,2017-05-24,US,OT


In [1]:
%%bigquery
select AGE_YRS
from vaers_modeled.Patient
group by AGE_YRS
order by AGE_YRS

Unnamed: 0,AGE_YRS
0,
1,0.0
2,1.0
3,2.0
4,3.0
...,...
100,99.0
101,100.0
102,102.0
103,103.0
