## hypertension diagnosis before CH admission query 

In [1]:
library(ggplot2)
library(bigrquery)
library(tidyverse)
library(ggplot2)
library(lubridate)
library(stats)

ProjectId = "yhcr-prd-bradfor-bia-core"

targetdb2 <- 'yhcr-prd-bradfor-bia-core.CB_FDM_PrimaryCare'
targetdb2 <-gsub(' ','',targetdb2)

targetdb3 <- 'yhcr-prd-bradfor-bia-core.CB_2172'
targetdb3 <- gsub(' ','',targetdb3)

targetdb4 <- 'yhcr-prd-bradfor-bia-core.CB_LOOKUPS'
targetdb4 <- gsub(' ','',targetdb4)


── [1mAttaching core tidyverse packages[22m ──────────────────────── tidyverse 2.0.0 ──
[32m✔[39m [34mdplyr    [39m 1.1.4     [32m✔[39m [34mreadr    [39m 2.1.5
[32m✔[39m [34mforcats  [39m 1.0.0     [32m✔[39m [34mstringr  [39m 1.5.1
[32m✔[39m [34mlubridate[39m 1.9.3     [32m✔[39m [34mtibble   [39m 3.2.1
[32m✔[39m [34mpurrr    [39m 1.0.2     [32m✔[39m [34mtidyr    [39m 1.3.1
── [1mConflicts[22m ────────────────────────────────────────── tidyverse_conflicts() ──
[31m✖[39m [34mdplyr[39m::[32mfilter()[39m masks [34mstats[39m::filter()
[31m✖[39m [34mdplyr[39m::[32mlag()[39m    masks [34mstats[39m::lag()
[36mℹ[39m Use the conflicted package ([3m[34m<http://conflicted.r-lib.org/>[39m[23m) to force all conflicts to become errors


In [2]:
sqlhtn <- paste('


with diagnosis_by_code as 
(
  with a as (
  SELECT 
  person_id,
  dateevent,
  row_number() over(partition by person_id order by dateevent) code_sequence
  FROM  ',targetdb2,'.tbl_srcode
  WHERE snomedcode in(SELECT SNOMEDCT_CONCEPTID FROM ',targetdb4,'.tbl_EFI2_Codelist where deficit = "Hypertension" AND Otherinstructions IS NULL) 
AND person_id in(select person_id from ',targetdb3,'.care_home_cohort_v1)),

  snomed as (
  SELECT 
  m.person_id,
  m.first_episodestartdate,
  a.dateevent as hypertension_date,
  FROM ',targetdb3,'.care_home_cohort_v1 m
  LEFT JOIN a ON
  a.person_id= m.person_id AND a.dateevent < m.first_episodestartdate and code_sequence = 1),

cohort as (
select
person_id,
case when hypertension_date is not null then "yes"
when hypertension_date is null then "no"
end as hypertension_code
from snomed)

select
*
from cohort
where hypertension_code = "yes"
order by person_id),

##here we have the script for identifying hypetension by numeric value taken from cb_2172_hypertension_numeric

diagnosis_by_value as (
  (with a as (
SELECT person_id, ctv3text,cast(numericvalue as bignumeric) as numericvalue_1, idappointment, dateevent, extract(date from dateevent) as date_record
,case 
when snomedcode in("400975005","163031004","407557002", "407555005") then "diastolic"
when snomedcode in("163033001","163030003","163034007","163035008","407554009","407556006","400974009","163020007") then "systolic"
when snomedcode in("413605002","198000000000000","314462001") then "home diastolic"
when snomedcode in("314449000","413606001","1.98E+14") then "home systolic"
else NULL end as bp_reading
 FROM ',targetdb2,'.tbl_srcode where person_id  in (select person_id from ',targetdb3,'.care_home_cohort_v1)
order by dateeventrecorded), 

b as (
  select a.*,
  c.first_episodestartdate
  from a
  left join ',targetdb3,'.care_home_cohort_v1  c
  on a.person_id = c.person_id
  where bp_reading is not null
  ),
#select for the minimum systlic and diastolic value on a given date 
d as(
  select 
  person_id, date_record,bp_reading,
  min(numericvalue_1) as min_bp
  from b
  where dateevent < first_episodestartdate and (datetime_diff(first_episodestartdate, dateevent, year) <= 2) and (numericvalue_1 > 10)
  group by person_id, date_record, bp_reading
  order by person_id, date_record),
#create a new variable of the preceding and following systolic and distolic value 
e as (select
  person_id,
  bp_reading,
  min_bp,
  row_number() over (partition by person_id,bp_reading order by min_bp desc) as bp_reading_seq,
  date_record
from d
order by person_id
),

max_value as (
  select
  *
  from e 
  where bp_reading_seq =1
),

min_value as (
  select
  *
  from e 
  where bp_reading_seq =2
),

min_max_join as (
  select
  max_value.person_id, 
  max_value.bp_reading,
  max_value.min_bp as first_reading,
  min_value.min_bp as second_reading
  from max_value 
  inner join min_value on
  max_value.person_id = min_value.person_id and max_value.bp_reading = min_value.bp_reading
  order by person_id),

htn_code as (
  select
  person_id, case when bp_reading = "diastolic" and (first_reading > 90) and (second_reading > 90) then "yes"
  when bp_reading = "systolic" and (first_reading >140) and (second_reading > 140) then "yes"
  when bp_reading = "home systolic" and (first_reading > 135) and (second_reading > 135) then "yes"
  when bp_reading = "home diastolic" and (first_reading > 85) and (second_reading> 85) then "yes" else "false"
  end as hypertension_numeric
  from min_max_join
  order by person_id)

  select
  distinct * 
  from htn_code
  where hypertension_numeric = "yes")),
#then we union the two together

union_htn as (
select
* from diagnosis_by_code

union distinct

select
* from diagnosis_by_value)

select ee.person_id,ee.first_episodestartdate, case when union_htn.hypertension_code = "yes" then "yes" else "no" end as hypertension from ',targetdb3,'.care_home_cohort_v1 ee left join union_htn on ee.person_id = union_htn.person_id
;',sep="")

tblhtn <-bq_project_query(ProjectId,sqlhtn)
data_htn <- bq_table_download(tblhtn)

In [3]:
data_htn %>% n_distinct()

In [4]:
data_htn_factor <- data_htn
data_htn_factor$hypertension.factor <- as.factor(data_htn_factor$hypertension)
data_htn_factor <- data_htn_factor%>%  select(-hypertension)

In [5]:
library(finalfit)
data_htn_factor %>% summary_factorlist(dependent= NULL ,explanatory="hypertension.factor")

No dependent variable(s) provided; defaulting to single-level factor



label,levels,all
<chr>,<chr>,<chr>
hypertension.factor,no,766 (27.4)
,yes,2028 (72.6)


In [6]:
data_htn_factor

person_id,first_episodestartdate,hypertension.factor
<int>,<dttm>,<fct>
13556264,2017-02-17,no
12824748,2016-09-06,yes
12405253,2019-04-17,no
12959867,2018-04-23,yes
12939436,2019-11-13,yes
13524173,2018-11-21,yes
12611808,2018-07-31,yes
12520526,2018-06-27,yes
12816739,2019-09-03,yes
13692315,2017-04-03,no


In [8]:
mybq = bq_table(project='yhcr-prd-bradfor-bia-core', dataset='CB_2172', table='cb_2172_hypertension_combined')
bq_table_upload(x=mybq, values= data_htn, fields= NULL,create_disposition='CREATE_IF_NEEDED', 
             write_disposition='WRITE_TRUNCATE')