## Care home identification notebook


this notebook is the latest version as of 17/08/2024 for identifying care home residents in connected bradford after some issues wih previous scripts were identified. it is the number that will be used in the final analysis, having it in a jupytner notebook format also enabls it to be easily upliaded to github to be made freely available.

In [None]:
library(bigrquery)
library(tidyverse) 

In [None]:
ProjectId = "yhcr-prd-bradfor-bia-core"

targetdb1 <- "yhcr-prd-bradfor-bia-core.CB_2172"
targetdb1 <- gsub(" ","",targetdb1)

targetdb2 <- 'yhcr-prd-bradfor-bia-core.CB_FDM_DeathCertificates'
targetdb2 <- gsub(" ","",targetdb2)

targetdb3 <- "yhcr-prd-bradfor-bia-core.CB_FDM_AdultSocialCare"
targetdb3 <- gsub(" ","",targetdb3)



In [None]:
#first query selects all individuals admitted to a care home at any point no age or date restrictions

In [None]:
sql1 <- 
paste(
    #updated care home cohort code, uses analytic functions so is tidier, this is the currnet query used to define the cohort,
    'with a  as (
SELECT person_id, 
tbl_adultsocialcare_services_start_date, 
servicetype, 
FinalisedPSR, 
row_number() over (partition by person_id order by tbl_adultsocialcare_services_start_date) as care_seq 
from ',targetdb3,'.tbl_adultsocialcare_services 
where ServiceType in("Nursing","Residential") 
),
b as (
select 
* 
from a
where care_seq = 1
order by person_id, tbl_adultsocialcare_services_start_date),

c as 
(SELECT person_id,
tbl_adultsocialcare_services_end_date, 
servicetype, 
row_number() over (partition by person_id order by tbl_adultsocialcare_services_end_date desc) as care_end_seq 
from ',targetdb3,'.tbl_adultsocialcare_services 
where ServiceType in("Nursing","Residential") )
,

d as (select
* from c 
where care_end_seq = 1),

e as (
select 
b.person_id,
b.tbl_adultsocialcare_services_start_date as first_episodestartdate,
d.tbl_adultsocialcare_services_end_date as last_episodestopdate,
b.ServiceType as admission_service,
b.finalisedPSR
from b 
LEFT join d on
b.person_id= d.person_id
order by person_id),

f as (
Select 
e.*,
datetime_diff(e.last_episodestopdate,e.first_episodestartdate,day) as admission_length
from e
),

h as (
SELECT
f.*,
g.birth_datetime,
FORMAT_DATETIME("%d-%b-%y",first_episodestartdate) as date_of_admission,
g.death_datetime,
g.ethnicity_source_value,
g.gender_source_value
FROM f 
LEFT JOIN ',targetdb3,'.person g on
f.person_id = g.person_id),

g as (
select
DATE_DIFF(first_episodestartdate, birth_datetime, YEAR) as age_admission,
h.*,
from h
where first_episodestartdate < "2019-12-31T00:00:00"
order by age_admission desc)

select
* from g

;',sep="")

sql_tbl <-bq_project_query(ProjectId,sql1)
sql_data <- bq_table_download(sql_tbl)


In [None]:
#convert vectors to date

In [None]:
sql_data$death_date <- as_date(sql_data$death_datetime)
sql_data <- sql_data %>% select(-death_datetime)

In [None]:
sql_data$episodestartdate <- as_date(sql_data$first_episodestartdate)


In [None]:
sql_data$episodestopdate <- as_date(sql_data$last_episodestopdate)


In [None]:
sql_data$birth_date <- as_date(sql_data$birth_datetime)
sql_data <- sql_data %>% select(-birth_datetime)

In [None]:
#remove those inidividuals admitted on dates that were identified as being erroneous (i.e. these individuals were actually admitted into a care home prior to 2016, but had no start date so the date was put as the date of upload, creating some skew) this has been confirmed with the individual who built the social care FDM in Connected bradford

In [None]:
sql_data <- sql_data %>% filter(!(episodestartdate == "2016-04-04"))%>% filter(!(episodestartdate == "2016-07-25"))%>% filter(!(episodestartdate == "2016-08-22"))

In [None]:
sql_data %>% n_distinct('person_id')

In [None]:
#how many aged less than 65 

In [None]:
sql_data_below_65 <- sql_data %>% filter(age_admission < 65)
n_distinct(sql_data_below_65$person_id)

In [None]:
sql_data_above_sixtyfive <- sql_data %>% filter(age_admission >= 65) 
sql_data_above_sixtyfive %>% n_distinct()


In [None]:
#here we bring in the mortality data to reduce number of missig death dates 

In [None]:
sql_death <- paste('
with a as (SELECT person_id,
dod,
tbl_NEC_Deaths,
reg_date,
rank() over(partition by person_id order by dod desc, reg_date desc) as rank_seq
from ',targetdb2,'.tbl_NEC_Deaths
where person_id in (SELECT person_id from ',targetdb3,'.person where death_datetime is null)),

b as (
select
*,
row_number() over(partition by person_id order by person_id) as rank_seq_2
from a 
where rank_seq = 1)

select 
person_id, dod,reg_date from b 
where rank_seq_2 = 1 
order by person_id 
;',sep="")

tbl_death <-bq_project_query(ProjectId,sql_death)
ch_death<- bq_table_download(tbl_death)

print(ch_death)

In [None]:
ch_death <- ch_death %>% mutate(date_death = ifelse(is.na(dod), reg_date,dod))

In [None]:
ch_death$date_death <- as_date(ch_death$date_death)

In [None]:
## join death certificate tibble to the deathdate in the master cohort table 

In [None]:
ch_death_filter <- ch_death %>% select(person_id,date_death)
sql_data_above_sixtyfive <- left_join(sql_data_above_sixtyfive,ch_death_filter,by='person_id')

In [None]:
sql_data_above_sixtyfive <- sql_data_above_sixtyfive %>% mutate(dod = coalesce(sql_data_above_sixtyfive$death_date,sql_data_above_sixtyfive$date_death))%>% select(-death_date,-date_death)


In [None]:
sql_data_above_sixtyfive <- sql_data_above_sixtyfive %>% mutate(mortality = dod-episodestartdate)

In [None]:
alive_morethan_6weeks <- sql_data_above_sixtyfive %>% filter((mortality > 42) |is.na(dod))

In [None]:
alive_morethan_6weeks %>% n_distinct()

In [None]:
##next we remove anyone who was discharged within 42 days or died within 42 days
#first we calculate the number and then remove anyone who died within 42 days 
long_stay_resident <- alive_morethan_6weeks %>% filter(episodestopdate > (episodestartdate + ddays(x=42)))


In [None]:
#second, of  those remaining, we calculate and remove those who had a recorded legnth of stay of 42 days or less. 
#the two figures combined (number died withi 42 days and number discharged alive) gives the total number filtered at this stage. 

In [None]:
long_stay_resident %>% n_distinct()

In [None]:
#we iidentified anyone with missing primary care data and removed them, this is most likely because whilst they are recieving social care funding from the bradford local authorty they are not registered to a GP contributing to Connected Bradford. 

In [None]:
sql_missing <- paste('
select
distinct b.person_id as primary_care_person
from `yhcr-prd-bradfor-bia-core.CB_FDM_AdultSocialCare.person` b
left join (select person_id,
  from `yhcr-prd-bradfor-bia-core.CB_FDM_PrimaryCare.tbl_srcode`
  where person_id in(select person_id from `yhcr-prd-bradfor-bia-core.CB_FDM_AdultSocialCare.person`)) a
  on b.person_id = a.person_id
  where a.person_id is null

                     ;',sep="")

tbl_missing <-bq_project_query(ProjectId,sql_missing)
tbl_missing <- bq_table_download(tbl_missing)


In [None]:
care_home_cohort_final <- long_stay_resident %>% filter(!(long_stay_resident$person_id %in% tbl_missing$primary_care_person)) 
care_home_cohort_final %>%  n_distinct()
care_home_ghosts <- care_home_cohort_final %>% filter( is.na(dod))

In [None]:
care_home_ghosts %>% n_distinct()

In [None]:
care_home_cohort_final %>% str()

In [None]:
#send the final care home cohort back to bigquery as a table so it can be ran through the other queries
as_bq_fields(care_home_cohort_final)
care_home_cohort_final$mortality <- as.numeric(care_home_cohort_final$mortality)

In [None]:
mybq = bq_table(project='yhcr-prd-bradfor-bia-core', dataset='CB_2172', table='care_home_cohort_v1')
bq_table_upload(x=mybq, values= care_home_cohort_final, create_disposition='CREATE_IF_NEEDED', 
             write_disposition='WRITE_TRUNCATE')

In [None]:
sql_ch <- paste('
select
* except(first_episodestartdate,finalisedPSR,episodestartdate,episodestopdate), cast(first_episodestartdate as DATETIME) as first_episodestartdate
from ',targetdb1,'.care_home_cohort_v1

                     ;',sep="")

tbl_ch <-bq_project_query(ProjectId,sql_ch)
tbl_ch <- bq_table_download(tbl_ch)

