transform_tables_from_safe_haven.Rmd

---
title: "Transform tables exported from Safe Haven for results paper"
author: "Jan Savinc"
date: "15/06/2020"
output: html_document
editor_options: 
  chunk_output_type: console
---

```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE)
```

# Libraries required

```{r}
library(tidyverse)  # for data manipulation
library(openxlsx)  # for writing excel files
library(readxl)  # for reading excel files
```

# Read tables

```{r}
# icd_chapters <- read_csv(file = "./processed_ICD_codes/map_icd_chapter_block_code.csv") %>%
#   mutate(
#     title = gsub(pattern = "Chapter"))

order_icd_chapters <-
  c(
    "Infectious and parasitic diseases",
    "Neoplasms",
    "Endocrine, nutritional and metabolic diseases, and immunity disorders",
    "Diseases of the blood and blood-forming organs",
    "Diseases of the nervous system and the sense organs",
    "Mental disorders",
    "Diseases of the circulatory system",
    "Diseases of the respiratory system",
    "Diseases of the digestive system",
    "Diseases of the genitourinary system",
    "Complications of pregnancy, childbirth, and the puerperium",
    "Diseases of the skin and subcutaneous tissue",
    "Diseases of the musculoskeletal system and connective tissue",
    "Congenital anomalies",
    "Certain conditions originating in the perinatal period",
    "Symptoms, signs, and ill-defined conditions",
    "Injury and poisoning",
    "Supplementary classification of factors influencing health status and contact with health services"
  )

tables <- 
  c(
    map(
      .x = c(
        "MVR codes, N individuals",
        "Schnitzer codes, N individuals"#,
        # "Mother died, N individuals"  # table superseded
        ),
      .f = 
        ~read_excel(
          path = "../Safe Haven Exports/2019-12-18/1617-0228_Output released/Frequency_tables_maltreatment_and_mothers_death_by_age_sex_cohort.xlsx", 
          sheet = .x
          ) %>%
        slice(-nrow(.))  # remove last row, it's the caption
  ) %>%
    # set_names(., nm = c("mvr", "schnitzer", "mother_died"))
    set_names(., nm = c("mvr", "schnitzer"))
  ,
  map(
  .x = c(
    "SMR01 & 04, MAIN & OTHER DIAG",
    "SMR01 & 04, MAIN DIAG"
  ),
  .f = 
    ~read_excel(
          path = "../Safe Haven Exports/2019-12-18/1617-0228_Output released/Freq_tables_individuals_with_MH_diag_by_age_sex_cohort.xlsx", 
          sheet = .x
          ) %>%
        slice(-nrow(.))  # remove last row, it's the caption
  ) %>%
  set_names(., nm = c("mh_main_and_other_diag", "mh_main_diag_only"))
  ,
  map(
  .x = c(
    # "SMR01 & 04, MAIN & OTHER DIAG",
    # "SMR01 & 04, MAIN DIAG"  # superseded by newer table
    "Any diagnosis",
    "Main diagnosis only"
  ),
  .f = 
    ~read_excel(
          # path = "../Safe Haven Exports/2019-12-18/1617-0228_Output released/Freq_tables_episodes_by_icd_chapter_age_cohort.xlsx",  # this table superseded by newer table
          path = "../Safe Haven Exports/2020-11-12/Frequency_tables_episodes.xlsx",  # the newer table!
          sheet = .x
          ) %>%
        slice(-nrow(.))  # remove last row, it's the caption
  ) %>%
  set_names(., nm = c("icd_main_and_other_diag", "icd_main_diag_only"))
)

tables$episodes_prior_to_death <- read_excel(path = "../Safe Haven Exports/Episode descriptives/Descriptives_number_of_episodes_admissions_only.xlsx", sheet = 3)

tables_descriptives_cohort <- read_excel(path = "../Safe Haven Exports/Descriptive summary of cohort & individuals with any records/Descriptive_summary_cohort_by_whether_they_had_any_records_prior_to_death.xlsx", sheet = 1) %>%
  slice(-nrow(.))  # remove last row - contains footnote

consort_data <- 
  read_csv(file = "../Safe Haven Exports/CONSORT_diagram_data.csv") %>%
  mutate(
    note = case_when(
      note == "Excluded: cases with no hospital records prior to death before age 18" &
        N > 1000 ~ "Excluded: controls with no hospital records prior to death before age 18",
      TRUE ~ note
    )
  )  # correction: instead of 'cases' it should be 'controls' at the entry for exclusions with no records prior to death or age 18 with N>1000


tables <-
  c(tables,
    map(
      .x = c(
        "CCS categories, any position",
        "CCS, main diagnosis only"
      ),
      .f = 
        ~read_excel(
              path = "../Safe Haven Exports/ResultsResults_paper_1_tables_2020_07_30/Frequency_table_ccs_categories.xlsx", 
              sheet = .x
              ) %>%
            slice(-nrow(.))  # remove last row, it's the caption
      ) %>%
      set_names(., nm = c("ccs_main_and_other_diag", "ccs_main_diag_only"))
)

tables$poisonings_and_sh <-
  read_excel(path = "../Safe Haven Exports/ResultsResults_paper_1_tables_2020_07_30/Frequency_table_poisonings_and_self_harm.xlsx", sheet = 1) %>% slice(-nrow(.))  # remove last row, it's the caption

tables <-
  c(tables,
    map(
      .x = c(
        "Carstairs, quintiles",
        "SIMD, quintiles",
        "Urban-rural,aggregated"
      ),
      .f = 
        ~read_excel(
              path = "../Safe Haven Exports/Exports 2020-10-07/Results_paper_1_tables/Frequency_table_geographic_indicators_at_death.xlsx", 
              sheet = .x
              ) %>%
            slice(-nrow(.))  # remove last row, it's the caption
      ) %>%
      set_names(., nm = c("distribution_geo_carstairs_quintile", "distribution_geo_simd_quintile","distribution_geo_urban_rural_4_quintile"))
)

tables$ccs_any_diagnosis_excluding_death_episodes <-
  read_excel(
          path = "../Safe Haven Exports/Results_paper_1_tables, 2020-09-14/Frequency_table_ccs_categories.xlsx", 
          sheet = 1
          ) %>%
        slice(-nrow(.))  # remove last row, it's the caption

tables <-
  c(tables,
    map(
      .x = c("MVR,ind. with pre-death records", "Schnitzer,ind.w.pre-death recs", "CCS,main diag w.pre-death recs.", "CCS,any diag w.pre-death recs."),
      .f = 
        ~read_excel(
              path = "../Safe Haven Exports/Results_paper_1_tables, 2020-09-14/Frequency_table_MVR_Schnitzer_CCS_index_episodes_individuals_with_records_prior_to_death.xlsx", 
              sheet = .x
              ) %>%
            slice(-nrow(.))  # remove last row, it's the caption
      ) %>%
      set_names(., nm = c("mvr_individuals_index_epi", "schnitzer_individuals_index_epi","ccs_main_diag_individuals_index_epi","ccs_any_diag_individuals_index_epi"))
)


tables$poisonings_by_intent <-
  read_excel(path = "../Safe Haven Exports/Results_paper_1_tables, 2020-09-14/Frequency_table_poisonings_by_intent.xlsx", sheet = 1) %>% slice(-nrow(.))  # remove last row, it's the caption


tables$not_in_work <-
  read_excel(path = "../Safe Haven Exports/Descriptive summary of cohort & individuals with any records/Descriptive_summary_cohort_by_whether_they_had_any_records_prior_to_death.xlsx", sheet = 1, ) %>% slice(-nrow(.))  # remove last row, it's the caption

tables$maternal_death <-
  read_excel(path = "../Safe Haven Exports/2020-10-30/Frequency_table_mothers_death_individuals_with_records_prior_to_death.xlsx", sheet = 1, ) %>% slice(-nrow(.))  # remove last row, it's the caption

tables$schnitzer_excluding_dental_caries <-
  read_excel(path = "../Safe Haven Exports/Schnitzer_codes_investigated_2020-07-21/Frequency_table_schnitzer_codes_without_dental_caries.xlsx", sheet = "Individuals") %>% slice (-nrow(.))  # remove the last row, being the caption
```


# Convert to long format for easier transforming


```{r}
tables_long <- list()

# tables_long[c("mvr", "schnitzer", "mother_died")] <- map(
#   .x = tables[c("mvr", "schnitzer", "mother_died")],
tables_long[c("mvr", "schnitzer")] <- map(
  .x = tables[c("mvr", "schnitzer")],
  .f = 
    ~.x %>%
  pivot_longer(
    cols = matches("\\s"),  # select all columns with spaces in name
    names_to = c("Type"),
    values_to = "n_prop"
  ) %>%
  mutate(
    Type = gsub(pattern = " N (%)", replacement = "", fixed = TRUE, x = Type),
    Type = gsub(pattern = " (%)", replacement = "", fixed = TRUE, x = Type)
    )  
)

## no converting needed for these
tables_long[c("mh_main_and_other_diag", "mh_main_diag_only")] <- tables[c("mh_main_and_other_diag", "mh_main_diag_only")]

## no converting needed for these
tables_long[c("ccs_main_and_other_diag", "ccs_main_diag_only", "poisonings_and_sh")] <-
  tables[c("ccs_main_and_other_diag", "ccs_main_diag_only", "poisonings_and_sh")]

## just assign existing tables, no changes needed
tables_long[c("distribution_geo_carstairs_quintile", "distribution_geo_simd_quintile", 
"distribution_geo_urban_rural_4_quintile")] <-
  tables[c("distribution_geo_carstairs_quintile", "distribution_geo_simd_quintile", 
"distribution_geo_urban_rural_4_quintile")]
```


# Transform tables

## Helper functions

```{r}
## helper function to group together ages 0-1 and 1-10
group_age_groups_0_to_10 <- function(data_tbl) {
  data_tbl %>%
    mutate(age_group = case_when(
      age_group %in% c(">=0, <1",">=1, <10") ~ ">=0, <10",
      TRUE ~ age_group
    ))
}

## helper function for calculating number and proportion
num_and_prop <- function(numerator, denominator, threshold=10, accuracy=0.01) {
  n_prop <- case_when(
    is.na(numerator)|is.na(denominator) ~ NA_character_,
    numerator < threshold ~ paste0("N<",threshold),
    TRUE ~ paste0(numerator, " (", scales::percent(numerator/denominator, accuracy = accuracy), ")")
  )
  return(n_prop)
}
```

## Various tables

```{r}
transformed <- list()

transformed$mvr_all_types <-
  tables_long$mvr %>%
  filter(Type == "All types") %>%  # only show all types combined
  filter(age_group != "<18") %>%  # only show separate age groups, not combined
  select(-Type) %>%
  arrange(case, sex, age_group) %>%
  pivot_wider(names_from = c("case","sex"), values_from = "n_prop") %>%
  mutate(indicator = "MVR") %>%
  relocate(indicator)

transformed$schnitzer_all_types <-
  tables_long$schnitzer %>%
  filter(Type == "All types") %>%  # only show all types combined
  filter(!age_group %in% c("<18",">=10, <18")) %>%  # Schnitzer only defined for 0-10
  select(-Type) %>%
  arrange(case, sex, age_group) %>%
  pivot_wider(names_from = c("case","sex"), values_from = "n_prop") %>%
  mutate(indicator = "Schnitzer") %>%
  relocate(indicator)

transformed$schnitzer_excluding_dental_caries <-
  tables$schnitzer_excluding_dental_caries %>%
  mutate(n = str_extract(neglect, pattern = "^\\d+\\s") %>% as.integer()) %>%
  left_join(denominators$individuals_with_any_records_prior_to_death %>% filter(has_no_records_prior_to_death == "No"), by = c("case","sex")) %>%
  mutate(
    n_prop = num_and_prop(numerator = n, denominator = denominator),
    indicator = "Schnitzer, excl. dental caries"
    ) %>%
  select(indicator, age_group, case, sex, n_prop) %>%
  arrange(case, sex, age_group) %>%
  pivot_wider(names_from = c("case","sex"), values_from = "n_prop")
  

# transformed$mother_died <-
#   tables_long$mother_died %>%
#   filter(age_group != "all") %>%  # only show separate age groups, not combined
#   select(-Type) %>%
#   arrange(case, sex, age_group) %>%
#   pivot_wider(names_from = c("case","sex"), values_from = "n_prop", values_fill = list(n_prop = "<=10")) %>%
#   arrange(age_group) %>%
#   mutate(indicator = "Mother died") %>%
#   relocate(indicator)

transformed$maternal_death <-
  tables$maternal_death %>%
  select(case,sex,age_group,n_prop) %>%
  arrange(case, sex, age_group) %>%
  pivot_wider(names_from = c("case","sex"), values_from = "n_prop", values_fill = list(n_prop = "<=10")) %>%
  arrange(age_group) %>%
  mutate(indicator = "Maternal death") %>%
  relocate(indicator)

transformed$mh_main_diag <-
  tables_long$mh_main_diag_only %>%
  select(-denominator) %>%
  filter(age_group != "<18") %>%  # only show separate age groups, not combined
  arrange(case, sex, age_group) %>%
  pivot_wider(names_from = c("case","sex"), values_from = "n_prop") %>%
  mutate(indicator = "MH (main diagnosis)") %>%
  relocate(indicator)

table_mvr_schnitzer_mh <-
  bind_rows(
    # transformed[c("mvr_all_types", "schnitzer_all_types", "mother_died", "mh_main_diag")]
    transformed[c("mvr_all_types", "schnitzer_all_types", "schnitzer_excluding_dental_caries", "maternal_death", "mh_main_diag")]
  )

transformed$icd_main_diag <-
  tables$icd_main_diag_only %>%
  filter(age_group == "<18") %>%
  select(case, sex, chapter_equivalent, n_prop_episodes) %>%
  arrange(case, sex=factor(sex,levels = c("both","male","female"))) %>%
  pivot_wider(names_from=c("case","sex"), values_from="n_prop_episodes") %>%
  arrange(chapter_equivalent=factor(chapter_equivalent, levels=order_icd_chapters))
  
transformed$icd_main_and_other_diag <-
  tables$icd_main_and_other_diag %>%
  filter(age_group == "<18") %>%
  select(case, sex, chapter_equivalent, n_prop_episodes) %>%
  arrange(case, sex=factor(sex,levels = c("both","male","female"))) %>%
  pivot_wider(names_from=c("case","sex"), values_from="n_prop_episodes") %>%
  arrange(chapter_equivalent=factor(chapter_equivalent, levels=order_icd_chapters))

table_icd <-
  full_join(
    transformed$icd_main_diag %>% rename_at(vars(-matches("chapter_equivalent")), ~paste0(.,"_main_diag")),
    transformed$icd_main_and_other_diag %>% rename_at(vars(-matches("chapter_equivalent")), ~paste0(.,"_any_diag")),
    by = "chapter_equivalent"
  )

## Main diag CCS not very interesting - cases have higher numbers, but difficult to compare due to ocntrols haveing extremely low numbers
transformed$ccs_main_diag <-
  tables$ccs_main_diag_only %>%
  filter(age_group == "<18") %>%
  select(case, sex, ccs_category_description, n_prop) %>%
  pivot_wider(names_from=c("case","sex"), values_from="n_prop")
  

transformed$ccs_main_and_other_diag <-
  tables$ccs_main_and_other_diag %>%
  filter(age_group == "<18") %>%
  select(case, sex, ccs_category_description, n_prop) %>%
  pivot_wider(names_from=c("case","sex"), values_from="n_prop") %>%
  filter(!str_detect(ccs_category_description, "Adjustment|Impulse|Delirium|Screening|Personality|Schizophrenia|usually diagnosed|Developmental")) %>%
  rename("CCS Category"=ccs_category_description)

transformed$ccs_main_and_other_diag_over_18s <-
  tables$ccs_main_and_other_diag %>%
  filter(age_group == "18+") %>%
  select(case, sex, ccs_category_description, n_prop) %>%
  pivot_wider(names_from=c("case","sex"), values_from="n_prop") %>%
  # filter(!str_detect(ccs_category_description, "Adjustment|Impulse|Delirium|Screening|Personality|Schizophrenia|usually diagnosed|Developmental")) %>%
  rename("CCS Category"=ccs_category_description)

transformed$poisonings_and_sh <-
  tables_long$poisonings_and_sh %>%
  filter(age_group == "<18" & criterion == "Poisoning (960-979, T36-T50)") %>%
  select(case, sex, n_prop, Diagnosis = criterion) %>%
  pivot_wider(names_from=c("case","sex"), values_from="n_prop")

transformed$ccs_any_diagnosis_excluding_death_episodes <-
  tables$ccs_any_diagnosis_excluding_death_episodes %>%
  filter(age_group != "18+") %>%
  filter(sex!="both") %>%
  select(case, sex, ccs_category_description, n_prop) %>%
  pivot_wider(names_from=c("case","sex"), values_from="n_prop") %>%
  # filter(!str_detect(ccs_category_description, "Adjustment|Impulse|Delirium|Screening|Personality|Schizophrenia|usually diagnosed|Developmental")) %>%
  rename("CCS Category"=ccs_category_description)

transformed$mvr_individuals_index_epi <-
  tables$mvr_individuals_index_epi %>%
  select(case, sex, age_group, n_prop) %>%
  # filter(!age_group %in% c("<18","18+")) %>%
  filter(!age_group %in% c("<18")) %>%
  pivot_wider(names_from=c("case","sex"), values_from="n_prop") %>%
  mutate(indicator = "MVR") %>%
  relocate(indicator)

transformed$schnitzer_individuals_index_epi <-
  tables$schnitzer_individuals_index_epi %>%
  select(case, sex, age_group, n_prop) %>%
  filter(!age_group %in% c("<18","18+")) %>%
  pivot_wider(names_from=c("case","sex"), values_from="n_prop") %>%
  mutate(indicator = "Codes suggestive of neglect (Schnitzer et al., 2011)") %>%
  relocate(indicator)

transformed$ccs_any_diag_individuals_index_epi <-
  tables$ccs_any_diag_individuals_index_epi %>%
  select(case, sex, age_group, n_prop) %>%
   # filter(!age_group %in% c("<18","18+")) %>%
  filter(!age_group %in% c("<18")) %>%
  pivot_wider(names_from=c("case","sex"), values_from="n_prop") %>%
  mutate(indicator = "CCS (any diagnosis)") %>%
  relocate(indicator)

transformed$ccs_any_diag_individuals_index_epi_over_18 <-
  tables$ccs_any_diag_individuals_index_epi %>%
  select(case, sex, age_group, n_prop) %>%
  filter(age_group %in% c("18+")) %>%
  pivot_wider(names_from=c("case","sex"), values_from="n_prop") %>%
  mutate(indicator = "CCS (any diagnosis)") %>%
  relocate(indicator)

transformed$main_adversities <-
  transformed[c("mvr_individuals_index_epi","schnitzer_individuals_index_epi","ccs_any_diag_individuals_index_epi", "maternal_death")] %>% bind_rows

transformed$poisonings_by_intent <-
  tables$poisonings_by_intent %>%
  # filter(age_group!="18+") %>%
  select(case, sex, age_group, poisoning_external_cause, n_prop) %>%
  arrange(case,sex = factor(levels="both","female","male")) %>%
  pivot_wider(names_from=c("case","sex"), values_from="n_prop") %>%
  arrange(poisoning_external_cause) %>%
  relocate(poisoning_external_cause)

transformed$not_in_work <-
  tables$not_in_work %>% 
  filter(!is.na(proportion_not_in_work)) %>%
  select(case,sex,has_no_records_prior_to_death,N,proportion_not_in_work) %>%
  mutate(N_not_in_work = as.integer(N * proportion_not_in_work)) %>%
  (function(tbl) {
    bind_rows(
      tbl,
      tbl %>%
        group_by(case,sex) %>%
        summarise(
          has_no_records_prior_to_death = "Both",
          N = sum(N),
          N_not_in_work = sum(N_not_in_work),
          .groups = "drop"
        ) %>%
        mutate(
          proportion_not_in_work = N_not_in_work / N
          )
    )
  }) %>%
  mutate(
    percent_not_in_work = scales::percent(proportion_not_in_work, accuracy=0.1)
  )
```


## Denominators


```{r}
denominators <- list()
denominators$individuals_with_any_records_prior_to_death <- tables_descriptives_cohort %>% select(case,sex,has_no_records_prior_to_death,denominator=N)

denominators$individuals_lifetime <- denominators$individuals_with_any_records_prior_to_death %>%
  group_by(case, sex) %>% summarise(denominator=sum(denominator), .groups="drop")

# TODO: export these from SH!
# denominators$individuals_with_records_alive_by_age_group <-
#   NULL

```


## Table of geographical distributions

We exported disaggregated data for sex & having records prior to death; there were small numbers for females & carstairs data, so carstairs 4 & 5 were merged for those & we'll merge them for the computations also.
The SIMD & Urban-rural distributions can be aggregated mroe simply!

```{r}
transformed_recalculated[c("distribution_geo_simd_quintile", 
"distribution_geo_urban_rural_4_quintile")] <-
  tables_long[c("distribution_geo_simd_quintile", 
"distribution_geo_urban_rural_4_quintile")] %>%
  map(.x = ., .f = function(data_tbl) {
    recalculated_chunk_records <-
      data_tbl %>%
      select(-n_prop) %>%  # we'll recalculate the proportion
      group_by(case, sex, Indicator, Measure) %>%
      summarise(
        has_no_records_prior_to_death = "Both",
        N = sum(N),
        denominator = sum(denominator),
        .groups = "drop"
      )
    recalculated_chunk_sexes <-
      recalculated_chunk_records %>%
      group_by(case, has_no_records_prior_to_death, Indicator, Measure) %>%
      summarise(
        sex = "Both",
        N = sum(N),
        denominator = sum(denominator),
        .groups = "drop"
      )
    bind_rows(
      data_tbl,
      recalculated_chunk_records,
      recalculated_chunk_sexes
    ) %>%
      mutate(n_prop = num_and_prop(numerator = N, denominator = denominator))
  })

transformed_recalculated$distribution_geo_carstairs_quintile <- 
  tables_long$distribution_geo_carstairs_quintile %>%
  (function(data_tbl) {
    recalculated_chunk_records <-
      data_tbl %>%
      select(-n_prop) %>%  # we'll recalculate the proportion
      mutate(Indicator = if_else(Indicator %in% c("4","5"), "4 & 5", Indicator)) %>%
      group_by(case, sex, Indicator, Measure) %>%
      summarise(
        has_no_records_prior_to_death = "Both",
        N = sum(N),
        denominator = sum(unique(denominator)),
        .groups = "drop"
      )
    recalculated_chunk_sexes <-
      recalculated_chunk_records %>%
      group_by(case, has_no_records_prior_to_death, Indicator, Measure) %>%
      summarise(
        sex = "Both",
        N = sum(N),
        denominator = sum(denominator),
        .groups = "drop"
      )
    bind_rows(
      data_tbl,
      recalculated_chunk_records,
      recalculated_chunk_sexes
    ) %>%
      mutate(n_prop = num_and_prop(numerator = N, denominator = denominator))
  })
```


# Save transformed tables

```{r}
# write.xlsx(x = table_mvr_schnitzer_mh, file = "../Publications/Results paper 1/Table_MVR_Schnitzer_MH.xlsx")
## re-write the old table!
write.xlsx(x = transformed$main_adversities, file = "../Publications/Results paper 1/Table_MVR_Schnitzer_MH.xlsx")

write.xlsx(x = table_icd, file = "../Publications/Results paper 1/Table_ICD_chapter_episodes.xlsx")

# write.xlsx(x = transformed$ccs_main_and_other_diag, file = "../Publications/Results paper 1/Table_CCS_diagnoses_main_or_other.xlsx")
write.xlsx(x = transformed$ccs_any_diagnosis_excluding_death_episodes, file = "../Publications/Results paper 1/Table_CCS_diagnoses_any_position_removed_death_epi.xlsx")

write.xlsx(x = transformed$ccs_main_and_other_diag_over_18s, file = "../Publications/Results paper 1/Table_CCS_diagnoses_main_or_other_over_18s.xlsx")

write.xlsx(x = transformed$poisonings_and_sh, file = "../Publications/Results paper 1/Table_poisonings_incl_death_episodes_under_18s.xlsx")

write.xlsx(x = transformed$poisonings_by_intent, file = "../Publications/Results paper 1/Table_poisonings_by_intent.xlsx")


write.xlsx(x = transformed_recalculated[c("distribution_geo_carstairs_quintile", "distribution_geo_simd_quintile", 
"distribution_geo_urban_rural_4_quintile")] %>% set_names(., nm = c("Carstairs, quintiles", "SIMD, quintiles", "Urban-rural,aggregated")), file = "../Publications/Results paper 1/Table_distribution_geographic_indicators.xlsx"
)

write.xlsx(x = transformed$not_in_work, file = "../Publications/Results paper 1/Table_not_in_work.xlsx")
```


## Copying to clipboard for use with excel files

```{r}
if (interactive()) {  # run only in interactive mode
  
tables$icd_main_diag_only %>% filter(age_group=="<18" & sex!="both") %>% select(case, sex, denominator) %>% distinct %>% arrange(case,desc(sex)) %>% mutate(text = paste0(str_to_sentence(sex)," N=",format(denominator,trim = TRUE,big.mark = ","))) %>% .$text %>% write.table("clipboard", sep="\t", row.names = FALSE, col.names = FALSE, quote = TRUE)

tables$icd_main_and_other_diag %>% filter(age_group=="<18" & sex!="both") %>% select(case, sex, denominator) %>% distinct %>% arrange(case,desc(sex)) %>% mutate(text = paste0(str_to_sentence(sex)," N=",format(denominator,trim = TRUE,big.mark = ","))) %>% .$text %>% write.table("clipboard", sep="\t", row.names = FALSE, col.names = FALSE, quote = TRUE)

transformed$schnitzer_excluding_dental_caries %>% write.table("clipboard", sep="\t", row.names = FALSE, col.names = FALSE)

}
```


# Checking outputs

## ICD chapter classiciation counts not adding up to 100%

```{r}
# TODO: the original table counted codes rather than episodes; and doublecounted some MAIN diag codes also

if (interactive()) {
  

tables$icd_main_and_other_diag %>% mutate(n = if_else(n_prop=="<=10", NA_real_,parse_number(str_extract(n_prop, pattern="^\\d+")))) %>%
  group_by(case, age_group, sex) %>% summarise(sum_n=sum(n,na.rm = TRUE),denom=unique(denominator))

tables$icd_main_diag_only %>% mutate(n = if_else(n_prop=="<=10", NA_real_,parse_number(str_extract(n_prop, pattern="^\\d+")))) %>%
  group_by(case, age_group, sex) %>% summarise(sum_n=sum(n,na.rm = TRUE),denom=unique(denominator))

}
```


# Recalculating counts across age/gender

# Mothers' adversity tables

```{r}
tables$mothers_adversity_mvr <- read_excel(path = "../Safe Haven Exports/2020-04-07/Mothers_health/Freq_table_indivi_whose_mothers_had_MVR_codes_coarse.xlsx", sheet = 1) %>% slice(-nrow(.))

tables$mothers_adversity_ccs <- read_excel(path = "../Safe Haven Exports/2020-04-07/Mothers_health/Freq_table_indiv_whose_mothers_had_mental_health_codes.xlsx", sheet = 1) %>% slice(-nrow(.))

tables_long$mothers_adversity_mvr <-
  tables$mothers_adversity_mvr %>%
  filter(dataset=="SMR01") %>% pivot_longer(cols=c(case,control), names_to="case", values_to = "n_prop") %>%
  ## Note: we can't recalculate using the smaller denominator because we don't know if the individuals with maternal adversity were also the ones with hospital records!
  # left_join(denominators$individuals_with_any_records_prior_to_death %>% filter(has_no_records_prior_to_death=="No"), by=c("case","sex")) %>%
  # mutate(
  #   n = str_extract(n_prop, "^(\\d+)\\s"),
  #   n = as.numeric(n),
  #   n_prop2 = num_and_prop(numerator = n, denominator = denominator)
  #   ) %>%
  pivot_wider(names_from = c(sex, case), values_from=n_prop) %>%
  select(-dataset)

tables_long$mothers_adversity_ccs <-
  tables$mothers_adversity_ccs %>%
  filter(position=="any position") %>%
  pivot_longer(cols=c(case,control), names_to="case", values_to = "n_prop") %>%
    mutate(
    n = str_extract(n_prop, "^(\\d+)\\s"),
    n = as.numeric(n),
    ) %>%
  (function(data_tbl) {
    bind_rows(
      data_tbl %>% filter(timespan %in% c("birth","lifetime")) %>% group_by(sex,case) %>% summarise(n=sum(n,na.rm=TRUE), timespan="lifetime", .group="drop"),
      data_tbl %>% filter(timespan %in% c("birth","after birth")) %>% group_by(sex,case) %>% summarise(n=sum(n,na.rm=TRUE), timespan="birth or after", .group="drop")
    )
  }) %>%
  left_join(denominators$individuals_lifetime, by=c("case","sex")) %>%
  mutate(n_prop2 = num_and_prop(n)) %>%  # calling i n_prop2 so it can be checked against previous n_prop
  pivot_wider(names_from = c(sex, case), values_from=n_prop2)

  
```


# Obsolete approaches


### Recalculated age group tables transformed


```{r}
# NOTE: this is now dealt with with neweredata exporetd from SH


# tables_long$mvr_recalculated <-
#   tables_long$mvr %>%
#   mutate(
#     n = str_replace(n_prop, pattern="<=10", replacement=NA_character_),
#     n = str_replace(n, pattern=" \\(.*\\)", replacement = ""), 
#     n = as.numeric(n)
#     ) %>%
#   group_age_groups_0_to_10() %>%
#   group_by(
#     case, sex, age_group, Type
#   ) %>%
#   summarise(
#     n = sum(n), .groups="drop"
#   ) %>%
#   filter(age_group!="18+") %>%
#   left_join(denominators$individuals_with_any_records_prior_to_death %>% filter(has_no_records_prior_to_death=="No") %>% select(-has_no_records_prior_to_death), by = c("case","sex")) %>%
#   mutate(n_prop = num_and_prop(numerator = n, denominator = denominator))
# 
# tables_long$schnitzer_recalculated <-
#   tables_long$schnitzer %>%
#   mutate(
#     n = str_replace(n_prop, pattern="<=10", replacement=NA_character_),
#     n = str_replace(n, pattern=" \\(.*\\)", replacement = ""), 
#     n = as.numeric(n)
#     ) %>%
#   group_age_groups_0_to_10() %>%
#   group_by(
#     case, sex, age_group, Type
#   ) %>%
#   summarise(
#     n = sum(n), .groups="drop"
#   ) %>%
#   filter(age_group==">=0, <10") %>%
#   left_join(denominators$individuals_with_any_records_prior_to_death %>% filter(has_no_records_prior_to_death=="No") %>% select(-has_no_records_prior_to_death), by = c("case","sex")) %>%
#   group_by(case,age_group,Type) %>%  # the following infers female counts from both & male where female is NA, however, ideally we would ahve worked this out a a previous stage
#   mutate(
#     n_both = n[sex=="both"], 
#     n_male = n[sex=="male"],
#     n = if_else(!is.na(n_both) & !is.na(n_male) & is.na(n) & sex=="female", n_both-n_male, n)
#   ) %>%
#   mutate(n_prop = num_and_prop(numerator = n, denominator = denominator))
# 
# tables_long$mh_main_diag_only_recalculated <-
#   tables_long$mh_main_diag_only %>%
#   select(-denominator) %>%
#   mutate(
#     n = str_replace(n_prop, pattern="<=10", replacement=NA_character_),
#     n = str_replace(n, pattern=" \\(.*\\)", replacement = ""), 
#     n = as.numeric(n)
#     ) %>%
#   group_age_groups_0_to_10() %>%
#   group_by(
#     case, sex, age_group
#   ) %>%
#   summarise(
#     n = sum(n), .groups="drop"
#   ) %>%
#   filter(age_group!="18+") %>%
#   left_join(denominators$individuals_with_any_records_prior_to_death %>% filter(has_no_records_prior_to_death=="No") %>% select(-has_no_records_prior_to_death), by = c("case","sex")) %>%
#   mutate(n_prop = num_and_prop(numerator = n, denominator = denominator))

```

```{r}
# Note: this is now dealt with by having exported new data

# transformed_recalculated <- list()
# 
# transformed_recalculated$mvr_all_types <-
#   tables_long$mvr_recalculated %>%
#   filter(Type == "All types") %>%  # only show all types combined
#   filter(age_group != "<18") %>%  # only show separate age groups, not combined
#   select(-Type,-n,-denominator) %>%
#   arrange(case, sex, age_group) %>%
#   pivot_wider(names_from = c("case","sex"), values_from = "n_prop") %>%
#   mutate(indicator = "MVR") %>%
#   relocate(indicator)
# 
# transformed_recalculated$schnitzer_all_types <-
#   tables_long$schnitzer_recalculated %>%
#   filter(Type == "All types") %>%  # only show all types combined
#   filter(age_group %in% c(">=0, <10")) %>%  # Schnitzer only defined for 0-10
#   select(-Type,-n,-denominator) %>%
#   arrange(case, sex, age_group) %>%
#   pivot_wider(names_from = c("case","sex"), values_from = "n_prop") %>%
#   mutate(indicator = "Schnitzer") %>%
#   relocate(indicator)
# 
# transformed_recalculated$mh_main_diag <-
#   tables_long$mh_main_diag_only_recalculated %>%
#   select(-n,-denominator) %>%
#   filter(age_group != "<18") %>%  # only show separate age groups, not combined
#   arrange(case, sex, age_group) %>%
#   pivot_wider(names_from = c("case","sex"), values_from = "n_prop") %>%
#   mutate(indicator = "MH (main diagnosis)") %>%
#   relocate(indicator)
# 
# table_mvr_schnitzer_mh <-
#   bind_rows(
#     transformed_recalculated[c("mvr_all_types", "schnitzer_all_types", "mother_died", "mh_main_diag")]
#   )
```