# 04_describe

## Description of data

Loading libraries

In [None]:
#| message: false

library(tidyverse)
library(knitr)
library(gridExtra)
source("99_proj_func.R")

Loading data

In [None]:
#| message: false

data_clean_aug <- read_tsv("data/03_dat_aug.tsv.gz")

Creating the descriptive table

In [None]:
#| message: false
#| warning: false

#counted variables and their percentages are generated using the
#functions N_into_table and Perc, respectivelty

#create descriptive table containing overall counts for each disease
Tab_desc <- data_clean_aug |>
  group_by(Disease) |>
  summarise(N = n())

#add information on recipient gender
Tab_desc <- N_into_table(data_clean_aug, "Recipientgender", Tab_desc)
Tab_desc <- Perc(Tab_desc, Female, "Female patients")
Tab_desc <- Perc(Tab_desc, Male, "Male patients")

#add information on median donor age and recipient age
age <- data_clean_aug |>
  group_by(Disease) |>
  summarise("Median donor age" = round(median(Donorage), digits = 0),
            "Median recipient age" = round(median(Recipientage), digits = 0))
Tab_desc <- full_join(Tab_desc, age, join_by(Disease))

#add information on antigen matches
Tab_desc <- N_into_table(data_clean_aug, "Antigen", Tab_desc)
Tab_desc <- Perc(Tab_desc, `0 MM`, "0 antigen mismatches")
Tab_desc <- Perc(Tab_desc, `1 MM`, "1 antigen mismatch")
Tab_desc <- Perc(Tab_desc, `2 MM`, "2 antigen mismatches")
Tab_desc <- Perc(Tab_desc, `3 MM`, "3 antigen mismatches")
Tab_desc <- Perc(Tab_desc, `NA`, "No information on antigen mismatch")

#add information on relapses
Tab_desc <- N_into_table(data_clean_aug, "Relapse", Tab_desc)
Tab_desc <- Perc(Tab_desc, Yes, "Relapse")
Tab_desc <- Perc(Tab_desc, No, "No relapse")

#add information on graft vs host
Tab_desc <- N_into_table(data_clean_aug, "IIIV", Tab_desc)
Tab_desc <- Perc(Tab_desc, Yes, "Acute graft vs host")
Tab_desc <- Perc(Tab_desc, No, "No acute graft vs host")

#add information on time until developing acute graft vs host (column timetoaGvHD_III_IV)

#add information on survival status
Tab_desc <- N_into_table(data_clean_aug, "survival_status", Tab_desc)
Tab_desc <- Perc(Tab_desc, Alive, "Alive patients")
Tab_desc <- Perc(Tab_desc, Dead, "Deceased patients")

#add information on follow-up time for alive patients
FU_time <- data_clean_aug |> 
  group_by(Disease) |>
  filter(survival_status == "Alive") |>
  summarise("Median follow-up time for alive patients" = median(survival_time))
Tab_desc <- full_join(Tab_desc, FU_time, join_by(Disease))

#add information on survival time for deceased patients
surv_time <- data_clean_aug |> 
  group_by(Disease) |>
  filter(survival_status == "Dead") |>
  summarise("Median survival time for deceased patients" = median(survival_time))
Tab_desc <- full_join(Tab_desc, surv_time, join_by(Disease))

#Replace NAs for counted variables with 0 (0.0 %)
Tab_desc <- Tab_desc |>
  replace_na(list("1 antigen mismatch" = "0 (0.0 %)", 
                  "3 antigen mismatches" = "0 (0.0 %)",
                  "No information on antigen mismatch" = "0 (0.0 %)",
                  "Alive patients" = "0 (0.0 %)"))

#Transpose table
Tab_desc_transp <- t(Tab_desc)
#Tab_desc_tib <- tibble(Tab_desc_transp[[1]], Tab_desc_transp[[2]], Tab_desc_transp[[3]],
 #                      Tab_desc_transp[[4]], Tab_desc_transp[[5]]) 

#Set names of transposed table
#Tab_desc_tib <- as.data.frame(Tab_desc_tib)
rownames(Tab_desc_transp) <- colnames(Tab_desc)
colnames(Tab_desc_transp) <- Tab_desc$Disease

#Remove disease row, which is now used as column names in the table
Tab_desc_transp <- Tab_desc_transp[rownames(Tab_desc_transp) != "Disease",]

#Export table
write.csv(Tab_desc_transp, file = "Descriptive_table.txt")

Table 1. Description of the patients included in the data

A large number of ALL, AML, chronic and non-malignant patients were
included in the data. Only 9 lymphoma patients were included, and
findings on lymphoma patients should be interpreted carefully. A
majority of the included patients were male across all the investigated
diseases. Both median donor and recipient age did not vary to great
extend among the diseases, but it could be noted that the median
recipient age did both include pre-adolescent ages (ALL and
non-malignant patients) as well as ages during adolescence (AML and
lymphoma patients). Zero antigen mismatches with the donor was clearly
the most common for AML, lymphoma, and non-malignant patients, while for
ALL and chronic patients 0 and 2 antigen mismatches were found in close
to equal amounts. Only a minority of patients (\<18 %) experienced
relapse, except in the lymphoma group, in which almost half of the
patients experienced relapse. Between half and two-thirds (46.9 - 64.4
%) of the patients experienced acute graft vs host disease. For all
diseases, a majority of the patients survived their disease (54.5 - 62.5
%), except for lymphoma patients, among which all patients deceased.
Median follow-up time for surviving patients were between 1301 and 1867
for the diseases, excluding lymphoma which did not have any surviving
patients. Survival time for deceased patients was shortest for lymphoma
patients (67 days) and longest for AML patients (274 days).

<br>

Generating table for presentation

In [None]:
#filter rows for table for presentation
Tab_pres <- Tab_desc_transp[rownames(Tab_desc_transp) %in% 
                              c("N", "Male patients", "Median recipient age", "Relapse", 
                                "Deceased patients",
                                "Median survival time for deceased patients (days)",
                                "Median follow-up time for survived patients (days)"),]
 
#Export table
write.csv(Tab_pres, file = "Descriptive_table_present.txt")

<br>

Table 2. Part of Table 1 selected for the project presentation.