# 03_augment

### Load libraries

In [None]:
library(tidyverse)

── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr     1.1.4     ✔ readr     2.1.5
✔ forcats   1.0.0     ✔ stringr   1.5.1
✔ ggplot2   4.0.0     ✔ tibble    3.2.1
✔ lubridate 1.9.4     ✔ tidyr     1.3.1
✔ purrr     1.0.4     
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors

### Load data and augment data

In [None]:
data_clean_aug <- read_tsv(file = "data/02_dat_clean.tsv.gz", show_col_types = FALSE )  |>
  mutate(across(c(1:2, 4:22, 24:28, 37), factor)) |>
  mutate(
    Recipientgender = case_when(
      Recipientgender == 0 ~ "Female",
      Recipientgender == 1 ~ "Male"),
    Stemcellsource = case_when(
      Stemcellsource == 0 ~ "Bone marrow",
      Stemcellsource == 1 ~ "Pheripheral blood"),
    IIIV = case_when(
      IIIV == 0 ~ "No",
      IIIV == 1 ~ "Yes"),   
    Gendermatch = case_when(
      Gendermatch == 0 ~ "Other",
      Gendermatch == 1 ~ "Female to Male"),
    DonorABO = case_when(
      DonorABO == 0 ~ "0",
      DonorABO == 1 ~ "A",
      DonorABO == -1 ~ "B",
      DonorABO == 2 ~ "AB"),
    RecipientABO = case_when(
      RecipientABO == 0 ~ "0",
      RecipientABO == 1 ~ "A",
      RecipientABO == -1 ~ "B",
      RecipientABO == 2 ~ "AB"),    
    RecipientRh = case_when(
      RecipientRh == 0 ~ "-",
      RecipientRh == 1 ~ "+"),
    ABOmatch = case_when(
      ABOmatch == 0 ~ "Matched",
      ABOmatch == 1 ~ "Mismatched"),
    DonorCMV = case_when(
      DonorCMV == 0 ~ "Absent",
      DonorCMV == 1 ~ "Present"),
    RecipientCMV = case_when(
      RecipientCMV == 0 ~ "Absent",
      RecipientCMV == 1 ~ "Present"),
    Riskgroup = case_when(
      Riskgroup == 0 ~ "Low Risk",
      Riskgroup == 1 ~ "High risk"),
    Txpostrelapse = case_when(
      Txpostrelapse == 0 ~"No",
      Txpostrelapse == 1 ~ "Yes"),
    Diseasegroup = case_when(
      Diseasegroup == 0 ~ "Nonmalignant",
      Diseasegroup == 1 ~ "Malignant"),
    HLAmatch = case_when(
      HLAmatch == 0 ~ "10/10",
      HLAmatch == 1 ~ "9/10",
      HLAmatch == 2 ~ "8/10",
      HLAmatch == 3 ~ "7/10"),
    HLAmismatch = case_when(
      HLAmismatch == 0 ~ "Matched",
      HLAmismatch == 1 ~ "Mismatched"),
    Antigen = case_when(
      Antigen == -1 ~ "0 MM",
      Antigen == 0 ~ "1 MM",
      Antigen == 1 ~ "2 MM",
      Antigen == 2 ~ "3 MM"),
    Alel = case_when(
      Alel == -1 ~ "0 MM",
      Alel == 0 ~ "1 MM",
      Alel == 1 ~ "2 MM",
      Alel == 2 ~ "3 MM",
      Alel == 3 ~ "4 MM"),
    HLAgrI = case_when(
      HLAgrI == 0 ~ "0 MM",
      HLAgrI == 1 ~ "1 MM (antigen)",
      HLAgrI == 2 ~ "1 MM (allele)",
      HLAgrI == 3 ~ "1 MM (DRB1-allele)",
      HLAgrI == 4 ~ "2 MM (2 allele / allele+antigen)",
      HLAgrI == 5 ~ "2 MM (2 allele + antigen)",
      HLAgrI == 7 ~ "Mismatched"),
    Relapse = case_when( 
      Relapse == 0 ~ "No",
      Relapse == 1 ~ "Yes"),
    aGvHDIIIIV = case_when(
      aGvHDIIIIV == 0 ~ "Yes",
      aGvHDIIIIV == 1 ~ "No"),
    extcGvHD = case_when(
      extcGvHD == 0 ~ "No",
      extcGvHD == 1 ~ "Yes"), 
    survival_status = case_when(
      survival_status == 0 ~ "Alive",
      survival_status == 1 ~ "Dead")
  ) |> select(-c(Recipientage10, Recipientageint))
  

### Write augmented data to tsv
data_clean_aug |> write_tsv("data/03_dat_aug.tsv.gz")