# Preprocess

## [ps] rename feature and sample coverage tables

In [{}]:
flapro = lapply(AVAILABLE_OMICS, function(x) {
    flapro[[x]] %>% inner_join(meta_samples[[x]], by = "Sample") %>%         
        mutate(Sample = Internal_BioSample_ID) %>% 
        select(FlaCluster_Rep, Sample, Abundance)    
}) %>% setNames(AVAILABLE_OMICS)

In [{}]:
sample_coverage = lapply(AVAILABLE_OMICS, function(x) {
    sample_coverage[[x]] %>% inner_join(meta_samples[[x]], by = "Sample") %>% 
        mutate(Sample = Internal_BioSample_ID) %>% 
        select(Sample, Reads1)    
}) %>% setNames(AVAILABLE_OMICS)

## [ps] metadata - samples

In [{}]:
# merge the 2 meta_samples into 1
meta_samples = lapply(AVAILABLE_OMICS, function(x) {
    meta_samples[[x]] %>% 
        mutate(Sample = Internal_BioSample_ID) %>% 
        select(Sample, Participant_ID, Group, week_num, visit_num)
}) %>% setNames(AVAILABLE_OMICS)
meta_samples

In [{}]:
meta_samples = do.call(rbind, meta_samples[AVAILABLE_OMICS]) %>% distinct() 
meta_samples

In [{}]:
# order Participant_ID by Group for viz purposes
meta_samples = meta_samples %>% 
    mutate(Participant_ID = factor(Participant_ID, 
                                 levels = unique(Participant_ID[order(Group)])))
meta_samples

In [{}]:
# meta_samples: plot lines for weeks_num per Participant_ID, arranged by diagnosis
p.dims(15, 3)
meta_samples %>% 
    ggplot(aes(x = Participant_ID, y = week_num, color = Group)) +
    geom_point() +
    geom_line() +
    theme_minimal() +
    theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) +
    scale_color_futurama()

In [{}]:
meta_samples = meta_samples %>%    
    select(Sample, Participant_ID, Group, week_num)
meta_samples

In [{}]:
# add Sex and Age from the large meta table
meta_samples = meta_samples %>% inner_join(hmp_metadata_original %>% select(Participant_ID, Age, Sex) %>% distinct(), by = "Participant_ID") 

In [{}]:
# in meta_samples Group, replace values HC - with 0_HC, UC - with 1_UC, CD - with 2_CD
meta_samples = meta_samples %>%     
    mutate(Group = str_replace(Group, "nonIBD", "HC")) %>%
    # make Diagnosis a factor, with levels ordered as HC, UC, CD
    mutate(Group = factor(Group, levels = c("HC", "UC", "CD")))
meta_samples

In [{}]:
# leave only those who have sex
nrow(meta_samples %>% filter(is.na(Sex)))
meta_samples = meta_samples %>% filter(!is.na(Sex))

# and age
nrow(meta_samples %>% filter(is.na(Age)))
meta_samples = meta_samples %>% filter(!is.na(Age))

nrow(meta_samples)

In [{}]:
meta_samples %>% select(Participant_ID) %>% distinct() %>% nrow()
meta_samples %>% select(Participant_ID, Group) %>% distinct() %>% select(Group) %>% table()

### [ps] Create derivative factor/s

In [{}]:
# DiseaseScore is a quantitative approximation for a severity of disease (considering UC is normally less severe than CD; with HC = 0)
meta_samples = meta_samples %>% 			
	mutate(DiseaseScore = ifelse(Group == "HC", 0, ifelse(Group == "UC", 1, ifelse(Group == "CD", 2, NA))))	
meta_samples