In [None]:
library(ggplot2)
library(ggalluvial)
libarary(ggsci)

In [None]:
filter_data <- function(df) { 
    # remove no information
    mask <- df['PostCKD'] != 'No Information'
    cat(sprintf('Removing %s patients with no post CKD info', sum(df[!mask, 'Freq'])))
    df <- df[mask, ]

    mask <- df['AKI'] != 'No Information'
    cat(sprintf('Removing %s patients with no AKI info', sum(df[!mask, 'Freq'])))
    df <- df[mask, ]

    # remove small cells with only 5 patients or less
    mask <- df['Freq'] > 5
    cat(sprintf('Removing %s patients in small cells', sum(df[!mask, 'Freq'])))
    df <- df[mask, ]
    cat(sprintf('\n%s patients left', sum(df['Freq'])))
    return(df)
}

plot <- function(df) {
    options(repr.plot.width=8, repr.plot.height=8)
    image <- ggplot(df, aes(y = Prop, axis1 = PreCKD, axis2 = AKI, axis3 = PostCKD)) +
        geom_alluvium(aes(fill = PostCKD)) +
        geom_stratum(fill = NA) +
        geom_label(stat = "stratum", aes(label = after_stat(stratum))) +
        scale_fill_manual(values = rev(pal_jama()(4))) +
        scale_x_discrete(limits = c("Pre-treatment CKD Grade", "Worst AKI Stage", "Post-treatment CKD Grade")) +
        theme_classic() + 
        theme(legend.position = 'none', axis.text = element_text(size = 10)) + 
        labs(y = 'Percentage of Patients') + 
        facet_zoom(ylim = c(0, 30))
    return(image)
}

In [None]:
df <- read.csv('data/ckd_aki_alluvial_raw_dev.csv')
colnames(df) <- c('PreCKD', 'AKI', 'PostCKD', 'Freq')
is_alluvia_form(df, silent=TRUE)
df <- filter_data(df)
df['Prop'] <- df['Freq'] / sum(df['Freq']) * 100
image <- plot(df)
ggsave(file = 'data/alluvial/alluvial_dev.png', plot = image, width = 8, height = 8, dpi = 300)
image

In [None]:
df <- read.csv('data/ckd_aki_alluvial_raw_test.csv')
colnames(df) <- c('PreCKD', 'AKI', 'PostCKD', 'Freq')
df <- filter_data(df)
df['Prop'] <- df['Freq'] / sum(df['Freq']) * 100
image <- plot(df)
ggsave(file = 'data/alluvial/alluvial_test.png', plot = image, width = 8, height = 8, dpi = 300)