# Run analysis of validation data

In [None]:
%load_ext rpy2.ipython

### Import packages

In [None]:
%%R
library(reshape2)
library(cluster) 
library(factoextra)
library(NbClust)
library(rjson)

library(gridExtra)
library(grid)
library(ggplot2)
library(ggfortify)

### Lets set the experiment WD and simulation WD

In [None]:
%%R
exp_wd <- "./Experiment_out/test2"
sim_wd <- "./Simulation_result/test2/Experiment_test2_1"
cbbPalette <- c("#EEEEEE", "#E69F00", "#56B4E9", "#009E73", "#F0E442", "#0072B2", "#D55E00", "#CC79A7")

### Inspect a Peak

#### Define Plotting Functions

In [None]:
%%R
plot_pattern <- function(data, col_palette) {
    acum <- matrix(0, nrow=10,ncol=10)
    colour_idx <- 1
    colour_vec <- matrix(rep(0,10))
    for (pattern in data){
        p <-matrix(pattern)
        acum <- acum + (p %*% t(p))*colour_idx
        colour_vec <- colour_vec+p*colour_idx
        colour_idx<-colour_idx+1
    }
    plt<-c()
    plt$mat <- (ggplot(data = melt(acum), aes((11-Var1), Var2, fill=factor(value)))+
    geom_tile()+
    scale_x_continuous("modification", breaks=c(0:10)+0.5)+
    scale_y_continuous("modification", breaks=c(0:10)+0.5)+
    scale_fill_manual(values=col_palette, labels = c("None", c(1:10)))+
    guides(fill=guide_legend(title="Association",title.position = "left"))+
    coord_fixed() +
    theme(
        panel.grid.major = element_line(colour = "black"),
        legend.title = element_text(angle=90,hjust=0.5),
        legend.key.height = grid::unit(5,"cm"),
        legend.key.width = grid::unit(1.75,"cm"),
        legend.margin = grid::unit(0.25,"cm"),
        legend.box = "vertical",
        legend.box.margin = margin(c(10,10,10,10)),
        panel.background = element_rect(fill = NA),
        panel.ontop = TRUE,
        axis.text.x=element_blank(),
        axis.text.y=element_blank(),
        axis.ticks=element_line(size=0.4),
        panel.border=element_blank())
    )
    plt$colours <- colour_vec
    return(plt)
}

g_legend<-function(a.gplot){
    tmp <- ggplot_gtable(ggplot_build(a.gplot))
    leg <- which(sapply(tmp$grobs, function(x) x$name) == "guide-box")
    legend <- tmp$grobs[[leg]]                        
    return(legend)
}
                        
plot_association <- function(patt_no, patterns, palette) {
    
    plts <- list()
    
    patt_name <- paste0("pattern_",patt_no)
    peak_data <-data.frame(read.csv(paste0(sim_wd,"/",patt_name,"/","/peak_0/data.csv")))
    layout_mat <- matrix(0, nrow=10,ncol=3)
    layout_mat[,1] <- rep(1,10)
    layout_mat[,2] <- rep(2,10)
    layout_mat[,3] <- c(3:12)

    patt_plt <- plot_pattern(patterns[[patt_no+1]], cbbPalette)
    colours <- patt_plt$colours
    
    mat_plt <-patt_plt$mat
    
    legend <- g_legend(mat_plt)
    plts[[1]] <- legend
    plts[[2]] <- mat_plt+theme(legend.position="none")
    
    for (i in 0:9) {
        p <- (ggplot(subset(peak_data,modification==i), aes(x=position))+
                geom_histogram(colour="white", fill=cbbPalette[colours[i+1]+1],bins=150)+
                xlim(c(0,2200)) +
                ylim(c(0,20)))
        plts[[i+3]] <- p
    }
    pdf(paste0("association_pattern_",patt_no,".pdf"), width=31,height=15)
        grid.arrange(grobs=plts,
                     layout_matrix=layout_mat, widths=c(2,15,14))
        grid.rect(gp=gpar(fill=NA))
    dev.off()
} 

#### Plot Patterns for Peak

In [None]:
%%R
patterns <- fromJSON(file=paste0(sim_wd, "/","patterns.json"))
for(i in c(0:3)) {
    plot_association(i, patterns, cbbPalette)
}

### Auxilliary Functions

In [None]:
%%R
make_mat <- function(x) {
    b <- matrix(0, 10, 10)
    b[lower.tri(b, diag=FALSE)] <- x
    b <- t(b)
    b <- (b+t(b))
    return(b)
}

get_evecs <- function(x) {
    b <- matrix(0, 10, 10)
    b[lower.tri(b, diag=FALSE)] <- (x/sqrt(sum(x^2)))
    b <- t(b)
    b <- (b+t(b))
    b <- b
    deg <- colSums(b)
    d <- matrix(0, 10, 10)
    diag(d) <- deg
    L <- d-b
    L <- sqrt(diag(1/deg)) %*% L %*% sqrt(diag(1/deg))
    e <- eigen(L)
    vv <- c()
    for (i in 1:9){
        vv <- c(vv,e$vectors[,i]*e$values[i])
    }
    return(e$vectors[,1])
}


### Plot Eigen-Projections

In [None]:
%%R
replicas <- list.dirs(path = exp_wd, full.names = TRUE, recursive = FALSE)

par(mfrow=c(3,2)) 
par(mar=c(1,1,1,1))

for (replica in replicas){
    exp_df <- data.frame()
    patterns <- list.dirs(path = replica, full.names = TRUE, recursive = FALSE)
    for (pattern in patterns){
        pattern_name <- basename(pattern)
        dists <- data.frame(read.csv(file=paste0(pattern, '/', 'MMD_dists.csv'), header=TRUE, sep=","))
        dists$pattern <- pattern_name
        exp_df <- rbind(exp_df, dists)
    }
    evecs <- apply((exp_df[,2:46]),1,get_evecs)
    pca <- prcomp(t(evecs), scale = FALSE)
    patterns <- factor(exp_df$pattern)
    plt <- autoplot(pca) +
        geom_point(shape=23, aes(fill = patterns), color = "darkred", size=3) +
        labs(fill = "Pattern") +
        ggtitle(basename(replica))
    plot(plt)
}

### Plot PCA Projections

In [None]:
%%R
replicas <- list.dirs(path = exp_wd, full.names = TRUE, recursive = FALSE)

par(mfrow=c(3,2)) 
par(mar=c(1,1,1,1))

for (replica in replicas){
    exp_df <- data.frame()
    patterns <- list.dirs(path = replica, full.names = TRUE, recursive = FALSE)
    for (pattern in patterns){
        pattern_name <- basename(pattern)
        dists <- data.frame(read.csv(file=paste0(pattern, '/', 'MMD_dists.csv'), header=TRUE, sep=","))
        dists$pattern <- pattern_name
        exp_df <- rbind(exp_df, dists)
    }
    pca <- prcomp(exp_df[,2:46], scale = FALSE)
    patterns <- factor(exp_df$pattern)
    plt <- autoplot(pca) +
        geom_point(shape=23, aes(fill = patterns), color="darkred", size=3) +
        labs(fill = "Pattern") +
        ggtitle(basename(replica))
    plot(plt)
}