# Decoder analysis

Before running this notebook, the `decoder.slurm` jobs needs to be run and the data need to be collated into a single csv file.

In [None]:
import <- function(pkg) { library(pkg, warn.conflicts=F, quietly=T, character.only=T) }
import("repr")
import("stringr")
import("tidyr")
import("dplyr")
import("ggplot2")
import("lme4")
import("emmeans")

In [None]:
options(repr.matrix.max.cols=15, repr.matrix.max.rows=20)
my.theme <- theme(legend.text=element_text(size=6),
                  legend.title=element_text(size=6),
                  plot.title = element_text(size=7, hjust=0.5),
                  axis.line=element_line(linewidth=0.25),
                  axis.ticks=element_line(linewidth=0.25),
                  axis.ticks.length=unit(0.05, "cm"),
                  axis.title=element_text(size=7),
                  axis.text=element_text(size=6),
                  strip.placement="outside",
                  strip.text=element_text(size=7),
                  strip.background=element_blank())
no.legend <- theme(legend.position="none")
update_geom_defaults("point", list(fill="white", shape=21, size=0.8))
update_geom_defaults("line", list(linewidth=0.4))

In [None]:
predictions <- data.table::fread("../build/decoder_predictions.csv")

In [None]:
(
    predictions
    |> ggplot(aes(n_units, score, color=dataset))
    + facet_wrap(~ motif)
    + stat_smooth()
    + scale_x_log10()
)

In [None]:
options(repr.plot.width=2.2, repr.plot.height=2.2, repr.plot.res = 450)
p <- (
    predictions
    |> group_by(dataset, n_units, seed)
    |> summarize(score=mean(score))
    |> summarize(y=median(score), ymin=quantile(score, 0.25), ymax=quantile(score, 0.75))
    |> ggplot(aes(n_units, y, color=dataset))
    + geom_point(position=position_dodge(width=0.051), size=1.5)
    + geom_linerange(aes(ymin=ymin, ymax=ymax), position=position_dodge(width=0.05))
    + scale_x_log10("Ensemble size")
    + scale_y_continuous("Prediction score (R^2)")
    + theme_classic() + my.theme + no.legend
)
p

In [None]:
pdf("../figures/decoder_accuracy.pdf", width=2.1, height=2.1)
print(p)
dev.off()

In [None]:
pred_model <- function(df) {
    wilcox.test(score ~ dataset, df)
}

fm <- (
    predictions
    |> filter(n_units < 1000)
    |> group_by(n_units)
    |> nest()
    |> transmute(mdl=purrr::map(data, pred_model))
    |> mutate(stats=purrr::map(mdl, broom::tidy))
    |> select(n_units, stats)
    |> unnest(cols=stats)
    |> arrange(n_units)
)
fm

In [None]:
fm <- (
    predictions
    |> filter(n_units < 1000)
    |> mutate(n_units=factor(n_units))
    |> lmer(score ~ dataset*n_units + (1|motif), data=_)
)
summary(fm)

In [None]:
filter(predictions, n_units == 927) |> group_by(dataset) |> summarize(score=median(score))

In [None]:
0.28 / 0.24