# Noise (and signal) correlations

This notebook analyzes the noise and signal correlation data calculated by the `scripts/pairwise_correlations.py` script.

In [None]:
import <- function(pkg) { library(pkg, warn.conflicts=F, quietly=T, character.only=T) }
import("repr")
import("stringr")
import("tidyr")
import("dplyr")
import("ggplot2")
import("lme4")
import("emmeans")

In [None]:
options(repr.matrix.max.cols=15, repr.matrix.max.rows=20)
my.theme <- theme(legend.text=element_text(size=5),
                  legend.title=element_text(size=6),
                  plot.title = element_text(size=8, hjust=0.5),
                  axis.line=element_line(linewidth=0.25),
                  axis.ticks=element_line(linewidth=0.25),
                  axis.title=element_text(size=8),
                  axis.text=element_text(size=6),
                  strip.placement="outside",
                  strip.text=element_text(size=8),
                  strip.background=element_blank())
no.legend <- theme(legend.position="none")
update_geom_defaults("point", list(fill="white", shape=21, size=1.1))
update_geom_defaults("line", list(linewidth=0.25))


## Loading data and first steps of analysis

In [None]:
# Metadata
birds <- data.table::fread("../inputs/bird_metadata.csv")
sites <- data.table::fread("../inputs/recording_metadata.csv")
all_sites <- (
   sites
   |> mutate(bird=str_match(site, "[:alnum:]+")[,1])
   |> inner_join(birds, by="bird")
   |> filter(area %in% c("deep", "intermediate", "superficial"), stim_amplitude == "okay")
   |> mutate(area=forcats::fct_recode(area, "L1/CM"="superficial", "L2a/L2b"="intermediate", "L3/NCM"="deep") |> forcats::fct_relevel(c("L2a/L2b", "L1/CM", "L3/NCM")),
             group=factor(group, levels=c("CR", "PR")))
)

In [None]:
# Need to have run `python scripts/unit_waveforms.py -o build inputs/all_units.txt`
unit_spike_features <- (
    data.table::fread("../build/mean_spike_features.csv") %>%
    mutate(spike=factor(spike, levels=c("wide", "narrow"), exclude="")) %>%
    filter(!is.na(spike))
)
# Need to have run `single-unit-analysis` notebook to identify responsive units
responsive_units <- data.table::fread("../inputs/responsive_units.txt", header=F, col.names=c("unit"))
    
# Need to have run `scripts/extract_channel.py inputs/all_units.txt > build/unit_channels.csv"
units <- (
    data.table::fread("../build/unit_channels.csv")
    |> semi_join(responsive_units, by="unit")
    |> inner_join(unit_spike_features |> select(unit, spike), by="unit")
)

In [None]:
# Pairwise correlations. Need to have run `batch/pairwise_correlations.sh < inputs/recording_metadata.csv`u
header <- data.table::fread(cmd='find ../build/ -name "*_correlations.csv" | head -n1 | xargs head -n1', header=T)
unit_correlations <- tibble(data.table::fread(cmd='find ../build/ -name "*_correlations.csv" | xargs tail -q -n+2', header=F))
names(unit_correlations) <- names(header)

In [None]:
ucorr <- (
    unit_correlations
    # drop all comparisons where signal or noise correlation can't be calculated (typically because responses are too weak)
    |> filter(!is.na(evoked_noise), !is.na(signal))
    # look up channel and spike type. This will also remove non-responsive units
    |> inner_join(units |> rename_with(function(s) str_c(s, "_1")), by="unit_1")
    |> inner_join(units |> rename_with(function(s) str_c(s, "_2")), by="unit_2")
    # exclude pairs on the same electrode (might change this if we calculate distance)
    |> filter(channel_1 != channel_2)
    |> mutate(site=str_match(unit_1, "[:alnum:]+_\\d+_\\d+")[,1])
    |> inner_join(all_sites, by="site")
    |> mutate(conn_type=ifelse(spike_1=="wide", ifelse(spike_2=="wide", "E-E", "E-I"), ifelse(spike_2=="wide", "E-I", "I-I")))
)

In [None]:
ucorr

In [None]:
ucorr_sites <- (
    ucorr
    |> group_by(group, area, site, conn_type)
    |> filter(n() > 5)
    |> summarize(evoked_noise_c=mean(evoked_noise_c))
)
ucorr_sites

### Raw data plots

In [None]:
options(repr.plot.width=4, repr.plot.height=2.5, repr.plot.res = 300)
(
    ucorr
    |> ggplot(aes(conn_type, evoked_noise_c, color=group))
    + facet_grid(~ area)
    + geom_violin()
    + stat_summary(fun.data="mean_se", fatten=1.5, position=position_dodge(width=1.0))
    + theme_classic() + my.theme
)

In [None]:
options(repr.plot.width=4, repr.plot.height=2.5, repr.plot.res = 300)
(
    ucorr
    |> ggplot(aes(conn_type, spont_noise_c, color=group))
    + facet_grid(~ area)
    + geom_violin()
    + stat_summary(fun.data="mean_se", fatten=1.5, position=position_dodge(width=1.0))
    + theme_classic() + my.theme
)

### Evoked noise correlation - linear model - pairs

In [None]:
fm_corr <- lm(evoked_noise_c ~ area*conn_type*group, ucorr)
emmeans(fm_corr, ~ group | conn_type*area) |> contrast("pairwise")

In [None]:
p <- (
    fm_corr
    |> emmeans(~ group*conn_type*area)
    |> confint(level=0.90, type="response")
    |> ggplot(aes(conn_type, emmean, color=group))
    + facet_wrap(~ area)
    + geom_pointrange(aes(ymin=lower.CL, ymax=upper.CL), fatten=1.5, position=position_dodge(width=0.5))
    + theme_classic() + my.theme
)
p 

### Spontaneous noise correlation - linear model - pairs

In [None]:
fm_corr <- lm(spont_noise_c ~ area*conn_type*group, ucorr)
emmeans(fm_corr, ~ group | conn_type*area) |> contrast("pairwise")

In [None]:
p <- (
    fm_corr
    |> emmeans(~ group*conn_type*area)
    |> confint(level=0.90, type="response")
    |> ggplot(aes(conn_type, emmean, color=group))
    + facet_wrap(~ area)
    + geom_pointrange(aes(ymin=lower.CL, ymax=upper.CL), fatten=1.5, position=position_dodge(width=0.5))
    + theme_classic() + my.theme
)
p 

### Linear model - sites

In [None]:
fm_corr <- lm(evoked_noise_c ~ area*conn_type*group, ucorr_sites)
emmeans(fm_corr, ~ group | conn_type*area) |> contrast("pairwise")

In [None]:
p <- (
    fm_corr
    |> emmeans(~ group*conn_type*area)
    |> confint(level=0.90, type="response")
    |> ggplot(aes(conn_type, emmean, color=group))
    + facet_wrap(~ area)
    + geom_pointrange(aes(ymin=lower.CL, ymax=upper.CL), fatten=1.5, position=position_dodge(width=0.5))
    + theme_classic() + my.theme
)
p 

### With random effects

In [None]:
fm_corr <- lmer(evoked_noise_c ~ area*conn_type*group + (1+conn_type|site), ucorr, control=lmerControl(optimizer="bobyqa"))
emmeans(fm_corr, ~ group | conn_type*area) |> contrast("pairwise")

In [None]:
p <- (
    fm_corr
    |> emmeans(~ group*conn_type*area)
    |> confint(level=0.90, type="response")
    |> ggplot(aes(conn_type, emmean, color=group))
    + facet_wrap(~ area)
    + geom_pointrange(aes(ymin=asymp.LCL, ymax=asymp.UCL), fatten=1.5, position=position_dodge(width=0.5))
    + theme_classic() + my.theme
)
p 

### Noise vs signal

In [None]:
options(repr.plot.width=4, repr.plot.height=4, repr.plot.res = 300)
(
    ucorr
    |> ggplot(aes(signal, evoked_noise_c, color=group))
    + facet_grid(conn_type ~ area)
    + geom_point()
    + stat_smooth(method="lm")
)

In [None]:
# spontaneous vs evoked
options(repr.plot.width=4, repr.plot.height=4, repr.plot.res = 300)
(
    ucorr
    |> ggplot(aes(evoked_noise_c, spont_noise_c, color=group))
    + facet_grid(conn_type ~ area)
    + geom_point()
    + stat_smooth(method="lm")
)

In [None]:
(
    ucorr
    |> mutate(signal_group=cut(signal, c(-2, -0.4, 0.4, 2), labels=c("negative", "mid", "high")))
    |> ggplot(aes(signal_group, noise_corrected, color=group))
    + facet_grid(conn_type ~ area)
    + stat_summary(fun.data="mean_se", fatten=1.5, position=position_dodge(width=0.5))
    + theme_classic() + my.theme
)