# Noise and signal correlations

This notebook generates the plots of signal and noise correlations in Figure 5. You need to have run `scripts/pairwise_correlations.py` on all the units prior to running this notebook. You also need to have run the `figure4-6_single-unit-stats` notebook to export a list of responsive units.

In [None]:
import <- function(pkg) { library(pkg, warn.conflicts=F, quietly=T, character.only=T) }
import("repr")
import("stringr")
import("tidyr")
import("dplyr")
import("ggplot2")
import("lme4")
import("emmeans")
import("xtable")

In [None]:
options(repr.matrix.max.cols=15, repr.matrix.max.rows=20)
my.theme <- theme(legend.text=element_text(size=6),
                  legend.title=element_text(size=6),
                  plot.title = element_text(size=7, hjust=0.5),
                  axis.line=element_line(linewidth=0.25),
                  axis.ticks=element_line(linewidth=0.25),
                  axis.ticks.length=unit(0.05, "cm"),
                  axis.title=element_text(size=7),
                  axis.text=element_text(size=6),
                  strip.placement="outside",
                  strip.text=element_text(size=7),
                  strip.background=element_blank())
no.legend <- theme(legend.position="none")
update_geom_defaults("point", list(fill="white", shape=21, size=0.8))
update_geom_defaults("line", list(linewidth=0.4))


## Loading data and first steps of analysis

In [None]:
# Metadata
birds <- data.table::fread("../datasets/zebf-social-acoustical-ephys/metadata/birds.csv")
sites <- data.table::fread("../datasets/zebf-social-acoustical-ephys/metadata/recordings.csv")
all_sites <- (
   sites
   |> mutate(bird=str_match(site, "[:alnum:]+")[,1])
   |> inner_join(birds, by="bird")
   |> mutate(area=forcats::fct_recode(area, "L1/CM"="superficial", "L2a/L2b"="intermediate", "L3/NCM"="deep") |> forcats::fct_relevel(c("L2a/L2b", "L1/CM", "L3/NCM")),
             group=factor(group, levels=c("CR", "PR")))
)
# Need to have run `single-unit-analysis` notebook to identify responsive units
responsive_units <- data.table::fread("../build/responsive_units.txt", header=F, col.names=c("unit"))
# Need to have run `scripts/extract_channel.py inputs/all_units.tbl > build/unit_channels.csv"
unit_channels <- data.table::fread("../build/unit_channels.csv")

In [None]:
## master table of units with spike waveform types - exclude unclassified waveforms and non-auditory units
all_units <- (
    data.table::fread("../datasets/zebf-social-acoustical-ephys/metadata/mean_spike_features.csv")
    |> mutate(spike=factor(spike, levels=c("wide", "narrow"), exclude=""))
    |> filter(!is.na(spike))
    |> mutate(site=str_match(unit, "[:alnum:]+_\\d+_\\d+")[,1])
    |> select(unit, site, spike)
    |> inner_join(unit_channels, by="unit")
    |> inner_join(all_sites, by="site")
    |> semi_join(responsive_units, by="unit")
)

In [None]:
# Pairwise correlations. Need to have run `batch/pairwise_correlations.sh < inputs/recording_metadata.csv`u
header <- data.table::fread(cmd='find ../build/ -name "*_correlations.csv" | head -n1 | xargs head -n1', header=T)
unit_correlations <- tibble(data.table::fread(cmd='find ../build/ -name "*_correlations.csv" | xargs tail -q -n+2', header=F))
names(unit_correlations) <- names(header)

In [None]:
ucorr <- (
    unit_correlations
    # drop all comparisons where signal or noise correlation can't be calculated (typically because responses are too weak)
    |> filter(!is.na(evoked_noise), !is.na(signal))
    # look up channel and spike type. This will also remove non-responsive units
    |> inner_join(all_units |> rename_with(function(s) str_c(s, "_1")), by="unit_1")
    |> inner_join(all_units |> rename_with(function(s) str_c(s, "_2")), by="unit_2")
    # exclude pairs on the same electrode
    |> filter(channel_1 != channel_2)
    |> mutate(site=str_match(unit_1, "[:alnum:]+_\\d+_\\d+")[,1])
    |> inner_join(all_sites, by="site")
    |> mutate(conn_type=ifelse(spike_1=="wide", ifelse(spike_2=="wide", "BS-BS", "BS-NS"), ifelse(spike_2=="wide", "BS-NS", "NS-NS")))
    |> filter(conn_type != "BS-NS")   # exclude BS-NS pairs, hard to interpret                                   
)

In [None]:
## number of pairs by area, condition, and cell type
df <- (
    ucorr
    |> xtabs(~ area + group + conn_type, data=_)
    |> addmargins(c(1,3))
    |> as.data.frame()
    |> arrange(area)
    |> pivot_wider(names_from=c(area, group), values_from=Freq, values_fill=0)
)
df

In [None]:
print(xtable(df, digits=0), type="latex")

## Signal correlations - linear model

In [None]:
fm_signal_corr <- lm(signal ~ area*conn_type*group, data=ucorr)
joint_tests(fm_signal_corr)

In [None]:
options(repr.plot.width=1.8, repr.plot.height=1.45, repr.plot.res = 450)
p <- (
    fm_signal_corr
    |> emmeans(~ group*conn_type*area)
    |> confint(level=0.90, type="response")
    |> filter(conn_type != "BS-NS")
    |> ggplot(aes(area, emmean, color=group))
    + facet_wrap(~ conn_type)
    + geom_point(position=position_dodge(width=0.5), size=1.5)
    + geom_linerange(aes(ymin=lower.CL, ymax=upper.CL), position=position_dodge(width=0.5))
    + scale_x_discrete(name=NULL)
    + scale_y_continuous("Signal correlation")
    + theme_classic() + my.theme + no.legend
)
p 

In [None]:
pdf("../figures/signal_correlations.pdf", width=2.4, height=1.75)
print(p)
dev.off()

In [None]:
## post-hoc pairwise CR vs PR
emmeans(fm_signal_corr, ~ group | area*conn_type) |> contrast("pairwise")

In [None]:
# compare BS to NS
emmeans(fm_signal_corr, ~ conn_type) |> contrast("pairwise")

## Noise correlations - linear model

In [None]:
fm_noise_corr <- lm(evoked_noise_c ~ area*conn_type*group, data=ucorr)
joint_tests(fm_noise_corr)

In [None]:
options(repr.plot.width=1.8, repr.plot.height=1.45, repr.plot.res = 450)
p <- (
    fm_noise_corr
    |> emmeans(~ group*conn_type*area)
    |> confint(level=0.90, type="response")
    |> ggplot(aes(area, emmean, color=group))
    + facet_wrap(~ conn_type)
    + geom_point(position=position_dodge(width=0.5), size=1.5)
    + geom_linerange(aes(ymin=lower.CL, ymax=upper.CL), position=position_dodge(width=0.5))
    + scale_x_discrete(name=NULL)
    + scale_y_continuous("Noise correlation")
    + theme_classic() + my.theme + no.legend
)
p 

In [None]:
pdf("../figures/noise_correlations.pdf", width=2.4, height=1.75)
print(p)
dev.off()

## Noise vs signal correlations

In [None]:
options(repr.plot.width=3.0, repr.plot.height=3.5, repr.plot.res = 450)
p <- (
    ucorr
    |> ggplot(aes(signal, evoked_noise, color=group, fill=group))
    + facet_grid(area ~ conn_type)
    + geom_point(size=0.1, alpha=0.2, shape=21)
    + stat_smooth(method="lm", linewidth=0.5)
    + scale_x_continuous("Signal correlation")
    + scale_y_continuous("Noise correlation")
    + theme_classic() + my.theme + no.legend
)
p

In [None]:
pdf("../figures/signal_vs_noise_correlations.pdf", width=3.0, height=3.5)
print(p)
dev.off()

We want to determine if the relationship between noise and signal correlations differs between conditions.
Fit a linear model with noise corr as dependent variable and signal corr as main independent variable,
and see if slope for each area and connection type differs between CR and PR.

In [None]:
fm_corr_corr <- lm(evoked_noise ~ signal*conn_type*area*group, data=ucorr)
em_corr_corr <- emtrends(fm_corr_corr, pairwise ~ group | area*conn_type, var="signal")
summary(em_corr_corr)$contrasts

In [None]:
options(repr.plot.width=1.6, repr.plot.height=3.5, repr.plot.res = 450)
p <- (
    summary(em_corr_corr)$emtrends
    |> ggplot(aes(conn_type, signal.trend, color=group))
    + facet_grid(area ~ .)
    + geom_point(position=position_dodge(width=0.35), size=1.5)
    + geom_linerange(aes(ymin=lower.CL, ymax=upper.CL), position=position_dodge(width=0.35))
    + scale_x_discrete(name=NULL)
    + scale_y_continuous("Slope", limits=c(-0.01, 0.6))
    + theme_classic() + my.theme + no.legend
)
p

In [None]:
pdf("../figures/signal_vs_noise_corr_slope.pdf", width=1.6, height=3.1)
print(p)
dev.off()

An alternative approach, not used in the paper, is to quantize the signal correlations into low, middle, and high
groups and then compare noise correlations within each of these groups.

In [None]:
options(repr.plot.width=1.8, repr.plot.height=3.0, repr.plot.res = 450)
(
    ucorr
    #|> filter(area=="L3/NCM", conn_type!="BS-NS")
    |> mutate(scorr_group=cut(signal, breaks=c(-1, -0.4, 0.4, 1), labels=c("low", "mid", "high")))
    |> ggplot(aes(scorr_group, evoked_noise_c, color=group))
    + facet_grid(area ~ conn_type)
    + stat_summary(fun.data="mean_se", fatten=0.1, position=position_dodge(width=0.5))
    + theme_classic() + my.theme + no.legend
)