# Single-unit analysis

This notebook contains the code to generate summary statistic panels in Figures 4 and 6 and the data in Table 1. You need to have calculated motif-level firing rates (`scripts/motif_rates.py`) and discriminability (`scripts/motif_discrim.py`) and unit-level selectivity (`scripts/unit_selectivity.R`) for all the units.

In [None]:
import <- function(pkg) { library(pkg, warn.conflicts=F, quietly=T, character.only=T) }
import("repr")
import("stringr")
import("tidyr")
import("dplyr")
import("ggplot2")
import("lme4")
import("emmeans")

In [None]:
options(repr.matrix.max.cols=15, repr.matrix.max.rows=20)
my.theme <- theme(legend.text=element_text(size=6),
                  legend.title=element_text(size=6),
                  plot.title = element_text(size=7, hjust=0.5),
                  axis.line=element_line(linewidth=0.25),
                  axis.ticks=element_line(linewidth=0.25),
                  axis.ticks.length=unit(0.05, "cm"),
                  axis.title=element_text(size=7),
                  axis.text=element_text(size=6),
                  strip.placement="outside",
                  strip.text=element_text(size=7),
                  strip.background=element_blank())
no.legend <- theme(legend.position="none")
update_geom_defaults("point", list(fill="white", shape=21, size=0.8))
update_geom_defaults("line", list(linewidth=0.4))


## Loading data and first steps of analysis

### Metadata

In [None]:
birds <- data.table::fread("../datasets/zebf-social-acoustical-ephys/metadata/birds.csv")
sites <- data.table::fread("../datasets/zebf-social-acoustical-ephys/metadata/recordings.csv")
all_sites <- (
   sites
   |> mutate(bird=str_match(site, "[:alnum:]+")[,1])
   |> inner_join(birds, by="bird")
   |> mutate(area=forcats::fct_recode(area, "L1/CM"="superficial", "L2a/L2b"="intermediate", "L3/NCM"="deep") |> forcats::fct_relevel(c("L2a/L2b", "L1/CM", "L3/NCM")),
             group=factor(group, levels=c("CR", "PR")))
)

In [None]:
## master table of units with spike waveform classifications - exclude unclassified units
all_units <- (
    data.table::fread("../datasets/zebf-social-acoustical-ephys/metadata/mean_spike_features.csv")
    |> mutate(spike=factor(spike, levels=c("wide", "narrow"), exclude=""))
    |> filter(!is.na(spike))
    |> mutate(site=str_match(unit, "[:alnum:]+_\\d+_\\d+")[,1])
    |> select(unit, site, spike)
    |> inner_join(all_sites, by="site")
)

### Tabulate of the number of birds and recording sites for Table 1

In [None]:
## number of animals in each condition by sex
(
    all_sites
    |> distinct(bird, sex, group)
    |> xtabs(~ sex + group, data=_)
    |> addmargins()
)

In [None]:
## number of sites by area and condition
(
    all_sites
    |> xtabs(~ area + group, data=_)
    |> addmargins()
)

In [None]:
## number of areas sampled per bird
(
    all_sites
    |> xtabs(~ area + bird, data=_)
    |> as.data.frame()
    |> mutate(across(where(is.numeric), ~as.numeric(. > 0)))
    |> filter(Freq > 0)
    |> group_by(bird)
    |> summarize(n_areas=n())
    |> summary()
)

In [None]:
## birds per area/group#
(
    distinct(all_sites, area, bird, group) 
    |> xtabs(~ area + group, data=_)
)    

In [None]:
## number of units by area, condition, and cell type
(
    all_units
    |> xtabs(~ area + group + spike, data=_)
    |> addmargins(c(1,3))
    |> as.data.frame()
    |> arrange(area)
    |> pivot_wider(names_from=c(area, group), values_from=Freq, values_fill=0)
)

## Firing rates


In [None]:
## load the rate data
header <- data.table::fread(cmd='find ../build/ -name "*_rates.csv" | head -n1 | xargs head -n1', header=T)
all_motif_rates <- tibble(data.table::fread(cmd='find ../build/ -name "*_rates.csv" | xargs tail -q -n+2', header=F))
names(all_motif_rates) <- names(header)

## initial cleaning - only responses to highest SNR, omit background segment
motif_rates <- (
    all_motif_rates
    |> filter(background_dBFS==-100 | foreground=="silence", foreground!="background")
    |> mutate(foreground=relevel(factor(foreground), "silence"))
)
## pool trials of the same stimulus (we can do this because our dependent variable is Poisson)
## and regularize rate estimates by adding 1 spike to units with no spontaneous spikes
motif_rate_summary <- (
    motif_rates
    |> group_by(unit, foreground) 
    |> summarize(n_events=sum(n_events), interval=sum(interval_end))
    |> mutate(n_events=ifelse(foreground=="silence" & n_events == 0, 1, n_events))
    |> inner_join(all_units, by="unit")
)

In [None]:
## Number of units where we had to add a spike to the spontaneous interval to regularize rate estimates.
(
    motif_rates 
    |> filter(foreground=="silence")
    |> group_by(unit)
    |> summarize(n_events=sum(n_events), interval=sum(interval_end))
    |> inner_join(all_units, by="unit") 
    |> mutate(silent=n_events==0)
    |> xtabs(~ silent, data=_)
    |> prop.table()
)

### Spontaneous firing rate

Figure 3A

In [None]:
fm_spont <- glmer(
    n_events ~ area*spike*group + (1|unit),
    offset=log(interval),
    family=poisson,
    control=glmerControl(optimizer = "bobyqa"),
    data=motif_rate_summary |> filter(foreground=="silence")
)

In [None]:
options(repr.plot.width=1.8, repr.plot.height=1.45, repr.plot.res = 450)
p <- (
    fm_spont
    |> emmeans(~ group*spike*area, type="response", offset=0)
    |> confint(level=0.90)
    |> ggplot(aes(area, rate, color=group))
    + facet_wrap(~ spike)
    + geom_point(position=position_dodge(width=0.5), size=1.5)
    + geom_linerange(aes(ymin=asymp.LCL, ymax=asymp.UCL), position=position_dodge(width=0.5))
    + scale_x_discrete(name=NULL)
    + scale_y_log10(name="Spont rate (Hz)", limits=c(0.1, 7.0))
    + theme_classic() + my.theme + no.legend
)
p 

In [None]:
pdf("../figures/spont_rate_all_units.pdf", width=2.25, height=2)
print(p)
dev.off()

In [None]:
## pairwise post-hoc comparisons
(
    fm_spont
    |> emmeans(~ group | area*spike)
    |> contrast("pairwise")
)

### Evoked rate

Figure 2B

In [None]:
options(repr.plot.width=4, repr.plot.height=2.5, repr.plot.res = 300)
p <- (
    motif_rate_summary
    |> filter(foreground!="silence")
    |> mutate(rate=mean(n_events / interval))
    |> group_by(group, area, spike, unit)
    |> summarize(rate=mean(rate))
    |> ggplot(aes(area, rate, color=group))
    + facet_wrap(~ spike)
    + geom_boxplot()
    + scale_y_log10(name="Evoked rate (Hz)")
    + theme_classic() + my.theme
)
p 

In [None]:
fm_evoked <- glmer(
    n_events ~ group*area*spike + (1|unit) + (1|foreground),
    offset=log(interval),
    family=poisson,
    control=glmerControl(optimizer="bobyqa"),
    data=motif_rate_summary |> filter(foreground!="silence")
)

In [None]:
options(repr.plot.width=1.8, repr.plot.height=1.45, repr.plot.res = 450)
p <- (
    fm_evoked
    |> emmeans(~ group*spike*area, type="response", offset=0)
    |> confint(level=0.90)
    |> ggplot(aes(area, rate, color=group))
    + facet_wrap(~ spike)
    + geom_point(position=position_dodge(width=0.5), size=1.5)
    + geom_linerange(aes(ymin=asymp.LCL, ymax=asymp.UCL), position=position_dodge(width=0.5))
    + scale_x_discrete(name=NULL)
    + scale_y_log10(name="Evoked rate (Hz)", limits=c(0.1, 7.0))
    + theme_classic() + my.theme + no.legend
)
p 

In [None]:
pdf("../figures/evoked_rate_all_units.pdf", width=2.25, height=2)
print(p)
dev.off()

In [None]:
(
    fm_evoked
    |> emmeans(~ group | area*spike)
    |> contrast("pairwise")
)

### Number of units per site

Figure 3C

In [None]:
# GLMM
fm_units <- (
    all_units
    |> group_by(group, area, spike, site)
    |> summarize(n_units=n())
    |> glmer(n_units ~ area*spike*group + (1 + spike|site), data=_, control=glmerControl(optimizer="bobyqa"), family=poisson)
)

In [None]:
options(repr.plot.width=2.25, repr.plot.height=2, repr.plot.res = 450)
p <- (
    fm_units
    |> emmeans(~ group*spike*area)
    |> confint(level=0.90, type="response")
    |> ggplot(aes(area, rate, color=group))
    + facet_wrap(~ spike)
    + geom_point(position=position_dodge(width=0.5), size=1.5)
    + geom_linerange(aes(ymin=asymp.LCL, ymax=asymp.UCL), position=position_dodge(width=0.5))
    + scale_x_discrete(name=NULL)
    + scale_y_log10(name="total units per site")
    + theme_classic() + my.theme + no.legend
)
p 

In [None]:
pdf("../figures/total_units_per_site.pdf", width=2.25, height=2)
print(p)
dev.off()

In [None]:
emmeans(fm_units, ~ group | area*spike) |> contrast("pairwise")

## Discriminability

Discriminability is assessed by computing spike distances and training a classifier. The computation is performed by `scripts/motif_discrim.py`, which outputs one file per unit. Only the "clean" condition is used (inaudible noise). We classify cells as responsive if the z-score of the cross-validation is above the one-tailed 95% confidence level.

Discriminability is what we use to decide whether neurons are auditory or not, so the analyses use all neurons. The population analysis is in Figure 6D.

In [None]:
z_score_alpha <- 0.05
z_score_global_thresh <- qnorm(1 - z_score_alpha)
## this is not used: too easy for individual motifs to sneak over the threshold
z_score_motif_thresh <- qnorm(1 - z_score_alpha/9)

In [None]:
header <- data.table::fread(cmd='find ../build/ -name "*motif_discrim.csv" | head -n1 | xargs head -n1', header=T)
motif_discrim <- tibble(data.table::fread(cmd='find ../build/ -name "*motif_discrim.csv" | xargs tail -q -n+2', header=F))
names(motif_discrim) <- names(header)

In [None]:
## classify units as auditory and join to metadata
unit_discrim <- (
    motif_discrim
    |> filter(foreground=="_average")
    |> select(!foreground)
    |> mutate(is_auditory=(z_score > z_score_global_thresh))
    |> inner_join(all_units, by="unit")
)

In [None]:
## GLMM
## There are 90 comparisons for each trial, so score can be converted to a binomial RV by multiplying by 90
fm_discrim_score <- (
    unit_discrim 
    |> mutate(n_correct=score * 90)
    |> glmer(cbind(n_correct, 90 - n_correct) ~ group*area*spike + (1|unit), 
             data=_, 
             control=glmerControl(optimizer="bobyqa"), 
             family=binomial)
)

In [None]:
options(repr.plot.width=2.4, repr.plot.height=1.9, repr.plot.res = 450)
p <- (
    fm_discrim_score
    |> emmeans(~ group*spike*area, type="response")
    |> confint(level=0.90)
    |> ggplot(aes(area, prob, color=group))
    + facet_wrap(~ spike)
    + geom_point(position=position_dodge(width=0.5), size=1.5)
    + geom_linerange(aes(ymin=asymp.LCL, ymax=asymp.UCL), position=position_dodge(width=0.5))
    + scale_x_discrete(name=NULL)
    + scale_y_continuous(name="Discriminability", limits=c(0,1))
    + theme_classic() + my.theme + no.legend
)
p 

In [None]:
pdf("../figures/discriminability_all_units.pdf", width=2.4, height=1.9)
print(p)
dev.off()

In [None]:
# post-hoc pairwise tests for each cell type / area
emmeans(fm_discrim_score, ~ group | area*spike) |> contrast("pairwise")

In [None]:
# test whether BS are less discriminable than NS
emmeans(fm_discrim_score, ~ spike) |> contrast("pairwise")

## Selectivity

A very simple definition of selectivity is related to what proportion of stimuli evoke a significant response. We can estimate this as the probability of a binomial random variable. This is Figure 6E.

In [None]:
## load the glm results
motif_rate_stats <- data.table::fread("../build/motif_rate_glm.csv", header=T)
## summarize each unit's average evoked rate and the number of motifs that elicit a significant response (selectivity)
unit_selectivity <- (
    motif_rate_stats
    |> group_by(unit)
    |> summarize(avg_evoked=mean(estimate), n_responsive=sum(is_responsive))
    |> inner_join(unit_discrim, by="unit")
)

In [None]:
## GLMM
fm_selectivity <- (
    unit_selectivity 
    |> filter(is_auditory)
    |> glmer(
        cbind(10 - n_responsive, n_responsive) ~ group*area*spike + (1|unit), 
        data=_,
        control=glmerControl(optimizer="bobyqa"),
        family=binomial)
)


In [None]:
options(repr.plot.width=2.4, repr.plot.height=1.9, repr.plot.res = 450)
p <- (
    fm_selectivity
    |> emmeans(~ group*spike*area, type="response")
    |> confint(level=0.90)
    |> ggplot(aes(area, prob, color=group))
    + facet_wrap(~ spike)
    + geom_point(position=position_dodge(width=0.5), size=1.5)
    + geom_linerange(aes(ymin=asymp.LCL, ymax=asymp.UCL), position=position_dodge(width=0.5))
    + scale_x_discrete(name=NULL)
    + scale_y_continuous(name="Selectivity", limits=c(0,1))
    + theme_classic() + my.theme + no.legend
)
p 

In [None]:
pdf("../figures/selectivity_auditory_units.pdf", width=2.4, height=1.9)
print(p)
dev.off()

In [None]:
## pairwise post-hoc tests
emmeans(fm_selectivity, ~ group | area*spike) |> contrast("pairwise")

In [None]:
# post-hoc compare BS to NS
emmeans(fm_selectivity, ~ spike) |> contrast("pairwise")