# Single-unit analysis

This notebook contains the code to analyze single unit data. We want to measure how the following depend on area, spike type and rearing conditions:

- all units:
    - units per site
    - spontaneous rate
    - evoked rate
- responsive units:
    - responsive units per site
    - selectivity

We are only looking at "clean" responses for this paper.

In [None]:
import <- function(pkg) { library(pkg, warn.conflicts=F, quietly=T, character.only=T) }
import("repr")
import("stringr")
import("tidyr")
import("dplyr")
import("ggplot2")
import("lme4")
import("emmeans")

In [None]:
options(repr.matrix.max.cols=15, repr.matrix.max.rows=20)
my.theme <- theme(legend.text=element_text(size=6),
                  legend.title=element_text(size=6),
                  plot.title = element_text(size=7, hjust=0.5),
                  axis.line=element_line(linewidth=0.25),
                  axis.ticks=element_line(linewidth=0.25),
                  axis.ticks.length=unit(0.05, "cm"),
                  axis.title=element_text(size=7),
                  axis.text=element_text(size=6),
                  strip.placement="outside",
                  strip.text=element_text(size=7),
                  strip.background=element_blank())
no.legend <- theme(legend.position="none")
update_geom_defaults("point", list(fill="white", shape=21, size=0.8))
update_geom_defaults("line", list(linewidth=0.4))


## Loading data and first steps of analysis

### Metadata

In [None]:
birds <- (
    data.table::fread("../inputs/bird_metadata.csv")
    |> filter(behavior=="no")
)
sites <- data.table::fread("../inputs/recording_metadata.csv")
all_sites <- (
   sites
   |> mutate(bird=str_match(site, "[:alnum:]+")[,1])
   |> inner_join(birds, by="bird")
   |> filter(area %in% c("deep", "intermediate", "superficial"), stim_amplitude == "okay")
   |> mutate(area=forcats::fct_recode(area, "L1/CM"="superficial", "L2a/L2b"="intermediate", "L3/NCM"="deep") |> forcats::fct_relevel(c("L2a/L2b", "L1/CM", "L3/NCM")),
             group=factor(group, levels=c("CR", "PR")))
)

In [None]:
birds

In [None]:
## number of animals in each condition by sex
(
    all_sites
    |> distinct(bird, sex, group)
    |> xtabs(~ sex + group, data=_)
    |> addmargins()
)

In [None]:
## number of sites by area and condition
(
    all_sites
    |> xtabs(~ area + group, data=_)
    |> addmargins()
)

In [None]:
## birds per area/group#
(
    distinct(all_sites, area, bird, group) 
    |> xtabs(~ area + group, data=_)
)    

### Unit waveform classifications

Need to have run `python scripts/unit_waveforms.py -o build inputs/all_units.txt`

In [None]:
all_unit_spike_features <- data.table::fread("../build/mean_spike_features.csv")
unit_spike_features <- (
    all_unit_spike_features
    |> mutate(spike=factor(spike, levels=c("wide", "narrow"), exclude=""))
    |> filter(!is.na(spike))
)

In [None]:
(
    all_unit_spike_features
    |> mutate(trough_z=trough_v / sd_v)
    |> filter(peak2_t > 1.5)
    #|> filter(trough_z > -15, trough_v > -400)
)

In [None]:
### generate tables of pr and cr units - this is used by the pooled decoder analysis
all_units <- (
    unit_spike_features %>%
    mutate(site=str_match(unit, "[:alnum:]+_\\d+_\\d+")[,1]) %>%
    select(unit, site, spike) %>%
    left_join(all_sites, by="site")
)
filter(all_units, group=="CR") %>% select(unit) %>% readr::write_csv("../build/cr_units.txt", col_names=F)
filter(all_units, group=="PR") %>% select(unit) %>% readr::write_csv("../build/pr_units.txt", col_names=F)

In [None]:
all_units

In [None]:
## number of units by area, condition, and cell type
(
    all_units
    |> xtabs(~ area + group + spike, data=_)
    |> addmargins(c(1,3))
    |> as.data.frame()
    |> arrange(area)
    |> pivot_wider(names_from=c(area, group), values_from=Freq, values_fill=0)
)

### Discriminability

Discriminability is assessed by computing spike distances and training a classifier. The computation is performed by `scripts/motif_discrim.py`.

We classify cells as responsive if the z-score of the cross-validation is above the one-tailed 95% confidence level.

In [None]:
z_score_alpha <- 0.05
z_score_global_thresh <- qnorm(1 - z_score_alpha)
## this is not used: too easy for individual motifs to sneak over the threshold
z_score_motif_thresh <- qnorm(1 - z_score_alpha/9)

In [None]:
z_score_global_thresh

In [None]:
header <- data.table::fread(cmd='find ../build/ -name "*motif_discrim.csv" | head -n1 | xargs head -n1', header=T)
motif_discrim <- tibble(data.table::fread(cmd='find ../build/ -name "*motif_discrim.csv" | xargs tail -q -n+2', header=F))
names(motif_discrim) <- names(header)

In [None]:
unit_n_discrim <- (
    motif_discrim
    %>% filter(foreground!="_average")
    %>% group_by(unit)
    %>% summarize(n_discrim=sum(z_score > z_score_motif_thresh), max_score=max(score))
)

In [None]:
## classify units as auditory at each noise level and join to metadata
unit_discrim <- (
    motif_discrim
    %>% filter(foreground=="_average")
    %>% select(!foreground)
    %>% inner_join(unit_n_discrim, by="unit")
    %>% mutate(is_auditory=(z_score > z_score_global_thresh))
    %>% inner_join(select(unit_spike_features, unit, spike), by="unit")
    %>% mutate(site=str_match(unit, "[:alnum:]+_\\d+_\\d+")[,1])
)

In [None]:
## save output for downstream analysis
filter(unit_discrim, is_auditory) |> ungroup() |> select(unit) |> write.table("../inputs/responsive_units.txt", quote=F, row.names=F, col.names=F)

In [None]:
## this code cell is for picking out examples. It can be hard to find units that are selective but not too selective.
(
    unit_discrim
    |> inner_join(all_sites, by="site")
    |> filter(area=="L3/NCM", spike=="wide", str_starts(unit, "C104_3_1"), is_auditory)
    |> arrange(spikes_mean)
)

### Firing rates

Firing rates can be summarized across trials by adding up the number of events and the total observation interval. The underlying assumption
is that the spike counts have a poisson distribution, which is probably not true but better than the alternative of assuming the rates are normally distributed.

In [None]:
header <- data.table::fread(cmd='find ../build/ -name "*_rates.csv" | head -n1 | xargs head -n1', header=T)
motif_rates <- tibble(data.table::fread(cmd='find ../build/ -name "*_rates.csv" | xargs tail -q -n+2', header=F))
names(motif_rates) <- names(header)
motif_rates <- (
    motif_rates
    |> filter(background_dBFS==-100 | foreground=="silence", foreground!="background")
    |> mutate(foreground=relevel(factor(foreground), "silence"))
)
motif_rate_summary <- (
    motif_rates
    |> group_by(unit, foreground) 
    |> summarize(n_events=sum(n_events), interval=sum(interval_end))
    ## this regularizes rate estimates for silence by adding 1 spike to units with no spontaneous spikes
    |> mutate(n_events=ifelse(foreground=="silence" & n_events == 0, 1, n_events))
)

In [None]:
## motif_rate_summary |> filter(foreground=="silence") |> mutate(silent=n_events==0) |> xtabs(~ silent, data=_) |> prop.table()

In [None]:
filter(motif_rate_summary, unit=="C104_1_1_c10", foreground !="silence") |> mutate(duration=interval / 10) |> summarize(dur_mean=mean(duration), dur_sd=sd(duration), dur_max=max(duration), dur_min=min(duration))

Compute some unit-level rate statistics:

1. `n_responsive` - the number of stimuli that evoked a significant change in firing rate relative to silence. Significance is assessed using a Poisson GLM.
2. `activity_frac` - the Vinje and Gallant activity fraction across all non-silence stimuli


In [None]:
## compute unit-level rate statistics
rate_model <- function(df) {
    glm(n_events ~ foreground, data=df, offset=log(interval), family=poisson)
}

response_strength <- function(df) {
    df <- mutate(df, rate=n_events / interval)
    spont <- df$rate[df$foreground == "silence"]
    filter(df, foreground != "silence") |> transmute(rate, rs=rate - spont, zscore=rs / sd(rate))
}

activity_fraction <- function(df) {
    df |> filter(foreground!="silence") |> mutate(rate=n_events / interval) |> summarize(activity_frac=(1 - sum(rate / n())**2 / sum(rate**2 / n())) / (1 - 1 / n()))
}

rate_model_responsive <- function(mdl) {
    (
        emmeans(mdl, ~ foreground) 
        |> contrast("trt.vs.ctrl") 
        |> broom::tidy() 
        |> transmute(foreground=str_extract(contrast, "\\w+"), estimate, is_responsive=(estimate > 0) & (adj.p.value < 0.05))
    )
}

In [None]:
(
    motif_rate_summary
    |> filter(unit=="C42_4_1_c131")
    |> rate_model()
    |> emmeans(~ foreground)
    |> contrast("trt.vs.ctrl")
    |> broom::tidy()
)

In [None]:
# this cell takes a long time to run - it would be nice to do this in motif_rates.py but I can't do marginal means in python
unit_motif_responsive <- (
    motif_rate_summary
    |> group_by(unit)
    |> nest()
    |> mutate(rate_stats=purrr::map(data, response_strength))
    |> mutate(model=purrr::map(data, rate_model))
    |> mutate(responsive=purrr::map(model, rate_model_responsive))
    |> select(unit, responsive, rate_stats)
    |> unnest(cols=c(responsive, rate_stats))
)

In [None]:
unit_rate_stats <- (
    unit_motif_responsive
    |> group_by(unit)
    |> summarize(avg_evoked=mean(estimate), n_responsive=sum(is_responsive), activity_frac=(1 - sum(rate / n())**2 / sum(rate**2 / n())) / (1 - 1 / n()))
)

Join the rate and discrimination data together with metadata.

In [None]:
unit_summary <- (
    unit_discrim
    |> inner_join(unit_rate_stats, by="unit")
    |> inner_join(all_sites, by="site")
)

site_summary <- (
    unit_summary
    |> group_by(bird, site, sex, group, area, spike)
    |> summarize(n_units=n(), n_auditory=sum(is_auditory), n_auditory_rate=sum(n_responsive > 0))
)

unit_motif_summary <- (
    motif_rate_summary
    |> inner_join(select(unit_spike_features, unit, spike), by="unit")
    |> mutate(site=str_match(unit, "[:alnum:]+_\\d+_\\d+")[,1])
    |> inner_join(all_sites, by="site")
)

In [None]:
# save for plotting elsewhere
unit_motif_responsive |> write.csv("../build/motif_rate_coefs.csv", quote=F, row.names=F)

## Example units

In [None]:
(
    unit_summary
    |> filter(area=="L3/NCM", spike=="narrow", is_auditory)
    |> arrange(n_responsive)
    |> select(unit, score, spikes_mean, nonzero_trials, n_discrim, activity_frac, n_responsive)
    |> filter(score < 1, n_discrim==8)
    |> arrange(score)
)

## Single-Unit statistics

### Number of units per site

Supplementary

In [None]:
options(repr.plot.width=4, repr.plot.height=2.5, repr.plot.res = 300)
p3.1 <- (
    site_summary
    |> ggplot(aes(spike, n_units, fill=group))
    + facet_wrap(~ area)
    + geom_boxplot(width=0.5, outlier.size=1)
    + scale_y_log10(name="total units per site")
    + scale_x_discrete(name="spike shape")
)
p3.1 + theme_classic() + my.theme

In [None]:
# GLMM
fm_units <- glmer(n_units ~ area*spike*group + (1 + spike|site), site_summary, control=glmerControl(optimizer="bobyqa"), family=poisson)
emmeans(fm_units, ~ group | area*spike) |> contrast("pairwise")

In [None]:
options(repr.plot.width=2.25, repr.plot.height=2, repr.plot.res = 450)
p <- (
    fm_units
    |> emmeans(~ group*spike*area)
    |> confint(level=0.90, type="response")
    |> ggplot(aes(area, rate, color=group))
    + facet_wrap(~ spike)
    + geom_point(position=position_dodge(width=0.5), size=1.5)
    + geom_linerange(aes(ymin=asymp.LCL, ymax=asymp.UCL), position=position_dodge(width=0.5))
    + scale_x_discrete(name=NULL)
    + scale_y_log10(name="total units per site")
    + theme_classic() + my.theme + no.legend
)
p 

In [None]:
pdf("../figures/total_units_per_site.pdf", width=2.25, height=2)
print(p)
dev.off()

### Spontaneous firing rate

In [None]:
unit_motif_summary |> filter(unit=="C104_1_1_c10", foreground != "silence", foreground !="igmi8fxa") |> summarize(int_mean=mean(interval))

In [None]:
options(repr.plot.width=4, repr.plot.height=2.5, repr.plot.res = 300)
p <- (
    unit_motif_summary
    |> filter(foreground=="silence")
    |> mutate(rate=mean(n_events / interval))
    |> ggplot(aes(spike, rate, color=group))
    + facet_wrap(~ area)
    + geom_boxplot()
    + scale_y_continuous(name="Spontaneous rate (Hz)")
)
p + theme_classic() + my.theme

In [None]:
fm_spont <- glmer(
    n_events ~ area*spike*group + (1|unit),
    offset=log(interval),
    family=poisson,
    control=glmerControl(optimizer = "bobyqa"),
    data=unit_motif_summary |> filter(foreground=="silence")
)

In [None]:
(
    fm_spont
    |> emmeans(~ group | area*spike)
    |> contrast("pairwise")
)

In [None]:
options(repr.plot.width=1.8, repr.plot.height=1.45, repr.plot.res = 450)
p <- (
    fm_spont
    |> emmeans(~ group*spike*area, type="response", offset=0)
    |> confint(level=0.90)
    |> ggplot(aes(area, rate, color=group))
    + facet_wrap(~ spike)
    + geom_point(position=position_dodge(width=0.5), size=1.5)
    + geom_linerange(aes(ymin=asymp.LCL, ymax=asymp.UCL), position=position_dodge(width=0.5))
    + scale_x_discrete(name=NULL)
    + scale_y_log10(name="Spont rate (Hz)")
    + theme_classic() + my.theme + no.legend
)
p 

In [None]:
pdf("../figures/spont_rate_all_units.pdf", width=2.25, height=2)
print(p)
dev.off()

### Evoked rate

In [None]:
options(repr.plot.width=4, repr.plot.height=2.5, repr.plot.res = 300)
p <- (
    unit_motif_summary
    |> filter(foreground!="silence")
    |> mutate(rate=mean(n_events / interval))
    |> group_by(group, area, spike, unit)
    |> summarize(rate=mean(rate))
    |> ggplot(aes(area, rate, color=group))
    + facet_wrap(~ spike)
    + geom_boxplot()
    + scale_y_log10(name="Evoked rate (Hz)")
    + theme_classic() + my.theme
)
p 

In [None]:
fm_evoked <- glmer(
    n_events ~ group*area*spike + (1|unit) + (1|foreground),
    offset=log(interval),
    family=poisson,
    control=glmerControl(optimizer="bobyqa"),
    data=unit_motif_summary |> filter(foreground!="silence")
)

In [None]:
(
    fm_evoked
    |> emmeans(~ group | area*spike)
    |> contrast("pairwise")
)

In [None]:
options(repr.plot.width=1.8, repr.plot.height=1.45, repr.plot.res = 450)
p <- (
    fm_evoked
    |> emmeans(~ group*spike*area, type="response", offset=0)
    |> confint(level=0.90)
    |> ggplot(aes(area, rate, color=group))
    + facet_wrap(~ spike)
    + geom_point(position=position_dodge(width=0.5), size=1.5)
    + geom_linerange(aes(ymin=asymp.LCL, ymax=asymp.UCL), position=position_dodge(width=0.5))
    + scale_x_discrete(name=NULL)
    + scale_y_log10(name="Evoked rate (Hz)")
    + theme_classic() + my.theme + no.legend
)
p 

In [None]:
pdf("../figures/evoked_rate_all_units.pdf", width=2.25, height=2)
print(p)
dev.off()

### Discriminability

Discriminability is what we use to decide whether neurons are auditory or not, so any trends need to be analyzed using all neurons.

In [None]:
## raw classifier scores
options(repr.plot.width=4, repr.plot.height=2.5, repr.plot.res = 300)
p <- (
    unit_summary
    |> ggplot(aes(area, score, fill=group))
    + facet_wrap(~ spike)
    + geom_violin()
    + my.theme
)
p 

In [None]:
## GLMM
## There are 90 comparisons for each trial, so score can be converted to a binomial RV by multiplying by 90
fm_discrim_score <- (
    unit_summary 
    |> mutate(n_correct=score * 90)
    |> glmer(cbind(n_correct, 90 - n_correct) ~ group*area*spike + (1|unit), 
             data=_, 
             control=glmerControl(optimizer="bobyqa"), 
             family=binomial)
)
emmeans(fm_discrim_score, ~ group | area*spike) |> contrast("pairwise")

In [None]:
# used to test whether BS are less discriminable than NS
emmeans(fm_discrim_score, ~ spike) |> contrast("pairwise")

In [None]:
# used to report lack of effect for BS neurons
emmeans(fm_discrim_score, ~ group | spike) |> contrast("pairwise")

In [None]:
options(repr.plot.width=2.4, repr.plot.height=1.9, repr.plot.res = 450)
p <- (
    fm_discrim_score
    |> emmeans(~ group*spike*area, type="response")
    |> confint(level=0.90)
    |> ggplot(aes(area, prob, color=group))
    + facet_wrap(~ spike)
    + geom_point(position=position_dodge(width=0.5), size=1.5)
    + geom_linerange(aes(ymin=asymp.LCL, ymax=asymp.UCL), position=position_dodge(width=0.5))
    + scale_x_discrete(name=NULL)
    + scale_y_continuous(name="Discriminability", limits=c(0,1))
    + theme_classic() + my.theme + no.legend
)
p 

In [None]:
pdf("../figures/discriminability_all_units.pdf", width=2.4, height=1.9)
print(p)
dev.off()

In [None]:
## number of discriminable motifs
options(repr.plot.width=4, repr.plot.height=2.5, repr.plot.res = 300)
p <- (
    unit_summary
    |> ggplot(aes(area, n_discrim/10, fill=group))
    + facet_wrap(~ spike)
    + geom_boxplot()
    + theme_classic() + my.theme + no.legend
)
p + my.theme

In [None]:
## GLMM
## Alternatively, how many motifs are discriminable
fm_discrim_n <- (
    unit_summary
    |> glmer(cbind(n_discrim, 9 - n_discrim) ~ group*area*spike + (1|unit), data=_, control=glmerControl(optimizer="bobyqa"), family=binomial)
)
emmeans(fm_discrim_n, ~ group | area*spike) |> contrast("pairwise")

In [None]:
options(repr.plot.width=1.8, repr.plot.height=1.45, repr.plot.res = 450)
p <- (
    fm_discrim_n
    |> emmeans(~ group*spike*area, type="response")
    |> confint(level=0.90)
    |> ggplot(aes(area, prob, color=group))
    + facet_wrap(~ spike)
    + geom_point(position=position_dodge(width=0.5), size=1)
    + geom_linerange(aes(ymin=asymp.LCL, ymax=asymp.UCL), position=position_dodge(width=0.5))
    + scale_x_discrete(name=NULL)
    + scale_y_continuous(name="p(discriminable)", limits=c(0,1))
    + theme_classic() + my.theme + no.legend
)
p 

In [None]:
pdf("../figures/p_discriminable_all_units.pdf", width=1.8, height=1.45)
print(p)
dev.off()

### Number of auditory units per site

Supplementary

In [None]:
options(repr.plot.width=4, repr.plot.height=2.5, repr.plot.res = 300)
(
    site_summary
    |> ggplot(aes(area, n_auditory, fill=group))
    + facet_wrap(~ spike)
    + geom_boxplot()
    + my.theme
)

In [None]:
(
    site_summary
    |> ggplot(aes(area, n_auditory_rate, fill=group))
    + facet_wrap(~ spike)
    + geom_boxplot()
    + my.theme
)

In [None]:
# GLMM
fm_aud_units <- glmer(n_auditory ~ group*area*spike + (1 + spike|site), site_summary, control=glmerControl(optimizer="bobyqa"), family=poisson)
emmeans(fm_aud_units, ~ group | area*spike) |> contrast("pairwise")

In [None]:
options(repr.plot.width=1.8, repr.plot.height=1.45, repr.plot.res = 450)
p <- (
    fm_aud_units
    |> emmeans(~ group*spike*area)
    |> confint(level=0.90, type="response")
    |> ggplot(aes(area, rate, color=group))
    + facet_wrap(~ spike)
    + geom_point(position=position_dodge(width=0.5), size=1)
    + geom_linerange(aes(ymin=asymp.LCL, ymax=asymp.UCL), position=position_dodge(width=0.5))
    + scale_x_discrete(name=NULL)
    + scale_y_log10(name="auditory units per site")
    + theme_classic() + my.theme + no.legend
)
p 

In [None]:
pdf("../figures/aud_units_per_site.pdf", width=1.8, height=1.45)
print(p)
dev.off()

In [None]:
options(repr.plot.width=4, repr.plot.height=2.5, repr.plot.res = 300)
(
    site_summary
    |> ggplot(aes(area, n_auditory / n_units, fill=group))
    + facet_wrap(~ spike)
    + geom_boxplot()
    + scale_y_continuous(name="p(responsive)", limits=c(0, 1))
    + my.theme
)

In [None]:
fm_aud_prop <- glmer(
    cbind(n_auditory, n_units - n_auditory) ~ group*area*spike + (1 + spike|site), 
    site_summary,
    control=glmerControl(optimizer="bobyqa"),
    family=binomial)
emmeans(fm_aud_prop, ~ group | area*spike) |> contrast("pairwise")

In [None]:
options(repr.plot.width=1.8, repr.plot.height=1.45, repr.plot.res = 450)
p <- (
    fm_aud_prop
    |> emmeans(~ group*spike*area)
    |> confint(level=0.90, type="response")
    |> ggplot(aes(area, prob, color=group))
    + facet_wrap(~ spike)
    + geom_point(position=position_dodge(width=0.5), size=1)
    + geom_linerange(aes(ymin=asymp.LCL, ymax=asymp.UCL), position=position_dodge(width=0.5))
    + scale_x_discrete(name=NULL)
    + scale_y_continuous(name="p(auditory)", limits=c(0,1))
    + theme_classic() + my.theme + no.legend
)
p

In [None]:
pdf("../figures/aud_units_proportion_site.pdf", width=1.8, height=1.45)
print(p)
dev.off()

### Selectivity (auditory units only)

A very simple definition of selectivity is related to what proportion of stimuli evoke a significant response.

In [None]:
options(repr.plot.width=4, repr.plot.height=2.5, repr.plot.res = 300)
p <- (
    unit_summary
    |> filter(is_auditory)
    |> ggplot(aes(area, 1 - n_responsive/10, fill=group))
    + facet_wrap(~ spike)
    + geom_violin()
)
p + my.theme

In [None]:
## GLMM
fm_selectivity <- (
    unit_summary 
    |> filter(is_auditory)
    |> glmer(
        cbind(10 - n_responsive, n_responsive) ~ group*area*spike + (1|unit), 
        data=_,
        control=glmerControl(optimizer="bobyqa"),
        family=binomial)
)
emmeans(fm_selectivity, ~ group | area*spike) |> contrast("pairwise")

In [None]:
# compare BS to NS
emmeans(fm_selectivity, ~ spike) |> contrast("pairwise")

In [None]:
options(repr.plot.width=2.4, repr.plot.height=1.9, repr.plot.res = 450)
p <- (
    fm_selectivity
    |> emmeans(~ group*spike*area, type="response")
    |> confint(level=0.90)
    |> ggplot(aes(area, prob, color=group))
    + facet_wrap(~ spike)
    + geom_point(position=position_dodge(width=0.5), size=1.5)
    + geom_linerange(aes(ymin=asymp.LCL, ymax=asymp.UCL), position=position_dodge(width=0.5))
    + scale_x_discrete(name=NULL)
    + scale_y_continuous(name="Selectivity", limits=c(0,1))
    + theme_classic() + my.theme + no.legend
)
p 

In [None]:
pdf("../figures/selectivity_auditory_units.pdf", width=2.4, height=1.9)
print(p)
dev.off()

### Selectivity - activity fraction

In [None]:
options(repr.plot.width=4, repr.plot.height=2.5, repr.plot.res = 300)
p <- (
    unit_summary
    |> filter(is_auditory)
    |> ggplot(aes(area, activity_frac, fill=group))
    + facet_wrap(~ spike)
    + geom_boxplot()
    + my.theme
)
p

In [None]:
## LMM
fm_selectivity <- lmer(
    activity_frac ~ group*area*spike + (1|site), 
    data=unit_summary |> filter(is_auditory))
emmeans(fm_selectivity, ~ group | area*spike) |> contrast("pairwise")

In [None]:
options(repr.plot.width=2.6, repr.plot.height=2.6, repr.plot.res = 450)
(
    unit_summary
    |> filter(is_auditory)
    |> ggplot(aes(score, 1 - n_discrim/10, color=group))
    + facet_grid(area ~ spike)
    + geom_point(alpha=0.4)
    #+ stat_smooth(method="lm", linewidth=0.5)
    + scale_x_continuous("Discriminability", limits=c(0,1))
    + scale_y_continuous("Selectivity", limits=c(0, 1))
    + theme_classic() + my.theme + no.legend
)

## Population sparseness

This is a quick and dirty way to look at sparseness, by calculating the proportion of units in a site that give significant responses to each motif.

In [None]:
(
    unit_motif_responsive
    |> semi_join(filter(unit_discrim, is_auditory), by="unit")
    |> group_by(unit)
    |> arrange(desc(rate))
)

In [None]:
options(repr.plot.width=4, repr.plot.height=2.5, repr.plot.res = 300)
site_n_responsive <- (
    unit_motif_responsive
    # only auditory units?
    |> semi_join(filter(unit_discrim, is_auditory), by="unit")
    |> inner_join(select(unit_spike_features, unit, spike), by="unit")
    |> mutate(site=str_match(unit, "[:alnum:]+_\\d+_\\d+")[,1])
    |> inner_join(all_sites, by="site")
    |> group_by(bird, group, area, spike, site, foreground)
    |> summarize(n_units=n(), n_responsive=sum(is_responsive))
)

In [None]:
options(repr.plot.width=4, repr.plot.height=2.5, repr.plot.res = 300)
(
    site_n_responsive
    |> summarize(sparseness= 1 - mean(n_responsive) / mean(n_units))
    |> ggplot(aes(area, sparseness, fill=group))
    + facet_grid(~ spike)
    + geom_boxplot() # stat_summary(fun.data="mean_se")
    + theme_classic() + my.theme
)

In [None]:
## GLMM
fm_sparseness <- glmer(
    cbind(n_units - n_responsive, n_responsive) ~ group*area*spike + (1 + area|bird),
    data=site_n_responsive,
    control=glmerControl(optimizer="bobyqa"),
    family=binomial)
emmeans(fm_sparseness, ~ group | area*spike) |> contrast("pairwise")

In [None]:
options(repr.plot.width=1.8, repr.plot.height=1.45, repr.plot.res = 450)
p <- (
    fm_sparseness
    |> emmeans(~ group*spike*area, type="response")
    |> confint(level=0.90)
    |> ggplot(aes(area, prob, color=group))
    + facet_wrap(~ spike)
    + geom_point(position=position_dodge(width=0.5), size=1)
    + geom_linerange(aes(ymin=asymp.LCL, ymax=asymp.UCL), position=position_dodge(width=0.5))
    + scale_x_discrete(name=NULL)
    + scale_y_continuous(name="Sparseness", limits=c(0,1))
    + theme_classic() + my.theme + no.legend
)
p 

In [None]:
pdf("../figures/sparseness_auditory_units.pdf", width=1.8, height=1.45)
print(p)
dev.off()

In [None]:
cr_wide <- (
    unit_motif_responsive
    |> inner_join(select(unit_spike_features, unit, spike), by="unit")
    |> mutate(site=str_match(unit, "[:alnum:]+_\\d+_\\d+")[,1])
    |> inner_join(all_sites, by="site")
    |> filter(group=="PR") #, spike=="narrow", area=="L3/NCM")
)

In [None]:
cr_wide_best_stim <- (
    cr_wide
    |> group_by(unit)
    |> summarize(best_stim=foreground[which.max(estimate)], max_resp=max(estimate), selectivity=1 - mean(is_responsive))
    |> arrange(best_stim, selectivity, max_resp)
    # |> filter(selectivity < 1)
    |> mutate(idx=row_number())    
)
cr_wide_best_stim

### Principal components

Another approach to measuring sparseness is to use principal components analysis to estimate the degree to which neural responses are correlated across stimulus conditions. The responses are organized into a conditions x neurons array. The principal components (row vectors) correspond to groups of neurons that covary. A sparse code should be characterized by a relatively flat eigenspectrum.

In [None]:
(
    cr_wide
    |> inner_join(cr_wide_best_stim, by="unit")
    |> ggplot(aes(foreground, idx))
    + geom_tile(mapping=aes(fill=zscore))
    + scale_fill_gradient2()
    + theme(axis.ticks.x=element_blank(), axis.text.x=element_blank(), axis.ticks.y=element_blank(), axis.text.y=element_blank())
)

In [None]:
options(repr.plot.width=3, repr.plot.height=3, repr.plot.res = 300)
pca <- (
    cr_wide
    |> pivot_wider(id_cols=unit, names_from=foreground, values_from=zscore)
    |> ungroup()
    |> select(-unit)
    |> as.matrix()
    |> t()
    |> prcomp()
)
plot(pca)

In [None]:
options(repr.plot.width=3, repr.plot.height=3, repr.plot.res = 300)
pca <- (
    cr_wide
    |> pivot_wider(id_cols=unit, names_from=foreground, values_from=zscore)
    |> ungroup()
    |> select(-unit)
    |> as.matrix()
    |> t()
    |> prcomp()
)
plot(pca)

## Noise invariance

Just seeing if there is anything worth reporting in this paper.

In [None]:
(
    unit_level_discrim
    |> inner_join(all_sites, by="site")
    |> ggplot(aes(background_dBFS, score))
    + facet_grid(spike ~ area)
    + stat_summary(aes(color=group))
    + geom_line(mapping=aes(color=group, group=unit), alpha=0.1)
    + theme_classic() + my.theme
)