In [1]:
import pandas as pd
import numpy as np
import scipy.stats

import sprawl
from sprawl import plotting, utils

In [2]:
#Timp3 upstream and downstream peaks L6_IT vs Sst binomial test
bam_path = '../../inputs/BICCN_10Xv3_scRNAseq.bam'
upstream_timp3 = ('chr10',86346857,86347517)
dwstream_timp3 = ('chr10',86349055,86349469)

upstream_counts = utils.bam_read_positions(bam_path, upstream_timp3, stratify_tag='XO')
dwstream_counts = utils.bam_read_positions(bam_path, dwstream_timp3, stratify_tag='XO')
n = len(set(upstream_counts['strat']).intersection(dwstream_counts['strat']))

upstream_L6_IT = upstream_counts['strat'].eq('L6_IT').sum()
upstream_Sst = upstream_counts['strat'].eq('Sst').sum()
dwstream_L6_IT = dwstream_counts['strat'].eq('L6_IT').sum()
dwstream_Sst = dwstream_counts['strat'].eq('Sst').sum()

table = np.array([[upstream_L6_IT, upstream_Sst],[dwstream_L6_IT, dwstream_Sst]])
display(table)

oddsratio,p = scipy.stats.fisher_exact(table, alternative='two-sided')
print(f'Bonferroni corrected p with {n} celltypes is {p*((n*(n-1))/2)}')
print(f'Oddsratio is {oddsratio}')

array([[3222,  513],
       [  55, 2052]])

Bonferroni corrected p with 18 celltypes is 0.0
Oddsratio is 234.3272727272727


In [3]:
#Asic4 upstream and downstream peaks L6_IT vs L5_IT binomial test
bam_path = '../../inputs/BICCN_10Xv3_scRNAseq.bam'
upstream_asic4 = ('chr1',75473924,75474112)
dwstream_asic4 = ('chr1',75474185,75474317)

upstream_counts = utils.bam_read_positions(bam_path, upstream_asic4, stratify_tag='XO')
dwstream_counts = utils.bam_read_positions(bam_path, dwstream_asic4, stratify_tag='XO')
n = len(set(upstream_counts['strat']).intersection(dwstream_counts['strat']))

upstream_L6_IT = upstream_counts['strat'].eq('L6_IT').sum()
upstream_L5_IT = upstream_counts['strat'].eq('L5_IT').sum()
dwstream_L6_IT = dwstream_counts['strat'].eq('L6_IT').sum()
dwstream_L5_IT = dwstream_counts['strat'].eq('L5_IT').sum()

table = np.array([[upstream_L6_IT, upstream_L5_IT],[dwstream_L6_IT, dwstream_L5_IT]])
display(table)

oddsratio,p = scipy.stats.fisher_exact(table, alternative='two-sided')
print(f'Bonferroni corrected p with {n} celltypes is {p*((n*(n-1))/2)}')
print(f'Oddsratio is {oddsratio}')

array([[494,  63],
       [172, 126]])

Bonferroni corrected p with 14 celltypes is 3.165839166624685e-22
Oddsratio is 5.744186046511628


In [4]:
#Slc32a1 upstream and downstream peaks Lamp5 vs L23_IT binomial test
bam_path = '../../inputs/BICCN_10Xv3_scRNAseq.bam'
upstream_slc32a1 = ('chr2',158615282,158615519)
dwstream_slc32a1 = ('chr2',158615527,158615746)

upstream_counts = utils.bam_read_positions(bam_path, upstream_slc32a1, stratify_tag='XO')
dwstream_counts = utils.bam_read_positions(bam_path, dwstream_slc32a1, stratify_tag='XO')
n = len(set(upstream_counts['strat']).intersection(dwstream_counts['strat']))

upstream_L6_IT = upstream_counts['strat'].eq('Lamp5').sum()
upstream_L23_IT = upstream_counts['strat'].eq('L23_IT').sum()
dwstream_L6_IT = dwstream_counts['strat'].eq('Lamp5').sum()
dwstream_L23_IT = dwstream_counts['strat'].eq('L23_IT').sum()

table = np.array([[upstream_L6_IT, upstream_L23_IT],[dwstream_L6_IT, dwstream_L23_IT]])
display(table)

oddsratio,p = scipy.stats.fisher_exact(table, alternative='two-sided')
print(f'Bonferroni corrected p with {n} celltypes is {p*((n*(n-1))/2)}')
print(f'Oddsratio is {oddsratio}')

array([[10520,   159],
       [ 4442,   249]])

Bonferroni corrected p with 19 celltypes is 1.0794087961860416e-35
Oddsratio is 3.708851188908617


In [5]:
#Nr2f2 upstream and downstream peaks L23_IT vs. Sst binomial test
bam_path = '../../inputs/BICCN_10Xv3_scRNAseq.bam'
upstream_nr2f2 = ('chr7',70351873,70352624)
dwstream_nr2f2 = ('chr7',70353312,70354030)

upstream_counts = utils.bam_read_positions(bam_path, upstream_nr2f2, stratify_tag='XO')
dwstream_counts = utils.bam_read_positions(bam_path, dwstream_nr2f2, stratify_tag='XO')
n = len(set(upstream_counts['strat']).intersection(dwstream_counts['strat']))

upstream_L23_IT = upstream_counts['strat'].eq('L23_IT').sum()
upstream_Sst = upstream_counts['strat'].eq('Sst').sum()
dwstream_L23_IT = dwstream_counts['strat'].eq('L23_IT').sum()
dwstream_Sst = dwstream_counts['strat'].eq('Sst').sum()

table = np.array([[upstream_L23_IT, upstream_Sst],[dwstream_L23_IT, dwstream_Sst]])
display(table)

oddsratio,p = scipy.stats.fisher_exact(table, alternative='two-sided')
print(f'Bonferroni corrected p with {n} celltypes is {p*((n*(n-1))/2)}')
print(f'Oddsratio is {oddsratio}')

array([[  84,  485],
       [ 164, 2787]])

Bonferroni corrected p with 19 celltypes is 2.1605193034177367e-10
Oddsratio is 2.9432738244908223


# Spots per cell from the two CZB kidney datasets

In [6]:
sample = sprawl.HDF5('../../preprocessing/CZB_kidneyliver_preprocessing/finished_outputs/kidney_111921.hdf5')

num_cells = 1000
spot_counts = 0

count_threshold = 200
num_thresh_cells = 0

for _,cell in zip(range(num_cells),sample.iter_cells()):
    cell_spot_counts = sum(cell.gene_counts.values())
    spot_counts += cell_spot_counts
    
    if cell_spot_counts >= count_threshold:
        num_thresh_cells += 1

print('Average spots per cell',spot_counts/num_cells)
print(num_thresh_cells)

Average spots per cell 99.825
129
