In [7]:
from pathlib import Path
import pandas as pd

In [8]:
# Quick aside: estimating RNA yields.

# load in the tidied, long-format FANS data
df_path = Path("tidied_FANS_data/exp383_tidy_FANS_data_long.csv")
df = pd.read_csv(df_path)
df.head()

populationStats = (
    df.groupby(["population", "count_statistic"])["count_value"]
        .agg(["count", "mean", "std", "min", "max"])
        .sort_values(by=["count_statistic", "population"])
        .reset_index()
)

# I found a table in a review articles with estimated picogram quantities of RNA
# in eukaryotic cells.
# I also found a comparison of single-cell and single nucleus transcriptomes and found
# that 20% of RNA is nuclear. We can work out some estimated yields using this info.

nucRNAprop = 0.2/2
estRNAperCell_ng = 23.9e-3/2
estRNAperNuc_ng = estRNAperCell_ng  * nucRNAprop

# make new df values
pops = list(populationStats["population"].unique())
means= list(populationStats['mean'][0:4])
ngs  = [i*estRNAperNuc_ng for i in means]
    
estRNAYields = pd.DataFrame(
    {"population" : pops,
     "meanEstTotalCount": means,
     "EstRNA_yield_ng": ngs    
    }
)

estRNAYields.style.format({
    "meanEstTotalCount": "{:,.0f}",  
    "EstRNA_yield_ng"  : "{:,.1f}"
})

estRNAYields

# analysis doesnt make sense as we have a proportion of nuclear rna 
# by proportion of all transcripts rather than by mass

Unnamed: 0,population,meanEstTotalCount,EstRNA_yield_ng
0,NeuN+,4807840.0,5745.368999
1,PU1+,228977.4,273.627996
2,SOX10+,945400.5,1129.753581
3,SOX2+,261685.8,312.714571
