# 25/10/08 Visualisation and analysis of FANS count data.

In this notebook, we will work on the long-format data frame generated in `SortStatsFromPDFSortReports.ipynb`. It combines the meta-data, recorded by hand during sample processing, with the count data obtained post-sorting. 

To do:
- explore the data with a variety of plots and summary statistics
- perform inferential analysis on the variables driving cell number differences between groups.

In [32]:
from pathlib import Path
import pandas as pd

In [33]:
# Make pandas display floats nicely without ffecting underlying data
pd.set_option("display.float_format", "{:,.2f}".format)

In [41]:
# load in the tidied, long-format FANS data
df_path = Path("tidied_FANS_data/exp383_tidy_FANS_data_long.csv")
df = pd.read_csv(df_path)
df.head()

Unnamed: 0,animal_id,group_no,inoculum,inoculation_batch,dpi,sample_mass_mg,date_nuc_prep,incubation_time_hrs,population,count_statistic,count_value
0,918310,1,RML,BATCH4,60,246,2025-05-06,19.0,NeuN+,EstTotalCount,4359464.0
1,917423,19,22L,BATCH3,120,207,2025-05-06,19.67,NeuN+,EstTotalCount,2913658.0
2,918309,4,CBH,BATCH4,60,240,2025-05-07,16.92,NeuN+,EstTotalCount,5017422.0
3,916462,1,RML,BATCH1,60,248,2025-05-08,18.5,NeuN+,EstTotalCount,4324274.0
4,918277,9,RML,BATCH4,90,220,2025-05-08,18.98,NeuN+,EstTotalCount,5102254.0


In [3]:
populationStats = (
    df.groupby(["population", "count_statistic"])["count_value"]
        .agg(["count", "mean", "std", "min", "max"])
        .sort_values(by=["count_statistic", "population"])
        .reset_index()
)

populationStats

Unnamed: 0,population,count_statistic,count,mean,std,min,max
0,NeuN+,EstTotalCount,72,4807840,1215993.5,2165855,8113432
1,PU1+,EstTotalCount,72,228977,88067.1,99394,520370
2,SOX10+,EstTotalCount,72,945400,264829.4,453511,1497344
3,SOX2+,EstTotalCount,72,261686,82126.2,118225,447223
4,NeuN+,EstTotalCount_per_mg,72,20342,4767.4,9866,30474
5,PU1+,EstTotalCount_per_mg,72,971,371.3,427,2153
6,SOX10+,EstTotalCount_per_mg,72,3997,1056.9,2052,5964
7,SOX2+,EstTotalCount_per_mg,72,1111,359.3,483,2196


In [28]:
# repeating the analysis for the groups of interest
groupStats = (
    df.groupby(["dpi", "inoculum","population", "count_statistic"])["count_value"]
        .agg(["count", "mean", "std", "min", "max"])
        .sort_values(by=["count_statistic", "population"])
        .reset_index()
)

csvpath = Path("tidied_FANS_data/FANS_population_counts_by_group.csv")
groupStats.to_csv()

NeuNStats = groupStats[groupStats["population"] == "NeuN+"].reset_index(drop=True)
PU1Stats  = groupStats[groupStats["population"] == "PU1+"].reset_index(drop=True)
SOX10Stats= groupStats[groupStats["population"] == "SOX10+"].reset_index(drop=True)
SOX2Stats = groupStats[groupStats["population"] == "SOX2+"].reset_index(drop=True)

In [42]:
NeuNStats

Unnamed: 0,dpi,inoculum,population,count_statistic,count,mean,std,min,max
0,60,22L,NeuN+,EstTotalCount,6,5296955.0,1147793.42,3555483.0,6638607.0
1,60,CBH,NeuN+,EstTotalCount,6,4468945.33,1212032.46,2165855.0,5433749.0
2,60,ME7,NeuN+,EstTotalCount,6,4422387.17,865644.88,3742073.0,6081797.0
3,60,RML,NeuN+,EstTotalCount,6,4144303.83,605745.56,3027589.0,4851478.0
4,90,22L,NeuN+,EstTotalCount,6,5069785.17,966778.7,3850140.0,6308775.0
5,90,CBH,NeuN+,EstTotalCount,6,4934775.17,1099631.5,3429660.0,6020931.0
6,90,ME7,NeuN+,EstTotalCount,6,3965455.33,505209.64,3510850.0,4944121.0
7,90,RML,NeuN+,EstTotalCount,6,6186290.33,1586578.99,4110742.0,8113432.0
8,120,22L,NeuN+,EstTotalCount,6,5001261.0,1205751.89,2913658.0,6056519.0
9,120,CBH,NeuN+,EstTotalCount,6,4846132.33,1536279.51,3377948.0,7663752.0


In [43]:
PU1Stats

Unnamed: 0,dpi,inoculum,population,count_statistic,count,mean,std,min,max
0,60,22L,PU1+,EstTotalCount,6,203900.17,66581.42,131077.0,307741.0
1,60,CBH,PU1+,EstTotalCount,6,173772.17,65429.75,113656.0,285441.0
2,60,ME7,PU1+,EstTotalCount,6,192847.5,46149.97,137344.0,272697.0
3,60,RML,PU1+,EstTotalCount,6,153160.0,28055.45,114186.0,195702.0
4,90,22L,PU1+,EstTotalCount,6,257416.83,65912.37,150372.0,318621.0
5,90,CBH,PU1+,EstTotalCount,6,161572.0,41249.66,99394.0,211080.0
6,90,ME7,PU1+,EstTotalCount,6,197203.5,62335.84,153305.0,317107.0
7,90,RML,PU1+,EstTotalCount,6,328564.33,118443.77,222295.0,520370.0
8,120,22L,PU1+,EstTotalCount,6,237568.0,79642.47,153897.0,349635.0
9,120,CBH,PU1+,EstTotalCount,6,209205.83,90847.12,107237.0,334757.0


In [44]:
SOX2Stats

Unnamed: 0,dpi,inoculum,population,count_statistic,count,mean,std,min,max
0,60,22L,SOX2+,EstTotalCount,6,304220.0,110174.78,157122.0,415765.0
1,60,CBH,SOX2+,EstTotalCount,6,211137.17,45997.09,152305.0,261952.0
2,60,ME7,SOX2+,EstTotalCount,6,266414.33,123406.62,162677.0,432133.0
3,60,RML,SOX2+,EstTotalCount,6,247642.5,14504.53,229649.0,273467.0
4,90,22L,SOX2+,EstTotalCount,6,248651.0,62556.31,189516.0,366102.0
5,90,CBH,SOX2+,EstTotalCount,6,304648.67,42335.23,243713.0,358158.0
6,90,ME7,SOX2+,EstTotalCount,6,275164.33,42152.19,198941.0,323672.0
7,90,RML,SOX2+,EstTotalCount,6,287987.5,57351.2,180974.0,331754.0
8,120,22L,SOX2+,EstTotalCount,6,293363.0,140616.74,122749.0,447223.0
9,120,CBH,SOX2+,EstTotalCount,6,282620.33,94536.72,172787.0,445012.0


In [45]:
SOX10Stats

Unnamed: 0,dpi,inoculum,population,count_statistic,count,mean,std,min,max
0,60,22L,SOX10+,EstTotalCount,6,1014694.17,350048.93,547613.0,1336045.0
1,60,CBH,SOX10+,EstTotalCount,6,622822.33,100659.08,453511.0,758600.0
2,60,ME7,SOX10+,EstTotalCount,6,911172.67,167871.66,660082.0,1138063.0
3,60,RML,SOX10+,EstTotalCount,6,788023.67,163393.5,668362.0,1108861.0
4,90,22L,SOX10+,EstTotalCount,6,945488.0,181983.65,740369.0,1224530.0
5,90,CBH,SOX10+,EstTotalCount,6,986486.17,203406.11,652327.0,1119555.0
6,90,ME7,SOX10+,EstTotalCount,6,851365.17,218523.38,607854.0,1195770.0
7,90,RML,SOX10+,EstTotalCount,6,1161268.83,241742.25,905025.0,1497344.0
8,120,22L,SOX10+,EstTotalCount,6,983508.33,312829.44,467283.0,1306084.0
9,120,CBH,SOX10+,EstTotalCount,6,1031141.67,223509.85,729342.0,1377331.0
