# Saving out SC info for baptiste

In [1]:
import napari
from macrohet import dataio, tile, tools, visualise, colours
import os, glob
from tqdm.auto import tqdm
from skimage import io
import numpy as np
from macrohet.notify import send_sys_message as notify
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import zarr
import csv

palette = colours.expanded_piyg
sns.set_theme(style = 'white')
sns.set(font='Nimbus Sans', style = 'white')

# Define a function to remove outliers using IQR
def remove_outliers_iqr(series):
    Q1 = series.quantile(0.25)
    Q3 = series.quantile(0.75)
    IQR = Q3 - Q1
    lower_bound = Q1 - 1.5 * IQR
    upper_bound = Q3 + 1.5 * IQR
    return series[(series >= lower_bound) & (series <= upper_bound)]


In [9]:
df = pd.read_pickle('/mnt/SYNO/macrohet_syno/results/dfs/sc_df.pkl')

In [10]:
df


Unnamed: 0,ID,Time (hours),Mtb Area (µm),dMtb Area (µm),Mphi Area (µm),dMphi Area (µm),Infection Status,Initial Infection Status,Final Infection Status,x,...,Edge Status,Uptake,dMtb Area between frames (µm),Mtb Area Processed (µm),Time Model (hours),Mtb Area Model (µm),mtb_origin,Doubling Amounts,Doubling Times,r2
138,1.3.2.ND0002,0.0,0.000000,0.000000,4649.351562,2080.574775,,False,False,107.845200,...,False,False,,,,,,,,1.0
139,1.3.2.ND0002,0.5,0.000000,0.000000,4973.001953,2080.574775,,False,False,113.264671,...,False,False,0.000000,,,,,,,1.0
140,1.3.2.ND0002,1.0,0.000000,0.000000,4687.857910,2080.574775,,False,False,116.562256,...,False,False,0.000000,,,,,,,1.0
141,1.3.2.ND0002,1.5,0.000000,0.000000,4209.064453,2080.574775,,False,False,111.890106,...,False,False,0.000000,,,,,,,1.0
142,1.3.2.ND0002,2.0,0.000000,0.000000,4061.855225,2080.574775,False,False,False,114.113556,...,False,False,0.000000,,0.0,0.0,,,,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1312257,998.5.5.ND0003,48.0,0.000000,-0.111742,813.037051,-539.871799,,False,False,868.403076,...,False,False,0.000000,0.0,,,,,,0.78
1312258,998.5.5.ND0003,48.5,0.000000,-0.111742,745.745831,-539.871799,,False,False,868.194641,...,False,False,0.000000,0.0,,,,,,0.78
1312259,998.5.5.ND0003,49.0,0.000000,-0.111742,668.219016,-539.871799,,False,False,872.108582,...,False,False,0.000000,0.0,,,,,,0.78
1312260,998.5.5.ND0003,49.5,0.000000,-0.111742,662.609552,-539.871799,,False,False,871.521606,...,False,False,0.000000,0.0,,,,,,0.78


In [11]:
sorted(df[df['Experiment ID'] == 'ND0003']['Acquisition ID'].unique())

[(3, 1),
 (3, 2),
 (3, 3),
 (3, 4),
 (3, 5),
 (3, 6),
 (3, 7),
 (3, 8),
 (3, 9),
 (3, 10),
 (4, 3),
 (4, 4),
 (4, 5),
 (4, 6),
 (4, 7),
 (4, 8),
 (4, 9),
 (4, 10),
 (5, 3),
 (5, 4),
 (5, 5),
 (5, 6),
 (5, 7),
 (5, 8),
 (5, 9),
 (5, 10),
 (6, 3),
 (6, 4),
 (6, 5),
 (6, 6),
 (6, 7),
 (6, 8),
 (6, 9),
 (6, 10)]

In [39]:
for expt_ID in ['PS0000', 'ND0002', 'ND0003']:

    subset_df = df[df['Experiment ID'] == expt_ID]

    subset_df = subset_df.drop_duplicates(subset = 'ID', keep = 'last')
    
    # ensure that only the final frame measuremnets are included
    subset_df = subset_df[subset_df['Time (hours)'] == subset_df['Time (hours)'].max()]

    # Ensure the required columns are in the DataFrame
    columns_to_save = ['Mtb Area (µm)', 'ID', 'Acquisition ID', 'Compound', 'Concentration', 'Strain', 'mtb_origin', 'r2', 'Doubling Times', 'Doubling Amounts']
    
    # Save the subset DataFrame to a CSV file, including the index
    subset_df.to_csv(f'/mnt/SYNO/macrohet_syno/results/final_frame_mtb_values_csvs/{expt_ID}_final_frame_mtb_values.csv', columns=columns_to_save, index=True)

### Troubleshooting ND3 5,3 -> 5,8

In [21]:
expt_ID = 'ND0003'

In [19]:
subset_df = df[df['Experiment ID'] == expt_ID]

In [20]:
df[(df['Experiment ID'] == 'ND0003') & (df['Acquisition ID'] ==(5, 3))]['Time (hours)'].max()

74.5

In [25]:
sorted(subset_df['Acquisition ID'].unique())

[(3, 1),
 (3, 2),
 (3, 3),
 (3, 4),
 (3, 5),
 (3, 6),
 (3, 7),
 (3, 8),
 (3, 9),
 (3, 10),
 (4, 3),
 (4, 4),
 (4, 5),
 (4, 6),
 (4, 7),
 (4, 8),
 (4, 9),
 (4, 10),
 (5, 3),
 (5, 4),
 (5, 5),
 (5, 6),
 (5, 7),
 (5, 8),
 (5, 9),
 (5, 10),
 (6, 3),
 (6, 4),
 (6, 5),
 (6, 6),
 (6, 7),
 (6, 8),
 (6, 9),
 (6, 10)]

In [26]:
subset_df = subset_df.drop_duplicates(subset = 'ID', keep = 'last')

In [27]:
sorted(subset_df['Acquisition ID'].unique())

[(3, 1),
 (3, 2),
 (3, 3),
 (3, 4),
 (3, 5),
 (3, 6),
 (3, 7),
 (3, 8),
 (3, 9),
 (3, 10),
 (4, 3),
 (4, 4),
 (4, 5),
 (4, 6),
 (4, 7),
 (4, 8),
 (4, 9),
 (4, 10),
 (5, 3),
 (5, 4),
 (5, 5),
 (5, 6),
 (5, 7),
 (5, 8),
 (5, 9),
 (5, 10),
 (6, 3),
 (6, 4),
 (6, 5),
 (6, 6),
 (6, 7),
 (6, 8),
 (6, 9),
 (6, 10)]

In [28]:
# ensure that only the final frame measuremnets are included
subset_df = subset_df[subset_df['Time (hours)'] == subset_df['Time (hours)'].max()]


In [29]:
sorted(subset_df['Acquisition ID'].unique())

[(3, 1),
 (3, 2),
 (3, 3),
 (3, 4),
 (3, 5),
 (3, 6),
 (3, 7),
 (3, 8),
 (3, 9),
 (3, 10),
 (4, 3),
 (4, 4),
 (4, 5),
 (4, 6),
 (4, 7),
 (4, 8),
 (4, 9),
 (4, 10),
 (5, 8),
 (5, 9),
 (5, 10),
 (6, 3),
 (6, 4),
 (6, 5),
 (6, 6),
 (6, 7),
 (6, 8),
 (6, 9),
 (6, 10)]