# Export single-cell analyses

This notebook is designed to extract the relevant single-cell information from the z-tracks and plot as a pandas dataframe/csv/excel format for others to interrogate.

In [2]:
import btrack
import pandas as pd
import numpy as np
import glob
import os
import fnmatch
from homuncu_loc import dataio
# from tqdm.notebook import tqdm
import warnings

### Find H5 files will single-cell tracks in

In [6]:
# replace the path with your own, think it will be 'Analysis/homuncu_loc/segmentation_and_tracking/temp_mask_dir/' 
# but this is from memory so just check where you saved out the final tracks in previous notebook
h5_fns = dataio.find_h5_files('/path/to/directory/with/h5/files') 
h5_fns

[]

##### Optionally filter h5 filenames to only include a subset

In [23]:
h5_fns =  [fn for fn in h5_fns if any(substring in fn for substring in ['194', '195', '196'])] #[fn for fn in h5_fns if fn.endswith('sc_data.h5') and 'run1' in fn]
h5_fns

### Load tracks

In [28]:
### define parameters to plot in table
cols = ['Cell ID', 
        f'Max. int. ch0', 
        f'Max. int. ch1', 
        f'Max. int. ch2', 
        f'Max. int. ch3', 
        'Mtb+', 
        'N_frames Mtb+', 
        'Max. Mtb area', 
        'Average X', 
        'Average Y'
        ]

In [None]:
sum = 0
error_fns = list()
for tracks_fn in tqdm(h5_fns):
    with btrack.io.HDF5FileHandler(tracks_fn, 'r', ) as hdf:
        try:
            tracks = [t for t in hdf.tracks if len(t) > 3]
        except:
            error_fns.append(tracks_fn)
    # now plot into dataframe 
    df = pd.DataFrame([(track.ID, 
                        np.nanmax(track['mean_intensity'][:,0]),
                        np.nanmax(track['mean_intensity'][:,1]),
                        np.nanmax(track['mean_intensity'][:,2]),
                        np.nanmax(track['mean_intensity'][:,3]), 
                        # # True if np.nansum(track.properties['mtb_status']) > 0 else False, 
                        # # np.nansum(track.properties['mtb_status']), 
                        # # np.nanmax(track.properties['mtb_area']), 
                        np.mean(track.x), 
                        np.mean(track.y)) 
                       for track in tracks], 
                      columns=cols)
    # create fn
    output_fn = tracks_fn.replace('.h5', 'sc_data.csv')
    if output_fn == tracks_fn:
        warnings.warn("Warning: The output file name is the same as the tracks file name. This will result in overwriting the tracks file.")
    # save out
    df.to_csv(output_fn)
    # check on the fly 
    print(tracks_fn, '\n', df.head())
    # how many sc measreuments 
    sum += len(df)