# Export single-cell analyses

This notebook is designed to extract the relevant single-cell information from the z-tracks and plot as a pandas dataframe/csv/excel format for others to interrogate.

In [10]:
import btrack
import pandas as pd
import numpy as np
import glob
import os
import fnmatch
from homuncu_loc import dataio
from tqdm.notebook import tqdm
import warnings



### Find files

In [19]:
h5_fns = dataio.find_h5_files('/home/dayn/data/homuncu_loc_temp/results/')#'/mnt/DATA/homuncu_loc/results')

In [21]:
h5_fns = dataio.find_h5_files('/mnt/DATA/homuncu_loc/run0/DAPI-SPC-PDPN-ZO1/')

### Filter h5 filenames to only include a subset

In [23]:
h5_fns =  [fn for fn in h5_fns if any(substring in fn for substring in ['194', '195', '196'])]#[fn for fn in h5_fns if fn.endswith('sc_data.h5') and 'run1' in fn]

In [24]:
h5_fns

['/mnt/DATA/homuncu_loc/run0/DAPI-SPC-PDPN-ZO1/Day7_breath/20x_21-12-029A_A3456_Multichannel Z-Stack_20220818_196_z_tracks_masks.h5',
 '/mnt/DATA/homuncu_loc/run0/DAPI-SPC-PDPN-ZO1/Day7_breath/20x_21-12-029A_A3456_Multichannel Z-Stack_20220818_195_z_tracks_masks.h5',
 '/mnt/DATA/homuncu_loc/run0/DAPI-SPC-PDPN-ZO1/Day7_breath/20x_21-12-029A_A3456_Multichannel Z-Stack_20220818_194_z_tracks_masks.h5']

In [50]:
# pick subset?
h5_fns = ['/home/dayn/data/homuncu_loc_temp/results/image analysis_Nathan/Job_Mtb area/run1_23-01-001_23-01-005/48h pi/20230705_40X_23-01-005A3_Multichannel Z-Stack_20230705_1311_sc_data.h5']

### Load tracks

In [28]:
### define parameters to plot in table
cols = ['Cell ID', 
        f'Max. int. ch0', 
        f'Max. int. ch1', 
        f'Max. int. ch2', 
        f'Max. int. ch3', 
        # 'Mtb+', 
        # 'N_frames Mtb+', 
        # 'Max Mtb area', 
        'Average X', 
        'Average Y'
        ]

In [32]:
sum = 0
error_fns = list()
for tracks_fn in tqdm(h5_fns):
    with btrack.io.HDF5FileHandler(tracks_fn, 'r') as hdf:
        try:
            tracks = [t for t in hdf.tracks if len(t) > 3]
        except:
            error_fns.append(tracks_fn)
    # now plot into dataframe 
    df = pd.DataFrame([(track.ID, 
                        np.nanmax(track['mean_intensity'][:,0]),
                        np.nanmax(track['mean_intensity'][:,1]),
                        np.nanmax(track['mean_intensity'][:,2]),
                        np.nanmax(track['mean_intensity'][:,3]), 
                        # True if np.nansum(track.properties['mtb_status']) > 0 else False, 
                        # np.nansum(track.properties['mtb_status']), 
                        # np.nanmax(track.properties['mtb_area']), 
                        np.mean(track.x), 
                        np.mean(track.y)) 
                       for track in tracks], 
                      columns=cols)
    # create fn
    output_fn = tracks_fn.replace('.h5', 'sc_data_subset.csv')
    if output_fn == tracks_fn:
        warnings.warn("Warning: The output file name is the same as the tracks file name. This will result in overwriting the tracks file.")
    # save out
    df.to_csv(output_fn)
    # check on the fly 
    print(tracks_fn, '\n', df.head())
    # how many sc measreuments 
    sum += len(df)

  0%|          | 0/3 [00:00<?, ?it/s]

[INFO][2023/08/31 05:37:22 PM] Opening HDF file: /mnt/DATA/homuncu_loc/run0/DAPI-SPC-PDPN-ZO1/Day7_breath/20x_21-12-029A_A3456_Multichannel Z-Stack_20220818_196_z_tracks_masks.h5...


OSError: Unable to open file (file signature not found)

In [30]:
tracks_fn

'/mnt/DATA/homuncu_loc/run0/DAPI-SPC-PDPN-ZO1/Day7_breath/20x_21-12-029A_A3456_Multichannel Z-Stack_20220818_194_z_tracks_masks.h5'

In [31]:
output_fn

'/mnt/DATA/homuncu_loc/run0/DAPI-SPC-PDPN-ZO1/Day7_breath/20x_21-12-029A_A3456_Multichannel Z-Stack_20220818_194_z_tracks_masks.h5'

In [21]:
error_fns

[]

In [37]:
df.head()

Unnamed: 0,Cell ID,Max. int. ch0,Max. int. ch1,Max. int. ch2,Max. int. ch3,Mtb+,N_frames Mtb+,Max Mtb area,Average X,Average Y
0,57,727.278381,203.123306,291.497131,115.380356,False,0.0,0.0,785.089651,1235.473489
1,64,288.051758,121.796593,156.532761,113.914146,False,0.0,0.0,1252.164185,1325.526123
2,63,982.594971,178.015472,351.196564,117.629204,False,0.0,0.0,620.731316,1356.047065
3,60,1510.125366,222.231125,521.146973,129.199387,False,0.0,0.0,978.83844,1247.325787
4,58,211.873657,118.079422,155.936829,110.143578,False,0.0,0.0,773.87339,1225.832886


In [40]:
df


Unnamed: 0,Cell ID,Max. int. ch0,Max. int. ch1,Max. int. ch2,Max. int. ch3,Mtb+,N_frames Mtb+,Max Mtb area,Average X,Average Y
0,57,727.278381,203.123306,291.497131,115.380356,False,0.0,0.0,785.089651,1235.473489
1,64,288.051758,121.796593,156.532761,113.914146,False,0.0,0.0,1252.164185,1325.526123
2,63,982.594971,178.015472,351.196564,117.629204,False,0.0,0.0,620.731316,1356.047065
3,60,1510.125366,222.231125,521.146973,129.199387,False,0.0,0.0,978.838440,1247.325787
4,58,211.873657,118.079422,155.936829,110.143578,False,0.0,0.0,773.873390,1225.832886
...,...,...,...,...,...,...,...,...,...,...
893,2763,534.337769,159.370544,253.306107,113.354431,False,0.0,0.0,717.612048,1429.155811
894,2761,529.067627,200.244675,253.753616,117.835777,False,0.0,0.0,1128.260376,1456.089502
895,2796,500.577271,139.696594,196.759094,111.697731,False,0.0,0.0,861.499182,1542.807776
896,2797,461.022430,199.231262,445.917786,110.722771,False,0.0,0.0,2094.540632,1916.780622
