In [1]:
# for each video, make a csv w/ total # pixels, and # pixels above reasonable thresholds set for each channel at each frame
# then make a csv with the max gfp # at each thres and the frame of each max; and spy above threshs at frames 0 and 20

import os
import numpy as np
import pandas as pd
from skimage import io
import time

def threshold(folder):
    # get list of files to process
    tbd = [f for f in os.listdir(folder) if os.path.isfile(os.path.join(folder, f))]
    
    # create processed folder if it doesn't already exist
    processed_folder = os.path.join(folder, 'thresh')
    os.makedirs(processed_folder, exist_ok=True)

    #make a list to store concatenated values for the entire folder (eg. t0 & t20 spy, max gfp)
    folder_statistics = []
    
    for video in tbd:
        current = os.path.join(folder, video)
        print(f'Now processing: {current}')
        start_time = time.time()

        image = io.imread(current)
        spy = image[:, :, :, 0]
        gfp = image[:, :, :, -2]
        frames = range(image.shape[0])
            
        # count the pixels that actually have image data
        n_pixels = [np.sum(gfp[frame, :, :] > 0) for frame in frames]

        gfp_thresholds = [0, 850, 900, 1000, 1100]
        gfpcounts = [[np.sum(gfp[frame, :, :] > th) for th in gfp_thresholds] for frame in frames]
        
        spy_thresholds = [0, 350, 400, 500]
        spycounts = [[np.sum(spy[frame, :, :] > th) for th in spy_thresholds] for frame in frames]
                    
        print('done counting, now writing csvs')
        # Save per-frame counts to CSV
        csv_filename = os.path.join(processed_folder, f'{video[:-4]}_thresh.csv')
        df = pd.DataFrame({
            'frame': frames,
            **{f'gfp{th}': [gfpcounts[frame][i] for frame in frames] for i, th in enumerate(gfp_thresholds)},
            **{f'spy{th}': [spycounts[frame][i] for frame in frames] for i, th in enumerate(spy_thresholds)}
        })
        df.to_csv(csv_filename, index=False)
        
            #csv_filename = os.path.join(processed_folder,  video[:-4] + 'thresh.csv')
            #with open(csv_filename, 'w') as f:
            #    f.write('source, total, fxn.3, fxn.6, fxn.12, fxn.24, fxn.48, mean.speed, median.speed\n')
            #    for stats in statistics:
            #        f.write(','.join(map(str, stats)) + '\n')

            # and write to csv: fxn >3um, fxn > 6 um, fxn > 12 um, fxn > 24 um, fxn > 48 um; mean speed, median speed.
        # Max GFP values and their corresponding frames
        gfp_max_counts = [max(gfpcounts, key=lambda x: x[i])[i] for i in range(len(gfp_thresholds))]
        gfp_max_frames = [np.argmax([gfpcounts[frame][i] for frame in frames]) for i in range(len(gfp_thresholds))]
        
        # SPY counts at frames 0 and 20 (if frame 20 exists)
        t0_spy_counts = spycounts[0] if len(frames) > 0 else [0] * len(spy_thresholds)
        t20_spy_counts = spycounts[20] if len(frames) > 20 else [0] * len(spy_thresholds)
        
        # Collect folder statistics
        folder_statistics.append([video, *gfp_max_counts, *gfp_max_frames, *t0_spy_counts, *t20_spy_counts])
        end_time = time.time()
        print(f'Time taken to analyze {video}: {end_time - start_time:.2f} seconds')
    # Write accumulated statistics to a summary CSV file for this folder
    summary_csv_filename = os.path.join(processed_folder, 'thresh_summ.csv')
    summary_df = pd.DataFrame(folder_statistics, columns=[
        'source', 
        *[f'maxgfp{th}' for th in gfp_thresholds], 
        *[f'frame.maxgfp{th}' for th in gfp_thresholds], 
        *[f't0spy{th}' for th in spy_thresholds], 
        *[f't20spy{th}' for th in spy_thresholds]
    ])
    summary_df.to_csv(summary_csv_filename, index=False)
    print('Finished processing all files.')

In [None]:
# usage example:

#channels: gfp = :-2 in everything; spy = 0 in most. 
folder = 'D:/0_all_whole/spynuc/d7d8/'

threshold(folder)