In [20]:
import os
current_directory = os.getcwd()
print(f"Current Working Directory: {current_directory}")

from Tusc5ImageUtils import *
import numpy as np
from urllib.parse import urlparse
import matplotlib.pyplot as plt
import matplotlib as mpl
from skimage import exposure
mpl.rcParams['figure.dpi'] = 300
from cellpose import utils, io, plot, models, denoise
from scipy.ndimage import binary_erosion, binary_fill_holes, center_of_mass
from scipy.signal import find_peaks
import subprocess
import pandas as pd
import seaborn as sns
import ast
import csv
import re
import nd2
from skimage.measure import regionprops
from datetime import datetime

Current Working Directory: x:\Lab Analysis PC\ImageAnalysis


In [21]:
### Directory Configuration ###

'''
Input name of folder with .nd2 files in the parent directory of this python notebook
'''

parent_directory = os.path.dirname(current_directory)
target_folder_name = '080524_081824_combined'

# Cell Fluoresence Retrieval

In [22]:
'''
.nd2 files with flipped top and bottom for whatever reason
'''
backward_files = ['3604R_GLUT1_WGA_0001.nd2',
                  '3604R_GLUT1_WGA_0003.nd2',
                  '3654L_GLUT1_WGA_0001.nd2',
                  '3654L_GLUT1_WGA_0003.nd2']

## 1. Set the min and max for the DAPI projections

In [23]:
'''
Code to set min and max for DAPI max projections

Automatically records min and max presets in file_mp_configurations.csv
If min and max were set multiple times, the most recent parameters are used
'''

if not parent_directory or not target_folder_name:
    raise ValueError('Make sure to input target folder name and parent directory.')

mp_configs_df = pd.read_csv('file_mp_configurations.csv')


target_folder_path = os.path.join(parent_directory, target_folder_name)
nd2_files = [f for f in os.listdir(target_folder_path) if f.endswith('.nd2')]

counter = 0

for nd2_file in nd2_files:

    # 1) Download Image Data
    print(f'Processing {nd2_file}')
    nd2_path = os.path.join(target_folder_path, nd2_file)

    f = nd2.ND2File(nd2_path)
    z_sep = f.voxel_size().z
    image = to_8bit(f.asarray())
    f.close()

    # Flip image if image is backwards
    if nd2_file in backward_files:
        image = np.flip(image, axis=0)

    # 2) DAPI max projection, Deblur, Segment
    DAPI_stack = image[:, 0, :, :].copy()
   
    # Check if the nd2_file is already in mp_configs_df
    if nd2_file in mp_configs_df['file_name'].values:
        # If the file exists in the dataframe, retrieve z0 and z1
        file_mp_df = mp_configs_df.loc[mp_configs_df['file_name'] == nd2_file].iloc[-1]
        z0, z1 = file_mp_df['z0'], file_mp_df['z1']

    else:
        # If the file is not in the dataframe, run the max projector app and get z0 and z1
        z0, z1 = run_max_projector_app(DAPI_stack)
        
        # Create the data to write (file_name, z0, z1, date)
        mp_config = [nd2_file, z0, z1, datetime.now().strftime("%H:%M_%d_%m_%Y")]
        
        # Define the CSV file path
        csv_file_path = 'file_mp_configurations.csv'
        
        # Open the CSV file in append mode ('a') with proper newline handling
        with open(csv_file_path, mode='a', newline='') as f:
            writer = csv.writer(f)
            
            # Only add a newline if the file is non-empty and this is the first append

            

            if os.path.getsize(csv_file_path) > 0 and counter == 0:
                f.seek(0, os.SEEK_END)  # Go to the end of the file
                f.write('\n')  # Add a newline to ensure separation between last line and new data
                counter += 1

            if os.path.getsize(csv_file_path) > 0 and counter > 0:
                f.seek(0, os.SEEK_END)  # Go to the end of the file
            
            # Write the new data
            writer.writerow(mp_config)

Processing 3420L_GLUT1_647_WGA_594_0001.nd2
Processing 3420L_GLUT1_647_WGA_594_0002.nd2
Processing 3420L_GLUT1_647_WGA_594_0003.nd2
Processing 3420RA_GLUT1_647_WGA_594_0001.nd2
Processing 3420RA_GLUT1_647_WGA_594_0002.nd2
Processing 3420RA_GLUT1_647_WGA_594_0003.nd2
Processing 3527LC_dark_WGA594_GLUT1_647_TUSC5_001.nd2
Processing 3527LC_dark_WGA594_GLUT1_647_TUSC5_002.nd2
Processing 3527LC_dark_WGA594_GLUT1_647_TUSC5_003.nd2
Processing 3527LD_dark_WGA594_GLUT1_647_TUSC5_001.nd2
Processing 3527LD_dark_WGA594_GLUT1_647_TUSC5_002.nd2
Processing 3527LD_dark_WGA594_GLUT1_647_TUSC5_003.nd2
Processing 3527LD_dark_WGA594_GLUT1_647_TUSC5_004.nd2
Processing 3527RA_light_WGA594_GLUT1_647_TUSC5_001.nd2
Processing 3527RA_light_WGA594_GLUT1_647_TUSC5_002.nd2
Processing 3527RA_light_WGA594_GLUT1_647_TUSC5_003.nd2
Processing 3529RC_GLUT1_647_WGA_594_0001.nd2
Processing 3529RD_GLUT1_647_WGA_594_0001.nd2
Processing 3529RD_GLUT1_647_WGA_594_0002.nd2
Processing 3529RD_GLUT1_647_WGA_594_0003.nd2
Processing

In [24]:
if not parent_directory or not target_folder_name:
    raise ValueError('Make sure to input target folder name and parent directory.')

skip_rip_detection = False # NEW (True or False)

target_folder_path = os.path.join(parent_directory, target_folder_name)
nd2_files = [f for f in os.listdir(target_folder_path) if f.endswith('.nd2')]

model_path_dapi = os.path.join(parent_directory, 'ImageAnalysis/cellpose_models/T5_DAPI_V4')
deblur_model = denoise.CellposeDenoiseModel(gpu=True, model_type= model_path_dapi, restore_type="deblur_cyto3")

rip_df = pd.read_csv('rip_df.csv')

for nd2_file in nd2_files:


    if nd2_file in rip_df['file_name'].values:
        print(f'{nd2_file} already processed, skipping.')
        continue

    if skip_rip_detection == True:
        file_name = nd2_file.split('.')[0]
        masks_in_rip = []
        new_row = pd.DataFrame({'file_name': [file_name], 'masks_in_rip': [masks_in_rip]})
        rip_df = pd.concat([rip_df, new_row], ignore_index=True)
        rip_df.to_csv('rip_df.csv', index=False)

        print('Skipping rip detection... ensure this is on purpose.')

        continue

    else:
        pass

    # If not present, proceed with processing
    # 1) Download Image Data
    print(f'Processing {nd2_file}')
    nd2_path = os.path.join(target_folder_path, nd2_file)

    f = nd2.ND2File(nd2_path)
    z_sep = f.voxel_size().z
    image = to_8bit(f.asarray())
    f.close()

    # Flip image if image is backwards
    if nd2_file in backward_files:
        image = np.flip(image, axis=0)

    # 2) DAPI max projection, Deblur, Segment
    DAPI_stack = image[:, 0, :, :].copy()

    mp_DAPI = max_proj(DAPI_stack[5:30].copy())

    DAPI_masks, flows, styles, image_deblurred = deblur_model.eval(auto_brightness_contrast(mp_DAPI), diameter=None, channels=[0, 0])
    image_deblurred = image_deblurred[:, :, 0]  # resulting image has one channel, but it still needs to be indexed

    coords_3d = nuclei_centers_of_mass(DAPI_stack, DAPI_masks)
    filtered_coords_3d, filtered_idxs = remove_outliers_local(coords_3d, num_closest_points=15, z_threshold=2)
    filtered_DAPI_masks = extract_masks(DAPI_masks, filtered_idxs)
    DAPI_masks = filtered_DAPI_masks.copy()

    coords_2d = [(i[0], i[1]) for i in filtered_coords_3d]
    in_rip_dict = rip_identifier(nd2_file, image, DAPI_masks, coords_2d)
    
    # Extract file name and masks_in_rip from in_rip_dict (file name is the key, masks_in_rip is the value)
    for file_name, masks_in_rip in in_rip_dict.items():
        # Append the new data to the DataFrame
        new_row = pd.DataFrame({'file_name': [file_name], 'masks_in_rip': [masks_in_rip]})
        rip_df = pd.concat([rip_df, new_row], ignore_index=True)
        rip_df.to_csv('rip_df.csv', index=False)

Processing 3420L_GLUT1_647_WGA_594_0001.nd2
Processing 3420L_GLUT1_647_WGA_594_0002.nd2
Processing 3420L_GLUT1_647_WGA_594_0003.nd2
Processing 3420RA_GLUT1_647_WGA_594_0001.nd2
Processing 3420RA_GLUT1_647_WGA_594_0002.nd2
Processing 3420RA_GLUT1_647_WGA_594_0003.nd2
Processing 3527LC_dark_WGA594_GLUT1_647_TUSC5_001.nd2
Processing 3527LC_dark_WGA594_GLUT1_647_TUSC5_002.nd2
Processing 3527LC_dark_WGA594_GLUT1_647_TUSC5_003.nd2
Processing 3527LD_dark_WGA594_GLUT1_647_TUSC5_001.nd2
Processing 3527LD_dark_WGA594_GLUT1_647_TUSC5_002.nd2
Processing 3527LD_dark_WGA594_GLUT1_647_TUSC5_003.nd2
Processing 3527LD_dark_WGA594_GLUT1_647_TUSC5_004.nd2
Processing 3527RA_light_WGA594_GLUT1_647_TUSC5_001.nd2
Processing 3527RA_light_WGA594_GLUT1_647_TUSC5_002.nd2
Processing 3527RA_light_WGA594_GLUT1_647_TUSC5_003.nd2
Processing 3529RC_GLUT1_647_WGA_594_0001.nd2
Processing 3529RD_GLUT1_647_WGA_594_0001.nd2
Processing 3529RD_GLUT1_647_WGA_594_0002.nd2
Processing 3529RD_GLUT1_647_WGA_594_0003.nd2
Processing

## 2. Extract Traces

- This step takes a folder containing `.nd2` files and returns a DataFrame of traces.

In [25]:
import csv

### Setting paths ###
if not parent_directory or not target_folder_name:
    raise ValueError('Make sure to input target folder name and parent directory.')

target_folder_path = os.path.join(parent_directory, target_folder_name)


nd2_files = [f for f in os.listdir(target_folder_path) if f.endswith('.nd2')]

### Models ###
# Setting path for models
model_path_dapi = os.path.join(parent_directory, 'ImageAnalysis/cellpose_models/T5_DAPI_V4')
model_path_wga = os.path.join(parent_directory, 'ImageAnalysis/cellpose_models/T5_WGA_V2')

# Seting the DAPI (deblur) and WGA models
deblur_model = denoise.CellposeDenoiseModel(gpu=True, model_type= model_path_dapi, restore_type="deblur_cyto3")
wga_model = models.CellposeModel(gpu=True, pretrained_model=model_path_wga)

### Reading respective DAPI min and max each respective image stack ###
mp_configs_df = pd.read_csv('file_mp_configurations.csv')


all_data = pd.DataFrame()
unid_counter = 0

for nd2_file in nd2_files:

    # 1) Download Image Data
    
    print(f'Processing {nd2_file}')
    nd2_path = os.path.join(target_folder_path, nd2_file)

    f = nd2.ND2File(nd2_path)
    z_sep = f.voxel_size().z
    image = to_8bit(f.asarray())
    f.close()

    # Flip image if image is backwards
    if nd2_file in backward_files:
        image = np.flip(image, axis=0)

    # 2) DAPI max projection, Deblur, Segment
    DAPI_stack = image[:, 0, :, :].copy()

    # Check if the nd2_file is already in mp_configs_df
    if nd2_file in mp_configs_df['file_name'].values:
        # If the file exists in the dataframe, retrieve z0 and z1
        file_mp_df = mp_configs_df.loc[mp_configs_df['file_name'] == nd2_file].iloc[-1]
        z0, z1 = file_mp_df['z0'], file_mp_df['z1']

    else:
        # If the file is not in the dataframe, run the max projector app and get z0 and z1
        z0, z1 = run_max_projector_app(DAPI_stack)
        
        # Create the data to write (file_name, z0, z1, date)
        mp_config = [nd2_file, z0, z1, datetime.now().strftime("%H:%M_%d_%m_%Y")]
        
        # Define the CSV file path
        csv_file_path = 'file_mp_configurations.csv'
        
        # Open the CSV file in append mode ('a') with proper newline handling
        with open(csv_file_path, mode='a', newline='') as f:
            writer = csv.writer(f)
            
            # Only add a newline if the file is non-empty and this is the first append

            

            if os.path.getsize(csv_file_path) > 0 and counter == 0:
                f.seek(0, os.SEEK_END)  # Go to the end of the file
                f.write('\n')  # Add a newline to ensure separation between last line and new data
                counter += 1

            if os.path.getsize(csv_file_path) > 0 and counter > 0:
                f.seek(0, os.SEEK_END)  # Go to the end of the file
            
            # Write the new data
            writer.writerow(mp_config)

    mp_DAPI = max_proj(DAPI_stack[z0:z1].copy())

    # 3) Deblur and Segment DAPI max projection
    DAPI_masks, flows, styles, image_deblurred = deblur_model.eval(auto_brightness_contrast(mp_DAPI), diameter=None, channels=[0, 0])
    image_deblurred = image_deblurred[:, :, 0]  # resulting image has one channel, but it still needs to be indexed


    # DAPI filtering and eGFP identification
    coords_3d = nuclei_centers_of_mass(DAPI_stack, DAPI_masks)
    filtered_coords_3d, filtered_idxs = remove_outliers_local(coords_3d, num_closest_points=15, z_threshold=2)
    filtered_DAPI_masks = extract_masks(DAPI_masks, filtered_idxs)
    DAPI_masks = filtered_DAPI_masks.copy()

    ## Quick view
    ## plt.imshow(plot.mask_overlay(to_8bit(image_deblurred), DAPI_masks))
    ## plt.axis('off')
    ## plt.show()

    #Identifying cells in rip
    ##coords_2d = [(i[0], i[1]) for i in filtered_coords_3d]
    ##in_rip_dict = rip_identifier(nd2_file, image, DAPI_masks, coords_2d)

    # List for eGFP identification later
    eGFP_fluorescence_list = []
    
    # Initialize a list to accumulate cell data
    file_data_list = []

    '''
    Indiviudal Cell
    '''

    # 5) Segmentation of WGA channel
    mask_idxs = np.delete(np.unique(DAPI_masks), 0) - 1

    total_masks = len(mask_idxs)  # Total number of masks to process
    masks_found = 0  # Counter for the number of masks found

    for mask_id in mask_idxs:

        single_mask = extract_masks(DAPI_masks, mask_id)
        diam = get_mask_diameter(single_mask)
        expansion = 50

        sq_stacks = get_sq_stacks(image, single_mask)

        # Running the model of the expanded squares
        expanded_sq_WGA, z_level = extract_square_proj_expand(image, single_mask, expansion)

        expanded_mask, flows, styles = wga_model.eval(expanded_sq_WGA, diameter=diam, channels=[0, 0])

        # Removing 0-pixel boundary and finding the largest mask in the array
        WGA_mask = remove_boundary(expanded_mask, expansion)

        if len(np.unique(WGA_mask)) == 1:
            continue

        elif len(np.unique(WGA_mask)) > 2:
            WGA_mask = closest_mask_2d(single_mask, WGA_mask)

        masks_found += 1  # Increment the masks found counter

        # Z-axis profile
        trace_results = get_traces(sq_stacks, WGA_mask)

        # eGFP extraction
        eGFP_sum = np.sum(sq_stacks[1, z_level, :, :][(WGA_mask.astype(bool))])
        eGFP_sum_per_area = eGFP_sum / np.sum(WGA_mask)

        eGFP_fluorescence_list.append((mask_id, eGFP_sum_per_area))

        '''
        Organizing data
        '''

        # 6) Converting trace results into a pd dataframe
        cell_data = organize_data(trace_results, mask_id)

        # 7) Adding file information
        djid, eye, file_base = extract_information(nd2_file)

        nested_array = np.array(range(image.shape[0])) * z_sep
        cell_data['X_vals'] = [nested_array for i in range(len(cell_data))]
        cell_data['file_name'] = file_base
        cell_data['DJID'] = djid
        cell_data['Eye'] = eye
        cell_data['eGFP_Value'] = False
        cell_data['eGFP_Raw_Intensity'] = eGFP_sum_per_area

        # Adding in_rip information
        cell_data['in_rip'] = False

        ##for file_name, mask_ids in in_rip_dict.items():
        ##    if file_name == file_base and mask_id in mask_ids:
        ##        cell_data['in_rip'] = True

        # Accumulating cell data
        file_data_list.append(cell_data)

    # Print the number of masks found out of the total possible masks
    print(f'- Masks found: {masks_found}/{total_masks}')

    # Concatenating the list into a single DataFrame for the file
    file_data = pd.concat(file_data_list, ignore_index=True)

    # Processing eGFP data for the entire file
    eGFP_idxs = np.array([i[0] for i in eGFP_fluorescence_list])
    eGFP_vals = np.array([i[1] for i in eGFP_fluorescence_list])

    # Normalizing eGFP values
    eGFP_vals_normal = normalize(eGFP_vals)

    # Setting cells above .2 as positive
    eGFP_pos_idxs = eGFP_idxs[eGFP_vals_normal > .2]
    file_data.loc[file_data['mask_id'].isin(eGFP_pos_idxs), 'eGFP_Value'] = True

    # Accumulating data at the all_data level
    all_data = pd.concat([all_data, file_data], ignore_index=True)

Processing 3420L_GLUT1_647_WGA_594_0001.nd2




- Masks found: 89/113
Processing 3420L_GLUT1_647_WGA_594_0002.nd2
- Masks found: 67/97
Processing 3420L_GLUT1_647_WGA_594_0003.nd2
- Masks found: 54/79
Processing 3420RA_GLUT1_647_WGA_594_0001.nd2




- Masks found: 79/102
Processing 3420RA_GLUT1_647_WGA_594_0002.nd2
- Masks found: 17/98
Processing 3420RA_GLUT1_647_WGA_594_0003.nd2
- Masks found: 66/107
Processing 3527LC_dark_WGA594_GLUT1_647_TUSC5_001.nd2
- Masks found: 81/126
Processing 3527LC_dark_WGA594_GLUT1_647_TUSC5_002.nd2




- Masks found: 42/126
Processing 3527LC_dark_WGA594_GLUT1_647_TUSC5_003.nd2
- Masks found: 73/116
Processing 3527LD_dark_WGA594_GLUT1_647_TUSC5_001.nd2
- Masks found: 87/133
Processing 3527LD_dark_WGA594_GLUT1_647_TUSC5_002.nd2
- Masks found: 107/153
Processing 3527LD_dark_WGA594_GLUT1_647_TUSC5_003.nd2
- Masks found: 114/135
Processing 3527LD_dark_WGA594_GLUT1_647_TUSC5_004.nd2
- Masks found: 99/118
Processing 3527RA_light_WGA594_GLUT1_647_TUSC5_001.nd2
- Masks found: 113/169
Processing 3527RA_light_WGA594_GLUT1_647_TUSC5_002.nd2
- Masks found: 90/114
Processing 3527RA_light_WGA594_GLUT1_647_TUSC5_003.nd2




- Masks found: 89/148
Processing 3529RC_GLUT1_647_WGA_594_0001.nd2
- Masks found: 114/127
Processing 3529RD_GLUT1_647_WGA_594_0001.nd2




- Masks found: 83/109
Processing 3529RD_GLUT1_647_WGA_594_0002.nd2
- Masks found: 68/73
Processing 3529RD_GLUT1_647_WGA_594_0003.nd2
- Masks found: 69/81
Processing 3574LC_dark_WGA594_GLUT1_647_TUSC5_001.nd2
- Masks found: 133/159
Processing 3574LC_dark_WGA594_GLUT1_647_TUSC5_002.nd2
- Masks found: 119/120
Processing 3574LC_dark_WGA594_GLUT1_647_TUSC5_003.nd2
- Masks found: 141/156
Processing 3574LD_dark_WGA594_GLUT1_647_TUSC5_001.nd2
- Masks found: 39/78
Processing 3574LD_dark_WGA594_GLUT1_647_TUSC5_002.nd2




- Masks found: 78/141
Processing 3574LD_dark_WGA594_GLUT1_647_TUSC5_003.nd2
- Masks found: 30/121
Processing 3574RA_light_WGA594_GLUT1_647_TUSC5_001.nd2
- Masks found: 37/132
Processing 3574RA_light_WGA594_GLUT1_647_TUSC5_002.nd2




- Masks found: 54/159
Processing 3574RA_light_WGA594_GLUT1_647_TUSC5_003.nd2
- Masks found: 85/132
Processing 3574RB_light_WGA594_GLUT1_647_TUSC5_001.nd2




- Masks found: 57/149
Processing 3574RB_light_WGA594_GLUT1_647_TUSC5_002.nd2




- Masks found: 119/173
Processing 3574RB_light_WGA594_GLUT1_647_TUSC5_003.nd2
- Masks found: 92/155


In [26]:
# code to 

def merge_rip_ids(df_rip, df_masks):
    """
    Merge two DataFrames based on matching file_name and mask values.

    Parameters:
    df_rip (pd.DataFrame): DataFrame containing 'file_name' and 'masks_in_rip' (list of masks).
    df_masks (pd.DataFrame): DataFrame containing 'file_name' and 'masks' (single mask values).

    Returns:
    pd.DataFrame: The merged DataFrame with an 'in_rip' column indicating if the mask is in masks_in_rip.
    """
    # Create a copy of df_masks to avoid modifying the original DataFrame
    df_result = df_masks.copy()
    
    # Initialize 'in_rip' column to False
    df_result['in_rip'] = False
    
    # Iterate through df_rip and check for matches
    for i, row in df_rip.iterrows():
        file_name = row['file_name']
        masks_in_rip = row['masks_in_rip']
        
        # For matching file_name, check if any masks match
        mask_condition = (df_result['file_name'] == file_name) & (df_result['mask_id'].isin(masks_in_rip))
        
        # Set 'in_rip' to True where matches are found
        df_result.loc[mask_condition, 'in_rip'] = True
    
    return df_result

rip_df['masks_in_rip'] = rip_df['masks_in_rip'].apply(lambda x: ast.literal_eval(x) if isinstance(x, str) else x) # makes strings lists
all_data = merge_rip_ids(rip_df, all_data)#.query('in_rip == True')

# Exporting Data

In [27]:
export_df = all_data.copy()
export_df.shape

(10340, 10)

In [28]:
print(export_df['in_rip'].unique())
(export_df.groupby(['file_name', 'in_rip']).size()/4).reset_index(name='Number of Cells')

[False  True]


Unnamed: 0,file_name,in_rip,Number of Cells
0,3420L_GLUT1_647_WGA_594_0001,False,89.0
1,3420L_GLUT1_647_WGA_594_0002,False,67.0
2,3420L_GLUT1_647_WGA_594_0003,False,39.0
3,3420L_GLUT1_647_WGA_594_0003,True,15.0
4,3420RA_GLUT1_647_WGA_594_0001,False,66.0
5,3420RA_GLUT1_647_WGA_594_0001,True,13.0
6,3420RA_GLUT1_647_WGA_594_0002,False,17.0
7,3420RA_GLUT1_647_WGA_594_0003,False,51.0
8,3420RA_GLUT1_647_WGA_594_0003,True,15.0
9,3527LC_dark_WGA594_GLUT1_647_TUSC5_001,False,81.0


In [None]:
## Code for quick examination of any single cell
#export_df_temp = export_df.copy()
#export_df_temp['Cell_unid'] = export_df.groupby(['file_name', 'mask_id']).ngroup()
#plot_single_cell(export_df_temp.query('Cell_unid == 10'), prominence =15, distance = 20)

In [29]:
export_df['X_vals'] = export_df['X_vals'].apply(lambda x: ', '.join(map(str, x)))
export_df['Y_vals'] = export_df['Y_vals'].apply(lambda x: ', '.join(map(str, x)))
# export_df = export_df.loc[export_df['file_name'] != '3607R_GLUT1_WGA_0001'] RT: not sure why this is removed 9/22/24.

### VVV ATTENTION VVV

raw_data_csv_name = 'time_series_old_batch_new'
export_df.to_csv('raw_data_folder/' + raw_data_csv_name + '.csv', index = False) # Carefully modify export csv name