In [1]:
import ops
import glob
import os
from ops.preprocessing_smk import *

def find_and_parse_file(pattern, well='A1', cycle=None, channels=None, channel_parse=False):
    """
    Find and parse files based on the given pattern and parameters.
    
    Args:
        pattern (str): File pattern to search for.
        well (str): Well identifier (default: 'A1').
        cycle (int): Cycle number for SBS images (default: None).
        channels (str or list): Channel(s) to process (default: None).
        channel_parse (bool): Whether to parse channels (default: False).
    """
    if channels is None:
        channels = ['']  # Default to empty string if no channels specified
    elif isinstance(channels, str):
        channels = [channels]  # Convert single channel string to a list
        
    for channel in channels:
        # Replace placeholders in the pattern
        filled_pattern = pattern.format(cycle=cycle, well=well, channel=channel)
        
        # Find files matching the pattern
        matching_files = glob.glob(filled_pattern)
        
        if matching_files:
            # Select the first matching file
            file_to_parse = matching_files[0]
            print(f"Found file for channel {channel}: {file_to_parse}")
            
            # Parse the file
            try:
                file_description = parse_file(file_to_parse, home=parse_function_home, dataset=parse_function_dataset, channels=channel_parse)
                print(f"File description for channel {channel}:")
                print(file_description)
                print("-" * 50)
            except Exception as e:
                print(f"Error parsing file for channel {channel}: {e}")
        else:
            print(f"No files found matching pattern for channel {channel}: {filled_pattern}")


### Check if file patterns are according to plan:

The following cell contains crucial variables that need to be set according to your specific experimental setup and data organization. Please review and modify these variables as needed before proceeding with the analysis.

- `PH_CHANNELS` and `SBS_CHANNELS`: The channel names for Phenotyping and Sequencing By Synthesis images
- `SBS_INPUT_PATTERN` and `PH_INPUT_PATTERN`: The file naming conventions and directory structures for SBS and PH images
- `parse_function_home` and `parse_function_dataset`: The base directory and dataset name for the parsing function

Ensure these variables accurately reflect your experimental setup to guarantee correct data processing and analysis.

Acceptable ND2 File Format:
The parsing functions expect ND2 files to follow these naming conventions:
1. Cycle information (for SBS only) should be in a subdirectory named '/c{number}/' in the file path.
2. Well information should be present as 'Wells-XX_' or 'WellXX_' in the filename.
3. For multi-tile experiments, tile information should be present as 'Points-####' in the filename.
4. Channel information should be present as 'Channel{name}_' in the filename.
5. Phenotype images should have 'input_ph' in the file path.
6. SBS images should have 'input_sbs' in the file path.

Example acceptable filenames:

With tile information:
- SBS: /lab/example/screens/dataset/input_sbs/c1/acquisition_date_folder/Wells-A1_Points-0001_ChannelDAPI_Seq0000.nd2
- PH:  /lab/example/screens/dataset/input_ph/acquisition_date_folder/Wells-A1_Points-0001_ChannelDAPI_Seq0000.nd2

Without tile information:
- SBS: /lab/example/screens/dataset/input_sbs/c1/acquisition_date_folder/Wells-A1_ChannelDAPI_Seq0000.nd2
- PH:  /lab/example/screens/dataset/input_ph/acquisition_date_folder/Wells-A1_ChannelDAPI_Seq0000.nd2

In [2]:
# Define channels for PH (Phenotyping) and SBS (Sequencing By Synthesis) images
PH_CHANNELS = ['DAPI_1x1-GFP_1x1', 'A594_1x1', 'AF750_1x1']  
SBS_CHANNELS = ['DAPI-CY3_30p_545-A594_30p-CY5_30p-CY7_30p', 'CY3_30p_545', 'A594_30p', 'CY5_30p', 'CY7_30p']

# File patterns for SBS and PH images
SBS_INPUT_PATTERN = '/lab/barcheese01/screens/aconcagua/input_sbs/c{cycle}/**/Well{well}*Channel{channel}_Seq*.nd2'
PH_INPUT_PATTERN = '/lab/barcheese01/screens/aconcagua/input_ph/**/**/Well{well}*Channel{channel}_Seq*.nd2'

# Parse function parameters
parse_function_home = "/lab/barcheese01/screens"
parse_function_dataset = "aconcagua"

# Test SBS_INPUT_PATTERN with multichannel
print("\nTesting SBS_INPUT_PATTERN with multichannel:")
sbs_multi_parsed = find_and_parse_file(SBS_INPUT_PATTERN, cycle=1, channels=SBS_CHANNELS[0], channel_parse=True)
            
# Test SBS_INPUT_PATTERN with individual channels
print("\nTesting SBS_INPUT_PATTERN with individual channels:")
sbs_parsed = find_and_parse_file(SBS_INPUT_PATTERN, cycle=2, channels=SBS_CHANNELS[1:], channel_parse=True)

# Test PH_INPUT_PATTERN
print("\nTesting PH_INPUT_PATTERN:")
ph_parsed = find_and_parse_file(PH_INPUT_PATTERN, channels=PH_CHANNELS, channel_parse=True)


Testing SBS_INPUT_PATTERN with multichannel:
Found file for channel DAPI-CY3_30p_545-A594_30p-CY5_30p-CY7_30p: /lab/barcheese01/screens/aconcagua/input_sbs/c1/20200218_141421_172/WellA1_ChannelDAPI-CY3_30p_545-A594_30p-CY5_30p-CY7_30p_Seq0000.nd2
File description for channel DAPI-CY3_30p_545-A594_30p-CY5_30p-CY7_30p:
{'home': '/lab/barcheese01/screens', 'dataset': 'aconcagua', 'ext': 'tif', 'well': 'A1', 'channel': 'DAPI-CY3_30p_545-A594_30p-CY5_30p-CY7_30p', 'mag': '10X', 'tag': 'sbs', 'cycle': 'c1-SBS-1', 'subdir': 'input_sbs_tif'}
--------------------------------------------------

Testing SBS_INPUT_PATTERN with individual channels:
Found file for channel CY3_30p_545: /lab/barcheese01/screens/aconcagua/input_sbs/c2/20200224_101922_191/WellA1_ChannelCY3_30p_545_Seq0000.nd2
File description for channel CY3_30p_545:
{'home': '/lab/barcheese01/screens', 'dataset': 'aconcagua', 'ext': 'tif', 'well': 'A1', 'channel': 'CY3_30p_545', 'mag': '10X', 'tag': 'sbs', 'cycle': 'c2-SBS-2', 'subdir