In [1]:
import ops
import glob
import os
from ops.preprocessing_smk import *

def find_and_parse_file(pattern, well='A1', cycle=None, tiles=None):
    if tiles is None:
        tiles = [1]  # Default to tile 1 if no tiles specified
    
    for tile in tiles:
        # Replace placeholders in the pattern
        filled_pattern = pattern.format(cycle=cycle, well=well, tile=f"{tile:03d}")
        
        # Find files matching the pattern
        matching_files = glob.glob(filled_pattern)
        
        if matching_files:
            # Select the first matching file
            file_to_parse = matching_files[0]
            print(f"Found file for tile {tile}: {file_to_parse}")
            
            # Parse the file
            try:
                file_description = parse_file(file_to_parse, home=parse_function_home, dataset=parse_function_dataset)
                print(f"File description for tile {tile}:")
                print(file_description)
                print("-" * 50)
            except Exception as e:
                print(f"Error parsing file for tile {tile}: {e}")
        else:
            print(f"No files found matching pattern for tile {tile}: {filled_pattern}")

### Check if file patterns are according to plan:

The following cell contains crucial variables that need to be set according to your specific experimental setup and data organization. Please review and modify these variables as needed before proceeding with the analysis.

- `PH_CHANNELS` and `SBS_CHANNELS`: The channel names for Phenotyping and Sequencing By Synthesis images
- `SBS_INPUT_PATTERN` and `PH_INPUT_PATTERN`: The file naming conventions and directory structures for SBS and PH images
- `parse_function_home` and `parse_function_dataset`: The base directory and dataset name for the parsing function

Ensure these variables accurately reflect your experimental setup to guarantee correct data processing and analysis.

Acceptable ND2 File Format:
The parsing functions expect ND2 files to follow these naming conventions:
1. Cycle information (for SBS only) should be in a subdirectory named '/c{number}/' in the file path.
2. Well information should be present as 'Wells-XX_' or 'WellXX_' in the filename.
3. For multi-tile experiments, tile information should be present as 'Points-####' in the filename.
4. Channel information should be present as 'Channel{name}_' in the filename.
5. Phenotype images should have 'input_ph' in the file path.
6. SBS images should have 'input_sbs' in the file path.

Example acceptable filenames:

With tile information:
- SBS: /lab/example/screens/dataset/input_sbs/c1/acquisition_date_folder/Wells-A1_Points-0001_ChannelDAPI_Seq0000.nd2
- PH:  /lab/example/screens/dataset/input_ph/acquisition_date_folder/Wells-A1_Points-0001_ChannelDAPI_Seq0000.nd2

Without tile information:
- SBS: /lab/example/screens/dataset/input_sbs/c1/acquisition_date_folder/Wells-A1_ChannelDAPI_Seq0000.nd2
- PH:  /lab/example/screens/dataset/input_ph/acquisition_date_folder/Wells-A1_ChannelDAPI_Seq0000.nd2

In [2]:
# File patterns for SBS and PH images
SBS_INPUT_PATTERN = '/lab/barcheese01/screens/denali/input_sbs/c{cycle}/*Wells-{well}_Points-{tile:0>3}__Channel*.nd2'
PH_INPUT_PATTERN = '/lab/barcheese01/screens/denali/input_ph/**/*Wells-{well}_Points-{tile:0>3}__Channel*.nd2'

# Parse function parameters
parse_function_home = "/lab/barcheese01/screens"
parse_function_dataset = "denali"

# Test SBS_INPUT_PATTERN
print("Testing SBS_INPUT_PATTERN:")
sbs_parsed = find_and_parse_file(SBS_INPUT_PATTERN, cycle=1, tiles=[1, 100])

# Test PH_INPUT_PATTERN
print("\nTesting PH_INPUT_PATTERN:")
ph_parsed = find_and_parse_file(PH_INPUT_PATTERN, tiles=[1, 100, 1000])

Testing SBS_INPUT_PATTERN:
Found file for tile 1: /lab/barcheese01/screens/denali/input_sbs/c1/P001_SBS_10x_C1_Wells-A1_Points-001__Channel_Cy7,Cy5,AF594,Cy3_SBS,DAPI_SBS.nd2
File description for tile 1:
{'home': '/lab/barcheese01/screens', 'dataset': 'denali', 'ext': 'tif', 'well': 'A1', 'mag': '10X', 'tag': 'sbs', 'cycle': 'c1-SBS-1', 'subdir': 'input_sbs_tif'}
--------------------------------------------------
Found file for tile 100: /lab/barcheese01/screens/denali/input_sbs/c1/P001_SBS_10x_C1_Wells-A1_Points-100__Channel_Cy7,Cy5,AF594,Cy3_SBS,DAPI_SBS.nd2
File description for tile 100:
{'home': '/lab/barcheese01/screens', 'dataset': 'denali', 'ext': 'tif', 'well': 'A1', 'mag': '10X', 'tag': 'sbs', 'cycle': 'c1-SBS-1', 'subdir': 'input_sbs_tif'}
--------------------------------------------------

Testing PH_INPUT_PATTERN:
Found file for tile 1: /lab/barcheese01/screens/denali/input_ph/20240121_140219_589/P001_Pheno_20x_Wells-A1_Points-001__Channel_AF750,Cy3,GFP,DAPI.nd2
File descri