In [39]:
import pandas as pd
import pickle
import re
import sys

from pathlib import Path

from steinbock import io
from steinbock.preprocessing import imc
print(sys.path)
print(sys.executable)

['/home/T1D_preprocessing', '/opt/conda/lib/python39.zip', '/opt/conda/lib/python3.9', '/opt/conda/lib/python3.9/lib-dynload', '', '/opt/conda/lib/python3.9/site-packages']
/opt/conda/bin/python


In [40]:
# Data folder
folder_data = Path("/home/processing/")
Path(folder_data).mkdir(parents=True, exist_ok=True)
assert Path.exists(folder_data), f"{folder_data} does not exist"
print("Data folder:", folder_data)

spillcomp_data = folder_data / "spillcomp"
Path(spillcomp_data).mkdir(parents=True, exist_ok=True)
assert Path.exists(spillcomp_data), f"{spillcomp_data} does not exist"
print("Spillcomp data folder:", spillcomp_data)

# Git folder (folder containing the current notebook)
folder_git = Path.cwd()
assert Path.exists(folder_git), f"{folder_git} does not exist"
print("Git folder:", folder_git)

Data folder: /home/processing
Spillcomp data folder: /home/processing/spillcomp
Git folder: /home/T1D_preprocessing


In [41]:
# List panel files
panels = {
    "Uncompressed": (folder_data / 'panel_Uncompressed.csv'),
    "Compressed": (folder_data / 'panel_Compressed.csv')
}
panels

{'Uncompressed': PosixPath('/home/processing/panel_Uncompressed.csv'),
 'Compressed': PosixPath('/home/processing/panel_Compressed.csv')}

In [42]:
# Columns required in the panel file(s)
panel_cols = {
    "col_channel": "channel",
    "col_metal": "metal",
    "col_name": "name",
    "col_keep": "keep",
    "col_deeepcell": "deepcell",
}

In [43]:
folders = {
    "raw": folder_data / "raw",
    "img": folder_data / "img",
    "seg_cells": folder_data / "seg_cells",
    "masks_cells": folder_data / "masks_cells",
    "data_cells": folder_data / "data_cells",
    "variables": folder_data / "variables",
    "spillcomp": spillcomp_data,
    "img_spillcomp": folder_data / "img_spillcomp"
}

In [44]:
# Make directories (if they do not exist)
for folder in folders.values():
    folder.mkdir(exist_ok=True)
    
# Add base previously defined data and git folders
folders["data"] = folder_data
folders["git"] = folder_git

# Export folder names for use in downstream notebooks
with open(folder_data / "variables" / "folders.txt", "wb") as handle:
    pickle.dump(folders, handle)

In [45]:
# Loop through the panels
for panel_name, panel_path in panels.items():
    print("Panel:", panel_name)
    
    # Load the panel file
    assert Path.exists(panel_path), f"{panel_path} does not exist"
    cur_panel = pd.read_csv(panel_path, sep = ',', index_col = False)

    # Make sure that the required columns exist
    for col in panel_cols.values():
        assert(col in cur_panel.columns), f"Column {col} missing from panel"
    
    # Subset the panel
    cur_panel = cur_panel[cur_panel[panel_cols["col_keep"]]==1]
    panels[panel_name] = cur_panel
    
    # Display the panel
    print(panels[panel_name].head())
    
# Export the panels for use in downstream scripts
with open(folder_data / "variables" / "panels.txt", "wb") as handle:
     pickle.dump(panels, handle)

Panel: Uncompressed
   channel  metal                       name antibody_clone  keep  deepcell  \
0        1  In113                 Histone H3           D1H2     1         1   
1        2  La139               Somatostatin         ICDCLS     1         0   
2        3  Pr141                    insulin          C27C9     1         0   
3        4  Nd143                       CD44            IM7     1         0   
4        5  Nd144  Glucose Transporter GLUT1        EPR3915     1         0   

   clustering  categories shortname  Unnamed: 9  Unnamed: 10 Unnamed: 11  
0           0           0        H3         NaN          NaN         NaN  
1           1           0       SST         NaN          NaN         NaN  
2           1           0       INS         NaN          NaN         NaN  
3           1           0      CD44         NaN          NaN         NaN  
4           1           0     GLUT1         NaN          NaN         NaN  
Panel: Compressed
   channel  metal                    

In [46]:
folders

{'raw': PosixPath('/home/processing/raw'),
 'img': PosixPath('/home/processing/img'),
 'seg_cells': PosixPath('/home/processing/seg_cells'),
 'masks_cells': PosixPath('/home/processing/masks_cells'),
 'data_cells': PosixPath('/home/processing/data_cells'),
 'variables': PosixPath('/home/processing/variables'),
 'spillcomp': PosixPath('/home/processing/spillcomp'),
 'img_spillcomp': PosixPath('/home/processing/img_spillcomp'),
 'data': PosixPath('/home/processing'),
 'git': PosixPath('/home/T1D_preprocessing')}

In [None]:
panels["Uncompressed"]

for panel_name, panel in panels.items():
    print("Processing", panel_name, "panel")
    
    # Input and output folders
    image_info = []
    # Input:
    raw_subdir = folders["spillcomp"] / panel_name
    img_subdir = folders["img_spillcomp"] / panel_name
    img_subdir.mkdir(exist_ok = True)  
    
    # List zipped files
    cur_mcd_files = imc.list_mcd_files(raw_subdir, unzip=True)
    cur_txt_files = imc.list_txt_files(raw_subdir, unzip=True)
    
    # Process files
    for (mcd_file, acquisition, img, matched_txt, recovered) in \
    imc.try_preprocess_images_from_disk(
        cur_mcd_files, cur_txt_files,
        hpf = None,
        channel_names = panels[panel_name]["metal"],
        unzip = True
    ):
        cur_desc = acquisition.description
        cur_case = re_fn.search(mcd_file.name).group("caseid")
        
        img_file = f"{mcd_file.stem}_{cur_desc}.tiff"
        io.write_image(img, img_subdir / img_file)

        # Save acquisition metadata
        image_info_row = imc.create_image_info(
            mcd_file, acquisition, img, matched_txt, recovered, img_file
        )
    
        image_info_row["panel"] = panel_name
        image_info.append(image_info_row)

    image_info = pd.DataFrame(image_info)
    image_meta_file = f"images_{panel_name}_spillcomp.csv"
    image_info.to_csv(folders["data"] / image_meta_file, index = False)

Processing Uncompressed panel
Processing Compressed panel
