In [None]:
import numpy as np
import pandas as pd
from pathlib import Path

# Goal

The goal of this script is to summarize selected regions of interests (here, "boxes") and to export the related data as `.csv` files. At the end of the workflow, the exported `.csv` files will be used as the starting point for IF-IMC registration (script `13_Register_IF_IMC.ipynb`).  
  
Tasks performed by the current pipeline:
1. Import the ROIs defined in FIJI.
2. Sort the ROIs by color of the ROI mask (each ROI has a different color in the mask).
3. Randomize the ROI order (so that ROIs are measured in a random order in IMC).
4. Export a list of ROI for each slide (for each combination of case and panel).
5. Calculate the area of each ROI and total area per slide.
6. Export a list of ROIs for each case.
7. Export a global summary of all slides (number of ROIs and total area per slide).

## Parameters

***Adapt if needed***   
Enter the input directory (`BASE` folder), list the case IDs and panels.

In [None]:
input_dir = Path("/home/ubuntu/Data3/acquisition/Batch4/")

panels = ("Islet", "Immune")
caseList = ("6036", "6055", "6090", "6147", "6225", "6228", \
            "6303", "6321", "6388", "6396", "6414", "6421", "6428", \
            "6437", "6458", "6510", "6519", "6521", "6522", "6526", \
            "6532", "6547", "6550", "6553", "6558", "6562", "6563") 

## Process and summarize ROIs

In [None]:
assert Path.exists(input_dir), f"{input_dir} does not exist"
tempBoxesfile_prefix = "TEMPBoxes" # Prefix of `.csv` files exported by FIJI

In [None]:
boxesSummary_cols = ["Case", "Panel", "ROINb", "Area"]
boxesSummary = pd.DataFrame(columns=boxesSummary_cols)

# Loop through cases
for case in caseList:
    case_dir = input_dir / case
    assert Path.exists(case_dir), f"{case_dir} does not exist"
    boxesList = []
    
    # Loop through panels
    for panel in panels:
        # Import ROIs defined in FIJI
        caseBoxes_fn = case_dir / (tempBoxesfile_prefix + "_" + panel + ".csv")
        assert Path.exists(caseBoxes_fn), f"{caseBoxes_fn} does not exist"
        boxes = pd.read_csv(caseBoxes_fn)
        
        # Sort the imported data frame by mask color
        boxes.drop(columns=boxes.columns[0], axis=1, inplace=True)
        boxes = boxes.sort_values(by="Color")
        boxes.rename(columns = {"Color":"ROI"}, inplace = True)
        
        # Randomly reorder the ROIs
        np.random.seed(int(case))
        ROINb = boxes.shape[0]
        boxes["AcqOrder"] = np.random.choice(ROINb, ROINb, replace=False) + 1
        boxes = boxes.sort_values(by="AcqOrder")
        boxes = boxes.set_index("AcqOrder")
        
        # Export `.csv` file with randomly ordered ROIs
        boxes_fn = "_".join([case, "Boxes", panel]) + ".csv" 
        boxes.to_csv(case_dir / boxes_fn)
        
        # Calculate ROI area
        boxes["Area"] = boxes["W"] * boxes["H"]
        totalArea = sum(boxes["Area"])
    
        # Add a suffix corresponding to the current panel to column names
        boxes = boxes.add_suffix("_" + panel)
        roi_panel_column = "_".join(["ROI", panel])
        boxes["ROI"] = boxes[roi_panel_column]
        boxes.drop(columns=roi_panel_column, axis=1, inplace=True)
        
        # Append the current panel ROIs to the list of all ROIs
        boxesList.append(boxes)
        
        # Compile ROI summary for current case and panel
        cur_summary = pd.DataFrame(
            [[case,panel,ROINb,totalArea]], columns=boxesSummary_cols)
        boxesSummary = pd.concat([boxesSummary, cur_summary])
    
    # Create a single data frame from the ROI list
    boxesDF = pd.concat(boxesList, axis=1)
    # boxesDF = boxesDF.loc[:,~boxesDF.T.duplicated(keep="last")]
    ROI_col = boxesDF.pop("ROI").iloc[:,0]
    boxesDF.insert(0, "ROI", ROI_col)
    boxesDF.insert(0, "Case", int(case))
    
    # Export the data frame as `.csv`
    boxesDF_fn = "_".join([case, "Boxes"]) + ".csv" 
    boxesDF.to_csv(case_dir / boxesDF_fn)
    
# Export the ROI summary as `.csv`
boxesSummary_fn = "acquisition_summary.csv"
boxesSummary.to_csv(input_dir / boxesSummary_fn, index=False)