# Generate min/max representative single-cell images per top two features with highest coefficients  

This is based on the absolute values of the coefficients.

## Import libraries

In [1]:
import pathlib
from pprint import pprint

import cv2
import pandas as pd
from typing import List, Dict

## Define functions

In [2]:
# Function for formatting min/max row data frames into dictionaries
def create_sc_dict(dfs: List[pd.DataFrame], names: List[str]) -> dict:
    """Format lists of data frames and names into a dictionary with all relevant metadata to find single-cell images.

    Args:
        dfs (List[pd.DataFrame]): List of data frames each containing a single cell and relevant metadata.
        names (List[str]): List of names corresponding to the data frames.

    Returns:
        dict: Dictionary containing info relevant for finding single-cell crops.
    """
    sc_dict = {}
    for df, name in zip(dfs, names):
        for i, (_, row) in enumerate(df.iterrows()):
            key = f"{name}_{i + 1}"
            sc_dict[key] = {
                "plate": row["Metadata_Plate"],
                "well": row["Metadata_Well"],
                "site": row["Metadata_Site"],
                "location_center_x": row["Metadata_Nuclei_Location_Center_X"],
                "location_center_y": row["Metadata_Nuclei_Location_Center_Y"],
            }
    return sc_dict

In [3]:
# Function for generating and saving single-cell crops per channel as PNGs
def generate_sc_crops(
    sc_dict: Dict,
    channel_mapping: Dict[int, str],
    images_dir: pathlib.Path,
    output_img_dir: pathlib.Path,
    crop_size: int,
) -> None:
    """Using a dictionary with single-cell metadata info per image set, single-cell crops per channel are generated
    and saved as PNGs in an image set folder.

    Args:
        sc_dict (Dict): Dictionary containing info relevant for finding single-cell crops.
        channel_mapping (Dict[int, str]): Dictionary mapping integer to channel name for generating paths.
        images_dir (pathlib.Path): Directory where illumination corrected images are found.
        output_img_dir (pathlib.Path): Main directory to save each image set single-cell crops
        crop_size (int): Size of the box in pixels (example: setting crop_size as 250 will make a 250x250 pixel crop
        around the single-cell center coordinates)
    """
    for key, info in sc_dict.items():
        # Initialize a list to store file paths for every image set
        file_paths = []

        # Create file paths with well, site, and channel
        for i in range(1, 5):  # Update the range to start from 1
            channel = channel_mapping[i]
            filename = f"{images_dir}/{info['well']}_01_{i}_{info['site']}_{channel}_001_illumcorrect.tiff"
            file_paths.append(filename)

            # Read the image
            channel_image = cv2.imread(filename, cv2.IMREAD_UNCHANGED)

            # Use the location_center_x and location_center_y to create a crop
            center_x = info.get("location_center_x")
            center_y = info.get("location_center_y")

            # Crop dimensions (including crop_size)
            half_crop = crop_size // 2

            # Ensure the center coordinates are valid
            if center_x is not None and center_y is not None:
                # Calculate crop boundaries
                top_left_x = max(int(center_x - half_crop), 0)
                top_left_y = max(int(center_y - half_crop), 0)
                bottom_right_x = min(int(center_x + half_crop), channel_image.shape[1])
                bottom_right_y = min(int(center_y + half_crop), channel_image.shape[0])

                # Perform cropping
                cropped_channel = channel_image[
                    top_left_y:bottom_right_y, top_left_x:bottom_right_x
                ]

                # Ensure the cropped image is of size 250x250
                cropped_channel = cv2.resize(cropped_channel, (crop_size, crop_size))

                # Make directory for the key to keep all channels for an image in one folder
                key_dir = pathlib.Path(f"{output_img_dir}/{key}")
                key_dir.mkdir(exist_ok=True, parents=True)

                # Save the cropped image with single_cell and channel information
                output_filename = pathlib.Path(f"{key_dir}/{key}_d{i}_cropped.png")

                # Check if the file already exists
                if not output_filename.exists():
                    cv2.imwrite(str(output_filename), cropped_channel)
                else:
                    print(f"File {output_filename} already exists. Skipping.")

## Set paths and variables

In [4]:
# Path to cell painting data directory
cell_painting_dir = pathlib.Path(
    "/media/18tbdrive/1.Github_Repositories/nf1_schwann_cell_painting_data"
)

# Images directory for plate 6 (using for finding single-cells)
images_dir = pathlib.Path(
    f"{cell_painting_dir}/1.cellprofiler_ic/Corrected_Images/Corrected_Plate_6"
).resolve(strict=True)

# Output dir for cropped images
output_img_dir = pathlib.Path("./sc_crops")
output_img_dir.mkdir(exist_ok=True)

# Define the size of the cropping box (NxN pixels)
crop_size = 300

# Define a mapping for the suffixes
channel_mapping = {1: "DAPI", 2: "GFP", 3: "CY5", 4: "RFP"}

# Create open list for one row data frames for each top feature per channel per cell type
list_of_dfs = []

# Create open list of names to assign each data frame in a list relating to the feature, channel, and cell type
list_of_names = []

## Load in Plate 6 data to generate repesentative images from both derivatives

In [5]:
# Load in QC normalized + feature selected data as data frame
plate_df = pd.read_parquet(
    pathlib.Path(
        f"{cell_painting_dir}/3.processing_features/data/single_cell_profiles/cleaned_sc_profiles/Plate_6_sc_feature_selected.parquet"
    )
)

# Load in QC annotated dataframe to extract neighbors
annot_df = pd.read_parquet(
    pathlib.Path(
        f"{cell_painting_dir}/3.processing_features/data/single_cell_profiles/cleaned_sc_profiles/Plate_6_sc_annotated.parquet"
    ),
    columns=[
        "Metadata_Well",
        "Metadata_Site",
        "Metadata_Nuclei_Number_Object_Number",
        "Cells_Neighbors_NumberOfNeighbors_Adjacent",
    ],
)

plate_df = plate_df.merge(
    annot_df,
    on=["Metadata_Well", "Metadata_Site", "Metadata_Nuclei_Number_Object_Number"],
    how="inner",
)

plate_df.rename(
    columns={
        "Cells_Neighbors_NumberOfNeighbors_Adjacent": "Metadata_Number_of_Cells_Neighbors_Adjacent"
    },
    inplace=True,
)

# Drop HET cells
plate_df = plate_df[plate_df["Metadata_genotype"] != "HET"]

print(plate_df.shape)
plate_df.head()

(4398, 1156)


Unnamed: 0,Metadata_WellRow,Metadata_WellCol,Metadata_Well,Metadata_Site,Metadata_number_of_singlecells,Metadata_gene_name,Metadata_genotype,Metadata_Institution,Metadata_seed_density,Metadata_ImageNumber,...,Nuclei_Texture_InverseDifferenceMoment_GFP_3_00_256,Nuclei_Texture_InverseDifferenceMoment_RFP_3_00_256,Nuclei_Texture_InverseDifferenceMoment_RFP_3_01_256,Nuclei_Texture_InverseDifferenceMoment_RFP_3_02_256,Nuclei_Texture_InverseDifferenceMoment_RFP_3_03_256,Nuclei_Texture_SumEntropy_DAPI_3_02_256,Nuclei_Texture_SumVariance_CY5_3_03_256,Nuclei_Texture_SumVariance_GFP_3_03_256,Nuclei_Texture_SumVariance_RFP_3_01_256,Metadata_Number_of_Cells_Neighbors_Adjacent
0,B,2,B2,2,129,NF1,WT,iNFixion,1000,88,...,-0.563211,0.201421,-1.206699,-1.416853,-1.184348,0.470736,0.147279,0.217602,0.02618,0
1,B,2,B2,31,129,NF1,WT,iNFixion,1000,90,...,-1.73568,-0.820227,-0.946619,-0.584976,-0.425458,1.356754,1.022222,3.788065,0.026554,0
2,B,2,B2,5,129,NF1,WT,iNFixion,1000,98,...,-1.092363,-0.949758,-1.177671,-1.607254,-0.591539,0.699809,-0.100055,0.193561,-0.088946,1
3,B,2,B2,9,129,NF1,WT,iNFixion,1000,102,...,1.302359,-0.176119,-0.21883,-0.206733,0.769615,1.385889,-0.136975,-0.787444,-0.556767,0
4,B,2,B2,10,129,NF1,WT,iNFixion,1000,67,...,-0.251498,-0.452401,-1.287964,-1.30027,-1.161049,1.151205,-0.261562,-0.274148,-0.117812,3


## Load in feature importance data and determine the top two highest coefficients 

We will be creating image montages for the features with the highest coefficients (after absolute value). Below will show the top two ranked features with their sign.

**Note:** Top positive feature means the most important in predicting the WT genotype, most negative is most important in predicting Null genotype.

In [6]:
# Load in feature importances from QC model
feat_import_df = pd.read_csv(
    pathlib.Path("../supp_figure_7/coeff_results/final_model_coefficients.csv")
)

# Sort by absolute value of coefficient, descending
sorted_abs_feat_import_df = feat_import_df.reindex(
    feat_import_df["coefficient"].abs().sort_values(ascending=False).index
)

# Find the top two features by absolute coefficient value (keep sign)
top_coeff_feature = sorted_abs_feat_import_df.iloc[0]["feature"]
top_coeff_value = sorted_abs_feat_import_df.iloc[0]["coefficient"]

second_top_coeff_feature = sorted_abs_feat_import_df.iloc[1]["feature"]
second_top_coeff_value = sorted_abs_feat_import_df.iloc[1]["coefficient"]

# Print the features and their signed values
print(f"{top_coeff_feature}: {top_coeff_value}")
print(f"{second_top_coeff_feature}: {second_top_coeff_value}")

Nuclei_RadialDistribution_FracAtD_DAPI_4of4: 2.18162567715766
Nuclei_RadialDistribution_FracAtD_RFP_4of4: -1.9925754702179064


## Filter single-cells to only include isolated cells that are not near the edge of the FOV

In [7]:
# Filter the DataFrame directly
filtered_plate_df = plate_df[
    (plate_df["Metadata_Number_of_Cells_Neighbors_Adjacent"].isin([0]))
    & (plate_df["Metadata_Nuclei_Location_Center_X"] > crop_size // 2)
    & (
        plate_df["Metadata_Nuclei_Location_Center_X"]
        < (plate_df["Metadata_Nuclei_Location_Center_X"].max() - crop_size // 2)
    )
    & (plate_df["Metadata_Nuclei_Location_Center_Y"] > crop_size // 2)
    & (
        plate_df["Metadata_Nuclei_Location_Center_Y"]
        < (plate_df["Metadata_Nuclei_Location_Center_Y"].max() - crop_size // 2)
    )
]

print(filtered_plate_df.shape)
filtered_plate_df.head()

(778, 1156)


Unnamed: 0,Metadata_WellRow,Metadata_WellCol,Metadata_Well,Metadata_Site,Metadata_number_of_singlecells,Metadata_gene_name,Metadata_genotype,Metadata_Institution,Metadata_seed_density,Metadata_ImageNumber,...,Nuclei_Texture_InverseDifferenceMoment_GFP_3_00_256,Nuclei_Texture_InverseDifferenceMoment_RFP_3_00_256,Nuclei_Texture_InverseDifferenceMoment_RFP_3_01_256,Nuclei_Texture_InverseDifferenceMoment_RFP_3_02_256,Nuclei_Texture_InverseDifferenceMoment_RFP_3_03_256,Nuclei_Texture_SumEntropy_DAPI_3_02_256,Nuclei_Texture_SumVariance_CY5_3_03_256,Nuclei_Texture_SumVariance_GFP_3_03_256,Nuclei_Texture_SumVariance_RFP_3_01_256,Metadata_Number_of_Cells_Neighbors_Adjacent
0,B,2,B2,2,129,NF1,WT,iNFixion,1000,88,...,-0.563211,0.201421,-1.206699,-1.416853,-1.184348,0.470736,0.147279,0.217602,0.02618,0
1,B,2,B2,31,129,NF1,WT,iNFixion,1000,90,...,-1.73568,-0.820227,-0.946619,-0.584976,-0.425458,1.356754,1.022222,3.788065,0.026554,0
6,B,2,B2,17,129,NF1,WT,iNFixion,1000,74,...,-0.78515,-0.125381,0.454286,0.589137,0.304912,1.709033,-0.040404,2.108857,-0.183114,0
17,B,2,B2,11,129,NF1,WT,iNFixion,1000,68,...,-0.623394,-0.517475,-0.74097,-0.211301,0.170334,0.944783,1.756048,1.941606,-0.125965,0
41,B,2,B2,25,129,NF1,WT,iNFixion,1000,83,...,-0.108615,1.457668,-0.994203,-1.27088,-0.810972,-0.391736,-0.3575,-0.302703,0.34375,0


### Max single-cells for top highest feature (original)

In [8]:
## Get data frame with the top 3 single-cells for WT genotype from iNFixion institution
max_top_feature_orig = (
    filtered_plate_df[
        (filtered_plate_df["Metadata_genotype"] == "WT")
        & (filtered_plate_df["Metadata_Institution"] == "iNFixion")
    ]
    .sort_values(by=top_coeff_feature, ascending=False)
    .iloc[1:4][
        [
            top_coeff_feature,
            "Metadata_genotype",
            "Metadata_Institution",
            "Metadata_Well",
            "Metadata_Plate",
            "Metadata_Site",
            "Metadata_Number_of_Cells_Neighbors_Adjacent",
            "Metadata_Nuclei_Location_Center_X",
            "Metadata_Nuclei_Location_Center_Y",
        ]
    ]
)

# Append the DataFrame and its name to the lists
list_of_dfs.append(max_top_feature_orig)
list_of_names.append("max_top_feature_orig")

print(max_top_feature_orig.shape)
max_top_feature_orig

(3, 9)


Unnamed: 0,Nuclei_RadialDistribution_FracAtD_DAPI_4of4,Metadata_genotype,Metadata_Institution,Metadata_Well,Metadata_Plate,Metadata_Site,Metadata_Number_of_Cells_Neighbors_Adjacent,Metadata_Nuclei_Location_Center_X,Metadata_Nuclei_Location_Center_Y
138,4.064879,WT,iNFixion,B3,Plate_6,7,0,328.529321,684.669367
1,2.565905,WT,iNFixion,B2,Plate_6,31,0,763.729643,610.353837
475,1.386162,WT,iNFixion,B7,Plate_6,2,0,838.003883,581.233935


### Max single-cells for top highest feature (derivative)

In [9]:
## Get data frame with the top 3 single-cells for WT genotype from MGH institution
max_top_feature_deriv = filtered_plate_df[
    (filtered_plate_df["Metadata_genotype"] == "WT")
    & (filtered_plate_df["Metadata_Institution"] == "MGH")
].nlargest(3, top_coeff_feature)[
    [
        top_coeff_feature,
        "Metadata_genotype",
        "Metadata_Institution",
        "Metadata_Well",
        "Metadata_Plate",
        "Metadata_Site",
        "Metadata_Number_of_Cells_Neighbors_Adjacent",
        "Metadata_Nuclei_Location_Center_X",
        "Metadata_Nuclei_Location_Center_Y",
    ]
]

# Append the DataFrame and its name to the lists
list_of_dfs.append(max_top_feature_deriv)
list_of_names.append("max_top_feature_deriv")

print(max_top_feature_deriv.shape)
max_top_feature_deriv

(3, 9)


Unnamed: 0,Nuclei_RadialDistribution_FracAtD_DAPI_4of4,Metadata_genotype,Metadata_Institution,Metadata_Well,Metadata_Plate,Metadata_Site,Metadata_Number_of_Cells_Neighbors_Adjacent,Metadata_Nuclei_Location_Center_X,Metadata_Nuclei_Location_Center_Y
2442,7.672211,WT,MGH,E3,Plate_6,9,0,989.13217,359.405653
2379,7.114671,WT,MGH,E2,Plate_6,31,0,906.489554,604.6415
2350,6.826702,WT,MGH,E2,Plate_6,26,0,204.519246,437.741948


### Min single-cells for top highest feature (original)

In [10]:
## Get data frame with the bottom 3 single-cells for Null genotype from iNFixion institution
min_top_feature_orig = filtered_plate_df[
    (filtered_plate_df["Metadata_genotype"] == "Null")
    & (filtered_plate_df["Metadata_Institution"] == "iNFixion")
].nsmallest(3, top_coeff_feature)[
    [
        top_coeff_feature,
        "Metadata_genotype",
        "Metadata_Institution",
        "Metadata_Well",
        "Metadata_Plate",
        "Metadata_Site",
        "Metadata_Number_of_Cells_Neighbors_Adjacent",
        "Metadata_Nuclei_Location_Center_X",
        "Metadata_Nuclei_Location_Center_Y",
    ]
]

# Append the DataFrame and its name to the lists
list_of_dfs.append(min_top_feature_orig)
list_of_names.append("min_top_feature_orig")

print(min_top_feature_orig.shape)
min_top_feature_orig

(3, 9)


Unnamed: 0,Nuclei_RadialDistribution_FracAtD_DAPI_4of4,Metadata_genotype,Metadata_Institution,Metadata_Well,Metadata_Plate,Metadata_Site,Metadata_Number_of_Cells_Neighbors_Adjacent,Metadata_Nuclei_Location_Center_X,Metadata_Nuclei_Location_Center_Y
2092,-3.123547,Null,iNFixion,D9,Plate_6,22,0,685.73406,167.747837
1832,-1.574866,Null,iNFixion,D3,Plate_6,28,0,675.220873,566.090323
1819,-1.531352,Null,iNFixion,D3,Plate_6,21,0,176.349373,587.102256


### Min single-cells for top highest feature (derivative)

In [11]:
## Get data frame with the bottom 3 single-cells for Null genotype from MGH institution
min_top_feature_deriv = filtered_plate_df[
    (filtered_plate_df["Metadata_genotype"] == "Null")
    & (filtered_plate_df["Metadata_Institution"] == "MGH")
].nsmallest(3, top_coeff_feature)[
    [
        top_coeff_feature,
        "Metadata_genotype",
        "Metadata_Institution",
        "Metadata_Well",
        "Metadata_Plate",
        "Metadata_Site",
        "Metadata_Number_of_Cells_Neighbors_Adjacent",
        "Metadata_Nuclei_Location_Center_X",
        "Metadata_Nuclei_Location_Center_Y",
    ]
]

# Append the DataFrame and its name to the lists
list_of_dfs.append(min_top_feature_deriv)
list_of_names.append("min_top_feature_deriv")

print(min_top_feature_deriv.shape)
min_top_feature_deriv

(3, 9)


Unnamed: 0,Nuclei_RadialDistribution_FracAtD_DAPI_4of4,Metadata_genotype,Metadata_Institution,Metadata_Well,Metadata_Plate,Metadata_Site,Metadata_Number_of_Cells_Neighbors_Adjacent,Metadata_Nuclei_Location_Center_X,Metadata_Nuclei_Location_Center_Y
6628,-2.655509,Null,MGH,G9,Plate_6,2,0,921.202077,641.622204
6075,-2.486307,Null,MGH,G5,Plate_6,23,0,893.768813,539.560124
6368,-2.393502,Null,MGH,G7,Plate_6,13,0,211.865729,239.576726


### Max single-cells for the second highest feature (original)

In [12]:
# Get data frame with the top 3 single-cells for Null genotype from iNFixion institution
max_second_top_feature_orig = filtered_plate_df[
    (filtered_plate_df["Metadata_genotype"] == "Null")
    & (filtered_plate_df["Metadata_Institution"] == "iNFixion")
].nlargest(3, second_top_coeff_feature)[
    [
        second_top_coeff_feature,
        "Metadata_genotype",
        "Metadata_Institution",
        "Metadata_Well",
        "Metadata_Plate",
        "Metadata_Site",
        "Metadata_Number_of_Cells_Neighbors_Adjacent",
        "Metadata_Nuclei_Location_Center_X",
        "Metadata_Nuclei_Location_Center_Y",
    ]
]

# Append the DataFrame and its name to the lists
list_of_dfs.append(max_second_top_feature_orig)
list_of_names.append("max_second_top_feature_orig")

print(max_second_top_feature_orig.shape)
max_second_top_feature_orig

(3, 9)


Unnamed: 0,Nuclei_RadialDistribution_FracAtD_RFP_4of4,Metadata_genotype,Metadata_Institution,Metadata_Well,Metadata_Plate,Metadata_Site,Metadata_Number_of_Cells_Neighbors_Adjacent,Metadata_Nuclei_Location_Center_X,Metadata_Nuclei_Location_Center_Y
1894,3.44664,Null,iNFixion,D5,Plate_6,22,0,785.5982,222.985177
1880,2.97872,Null,iNFixion,D5,Plate_6,20,0,598.50139,196.650452
1852,2.845168,Null,iNFixion,D4,Plate_6,27,0,520.226313,381.553871


### Max single-cells for the second highest feature (derivative)

In [13]:
# Get data frame with the top 3 single-cells for Null genotype from MGH institution
max_second_top_feature_deriv = filtered_plate_df[
    (filtered_plate_df["Metadata_genotype"] == "Null")
    & (filtered_plate_df["Metadata_Institution"] == "MGH")
].nlargest(3, second_top_coeff_feature)[
    [
        second_top_coeff_feature,
        "Metadata_genotype",
        "Metadata_Institution",
        "Metadata_Well",
        "Metadata_Plate",
        "Metadata_Site",
        "Metadata_Number_of_Cells_Neighbors_Adjacent",
        "Metadata_Nuclei_Location_Center_X",
        "Metadata_Nuclei_Location_Center_Y",
    ]
]

# Append the DataFrame and its name to the lists
list_of_dfs.append(max_second_top_feature_deriv)
list_of_names.append("max_second_top_feature_deriv")

print(max_second_top_feature_deriv.shape)
max_second_top_feature_deriv

(3, 9)


Unnamed: 0,Nuclei_RadialDistribution_FracAtD_RFP_4of4,Metadata_genotype,Metadata_Institution,Metadata_Well,Metadata_Plate,Metadata_Site,Metadata_Number_of_Cells_Neighbors_Adjacent,Metadata_Nuclei_Location_Center_X,Metadata_Nuclei_Location_Center_Y
6699,6.677409,Null,MGH,G10,Plate_6,34,0,239.776202,374.609675
6220,5.765173,Null,MGH,G6,Plate_6,18,0,977.51374,672.706332
6377,4.916791,Null,MGH,G7,Plate_6,16,0,782.251617,691.458291


### Min single-cells for the second highest feature (original)

In [14]:
# Get data frame with the bottom 3 single-cells for WT genotype from iNFixion institution
min_second_top_feature_orig = filtered_plate_df[
    (filtered_plate_df["Metadata_genotype"] == "WT")
    & (filtered_plate_df["Metadata_Institution"] == "iNFixion")
].nsmallest(3, second_top_coeff_feature)[
    [
        second_top_coeff_feature,
        "Metadata_genotype",
        "Metadata_Institution",
        "Metadata_Well",
        "Metadata_Plate",
        "Metadata_Site",
        "Metadata_Number_of_Cells_Neighbors_Adjacent",
        "Metadata_Nuclei_Location_Center_X",
        "Metadata_Nuclei_Location_Center_Y",
    ]
]

# Append the DataFrame and its name to the lists
list_of_dfs.append(min_second_top_feature_orig)
list_of_names.append("min_second_top_feature_orig")

print(min_second_top_feature_orig.shape)
min_second_top_feature_orig

(3, 9)


Unnamed: 0,Nuclei_RadialDistribution_FracAtD_RFP_4of4,Metadata_genotype,Metadata_Institution,Metadata_Well,Metadata_Plate,Metadata_Site,Metadata_Number_of_Cells_Neighbors_Adjacent,Metadata_Nuclei_Location_Center_X,Metadata_Nuclei_Location_Center_Y
476,-1.670245,WT,iNFixion,B7,Plate_6,31,0,362.291513,315.209963
575,-1.57907,WT,iNFixion,B8,Plate_6,5,0,371.375657,439.080469
662,-1.369701,WT,iNFixion,B9,Plate_6,8,0,434.563911,574.430864


### Min single-cells for the second highest feature (derivative)

In [15]:
# Get data frame with the bottom 3 single-cells for WT genotype from MGH institution
min_second_top_feature_deriv = filtered_plate_df[
    (filtered_plate_df["Metadata_genotype"] == "WT")
    & (filtered_plate_df["Metadata_Institution"] == "MGH")
].nsmallest(3, second_top_coeff_feature)[
    [
        second_top_coeff_feature,
        "Metadata_genotype",
        "Metadata_Institution",
        "Metadata_Well",
        "Metadata_Plate",
        "Metadata_Site",
        "Metadata_Number_of_Cells_Neighbors_Adjacent",
        "Metadata_Nuclei_Location_Center_X",
        "Metadata_Nuclei_Location_Center_Y",
    ]
]

# Append the DataFrame and its name to the lists
list_of_dfs.append(min_second_top_feature_deriv)
list_of_names.append("min_second_top_feature_deriv")

print(min_second_top_feature_deriv.shape)
min_second_top_feature_deriv

(3, 9)


Unnamed: 0,Nuclei_RadialDistribution_FracAtD_RFP_4of4,Metadata_genotype,Metadata_Institution,Metadata_Well,Metadata_Plate,Metadata_Site,Metadata_Number_of_Cells_Neighbors_Adjacent,Metadata_Nuclei_Location_Center_X,Metadata_Nuclei_Location_Center_Y
3762,-2.255719,WT,MGH,E10,Plate_6,16,0,553.980415,496.656973
2795,-1.897099,WT,MGH,E5,Plate_6,22,0,630.478848,590.348335
2710,-1.605949,WT,MGH,E5,Plate_6,29,0,500.475253,268.793083


## Merge feature info into dictionary for processing

In [16]:
sc_dict = create_sc_dict(dfs=list_of_dfs, names=list_of_names)

# Check the created dictionary for the first two items
pprint(list(sc_dict.items())[:2], indent=4)

[   (   'max_top_feature_orig_1',
        {   'location_center_x': 328.52932098765433,
            'location_center_y': 684.6693672839506,
            'plate': 'Plate_6',
            'site': '7',
            'well': 'B3'}),
    (   'max_top_feature_orig_2',
        {   'location_center_x': 763.7296426479203,
            'location_center_y': 610.3538371411834,
            'plate': 'Plate_6',
            'site': '31',
            'well': 'B2'})]


## Generate single-cell crops 

In [17]:
generate_sc_crops(
    sc_dict=sc_dict,
    channel_mapping=channel_mapping,
    images_dir=images_dir,
    output_img_dir=output_img_dir,
    crop_size=crop_size,
)