In [1]:
import pandas as pd
import utils
import numpy as np

In [2]:
feature_group = "AreaShape"
feature_group_list = ["Texture", "Intensity", "RadialDistribution"]
compartments = ["Cells", "Cytoplasm", "Nuclei"]
channels = ["DNA", "RNA", "Mito", "ER", "AGP"]

In [3]:
orf_genesets = [
    ["SLC39A1", "ZBTB16"],
    ["SARS2", "ECH1", "DGUOK", "MRPS2", "LDHAL6B", "UQCRFS1"],
    ["MYT1", "LZTS2", "CHRM4", "GPR176", "TSC22D1"],
    ["SPDL1", "PAFAH1B1", "NDEL1", "NDE1", "HOOK2", "HOOK1"],
    ["MYT1", "INSYN1", "RNF41"],
    [
        "ZFP36L1",
        "PIK3R3",
        "NRBP1",
        "INSYN1",
        "HOXC8",
        "RAB40C",
        "RAB40B",
        "HTR1D",
        "TUBA3D",
    ],
]

crispr_genesets = [
    ["SLC39A1", "ZBTB16"],
    ["FOXO3", "TGFB1"],
    ["SARS2", "ECH1", "PVR", "LAIR1", "SLC1A5", "UQCRFS1"],
    ["MYT1", "CHRM4", "GPR176", "TSC22D1"],
    ["PIK3R3", "ZFP36L1", "HOXC8", "NRBP1", "RAB40B"],
]

In [4]:
orf_metadata_df = pd.read_csv(
    "../00.download-and-process-annotations/output/orf_metadata.tsv.gz",
    sep="\t",
    usecols=["Metadata_JCP2022", "Metadata_Symbol", "Metadata_pert_type"],
)

orf_profiles_with_feature_names_df = pd.read_parquet(
    "../profiles/profiles_wellpos_cc_var_mad_outlier_orf.parquet"
).merge(orf_metadata_df, on="Metadata_JCP2022", how="inner")

orf_profiles_with_feature_names_df.head()

Unnamed: 0,Metadata_Source,Metadata_Plate,Metadata_Well,Metadata_JCP2022,Cells_AreaShape_Area,Cells_AreaShape_BoundingBoxArea,Cells_AreaShape_BoundingBoxMaximum_X,Cells_AreaShape_BoundingBoxMaximum_Y,Cells_AreaShape_BoundingBoxMinimum_X,Cells_AreaShape_BoundingBoxMinimum_Y,...,Nuclei_Texture_Variance_RNA_3_00_256,Nuclei_Texture_Variance_RNA_3_01_256,Nuclei_Texture_Variance_RNA_3_02_256,Nuclei_Texture_Variance_RNA_3_03_256,Nuclei_Texture_Variance_RNA_5_00_256,Nuclei_Texture_Variance_RNA_5_01_256,Nuclei_Texture_Variance_RNA_5_02_256,Nuclei_Texture_Variance_RNA_5_03_256,Metadata_Symbol,Metadata_pert_type
0,source_4,BR00117035,A01,JCP2022_905588,1.976969,2.47889,-0.027964,-0.219052,-0.387556,-0.408526,...,-3.679746,-3.644897,-3.663296,-3.716321,-3.636625,-3.645543,-3.727525,-3.679245,CDK9,trt
1,source_4,BR00117036,A01,JCP2022_905588,0.609293,1.348788,-1.335341,-0.904727,-1.493824,-1.208351,...,2.437158,2.377791,2.409654,2.368619,2.390954,2.298647,2.406632,2.438143,CDK9,trt
2,source_4,BR00121558,A01,JCP2022_905588,-0.185963,1.084949,0.525013,-0.442693,0.36696,-0.529556,...,-0.525705,-0.495094,-0.506351,-0.553593,-0.530804,-0.562515,-0.519971,-0.535011,CDK9,trt
3,source_4,BR00121559,A01,JCP2022_905588,1.975644,2.602919,2.095917,-3.154853,1.84804,-3.808197,...,-0.86896,-0.879897,-0.857301,-0.868555,-0.879776,-0.898712,-0.885029,-0.847506,CDK9,trt
4,source_4,BR00121560,A01,JCP2022_905588,-3.07734,-2.134534,-2.492584,-0.933778,-2.360221,-0.558088,...,-0.916399,-0.913091,-0.958685,-0.89614,-0.88787,-0.906178,-0.915392,-0.889229,CDK9,trt


In [5]:
crispr_metadata_df = pd.read_csv(
    "../00.download-and-process-annotations/output/crispr_metadata.tsv.gz",
    sep="\t",
    usecols=["Metadata_JCP2022", "Metadata_Symbol"],
)

crispr_controls_df = pd.DataFrame(
    {
        "Metadata_JCP2022": ["JCP2022_805264", "JCP2022_800001", "JCP2022_800002"],
        "Metadata_pert_type": ["poscon", "negcon", "negcon"],
    },
    index=[0, 1, 2],
)

crispr_profiles_with_feature_names_df = (
    pd.read_parquet("../profiles/profiles_wellpos_cc_var_mad_outlier_crispr.parquet")
    .merge(crispr_metadata_df, on="Metadata_JCP2022", how="inner")
    .merge(crispr_controls_df, on="Metadata_JCP2022", how="left")
    .fillna(value={"Metadata_pert_type": "trt"})
)

crispr_profiles_with_feature_names_df.head()

Unnamed: 0,Metadata_Source,Metadata_Plate,Metadata_Well,Metadata_JCP2022,Cells_AreaShape_Area,Cells_AreaShape_BoundingBoxArea,Cells_AreaShape_BoundingBoxMaximum_X,Cells_AreaShape_BoundingBoxMaximum_Y,Cells_AreaShape_BoundingBoxMinimum_X,Cells_AreaShape_BoundingBoxMinimum_Y,...,Nuclei_Texture_Variance_RNA_3_00_256,Nuclei_Texture_Variance_RNA_3_01_256,Nuclei_Texture_Variance_RNA_3_02_256,Nuclei_Texture_Variance_RNA_3_03_256,Nuclei_Texture_Variance_RNA_5_00_256,Nuclei_Texture_Variance_RNA_5_01_256,Nuclei_Texture_Variance_RNA_5_02_256,Nuclei_Texture_Variance_RNA_5_03_256,Metadata_Symbol,Metadata_pert_type
0,source_13,CP-CC9-R1-01,A02,JCP2022_800002,2.218619,2.431635,0.501694,1.198642,0.224633,0.844867,...,-0.889403,-0.903963,-0.883159,-0.895142,-0.891171,-0.866901,-0.905884,-0.872768,non-targeting,negcon
1,source_13,CP-CC9-R1-01,L23,JCP2022_800002,2.185739,2.296984,-1.985822,0.262706,-2.189088,-0.015251,...,1.241002,1.224294,1.235871,1.233204,1.220388,1.22583,1.224827,1.229212,non-targeting,negcon
2,source_13,CP-CC9-R1-01,I23,JCP2022_800002,1.598852,1.733746,1.054698,-1.709091,0.85809,-1.865041,...,2.410808,2.404765,2.402406,2.401428,2.386266,2.419901,2.407083,2.400517,non-targeting,negcon
3,source_13,CP-CC9-R1-01,J02,JCP2022_800002,1.863836,2.242304,0.272562,-2.722119,0.111834,-2.9547,...,2.207796,2.195493,2.204458,2.189717,2.173862,2.194929,2.196221,2.188477,non-targeting,negcon
4,source_13,CP-CC9-R1-01,O23,JCP2022_800002,1.833142,1.89168,2.556046,-1.213637,2.293583,-1.423116,...,3.256251,3.251066,3.248827,3.243864,3.222744,3.243059,3.245875,3.223063,non-targeting,negcon


In [6]:
feature_names = utils.get_featurecols(orf_profiles_with_feature_names_df)

feature_name_interpretation_df = pd.DataFrame()

for feature_name in feature_names:
    df = pd.DataFrame(utils.parse_cp_features(feature_name), index=[0])
    feature_name_interpretation_df = pd.concat([feature_name_interpretation_df, df], ignore_index=True)

feature_name_interpretation_df.head()

Unnamed: 0,feature,compartment,feature_group,feature_type,channel
0,Cells_AreaShape_Area,Cells,AreaShape,Area,XNONE
1,Cells_AreaShape_BoundingBoxArea,Cells,AreaShape,BoundingBoxArea,XNONE
2,Cells_AreaShape_BoundingBoxMaximum_X,Cells,AreaShape,BoundingBoxMaximum,XNONE
3,Cells_AreaShape_BoundingBoxMaximum_Y,Cells,AreaShape,BoundingBoxMaximum,XNONE
4,Cells_AreaShape_BoundingBoxMinimum_X,Cells,AreaShape,BoundingBoxMinimum,XNONE


In [7]:
for geneset in orf_genesets:
    compartment_feature_group_df = pd.DataFrame()
    for compartment in compartments:
        feature_cols = (
            feature_name_interpretation_df.query("feature_group==@feature_group")
            .query("compartment==@compartment")
            .feature.to_list()
        )

        profiles = orf_profiles_with_feature_names_df.query(
            "Metadata_Symbol in @geneset"
        )
        plates_with_genes = list(np.unique(profiles.Metadata_Plate.to_list()))
        negative_control_profiles = orf_profiles_with_feature_names_df.query(
            "Metadata_Plate in @plates_with_genes"
        ).query("Metadata_pert_type=='negcon'")

        consensus_profiles = utils.consensus(profiles, "Metadata_JCP2022")
        consensus_profiles = utils.consensus(consensus_profiles, "Metadata_pert_type")[
            feature_cols
        ].copy()
        consensus_negative_control_profiles = utils.consensus(
            negative_control_profiles, "Metadata_pert_type"
        )[feature_cols].copy()

        id = ["genes", "negcon"]

        feature_values = np.asarray(
            [
                np.abs(utils.get_featuredata(consensus_profiles)).values[0],
                np.abs(
                    utils.get_featuredata(consensus_negative_control_profiles)
                ).values[0],
            ]
        )

        cosine_sim = utils.cosine_similarity(id, feature_values).values[0, 1]

        df = pd.DataFrame(
            {
                "Compartment": compartment,
                "Feature_group": feature_group,
                "Similarity": cosine_sim,
            },
            index=[0],
        )

        compartment_feature_group_df = pd.concat(
            [compartment_feature_group_df, df], ignore_index=True
        )

    compartment_feature_group_df = compartment_feature_group_df.pivot(
        index="Feature_group", columns="Compartment", values="Similarity"
    )

    print('-'.join(geneset))
    print(compartment_feature_group_df.to_markdown())

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

SLC39A1-ZBTB16
| Feature_group   |    Cells |   Cytoplasm |   Nuclei |
|:----------------|---------:|------------:|---------:|
| AreaShape       | 0.756485 |    0.741052 | 0.580042 |


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

SARS2-ECH1-DGUOK-MRPS2-LDHAL6B-UQCRFS1
| Feature_group   |    Cells |   Cytoplasm |   Nuclei |
|:----------------|---------:|------------:|---------:|
| AreaShape       | 0.928316 |    0.858894 | 0.704496 |


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

MYT1-LZTS2-CHRM4-GPR176-TSC22D1
| Feature_group   |    Cells |   Cytoplasm |   Nuclei |
|:----------------|---------:|------------:|---------:|
| AreaShape       | 0.781649 |    0.687088 | 0.784338 |


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

SPDL1-PAFAH1B1-NDEL1-NDE1-HOOK2-HOOK1
| Feature_group   |    Cells |   Cytoplasm |   Nuclei |
|:----------------|---------:|------------:|---------:|
| AreaShape       | 0.446054 |    0.556465 | 0.537324 |


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

MYT1-INSYN1-RNF41
| Feature_group   |    Cells |   Cytoplasm |   Nuclei |
|:----------------|---------:|------------:|---------:|
| AreaShape       | 0.622954 |    0.666372 | 0.775038 |


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

ZFP36L1-PIK3R3-NRBP1-INSYN1-HOXC8-RAB40C-RAB40B-HTR1D-TUBA3D
| Feature_group   |    Cells |   Cytoplasm |   Nuclei |
|:----------------|---------:|------------:|---------:|
| AreaShape       | 0.524976 |    0.643322 | 0.726768 |


In [8]:
for geneset in orf_genesets:
    channel_feature_group_df = pd.DataFrame()
    for channel in channels:
        for feature_group in feature_group_list:
            feature_cols = (
                feature_name_interpretation_df.query("feature_group==@feature_group")
                .query("channel==@channel")
                .feature.to_list()
            )

            profiles = orf_profiles_with_feature_names_df.query(
                "Metadata_Symbol in @geneset"
            )
            plates_with_genes = list(np.unique(profiles.Metadata_Plate.to_list()))
            negative_control_profiles = orf_profiles_with_feature_names_df.query(
                "Metadata_Plate in @plates_with_genes"
            ).query("Metadata_pert_type=='negcon'")

            consensus_profiles = utils.consensus(profiles, "Metadata_JCP2022")
            consensus_profiles = utils.consensus(consensus_profiles, "Metadata_pert_type")[
                feature_cols
            ].copy()
            consensus_negative_control_profiles = utils.consensus(
                negative_control_profiles, "Metadata_pert_type"
            )[feature_cols].copy()

            id = ["genes", "negcon"]

            feature_values = np.asarray(
                [
                    np.abs(utils.get_featuredata(consensus_profiles)).values[0],
                    np.abs(
                        utils.get_featuredata(consensus_negative_control_profiles)
                    ).values[0],
                ]
            )

            cosine_sim = utils.cosine_similarity(id, feature_values).values[0, 1]

            df = pd.DataFrame(
                {
                    "Channel": channel,
                    "Feature_group": feature_group,
                    "Similarity": cosine_sim,
                },
                index=[0],
            )

            channel_feature_group_df = pd.concat(
                [channel_feature_group_df, df], ignore_index=True
            )

    channel_feature_group_df = channel_feature_group_df.pivot(
        index="Feature_group", columns="Channel", values="Similarity"
    )

    print('-'.join(geneset))
    print(channel_feature_group_df.to_markdown())

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

SLC39A1-ZBTB16
| Feature_group      |      AGP |      DNA |       ER |     Mito |      RNA |
|:-------------------|---------:|---------:|---------:|---------:|---------:|
| Intensity          | 0.672086 | 0.789707 | 0.770823 | 0.965031 | 0.797988 |
| RadialDistribution | 0.663653 | 0.419407 | 0.63083  | 0.738365 | 0.856948 |
| Texture            | 0.741627 | 0.622982 | 0.569258 | 0.683218 | 0.649612 |


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

SARS2-ECH1-DGUOK-MRPS2-LDHAL6B-UQCRFS1
| Feature_group      |      AGP |      DNA |       ER |     Mito |      RNA |
|:-------------------|---------:|---------:|---------:|---------:|---------:|
| Intensity          | 0.541427 | 0.776904 | 0.518479 | 0.762869 | 0.553949 |
| RadialDistribution | 0.908801 | 0.638516 | 0.649865 | 0.775103 | 0.638098 |
| Texture            | 0.628305 | 0.932647 | 0.770487 | 0.909473 | 0.736923 |


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

MYT1-LZTS2-CHRM4-GPR176-TSC22D1
| Feature_group      |      AGP |      DNA |       ER |     Mito |      RNA |
|:-------------------|---------:|---------:|---------:|---------:|---------:|
| Intensity          | 0.487197 | 0.52347  | 0.644771 | 0.458181 | 0.681677 |
| RadialDistribution | 0.907964 | 0.730055 | 0.5467   | 0.641424 | 0.554975 |
| Texture            | 0.551001 | 0.761822 | 0.559646 | 0.687991 | 0.764027 |


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

SPDL1-PAFAH1B1-NDEL1-NDE1-HOOK2-HOOK1
| Feature_group      |      AGP |      DNA |       ER |     Mito |      RNA |
|:-------------------|---------:|---------:|---------:|---------:|---------:|
| Intensity          | 0.530789 | 0.712743 | 0.815348 | 0.713063 | 0.71227  |
| RadialDistribution | 0.378488 | 0.467896 | 0.707991 | 0.650461 | 0.43878  |
| Texture            | 0.601008 | 0.890659 | 0.765041 | 0.732915 | 0.751516 |


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

MYT1-INSYN1-RNF41
| Feature_group      |      AGP |      DNA |       ER |     Mito |      RNA |
|:-------------------|---------:|---------:|---------:|---------:|---------:|
| Intensity          | 0.625156 | 0.749395 | 0.468944 | 0.699724 | 0.739519 |
| RadialDistribution | 0.754368 | 0.727735 | 0.552624 | 0.684017 | 0.610879 |
| Texture            | 0.671218 | 0.725173 | 0.723866 | 0.594921 | 0.786448 |


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

ZFP36L1-PIK3R3-NRBP1-INSYN1-HOXC8-RAB40C-RAB40B-HTR1D-TUBA3D
| Feature_group      |      AGP |      DNA |       ER |     Mito |      RNA |
|:-------------------|---------:|---------:|---------:|---------:|---------:|
| Intensity          | 0.862226 | 0.862558 | 0.477843 | 0.861283 | 0.546821 |
| RadialDistribution | 0.490482 | 0.618996 | 0.73138  | 0.688579 | 0.717832 |
| Texture            | 0.707661 | 0.885056 | 0.716927 | 0.775312 | 0.652885 |


In [9]:
feature_group = "AreaShape"
feature_group_list = ["Texture", "Intensity", "RadialDistribution"]
compartments = ["Cells", "Cytoplasm", "Nuclei"]
channels = ["DNA", "RNA", "Mito", "ER", "AGP"]

In [10]:
feature_names = utils.get_featurecols(crispr_profiles_with_feature_names_df)

feature_name_interpretation_df = pd.DataFrame()

for feature_name in feature_names:
    df = pd.DataFrame(utils.parse_cp_features(feature_name), index=[0])
    feature_name_interpretation_df = pd.concat([feature_name_interpretation_df, df], ignore_index=True)

feature_name_interpretation_df.head()

Unnamed: 0,feature,compartment,feature_group,feature_type,channel
0,Cells_AreaShape_Area,Cells,AreaShape,Area,XNONE
1,Cells_AreaShape_BoundingBoxArea,Cells,AreaShape,BoundingBoxArea,XNONE
2,Cells_AreaShape_BoundingBoxMaximum_X,Cells,AreaShape,BoundingBoxMaximum,XNONE
3,Cells_AreaShape_BoundingBoxMaximum_Y,Cells,AreaShape,BoundingBoxMaximum,XNONE
4,Cells_AreaShape_BoundingBoxMinimum_X,Cells,AreaShape,BoundingBoxMinimum,XNONE


In [11]:
for geneset in crispr_genesets:
    compartment_feature_group_df = pd.DataFrame()
    for compartment in compartments:
        feature_cols = (
            feature_name_interpretation_df.query("feature_group==@feature_group")
            .query("compartment==@compartment")
            .feature.to_list()
        )

        profiles = crispr_profiles_with_feature_names_df.query(
            "Metadata_Symbol in @geneset"
        )
        plates_with_genes = list(np.unique(profiles.Metadata_Plate.to_list()))
        negative_control_profiles = crispr_profiles_with_feature_names_df.query(
            "Metadata_Plate in @plates_with_genes"
        ).query("Metadata_pert_type=='negcon'")

        consensus_profiles = utils.consensus(profiles, "Metadata_JCP2022")
        consensus_profiles = utils.consensus(consensus_profiles, "Metadata_pert_type")[
            feature_cols
        ].copy()
        consensus_negative_control_profiles = utils.consensus(
            negative_control_profiles, "Metadata_pert_type"
        )[feature_cols].copy()

        id = ["genes", "negcon"]

        feature_values = np.asarray(
            [
                np.abs(utils.get_featuredata(consensus_profiles)).values[0],
                np.abs(
                    utils.get_featuredata(consensus_negative_control_profiles)
                ).values[0],
            ]
        )

        cosine_sim = utils.cosine_similarity(id, feature_values).values[0, 1]

        df = pd.DataFrame(
            {
                "Compartment": compartment,
                "Feature_group": feature_group,
                "Similarity": cosine_sim,
            },
            index=[0],
        )

        compartment_feature_group_df = pd.concat(
            [compartment_feature_group_df, df], ignore_index=True
        )

    compartment_feature_group_df = compartment_feature_group_df.pivot(
        index="Feature_group", columns="Compartment", values="Similarity"
    )

    print('-'.join(geneset))
    print(compartment_feature_group_df.to_markdown())

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

SLC39A1-ZBTB16
| Feature_group   |    Cells |   Cytoplasm |   Nuclei |
|:----------------|---------:|------------:|---------:|
| AreaShape       | 0.594427 |    0.699604 | 0.548067 |


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

FOXO3-TGFB1
| Feature_group   |    Cells |   Cytoplasm |   Nuclei |
|:----------------|---------:|------------:|---------:|
| AreaShape       | 0.729562 |    0.698623 | 0.636117 |


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

SARS2-ECH1-PVR-LAIR1-SLC1A5-UQCRFS1
| Feature_group   |    Cells |   Cytoplasm |   Nuclei |
|:----------------|---------:|------------:|---------:|
| AreaShape       | 0.831355 |    0.715244 |  0.59991 |


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

MYT1-CHRM4-GPR176-TSC22D1
| Feature_group   |    Cells |   Cytoplasm |   Nuclei |
|:----------------|---------:|------------:|---------:|
| AreaShape       | 0.731146 |    0.642172 | 0.526139 |


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

PIK3R3-ZFP36L1-HOXC8-NRBP1-RAB40B
| Feature_group   |    Cells |   Cytoplasm |   Nuclei |
|:----------------|---------:|------------:|---------:|
| AreaShape       | 0.465711 |    0.517223 | 0.559276 |


In [12]:
for geneset in crispr_genesets:
    channel_feature_group_df = pd.DataFrame()
    for channel in channels:
        for feature_group in feature_group_list:
            feature_cols = (
                feature_name_interpretation_df.query("feature_group==@feature_group")
                .query("channel==@channel")
                .feature.to_list()
            )

            profiles = crispr_profiles_with_feature_names_df.query(
                "Metadata_Symbol in @geneset"
            )
            plates_with_genes = list(np.unique(profiles.Metadata_Plate.to_list()))
            negative_control_profiles = crispr_profiles_with_feature_names_df.query(
                "Metadata_Plate in @plates_with_genes"
            ).query("Metadata_pert_type=='negcon'")

            consensus_profiles = utils.consensus(profiles, "Metadata_JCP2022")
            consensus_profiles = utils.consensus(consensus_profiles, "Metadata_pert_type")[
                feature_cols
            ].copy()
            consensus_negative_control_profiles = utils.consensus(
                negative_control_profiles, "Metadata_pert_type"
            )[feature_cols].copy()

            id = ["genes", "negcon"]

            feature_values = np.asarray(
                [
                    np.abs(utils.get_featuredata(consensus_profiles)).values[0],
                    np.abs(
                        utils.get_featuredata(consensus_negative_control_profiles)
                    ).values[0],
                ]
            )

            cosine_sim = utils.cosine_similarity(id, feature_values).values[0, 1]

            df = pd.DataFrame(
                {
                    "Channel": channel,
                    "Feature_group": feature_group,
                    "Similarity": cosine_sim,
                },
                index=[0],
            )

            channel_feature_group_df = pd.concat(
                [channel_feature_group_df, df], ignore_index=True
            )

    channel_feature_group_df = channel_feature_group_df.pivot(
        index="Feature_group", columns="Channel", values="Similarity"
    )

    print('-'.join(geneset))
    print(channel_feature_group_df.to_markdown())

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

SLC39A1-ZBTB16
| Feature_group      |      AGP |      DNA |       ER |     Mito |      RNA |
|:-------------------|---------:|---------:|---------:|---------:|---------:|
| Intensity          | 0.728945 | 0.767571 | 0.771242 | 0.873135 | 0.822642 |
| RadialDistribution | 0.711016 | 0.761158 | 0.826235 | 0.858462 | 0.794492 |
| Texture            | 0.697588 | 0.727041 | 0.827957 | 0.800155 | 0.894299 |


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

FOXO3-TGFB1
| Feature_group      |      AGP |      DNA |       ER |     Mito |      RNA |
|:-------------------|---------:|---------:|---------:|---------:|---------:|
| Intensity          | 0.544389 | 0.688425 | 0.681472 | 0.722339 | 0.670453 |
| RadialDistribution | 0.735871 | 0.840089 | 0.661786 | 0.788626 | 0.665996 |
| Texture            | 0.717921 | 0.733803 | 0.669132 | 0.81236  | 0.572922 |


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

SARS2-ECH1-PVR-LAIR1-SLC1A5-UQCRFS1
| Feature_group      |      AGP |      DNA |       ER |     Mito |      RNA |
|:-------------------|---------:|---------:|---------:|---------:|---------:|
| Intensity          | 0.660504 | 0.714538 | 0.560621 | 0.486821 | 0.566481 |
| RadialDistribution | 0.738251 | 0.433272 | 0.749539 | 0.598352 | 0.745186 |
| Texture            | 0.654579 | 0.705495 | 0.689171 | 0.637149 | 0.639123 |


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

MYT1-CHRM4-GPR176-TSC22D1
| Feature_group      |      AGP |      DNA |       ER |     Mito |      RNA |
|:-------------------|---------:|---------:|---------:|---------:|---------:|
| Intensity          | 0.800768 | 0.675345 | 0.646027 | 0.722812 | 0.713452 |
| RadialDistribution | 0.710884 | 0.540154 | 0.609774 | 0.65714  | 0.577702 |
| Texture            | 0.612081 | 0.477809 | 0.717486 | 0.797778 | 0.764024 |


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

PIK3R3-ZFP36L1-HOXC8-NRBP1-RAB40B
| Feature_group      |      AGP |      DNA |       ER |     Mito |      RNA |
|:-------------------|---------:|---------:|---------:|---------:|---------:|
| Intensity          | 0.815547 | 0.663625 | 0.840245 | 0.863779 | 0.84601  |
| RadialDistribution | 0.533896 | 0.542838 | 0.53368  | 0.673193 | 0.665232 |
| Texture            | 0.697165 | 0.712801 | 0.732557 | 0.711678 | 0.818763 |
