In [None]:
from MPLearn.embedding_notebook import *
initialize_notebook()

## Load Embedding
Load the 20XX series plates embedded into the `top_hits_plate_scaled_200522a_umap2_2M_15_0.0` embedding

```shell
cd ${experiment_path} &&
    embed_umap \
        --dataset ${dataset} \
        --tag ${tag} \
        --feature_columns raw_data/cell_feature_columns.tsv \
        --no_standardize_features \
        --umap_low_memory \
        --verbose
````         

In [None]:
experiment_path = '../S25'
embedding_tag = "top_hits_plate_scaled_200522a_umap2_2M_15_0.0"

In [None]:
def load_embedding():
    meta_columns=[
        'Image_Metadata_PlateID',
        'Image_Metadata_WellID',
        'Image_Metadata_FieldID',
        'ImageNumber',
        'plate_id',
        'Compound',
        'dose_nM',
        'Cells_Number_Object_Number']

    embeddings = []

    top_hit_plate_ids = [
        '2006A', '2007A', '2008A', '2009A',
        '2010A', '2010A',          '2012A',
        '2013A', '2014A', '2015A', '2016A',
        '2017A',          '2019A']
    for plate_id in top_hit_plate_ids:
        embedding = load_single_embedding(
            experiment_path=experiment_path,
            embedding_tag=f"covid19cq1_SARS_{plate_id}_plate_scaled_into_{embedding_tag}",
            plate_id=f"covid19cq1_SARS_{plate_id}_plate_scaled",
            cluster_embedding_tag=False,
            meta_columns=meta_columns)
        embeddings.append(embedding)

    embedding = load_single_embedding(
        experiment_path="../S25",
        embedding_tag=f"covid19cq1_SARS_1999B_200523_umap2_into_top_hits_plate_scaled_200522a_15_0.0",
        plate_id=f"lf_rem_plate_scaled_1999B_2020A",
        cluster_embedding_tag=False,
        meta_columns=meta_columns)
    embeddings.append(embedding)
    embedding = pd.concat(embeddings)
    return embedding

embedding = load_embedding()

In [None]:
for plate_id in embedding.plate_id.unique():
    print(f"{plate_id} {embedding[embedding.plate_id == plate_id].shape[0]}")

In [None]:
save_embedding_plot(
    embedding=embedding,
    output_fname="../S25/product/figures/fig3/fig3a_full_embedding.png",
    plot_width=1000,
    plot_height=1000)

In [None]:
%%output size=400
view_UMAP(embedding)

In [None]:
%output size=400
embedding_plot = view_UMAP(embedding)
path_layer, regions_of_interest = draw_regions_of_interest()
embedding_plot * path_layer

In [None]:
regions_of_interest

In [None]:
save_regions_of_interest(
    regions_of_interest=regions_of_interest,
    output_path=(
        f"{experiment_path}/intermediate_data/"
        f"{embedding_tag}/"
        f"regions_of_interest.parquet"))

In [None]:
loaded_regions_of_interest = pa.parquet.read_table(
    "/tmp/tmp_regions_of_interest.parquet").to_pandas()

In [None]:
loaded_regions_of_interest

In [None]:
def load_regions_of_interest(
        source="regions_of_interest.parquet"):
    regions_of_interest = pa.parquet.read_table(
        source=source).to_pandas()
    
    xs = []
    ys = []
    for roi_index in regions_of_interest.roi_index.unique():
        xs.append(regions_of_interest[regions_of_interest.roi_index == roi_index]['xs'].to_list())
        ys.append(regions_of_interest[regions_of_interest.roi_index == roi_index]['yz'].to_list())
    return holoviews.streams.FreehandDraw(
        data = {'xs' : xs, 'ys' : ys})
z=load_regions_of_interest("/tmp/tmp_regions_of_interest.parquet")

In [None]:
z

In [None]:
regions_of_interest=load_regions_of_interest(source=(
        f"{experiment_path}/intermediate_data/"
        f"{embedding_tag}/"
        f"roi_paths.parquet"))

ROI_membership = get_ROI_membership(
    regions_of_interest = regions_of_interest,
    points = embedding[['UMAP_1', 'UMAP_2']])

pa.parquet.write_table(
    table=pa.Table.from_pandas(ROI_membership),
    where=(
        f"{experiment_path}/intermediate_data/"
        f"{embedding_tag}/"
        f"fig3_ROI_membership.parquet"))

In [None]:
%output size=300
view_UMAP_select_condition(embedding, 'plate_id')

In [None]:
%output size=100
view_UMAP_select_condition(embedding[embedding.dose_nM > 50], 'Compound')

In [None]:
compounds_of_interest = [
    # Sigma compounds
    'S1RA',
    'Hydroxychloroquine',
    'Amiodarone (hydrochloride)',
    
    # look unusual
    'Bosutinib',
    'Niclosamide',
    'Nevirapine',
    'Lomitapide',
    'Nintedanib',
    'MI-503',

    # make worse
    'Olemsartan Medoximil',
    '5-Aminolevulinic Acid (hydrochloride)',
    'Carbinoxamine (maleate)',
    
    # reduces roi 4 but not roi 1,2, or 3
    'Zanamivir',
    'Chloroxine',
    'Vardenafil (hydrochloride hydrate)', 
]

embedding_high = embedding[embedding.dose_nM > 250]

for compound in compounds_of_interest:
    if compound not in embedding.Compound.unique(): print(f"Unrecognized compound: {compound}")
    save_embedding_plot(
        embedding=embedding_high[embedding_high.Compound == compound],
        output_fname=f"../S25/product/figures/fig3/fig3a_{compound}_gt250_embedding.png",
        plot_width=250,
        plot_height=250)

In [None]:
embedding_PC_NC = embedding[
    (embedding.Compound == "PC") |
    (embedding.Compound == "NC")]

for plate_id in embedding.plate_id.unique():
    save_embedding_plot(
        embedding=embedding_PC_NC[embedding_PC_NC.plate_id == plate_id],
        output_fname=f"../S25/product/figures/fig3/fig3a_PC_NC_plate_id_{plate_id}_embedding.png",
        plot_width=400,
        plot_height=400)

In [None]:
def load_replica_embeddings():
    meta_columns=[
        'Image_Metadata_PlateID',
        'Image_Metadata_WellID',
        'Image_Metadata_FieldID',
        'ImageNumber',
        'plate_id',
        'Compound',
        'dose_nM',
        'Cells_Number_Object_Number']

    embeddings = []

    for replica in range(1, 7):
        if replica == 1:
            replica_label = ""
        else:
            replica_label = f"rep{replica}_"
            
        embedding = load_single_embedding(
            experiment_path=experiment_path,
            embedding_tag=f"top_hits_plate_scaled_200522a_{replica_label}umap2_2M_15_0.0",
            plate_id=f"top_hits_plate_scaled_200522a",
            cluster_embedding_tag=False,
            meta_columns=meta_columns)
        embedding.insert(len(embedding.columns), "replica", replica)
        embeddings.append(embedding)

    embedding = pd.concat(embeddings)
    return embedding

embedding_replicas = load_replica_embeddings()

In [None]:
for replica in embedding_replicas.replica.unique():
    save_embedding_plot(
        embedding=embedding_replicas[embedding_replicas.replica == replica],
        output_fname=f"../S25/product/figures/fig3/fig3a_replica_{replica}_embedding.png",
        plot_width=400,
        plot_height=400)

In [None]:
def load_num_neighbors_embeddings():
    meta_columns=[
        'Image_Metadata_PlateID',
        'Image_Metadata_WellID',
        'Image_Metadata_FieldID',
        'ImageNumber',
        'plate_id',
        'Compound',
        'dose_nM',
        'Cells_Number_Object_Number']

    embeddings = []

    for num_neighbors in [5, 10, 15]:
        embedding = load_single_embedding(
            experiment_path=experiment_path,
            embedding_tag=f"top_hits_plate_scaled_200522a_umap2_2M_{num_neighbors}_0.0",
            plate_id=f"top_hits_plate_scaled_200522a",
            cluster_embedding_tag=False,
            meta_columns=meta_columns)
        embedding.insert(len(embedding.columns), "num_neighbors", num_neighbors)
        embeddings.append(embedding)

    embedding = pd.concat(embeddings)
    return embedding

embedding_num_neighbors = load_num_neighbors_embeddings()

In [None]:
for num_neighbors in embedding_num_neighbors.num_neighbors.unique():
    save_embedding_plot(
        embedding=embedding_num_neighbors[embedding_num_neighbors.num_neighbors == num_neighbors],
        output_fname=f"../S25/product/figures/fig3/fig3a_num_neighbors_{num_neighbors}_embedding.png",
        plot_width=400,
        plot_height=400)