In [3]:
from MPLearn.embedding_notebook import *
initialize_notebook()

# Load Embedding
Gather cells for embedding:
```shell
R ~/opt/MPStats/vignettes/SARS-CoV-2/scripts/3_prepare_for_embedding_1005hits.R
```
Total number of cells: 1,955,131. The aim is to keep it under 2M so it runs in a reasonabl amount of memory and time
```
   Compound               n
   <chr>              <int>
 1 CIRC825            54188
 2 DGD1202            48753
 3 MCTI178            51495
 4 MCTI253            50695
 5 Negative Control 1500000
 6 Positive Control  250000
```

Load the 1005 top hits into the `UMAP_embedding_1005_hits_full` embedding:

```shell
cd ~/opt/MPLearn/vignettes/SARS-CoV-2/S25
~/anaconda3/envs/sextonlab/bin/embed_umap \
     --dataset /home/ubuntu/opt/MPStats/vignettes/SARS-CoV-2/raw_data/covid19primary_1005_hits_Cell_MasterDataTable.parquet \
     --tag UMAP_embedding_1005_hits_full \
     --feature_columns ~/opt/MPStats/vignettes/SARS-CoV-2/raw_data/cell_feature_columns.tsv \
     --umap_low_memory \
     --verbose
````         

In [8]:
experiment_path = '../S25'
embedding_tag = "UMAP_embedding_1005_hits_full"
plate_id = "covid19primary_1005_hits"

In [9]:
meta_columns=[
    'Image_Metadata_PlateID',
    'Image_Metadata_WellID',
    'Image_Metadata_FieldID',
    'ImageNumber',
    'plate_id',
    'Compound',
    'dose_nM',
    'Cells_Number_Object_Number']

embedding = load_single_embedding(
    experiment_path=experiment_path,
    embedding_tag=embedding_tag,
    plate_id=plate_id,
    cluster_embedding_tag=False,
    meta_columns=meta_columns)


In [10]:
for plate_id in embedding.plate_id.unique():
    print(f"{plate_id} {embedding[embedding.plate_id == plate_id].shape[0]}")

10050050C 361069
10050250C 401332
10050500C 396157
10051000C 410440
10052000C 386133


In [14]:
save_embedding_plot(
    embedding=embedding,
    output_fname="../S25/product/figures/plate_1005_hits/full_embedding.png",
    plot_width=500,
    plot_height=500)

In [11]:
%%output size=400
view_UMAP(embedding)

In [13]:
%output size=400
embedding_plot = view_UMAP(embedding)
path_layer, regions_of_interest = draw_regions_of_interest()
embedding_plot * path_layer

In [14]:
regions_of_interest

FreehandDraw(data={'xs': [[5.602316685375033, 5.534125243945048, 5.250171819865497, 5.115446275383054, 5.063651745736366, 5.0315679722129785, 5.016770129753251, 5.016770129753251, 5.016770129753251, 5.016770129753251, 5.04879396939257, 5.106271396630108, 5.190918018897349, 5.370808739516367, 5.609538718405387, 5.7940451805128, 5.966300157818375, 6.13855513512395, 6.355737217859935, 6.599201973590431, 6.760683835234545, 6.9451902973419575, 7.142129710733529, 7.342442069934112, 7.479534170422238, 7.653565521458423, 7.880158888823405, 7.995942828694773, 8.096099008295065, 8.179918209996126, 8.254620801998199, 8.31038095697414, 8.357913521531453, 8.365253757499831, 8.284809166437896, 8.062594302270199, 7.824931479483636, 7.595496247115701, 7.215292334881043, 7.010303467892939, 6.805314600904834, 6.643479795276757, 6.506149624082467, 6.4007841910584835, 6.202128337820372, 6.026560348589853, 5.74757810722783, 5.561177068451782, 5.471497664752831, 5.390816667814843, 5.390816667814843, 5.39081

In [16]:
save_regions_of_interest(
    regions_of_interest=regions_of_interest,
    output_path=(
        f"{experiment_path}/intermediate_data/"
        f"{embedding_tag}/"
        f"regions_of_interest.parquet"))

In [17]:
loaded_regions_of_interest = pa.parquet.read_table(
    "/tmp/tmp_regions_of_interest.parquet").to_pandas()

In [20]:
loaded_regions_of_interest

Unnamed: 0,roi_index,xs,yz
0,0,5.602317,6.451648
1,0,5.534125,6.327132
2,0,5.250172,5.909977
3,0,5.115446,5.611297
4,0,5.063652,5.380171
5,0,5.031568,5.19222
6,0,5.01677,5.067379
7,0,5.01677,4.915878
8,0,5.01677,4.825085
9,0,5.01677,4.723483


In [24]:
def load_regions_of_interest(
        source="regions_of_interest.parquet"):
    regions_of_interest = pa.parquet.read_table(
        source=source).to_pandas()
    
    xs = []
    ys = []
    for roi_index in regions_of_interest.roi_index.unique():
        xs.append(regions_of_interest[regions_of_interest.roi_index == roi_index]['xs'].to_list())
        ys.append(regions_of_interest[regions_of_interest.roi_index == roi_index]['yz'].to_list())
    return holoviews.streams.FreehandDraw(
        data = {'xs' : xs, 'ys' : ys})
z=load_regions_of_interest("/tmp/tmp_regions_of_interest.parquet")

In [25]:
z

FreehandDraw(data={'xs': [[5.602316685375033, 5.534125243945048, 5.250171819865497, 5.115446275383054, 5.063651745736366, 5.0315679722129785, 5.016770129753251, 5.016770129753251, 5.016770129753251, 5.016770129753251, 5.04879396939257, 5.106271396630108, 5.190918018897349, 5.370808739516367, 5.609538718405387, 5.7940451805128, 5.966300157818375, 6.13855513512395, 6.355737217859935, 6.599201973590431, 6.760683835234545, 6.9451902973419575, 7.142129710733529, 7.342442069934112, 7.479534170422238, 7.653565521458423, 7.880158888823405, 7.995942828694773, 8.096099008295065, 8.179918209996126, 8.254620801998199, 8.31038095697414, 8.357913521531453, 8.365253757499831, 8.284809166437896, 8.062594302270199, 7.824931479483636, 7.595496247115701, 7.215292334881043, 7.010303467892939, 6.805314600904834, 6.643479795276757, 6.506149624082467, 6.4007841910584835, 6.202128337820372, 6.026560348589853, 5.74757810722783, 5.561177068451782, 5.471497664752831, 5.390816667814843, 5.390816667814843, 5.39081

In [27]:
regions_of_interest=load_regions_of_interest(source=(
        f"{experiment_path}/intermediate_data/"
        f"{embedding_tag}/"
        f"roi_paths.parquet"))

ROI_membership = get_ROI_membership(
    regions_of_interest = regions_of_interest,
    points = embedding[['UMAP_1', 'UMAP_2']])

pa.parquet.write_table(
    table=pa.Table.from_pandas(ROI_membership),
    where=(
        f"{experiment_path}/intermediate_data/"
        f"{embedding_tag}/"
        f"fig3_ROI_membership.parquet"))

Getting membership for 16 regions of interest
   Getting membership for region 0...
   Getting membership for region 1...
   Getting membership for region 2...
   Getting membership for region 3...
   Getting membership for region 4...
   Getting membership for region 5...
   Getting membership for region 6...
   Getting membership for region 7...
   Getting membership for region 8...
   Getting membership for region 9...
   Getting membership for region 10...
   Getting membership for region 11...
   Getting membership for region 12...
   Getting membership for region 13...
   Getting membership for region 14...
   Getting membership for region 15...


In [13]:
%output size=300
view_UMAP_select_condition(embedding, 'Compound')

In [11]:
%output size=100
view_UMAP_select_condition(embedding[embedding.dose_nM > 50], 'Compound')

In [9]:
compounds_of_interest = [
    # Sigma compounds
    'S1RA',
    'Hydroxychloroquine',
    'Amiodarone (hydrochloride)',
    
    # look unusual
    'Bosutinib',
    'Niclosamide',
    'Nevirapine',
    'Lomitapide',
    'Nintedanib',
    'MI-503',

    # make worse
    'Olemsartan Medoximil',
    '5-Aminolevulinic Acid (hydrochloride)',
    'Carbinoxamine (maleate)',
    
    # reduces roi 4 but not roi 1,2, or 3
    'Zanamivir',
    'Chloroxine',
    'Vardenafil (hydrochloride hydrate)', 
]

embedding_high = embedding[embedding.dose_nM > 250]

for compound in compounds_of_interest:
    if compound not in embedding.Compound.unique(): print(f"Unrecognized compound: {compound}")
    save_embedding_plot(
        embedding=embedding_high[embedding_high.Compound == compound],
        output_fname=f"../S25/product/figures/fig3/fig3a_{compound}_gt250_embedding.png",
        plot_width=250,
        plot_height=250)

In [22]:
embedding_PC_NC = embedding[
    (embedding.Compound == "PC") |
    (embedding.Compound == "NC")]

for plate_id in embedding.plate_id.unique():
    save_embedding_plot(
        embedding=embedding_PC_NC[embedding_PC_NC.plate_id == plate_id],
        output_fname=f"../S25/product/figures/fig3/fig3a_PC_NC_plate_id_{plate_id}_embedding.png",
        plot_width=400,
        plot_height=400)

In [19]:
def load_replica_embeddings():
    meta_columns=[
        'Image_Metadata_PlateID',
        'Image_Metadata_WellID',
        'Image_Metadata_FieldID',
        'ImageNumber',
        'plate_id',
        'Compound',
        'dose_nM',
        'Cells_Number_Object_Number']

    embeddings = []

    for replica in range(1, 7):
        if replica == 1:
            replica_label = ""
        else:
            replica_label = f"rep{replica}_"
            
        embedding = load_single_embedding(
            experiment_path=experiment_path,
            embedding_tag=f"top_hits_plate_scaled_200522a_{replica_label}umap2_2M_15_0.0",
            plate_id=f"top_hits_plate_scaled_200522a",
            cluster_embedding_tag=False,
            meta_columns=meta_columns)
        embedding.insert(len(embedding.columns), "replica", replica)
        embeddings.append(embedding)

    embedding = pd.concat(embeddings)
    return embedding

embedding_replicas = load_replica_embeddings()

In [21]:
for replica in embedding_replicas.replica.unique():
    save_embedding_plot(
        embedding=embedding_replicas[embedding_replicas.replica == replica],
        output_fname=f"../S25/product/figures/fig3/fig3a_replica_{replica}_embedding.png",
        plot_width=400,
        plot_height=400)

In [26]:
def load_num_neighbors_embeddings():
    meta_columns=[
        'Image_Metadata_PlateID',
        'Image_Metadata_WellID',
        'Image_Metadata_FieldID',
        'ImageNumber',
        'plate_id',
        'Compound',
        'dose_nM',
        'Cells_Number_Object_Number']

    embeddings = []

    for num_neighbors in [5, 10, 15]:
        embedding = load_single_embedding(
            experiment_path=experiment_path,
            embedding_tag=f"top_hits_plate_scaled_200522a_umap2_2M_{num_neighbors}_0.0",
            plate_id=f"top_hits_plate_scaled_200522a",
            cluster_embedding_tag=False,
            meta_columns=meta_columns)
        embedding.insert(len(embedding.columns), "num_neighbors", num_neighbors)
        embeddings.append(embedding)

    embedding = pd.concat(embeddings)
    return embedding

embedding_num_neighbors = load_num_neighbors_embeddings()

In [29]:
for num_neighbors in embedding_num_neighbors.num_neighbors.unique():
    save_embedding_plot(
        embedding=embedding_num_neighbors[embedding_num_neighbors.num_neighbors == num_neighbors],
        output_fname=f"../S25/product/figures/fig3/fig3a_num_neighbors_{num_neighbors}_embedding.png",
        plot_width=400,
        plot_height=400)