# Viewing images of Varchamp wells
**Author:** Jessica Ewald <br>

The purpose of this script is to query image names by batch, plate, well, site, & channel, download from AWS, and render in a notebook. This will be useful for quick visual sanity checks.

In [3]:
# Imports
import pathlib
import polars as pl
import numpy as np
from sh import aws
import os
from skimage.io import imread

import black
import jupyter_black

jupyter_black.load(
    lab=False,
    line_length=79,
    verbosity="DEBUG",
    target_version=black.TargetVersion.PY310,
)

import warnings
warnings.filterwarnings("ignore")

DEBUG:jupyter_black:config: {'line_length': 79, 'target_versions': {<TargetVersion.PY310: 10>}}


<IPython.core.display.Javascript object>

In [4]:
# get sample metadata
dat = pl.scan_parquet("/dgx1nas1/storage/data/jess/varchamp/sc_data/processed_profiles/Rep_Ctrls/annotated_normalized_featselected.parquet")

meta_cols = [i for i in dat.columns if "Metadata_" in i]

dat = dat.select(meta_cols).collect()

In [35]:
# Specify image parameters

sel_batch = "B4A3R1"
sel_plate = "T4"
site = "05"
well = "D03"
sel_channel = "GFP"

In [36]:
# Define mapping between simple names and folder names
batch_dict = {"B4A3R1": "2023-12-15_B4A3R1",
              "B6A3R2": "2023-12-21_B6A3R2"}

letter_dict = {"A":1, "B":2, "C":3, "D":4, "E":5, "F":6, "G":7, "H":8, "I":9, "J":10, "K":11, "L":12, "M":13, "N":14, "O":15, "P":16}

plate_dict = {"T1": "2023-12-15_B4A3R1_P1T1__2023-12-15T08_49_02-Measurement1",
              "T2": "2023-12-15_B4A3R1_P1T2__2023-12-15T10_35_54-Measurement1",
              "T3": "2023-12-18_B4A3R1_P1T3__2023-12-18T08_29_45-Measurement1",
              "T4": "2023-12-18_B4A3R1_P1T4__2023-12-18T10_17_07-Measurement1"}

channel_dict = {"GFP":"01", "DAPI":"02", "Mito":"03", "AGP":"04", "Brightfield1":"05", "Brightfield2":"06", "Brightfield":"07"}

In [30]:
# get image dir
aws_batches = aws("s3",
                  "ls",
                  f"s3://cellpainting-gallery/cpg0020-varchamp/broad/images/")
aws_batches = aws_batches.replace("                           ", "")
aws_batches = aws_batches.replace("                           ", "")
aws_batches = aws_batches.replace(" ", "")
aws_batches = aws_batches.replace("/", "")
aws_batches = aws_batches.split("PRE")[1:]
aws_batches = list(map(lambda s: s.strip(), aws_batches))

batch = next((s for s in aws_batches if sel_batch in s), None)

'2023-12-15_B4A3R1'

In [37]:
# construct image name and aws path
letter = well[0]
col = well[1:3]
batch = batch_dict[sel_batch]
row = letter_dict[letter]
plate = plate_dict[sel_plate]
channel = channel_dict[sel_channel]

img_nm = f"r{row}c{col}f{site}p01-ch{channel}sk1fk1fl1.tiff"

aws_path = f"s3://cellpainting-gallery/cpg0020-varchamp/broad/images/{batch}/images/{plate}/Images/{img_nm}"
dgx_path = f"/dgx1nas1/storage/data/jess/varchamp/images/{sel_batch}/{sel_plate}"

In [42]:
# construct folder path in DGX
os.makedirs(dgx_path, exist_ok=True) 

if os.path.isfile(f"{dgx_path}/{img_nm}"):
    print("file exists!")
else:
    aws("s3",
        "cp",
        aws_path, 
        f"{dgx_path}/{img_nm}")

# check to see if image already downloaded to DGX. if not, download image to folder on DGX

# read tiff into numpy array

# plot

ErrorReturnCode_1: 

  RAN: /usr/bin/aws s3 cp s3://cellpainting-gallery/cpg0020-varchamp/broad/images/2023-12-15_B4A3R1/images/2023-12-18_B4A3R1_P1T4__2023-12-18T10_17_07-Measurement1/Images/r4c03f05p01-ch01sk1fk1fl1.tiff /dgx1nas1/storage/data/jess/varchamp/images/B4A3R1/T4/r4c03f05p01-ch01sk1fk1fl1.tiff

  STDOUT:


  STDERR:
fatal error: An error occurred (404) when calling the HeadObject operation: Key "cpg0020-varchamp/broad/images/2023-12-15_B4A3R1/images/2023-12-18_B4A3R1_P1T4__2023-12-18T10_17_07-Measurement1/Images/r4c03f05p01-ch01sk1fk1fl1.tiff" does not exist
