# Notebook for finding interesting data on the JUMP-scope project
- Number of images
- number of sites
- number of cells
- 

In [1]:
import pandas as pd
import os

In [3]:
match_rep_df = pd.read_csv("/Users/ctromans/image-analysis/jump_scope/jump-scope-analysis/checkpoints/match_rep_df.csv")

In [38]:
# Find the number of images loaded, as dictated by load_data_with_illum.csv

def find_trivia(metadata_df, load_data_path):
    plates = []
    df = metadata_df.copy()

    for root, dirs, files in os.walk(load_data_path):
        for file in files:
            if "load_data.csv" in file:
                path = os.path.join(root, file)
                plate = os.path.split(path)[0].split("/")[-1]
                load_data_df = pd.read_csv(path)
                # Find the number of image columns
                columns = load_data_df.columns.tolist()
                channels = sorted([i for i in columns if "FileName_Orig" in i])
                # Find number of rows
                rows = load_data_df.shape[0]
                # Total number of images = num_rows * num_channels
                num_images = rows * len(channels)
                df.loc[df["Assay_Plate_Barcode"] == plate, "image_num"] = num_images
                # Find total file
                image_size = df.loc[df["Assay_Plate_Barcode"] == plate, "Size_MB"]
                df.loc[df["Assay_Plate_Barcode"] == plate, "total_image_size_GB"] = (image_size * num_images) / 1000
    return df

no_sub = match_rep_df[
    (~match_rep_df["Batch"].str.contains("siteSub")) &
    (match_rep_df["sphering"] == True)
]

number_of_images = find_trivia(no_sub, "../jump-scope/load_data_csv/")


In [39]:
number_of_images["image_num"].sum()

454638.0

In [40]:
number_of_images["total_image_size_GB"].sum()

6786.468340043271

In [41]:
number_of_images["cell_count"].sum()

41356132

In [42]:
# Total number of sites. This is less accurate since some profiles have 4/5/6 channels and perhaps not all
# images are analysed
number_of_images["sites"].sum() * 384 * 5

441600

In [61]:
# How to calculate unique settings? Take only setting columns and remove duplicates?
# Thus this will find the number of unique setting combinations

setting_cols = [
    "aperture",
    "Magnification",
    "Modality",
    "Binning",
    "Number_of_channels",
    "z_plane",
    "spinning-disc",
    "dry-immersion",
    "vs-brightfield",
    "simultaneous-excitation",
    "sites",
    "channel_names",
    "Vendor"   
]

setts = number_of_images[setting_cols].drop_duplicates()

print(f"{setts.shape[0]} unique settings used")

25 unique settings used


In [55]:
number_of_images.columns

Index(['Vendor', 'Batch', 'Plate_Map_Name', 'Assay_Plate_Barcode', 'Modality',
       'Images_per_well', 'Sites-SubSampled', 'Binning', 'Magnification',
       'Number_of_channels', 'z_plane', 'BF_Zplanes', 'spinning-disc',
       'aperture', 'dry-immersion', 'vs-brightfield',
       'simultaneous-excitation', 'sites', 'Size_MB', 'Size_MB_std',
       'sphering', 'value_95_replicating', 'Percent_Replicating',
       'channel_names', 'brightfield_z_plane_used', 'feature_channels_found',
       'Percent_Matching', 'value_95_matching', 'cell_count', 'image_num',
       'total_image_size_GB'],
      dtype='object')

In [56]:
setts

Unnamed: 0,aperture,Magnification,Modality,Binning,Number_of_channels,z_plane,spinning-disc,dry-immersion,vs-brightfield,simultaneous-excitation,sites,channel_names
0,0.45,10,Confocal,1,6,1,no,dry,,,4,"Actin, DNA, ER, Golgi, Mito, RNA"
2,0.45,10,Confocal,1,6,1,yes,dry,,,4,"Actin, DNA, ER, Golgi, Mito, RNA"
4,0.45,10,Confocal,1,6,3,yes,dry,,,4,"Actin, DNA, ER, Golgi, Mito, RNA"
6,0.75,20,Confocal,1,6,3,yes,dry,,,4,"Actin, DNA, ER, Golgi, Mito, RNA"
8,0.75,20,Confocal,1,6,1,no,dry,,,9,"Actin, DNA, ER, Golgi, Mito, RNA"
10,0.75,20,Confocal,1,6,1,yes,dry,,,9,"Actin, DNA, ER, Golgi, Mito, RNA"
12,0.45,10,Widefield,1,4,1,,dry,,,1,"AGP, DNA, ER, Mito"
20,0.75,20,Widefield,1,4,1,,dry,,,9,"AGP, DNA, ER, Mito"
26,1.0,20,Confocal,1,5,1,,water,,,3,"DNA, ER, Mito, RNA, WGPhalloidin"
34,1.0,20,Confocal,1,5,3,,water,,,3,"DNA, ER, Mito, RNA, WGPhalloidin"


In [24]:
number_of_images

Unnamed: 0,Vendor,Batch,Plate_Map_Name,Assay_Plate_Barcode,Modality,Images_per_well,Sites-SubSampled,Binning,Magnification,Number_of_channels,...,value_95_replicating,Percent_Replicating,channel_names,brightfield_z_plane_used,feature_channels_found,Percent_Matching,value_95_matching,cell_count,image_num,total_image_size_GB
0,MolDev,Scope1_MolDev_10X,JUMP-MOA_compound_platemap,Plate2_PCO_6ch_4site_10XPA,Confocal,4,,1,10,6,...,0.191908,60.000000,"Actin, DNA, ER, Golgi, Mito, RNA",,"Actin, DNA, ER, Golgi, Mito, RNA",23.255814,0.288099,2014937,9210.0,483.307032
1,MolDev,Scope1_MolDev_10X,JUMP-MOA_compound_platemap,Plate2_PCO_6ch_4site_10XPA,Confocal,4,,1,10,6,...,0.269727,53.333333,"Actin, DNA, ER, Golgi, Mito, RNA",,"Actin, DNA, ER, Golgi, Mito, RNA",18.604651,0.320855,2014937,9210.0,483.307032
2,MolDev,Scope1_MolDev_10X,JUMP-MOA_compound_platemap,Plate3_PCO_6ch_4site_10XPA_Crest,Confocal,4,,1,10,6,...,0.269617,62.222222,"Actin, DNA, ER, Golgi, Mito, RNA",,"Actin, DNA, ER, Golgi, Mito, RNA",18.604651,0.398249,2413350,9216.0,483.623555
3,MolDev,Scope1_MolDev_10X,JUMP-MOA_compound_platemap,Plate3_PCO_6ch_4site_10XPA_Crest,Confocal,4,,1,10,6,...,0.329074,45.555556,"Actin, DNA, ER, Golgi, Mito, RNA",,"Actin, DNA, ER, Golgi, Mito, RNA",18.604651,0.389170,2413350,9216.0,483.623555
4,MolDev,Scope1_MolDev_10X_4siteZ,JUMP-MOA_compound_platemap,Plate3_PCO_6ch_4site_10XPA_Crestz,Confocal,4,,1,10,6,...,0.205121,66.666667,"Actin, DNA, ER, Golgi, Mito, RNA",,"Actin, DNA, ER, Golgi, Mito, RNA",23.255814,0.363114,2381443,9198.0,482.680056
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
103,Yokogawa_US,Scope1_Yokogawa_US_20X_6Ch_BRO0117059,JUMP-MOA_compound_platemap,BRO0117059_20X,Confocal,9,,1,20,6,...,0.407403,56.666667,"AGP, BrightField, DNA, ER, Mito, RNA",Z08,"AGP, BrightField, DNA, ER, Mito, RNA",20.930233,0.480152,1136319,20724.0,164.847787
104,Yokogawa_US,Scope1_Yokogawa_US_20X_6Ch_BRO01177034,JUMP-MOA_compound_platemap,BRO01177034_20x,Confocal,9,,1,20,6,...,0.206867,58.888889,"AGP, BrightField, DNA, ER, Mito, RNA",Z17,"AGP, BrightField, DNA, ER, Mito, RNA",18.604651,0.318081,1045379,20790.0,166.394513
105,Yokogawa_US,Scope1_Yokogawa_US_20X_6Ch_BRO01177034,JUMP-MOA_compound_platemap,BRO01177034_20x,Confocal,9,,1,20,6,...,0.261967,54.444444,"AGP, BrightField, DNA, ER, Mito, RNA",Z17,"AGP, BrightField, DNA, ER, Mito, RNA",20.930233,0.360108,1045379,20790.0,166.394513
106,Yokogawa_US,Scope1_Yokogawa_US_40X_BRO0117059,JUMP-MOA_compound_platemap,BRO0117059_40x,Confocal,9,,1,40,6,...,0.181967,54.444444,"AGP, BrightField, DNA, ER, Mito, RNA",Z08,"AGP, BrightField, DNA, ER, Mito, RNA",18.604651,0.266650,279900,20706.0,165.060538


In [10]:
df = pd.read_csv("/Users/ctromans/image-analysis/jump_scope/jump-scope/load_data_csv/2020_10_27_Scope1_YokogawaJapan/20201020T134356/load_data_with_illum.csv")

In [11]:
df.columns

Index(['FileName_OrigDNA', 'PathName_OrigDNA', 'FileName_OrigER',
       'PathName_OrigER', 'FileName_OrigRNA', 'PathName_OrigRNA',
       'FileName_OrigAGP', 'PathName_OrigAGP', 'FileName_OrigMito',
       'PathName_OrigMito', 'Metadata_Plate', 'Metadata_Well', 'Metadata_Site',
       'FileName_IllumDNA', 'PathName_IllumDNA', 'FileName_IllumER',
       'PathName_IllumER', 'FileName_IllumRNA', 'PathName_IllumRNA',
       'FileName_IllumAGP', 'PathName_IllumAGP', 'FileName_IllumMito',
       'PathName_IllumMito'],
      dtype='object')

In [12]:
[i for i in df.columns if "FileName_Orig" in i]

['FileName_OrigDNA',
 'FileName_OrigER',
 'FileName_OrigRNA',
 'FileName_OrigAGP',
 'FileName_OrigMito']