In [1]:
import glob
import os
import boto3

In [2]:
file_strings = ['r05c07f03','_E07_T0001F003','r11c24f08',
'_K24_T0001F002','r13c05f01','_M05_T0001F001'] #one random site each from 3 DMSO wells

In [3]:
s3 = boto3.client('s3')
def paginate_a_folder(s3, bucket_name, prefix,filter_in_list):
    paginator = s3.get_paginator("list_objects_v2")
    pages = paginator.paginate(Bucket=bucket_name, Prefix=prefix)
    image_list = []
    try:
        for page in pages:
            image_list += [x["Key"] for x in page["Contents"]]
    except KeyError:
        print(
            "No files for this prefix"
        )
        return []
    master_list = []
    for eachfilter in filter_in_list:
        master_list += [x for x in image_list if eachfilter in x]
    return master_list


def download_file(
    s3, bucket_name, local_file_name, remote_file_name
):
    import botocore
    with open(local_file_name, "wb") as f:
        try:
            s3.download_fileobj(bucket_name, remote_file_name, f)
        except botocore.exceptions.ClientError as error:
            print(f"Cannot currently download {remote_file_name}")
            return

In [4]:
name_mapper = {
    'Stain5_CondC_Standard':['2021_03_03_Stain5_CondC_PE_Standard','2021_02_26_Stain5_CondC_Thermo_Standard'],  
    'Stain5_CondC_Confocal':['2021_02_26_Stain5_CondC_Thermo_Confocal','2021_03_03_Stain5_CondC_PE_Confocal'], 
    'Stain5_CondAB_Standard':['2021_03_03_Stain5_CondB_Thermo_Confocal','2021_03_03_Stain5_CondB_PE_Confocal',
    '2021_03_04_Stain5_CondA_PE_Confocal','2021_03_04_Stain5_CondA_Thermo_Confocal'], 
    'Stain5_CondAB_Confocal':['2021_03_03_Stain5_CondB_PE_Standard','2021_03_03_Stain5_CondB_Thermo_Standard',
    '2021_03_04_Stain5_CondA_PE_Standard','2021_03_04_Stain5_CondA_Thermo_Standard']}
not_expected = ['Stain5_AZ','2020_10_20_Cell1_restainedReimaged_Bin1Pipeline', 
'README.md']

final_batch_list = []

for eachbatch in os.listdir('../profiles/'):
    if eachbatch in not_expected:
        pass
    elif eachbatch in name_mapper.keys():
        final_batch_list += name_mapper[eachbatch]
    else:
        final_batch_list.append(eachbatch)

final_batch_list.sort()

for eachbatch in final_batch_list:
    print(f'Paginating {eachbatch}')
    image_list = paginate_a_folder(s3,'cellpainting-gallery',
    f'jump-pilot/source_4/images/{eachbatch}',file_strings)
    print(f'{len(image_list)} responsive file names found. Downloading.')
for eachfile in image_list:
    split = eachfile.split('/')
    local_name = os.path.join('..','example_images',split[-5],split[-4],split[-3],split[-2],split[-1])
    download = True
    if os.path.exists(local_name):
        if os.path.getsize(local_name) >0:
            download = False
    if download:
        if not os.path.exists(os.path.dirname(local_name)):
            os.makedirs(os.path.dirname(local_name),exist_ok=True)
        download_file(s3,'cellpainting-gallery',local_name,eachfile)

Paginating 2020_06_25_Stain2_Batch2_Binned
18 responsive file names found. Downloading.
Paginating 2020_06_25_Stain2_Batch2_Confocal
15 responsive file names found. Downloading.
Paginating 2020_06_25_Stain2_Batch2_MitoCompare
18 responsive file names found. Downloading.
Paginating 2020_06_25_Stain2_Batch2_Multiplane
72 responsive file names found. Downloading.
Paginating 2020_06_25_Stain2_Batch2_Redone
72 responsive file names found. Downloading.
Paginating 2020_06_25_Stain2_Batch2_Repeat
18 responsive file names found. Downloading.
Paginating 2020_06_25_Stain2_Batch2_Standard
90 responsive file names found. Downloading.
Paginating 2020_08_11_Stain3_Bin1
18 responsive file names found. Downloading.
Paginating 2020_08_11_Stain3_HighExp
90 responsive file names found. Downloading.
Paginating 2020_08_11_Stain3_Multiplane
72 responsive file names found. Downloading.
Paginating 2020_08_11_Stain3_Standard
180 responsive file names found. Downloading.
Paginating 2020_08_11_Stain3_Yokogawa
150

In [5]:
for eachbatch in final_batch_list:
    if "Yokogawa" not in eachbatch:
        image_folder = os.path.join('..','example_images',eachbatch,'Images')
    else:
        image_folder = os.path.join('..','example_images',eachbatch)
    if os.path.exists(image_folder):
        print(f"{len(glob.glob(image_folder+'/**/**.tif**',recursive=True))} tiff files found in {len([x for x in os.listdir(image_folder) if '.' not in x])} plates in {eachbatch}")
    else:
        print(f"Could not find {image_folder}")


18 tiff files found in 1 plates in 2020_06_25_Stain2_Batch2_Binned
15 tiff files found in 1 plates in 2020_06_25_Stain2_Batch2_Confocal
18 tiff files found in 1 plates in 2020_06_25_Stain2_Batch2_MitoCompare
18 tiff files found in 1 plates in 2020_06_25_Stain2_Batch2_Multiplane
72 tiff files found in 4 plates in 2020_06_25_Stain2_Batch2_Redone
18 tiff files found in 1 plates in 2020_06_25_Stain2_Batch2_Repeat
90 tiff files found in 5 plates in 2020_06_25_Stain2_Batch2_Standard
18 tiff files found in 1 plates in 2020_08_11_Stain3_Bin1
90 tiff files found in 5 plates in 2020_08_11_Stain3_HighExp
18 tiff files found in 1 plates in 2020_08_11_Stain3_Multiplane
180 tiff files found in 10 plates in 2020_08_11_Stain3_Standard
150 tiff files found in 10 plates in 2020_08_11_Stain3_Yokogawa
54 tiff files found in 3 plates in 2020_09_22_Stain4_Bin1
54 tiff files found in 3 plates in 2020_09_22_Stain4_Bray
162 tiff files found in 9 plates in 2020_09_22_Stain4_Bray_HighExp
108 tiff files found in 