# Move processed GCS glider imagery files

The code for processiong shadowgraph images (Cutter's code, adapted from Ohman et al methods, in us-amlr/amlr-shadowgraph) wrote out processed images into a single directory. The purpose of this notebook is to copy these files to their own folders, to be imported into VIAME-Web-AMLR.

Image types: 
- -ffPCG.png images: Flatfielded images, with Pixel Gamma Correction
- -imgff.png images: Flatfielded images
- .jpgorig-regions.jpg: Original jpg images, with red region bounding boxes pasted onto them

Note that both versions of flatfielded images have had other processing steps applied, such as masking.

In [None]:
from filemgmt.utils import replace_path, create_pre
from filemgmt.gcs import list_blobs_with_prefix, copy_blob_client

# from google.cloud import storage
import multiprocessing as mp
import time
from itertools import repeat

## Set variable names
User tasks: update variable names as necessary in this block.

In [None]:
bucket_source_name      = "amlr-imagery-proc-dev"
bucket_destination_name = "amlr-gliders-imagery-proc-dev"

glider_deployment = "ringo-20240312"
# glider_deployment = "amlr07-20221204"
pre_source, pre_destination = create_pre(glider_deployment)

file_prefix_base = f"{pre_source}/{glider_deployment}/shadowgraph/images"

# file_substr    = "-ffPCG"
# file_substr    = "-imgff"
file_substr    = "jpgorig-regions"

# NOTE: if changing the directory prefix, you may have to update number of leading 0s in dir_paste
dir_range = range(0, 1) 
def dir_paste(y):
    return f"Dir0{y:02}"

# Printing for sanity checks
print(f"base: {file_prefix_base}")
print(f"substring: {file_substr}")
print(f"Directories: {dir_range}")
print(f"Example directory: {dir_paste(dir_range[0])}")

## Copy files

In [None]:
numcores = mp.cpu_count()
print(f"Running with {numcores} cores")

# storage_client = storage.Client(project = "ggn-nmfs-usamlr-dev-7b99")
# source_bucket = storage_client.bucket(bucket_source_name)
# destination_bucket = storage_client.bucket(bucket_destination_name)

print(f"\nStart time of all: {time.strftime('%Y-%m-%d %H:%M:%S')}")
for z in dir_range:    
    file_prefix = f"{file_prefix_base}/{dir_paste(z)}"
    print("------------------------------------------------------")
    print(file_prefix)
    start_time = time.time()
    
    file_list_orig = list_blobs_with_prefix(
        bucket_source_name, file_prefix, file_substr=file_substr)   
    
    file_list_destination = [replace_path(i, file_substr, pre_source, pre_destination) for i in file_list_orig]
        
    print(f"copying {len(file_list_orig)} files with '{file_substr}' " +
        f"with the prefix '{bucket_source_name}/{file_prefix}'")
    print(f"destination list length: {len(file_list_destination)}")
        
    # In parallel
    with mp.Pool(numcores) as pool:
        out_list = pool.starmap(
            copy_blob_client, 
            zip(repeat(bucket_source_name), file_list_orig, 
                repeat(bucket_destination_name), file_list_destination)
        )
        
    # # Not in parallel
    # for (i, j) in zip(file_list_orig, file_list_destination):
    #     if destination_bucket.blob(j).exists():
    #         continue
    #         # print(f"skipping {destination_bucket.blob(j).name}")
    #     else:
    #         copy_blob(source_bucket, i, destination_bucket, j)
            
    print(f"Time is {time.strftime('%Y-%m-%d %H:%M:%S')}")
    print(f"Full z runtime: {(time.time()-start_time)/60} minutes")