In [1]:
'''
------------------------------------------------------------------------------
 This script download the descriptors to be used in the tutoial.
 It needs account information:
   - Account name.
   - Account key.
 It needs the blob container information
   - Container name
   - Container sub-directory
------------------------------------------------------------------------------
'''
import os, uuid, sys
import subprocess
import tqdm
import astropy
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import multiprocessing
import pyarrow as pa
import pyarrow.parquet as pq
import random

from io import BytesIO
from astropy.io import fits
from functools import partial
from azure.storage.blob import BlockBlobService, PublicAccess
sys.path.append('./')

In [3]:
# Create the BlockBlockService that is used to call the Blob service 
# for the storage account
import config_blob_keys as cfg

account_name = cfg.AccountName
account_key = cfg.AccountKey
block_blob_service = BlockBlobService(account_name=account_name, account_key=account_key)

cont_name_proc = cfg.ContNameProc
block_blob_service.set_container_acl(cont_name_proc, public_access=PublicAccess.Container)

cont_name_proc_cor = cfg.ContNameProcCor
block_blob_service.set_container_acl(cont_name_proc_cor, public_access=PublicAccess.Container)

<azure.storage.blob.models.ResourceProperties at 0x7fdeb253b940>

In [20]:
# Functions to move files in azure cloud

# Create a list "filelist" with the blob content
# inside the "Azure:container/folder" location 
def BlobList(container, folder, filelist, verbose=False):
    
    gen = block_blob_service.list_blobs(container, prefix=folder)
    
    for blob in gen:
        file = str(blob.name).replace(folder,'')
        filelist.append(file)
        if verbose == True:
            print("\t Blob name: " + blob.name)
        
    return filelist

# Download a file "blobfile" from "container" and save it 
# in the file "locfile"
def DownBlob(container, blobfile, locfile, verbose=False):
    
    if verbose == True:
        print('Downloading ' + blobfile + ' to ' + locfile)
    
    block_blob_service.get_blob_to_path(container, blobfile, locfile)

# Uncompress data 
def UnCompress(file, verbose=False):
    
    if verbose == True:
        print('Uncompressing ' + file)
    
    subprocess.call(['uncompress', file])
    #os.popen('uncompress ' + file)

# Upload file "locfile" to the blob "blobfile" in container
def UpBlob(container, blobfile, locfile, verbose=False):
    
    if verbose == True:
        print('Uploading ' + locfile + ' to ' + blobfile)
        
    block_blob_service.create_blob_from_path(container, blobfile, locfile, validate_content=True)

# Select descriptors file from a list
def SelectArray(path_loc, cont_name, desc_blob_sub_dir, file, verbose=False):       
    # Download descriptors
    desc_blob_name = os.path.join(desc_blob_sub_dir,file)
    path_to_file_loc = os.path.join(path_loc, file)
    
    DownBlob(cont_name, desc_blob_name, path_to_file_loc, False)
    
    while not os.path.exists(path_to_file_loc):
        time.sleep(0.1)

In [24]:
# Download the images to ../data/desc_for_test/UVES_*
ProcBlobSubDirs = ['UVES_BLUE_BIAS','UVES_DIC1B_DFLAT']
# Root local path
method_dir = 'numpy' # Do not put a '/' at the beggining!!!
path_loc = './desc_for_test/method2'
path_loc_down = './array_for_test'
random.seed(100)
# Maximum number of files for dowload
nmax = 30
bad_files_garchim = []

# loop for descriptors folders
for proc_blob_sub_dir in ProcBlobSubDirs:
    
    print('Working on ' + proc_blob_sub_dir + '...')
    # Define the image type
    if proc_blob_sub_dir == 'UVES_BLUE_BIAS':
        image_type = 'bias_blue'
    elif proc_blob_sub_dir == 'UVES_RED_BIAS':
        image_type = 'bias_red'
    elif proc_blob_sub_dir == 'UVES_BLUE_WAVE' or proc_blob_sub_dir == 'UVES_DIC1B_FLAT' or proc_blob_sub_dir == 'UVES_DIC1B_DFLAT':
        image_type = 'blue_arc_flat'
    elif proc_blob_sub_dir == 'UVES_RED_WAVE' or proc_blob_sub_dir == 'UVES_DIC1R_FLAT':
        image_type = 'red_arc_flat'
        
    if proc_blob_sub_dir == 'UVES_BLUE_BIAS' or proc_blob_sub_dir == 'UVES_DIC1B_FLAT' or proc_blob_sub_dir == 'UVES_DIC1B_DFLAT':
        Exten = 0
    elif proc_blob_sub_dir == 'UVES_RED_BIAS' or proc_blob_sub_dir == 'UVES_BLUE_WAVE' or proc_blob_sub_dir == 'UVES_RED_WAVE' or proc_blob_sub_dir == 'UVES_DIC1R_FLAT':
        Exten = 1#,2]
        
    extension = '/ext' + str(Exten)
    
    # Take the Garchim bad images name
    #PROJECT_DIR = "/data/notebooks/uves_jprieto"
    #DATA_DIR = os.path.join(PROJECT_DIR, "data")
    #uves_flag_file = os.path.join(DATA_DIR, 'UVES_hidden_flag_results.txt')
    #uves_flag_df = pd.read_csv(uves_flag_file, comment='#', sep=';')
    #uves_flag_df['filename'] = uves_flag_df['filename']+'_desc.npy'
    #corrupted_df = uves_flag_df[(uves_flag_df['image_type'] == image_type) & (uves_flag_df['flag'] == 'CORRUPTED')]
    #bad_files_garchim = list(corrupted_df['filename'])     

    # List the good descriptor files
    print('Good files...')
    proc_folder_rem = method_dir + '/' + proc_blob_sub_dir + extension
    good_path_loc = path_loc + '/good/' + proc_blob_sub_dir + extension
    good_path_loc_down =  good_path_loc.replace(path_loc,path_loc_down).replace('method2','')
    good_files_list = []
    for file in os.listdir(good_path_loc):
        if file.endswith('.npy'):
            file = file.replace('_desc','')
            good_files_list.append(file)
        
    tasks = partial(SelectArray, good_path_loc_down, cont_name_proc, proc_folder_rem)
    with multiprocessing.Pool(1) as p:
        result = list(tqdm.tqdm_notebook(p.imap(tasks, good_files_list), total=len(good_files_list)))

    # List the bad nicolas descriptor files
    print('Bad Nicolas files...')
    proc_folder_rem = method_dir + '/' + proc_blob_sub_dir + extension
    badn_path_loc = path_loc + '/badn/' + proc_blob_sub_dir + extension
    badn_path_loc_down =  badn_path_loc.replace(path_loc,path_loc_down).replace('method2','')
    badn_files_list = []
    for file in os.listdir(badn_path_loc):
        if file.endswith('.npy'):
            file = file.replace('_desc','')
            badn_files_list.append(file)
        
    tasks = partial(SelectArray, badn_path_loc_down, cont_name_proc_cor, proc_folder_rem)
    with multiprocessing.Pool(1) as p:
        result = list(tqdm.tqdm_notebook(p.imap(tasks, badn_files_list), total=len(badn_files_list)))

    # List the bad descriptor files
    print('Bad Garchim files...')
    proc_folder_rem = method_dir + '/' + proc_blob_sub_dir + extension
    badg_path_loc = path_loc + '/badg/' + proc_blob_sub_dir + extension
    badg_path_loc_down =  badg_path_loc.replace(path_loc,path_loc_down).replace('method2','')
    badg_files_list = []
    for file in os.listdir(badg_path_loc):
        if file.endswith('.npy'):
            file = file.replace('_desc','')
            badg_files_list.append(file)
        
    tasks = partial(SelectArray, badg_path_loc_down, cont_name_proc, proc_folder_rem)
    with multiprocessing.Pool(1) as p:
        result = list(tqdm.tqdm_notebook(p.imap(tasks, badg_files_list), total=len(badg_files_list)))

Working on UVES_BLUE_BIAS...
Good files...


HBox(children=(IntProgress(value=0, max=30), HTML(value='')))

Bad Nicolas files...


HBox(children=(IntProgress(value=0, max=30), HTML(value='')))

Bad Garchim files...


HBox(children=(IntProgress(value=0, max=30), HTML(value='')))

Working on UVES_DIC1B_DFLAT...
Good files...


HBox(children=(IntProgress(value=0, max=30), HTML(value='')))

Bad Nicolas files...


HBox(children=(IntProgress(value=0, max=30), HTML(value='')))

Bad Garchim files...


HBox(children=(IntProgress(value=0, max=30), HTML(value='')))