In [None]:
'''
------------------------------------------------------------------------------
 This script download the descriptors to be used in the tutoial.
 It needs account information:
   - Account name.
   - Account key.
 It needs the blob container information
   - Container name
   - Container sub-directory
------------------------------------------------------------------------------
'''
import os, uuid, sys
import subprocess
import tqdm
import astropy
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import multiprocessing
import pyarrow as pa
import pyarrow.parquet as pq
import random

from io import BytesIO
from astropy.io import fits
from functools import partial
from azure.storage.blob import BlockBlobService, PublicAccess

In [None]:
# Create the BlockBlockService that is used to call the Blob service 
# for the storage account
import config_blob_keys as cfg

account_name = cfg.AccountName
account_key = cfg.AccountKey
block_blob_service = BlockBlobService(account_name=account_name, account_key=account_key)

# The cointainer where the .fits are
cont_name_desc = 'descriptor'
# Set the permission so the blobs are public.
block_blob_service.set_container_acl(cont_name_desc, public_access=PublicAccess.Container)

# The cointainer where the .fits are
cont_name_desc_cor = 'descriptorcorrupt'
# Set the permission so the blobs are public.
block_blob_service.set_container_acl(cont_name_desc_cor, public_access=PublicAccess.Container)

In [70]:
# Functions to move files in azure cloud

# Create a list "filelist" with the blob content
# inside the "Azure:container/folder" location 
def BlobList(container, folder, filelist, verbose=False):
    
    gen = block_blob_service.list_blobs(container, prefix=folder)
    
    for blob in gen:
        file = str(blob.name).replace(folder,'')
        filelist.append(file)
        if verbose == True:
            print("\t Blob name: " + blob.name)
        
    return filelist

# Download a file "blobfile" from "container" and save it 
# in the file "locfile"
def DownBlob(container, blobfile, locfile, verbose=False):
    
    if verbose == True:
        print('Downloading ' + blobfile + ' to ' + locfile)
    
    block_blob_service.get_blob_to_path(container, blobfile, locfile)

# Uncompress data 
def UnCompress(file, verbose=False):
    
    if verbose == True:
        print('Uncompressing ' + file)
    
    subprocess.call(['uncompress', file])
    #os.popen('uncompress ' + file)

# Upload file "locfile" to the blob "blobfile" in container
def UpBlob(container, blobfile, locfile, verbose=False):
    
    if verbose == True:
        print('Uploading ' + locfile + ' to ' + blobfile)
        
    block_blob_service.create_blob_from_path(container, blobfile, locfile, validate_content=True)

# Select descriptors file from a list
def SelectDesc(path_loc, cont_name, desc_blob_sub_dir, file, verbose=False):       
    # Download descriptors
    desc_blob_name = os.path.join(desc_blob_sub_dir,file)
    path_to_file_loc = os.path.join(path_loc, file)
    
    DownBlob(cont_name, desc_blob_name, path_to_file_loc, False)
    
    while not os.path.exists(path_to_file_loc):
        time.sleep(0.1)

In [73]:
# Download the images to ../data/desc_for_test/UVES_*
#DescBlobSubDirs = ['UVES_BLUE_BIAS','UVES_DIC1B_DFLAT']
DescBlobSubDirs = ['UVES_BLUE_WAVE']
# Root local path
method_dir = 'numpy/method2' # Do not put a '/' at the beggining!!!
path_loc = './desc_for_test/method2'
random.seed(100)
# Maximum number of files for dowload
nmax = 30
bad_files_garchim = []

# loop for descriptors folders
for desc_blob_sub_dir in DescBlobSubDirs:
    
    print('Working on ' + desc_blob_sub_dir + '...')
    # Define the image type
    if desc_blob_sub_dir == 'UVES_BLUE_BIAS':
        image_type = 'bias_blue'
    elif desc_blob_sub_dir == 'UVES_RED_BIAS':
        image_type = 'bias_red'
    elif desc_blob_sub_dir == 'UVES_BLUE_WAVE' or desc_blob_sub_dir == 'UVES_DIC1B_FLAT' or desc_blob_sub_dir == 'UVES_DIC1B_DFLAT':
        image_type = 'blue_arc_flat'
    elif desc_blob_sub_dir == 'UVES_RED_WAVE' or desc_blob_sub_dir == 'UVES_DIC1R_FLAT':
        image_type = 'red_arc_flat'
        
    if desc_blob_sub_dir == 'UVES_BLUE_BIAS' or desc_blob_sub_dir == 'UVES_DIC1B_FLAT' or desc_blob_sub_dir == 'UVES_DIC1B_DFLAT':
        Exten = 0
    elif desc_blob_sub_dir == 'UVES_RED_BIAS' or desc_blob_sub_dir == 'UVES_BLUE_WAVE' or desc_blob_sub_dir == 'UVES_RED_WAVE' or desc_blob_sub_dir == 'UVES_DIC1R_FLAT':
        Exten = 1#,2]
        
    extension = '/ext' + str(Exten)
    
    # Take the Garchim bad images name
    PROJECT_DIR = "/data/notebooks/uves_jprieto"
    DATA_DIR = os.path.join(PROJECT_DIR, "data")
    uves_flag_file = os.path.join(DATA_DIR, 'UVES_hidden_flag_results.txt')
    uves_flag_df = pd.read_csv(uves_flag_file, comment='#', sep=';')
    uves_flag_df['filename'] = uves_flag_df['filename']+'_desc.npy'
    corrupted_df = uves_flag_df[(uves_flag_df['image_type'] == image_type) & (uves_flag_df['flag'] == 'CORRUPTED')]
    bad_files_garchim = list(corrupted_df['filename'])     

    # List the good descriptor files
    print('Good files...')
    desc_files_list = []
    desc_folder_rem = method_dir + '/' + desc_blob_sub_dir + extension
    BlobList(cont_name_desc, desc_folder_rem, desc_files_list)
    good_path_loc = path_loc + '/good/' + desc_blob_sub_dir + extension

    # Erase corrupted files from the list
    desc_files_list = [s.replace('/','') for s in desc_files_list if s not in bad_files_garchim]
    nfiles = len(desc_files_list)
    random.shuffle(desc_files_list)
    
    if nfiles>nmax:
        desc_files_list = desc_files_list[:nmax]
    good_files_list = desc_files_list
    
    tasks = partial(SelectDesc, good_path_loc, cont_name_desc, desc_folder_rem)
    with multiprocessing.Pool(1) as p:
        result = list(tqdm.tqdm_notebook(p.imap(tasks, desc_files_list), total=len(desc_files_list)))
    
    # List the bad garchim descriptor files
    print('Bad Garchim files...')
    desc_files_list = []
    desc_folder_rem = method_dir + '/' + desc_blob_sub_dir + extension
    BlobList(cont_name_desc, desc_folder_rem, desc_files_list)
    desc_files_list = [s.replace('/','') for s in desc_files_list]
    desc_files_list = [s for s in desc_files_list if s in bad_files_garchim]
    desc_folder_rem = method_dir + '/' + desc_blob_sub_dir + extension
    badg_path_loc = path_loc + '/badg/' + desc_blob_sub_dir + extension
    nfiles = len(desc_files_list)
    random.shuffle(desc_files_list)
    
    if nfiles>nmax:
        desc_files_list = desc_files_list[:nmax]
    badg_files_list = desc_files_list

    tasks = partial(SelectDesc, badg_path_loc, cont_name_desc, desc_folder_rem)
    with multiprocessing.Pool(1) as p:
        result = list(tqdm.tqdm_notebook(p.imap(tasks, desc_files_list), total=len(desc_files_list)))
    
    # List the bad nicolas descriptor files
    print('Bad Nicolas files...')
    desc_files_list = []
    desc_folder_rem = method_dir + '/' + desc_blob_sub_dir + extension
    BlobList(cont_name_desc_cor, desc_folder_rem, desc_files_list)
    badn_path_loc = path_loc + '/badn/' + desc_blob_sub_dir + extension

    # Erase corrupted files from the list
    desc_files_list = [s.replace('/','') for s in desc_files_list if s not in bad_files_garchim]
    nfiles = len(desc_files_list)
    random.shuffle(desc_files_list)
    
    if nfiles>nmax:
        desc_files_list = desc_files_list[:nmax]
    badn_files_list = desc_files_list
    
    tasks = partial(SelectDesc, badn_path_loc, cont_name_desc_cor, desc_folder_rem)
    with multiprocessing.Pool(1) as p:
        result = list(tqdm.tqdm_notebook(p.imap(tasks, desc_files_list), total=len(desc_files_list)))


Working on UVES_BLUE_WAVE...
Good files...


HBox(children=(IntProgress(value=0, max=30), HTML(value='')))

FileNotFoundError: [Errno 2] No such file or directory: './desc_for_test/method2/good/UVES_BLUE_WAVE/ext1/UVES.2011-10-31T10:26:00.867_desc.npy'