In [3]:
'''
------------------------------------------------------------------------------
 This script download the fits to be used in the tutoial.
 It needs account information:
   - Account name.
   - Account key.
 It needs the blob container information
   - Container name
   - Container sub-directory
------------------------------------------------------------------------------
'''
import os, uuid, sys
import subprocess
import tqdm
import astropy
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import multiprocessing
import pyarrow as pa
import pyarrow.parquet as pq
import random

from io import BytesIO
from astropy.io import fits
from functools import partial
from azure.storage.blob import BlockBlobService, PublicAccess
sys.path.append('./')

In [4]:
#!pip install azure-storage-blob
# Create the BlockBlockService that is used to call the Blob service 
# for the storage account
import config_blob_keys as cfg

account_name = cfg.AccountName
account_key = cfg.AccountKey
block_blob_service = BlockBlobService(account_name=account_name, account_key=account_key)

cont_name_unc = cfg.ContNameUnC
block_blob_service.set_container_acl(cont_name_unc, public_access=PublicAccess.Container)

<azure.storage.blob.models.ResourceProperties at 0x7f11eefcada0>

In [26]:
# Functions to move files in azure cloud

# Create a list "filelist" with the blob content
# inside the "Azure:container/folder" location 
def BlobList(container, folder, filelist, verbose=False):
    
    gen = block_blob_service.list_blobs(container, prefix=folder)
    
    for blob in gen:
        file = str(blob.name).replace(folder,'')
        filelist.append(file)
        if verbose == True:
            print("\t Blob name: " + blob.name)
        
    return filelist

# Download a file "blobfile" from "container" and save it 
# in the file "locfile"
def DownBlob(container, blobfile, locfile, verbose=False):
    
    if verbose == True:
        print('Downloading ' + blobfile + ' to ' + locfile)
    
    block_blob_service.get_blob_to_path(container, blobfile, locfile)

# Uncompress data 
def UnCompress(file, verbose=False):
    
    if verbose == True:
        print('Uncompressing ' + file)
    
    subprocess.call(['uncompress', file])
    #os.popen('uncompress ' + file)

# Upload file "locfile" to the blob "blobfile" in container
def UpBlob(container, blobfile, locfile, verbose=False):
    
    if verbose == True:
        print('Uploading ' + locfile + ' to ' + blobfile)
        
    block_blob_service.create_blob_from_path(container, blobfile, locfile, validate_content=True)

# Select the .fits based on its original name to be saved in the local folder
def SelectFits(path_loc, unc_blob_sub_dir, npy_blob_sub_dir, file, verbose=False):       
    # Download the data from uncompressed
    unc_blob_name = os.path.join(unc_blob_sub_dir,file)
    path_to_file_loc = os.path.join(path_loc, file)

    DownBlob(cont_name_unc, unc_blob_name, path_to_file_loc, False)
    
    while not os.path.exists(path_to_file_loc):
        time.sleep(0.1)
    
    # Ask for the file name
    hdu_list = fits.open(path_to_file_loc)
    orig_file_name = hdu_list[0].header['ORIGFILE']

    if verbose == True:
        print('File name = ' + orig_file_name)    
        
    npy_folder = ''.join([npy_blob_sub_dir[i] for i in range(len(npy_blob_sub_dir)) if npy_blob_sub_dir[i] in orig_file_name])
    new_path_to_loc_file = path_loc + '/' + npy_folder + '/' + orig_file_name
   
    os.rename(path_to_file_loc, new_path_to_loc_file)

In [27]:
# Download the images to ../data/fits_for_test/UVES_*

uncBlobSubDirs = ['bias_blue', 'bias_red', 'blue_arc_flat','red_arc_flat']
#Test
#uncBlobSubDirs = ['blue_arc_flat']
path_loc = './fits_for_test'

# loop for uncompressed folders
for unc_blob_sub_dir in uncBlobSubDirs:
    
    # Define the local UVES folders based on the type of images
    if unc_blob_sub_dir == 'bias_blue':
        npy_blob_sub_dir = ['UVES_BLUE_BIAS']
    elif unc_blob_sub_dir == 'bias_red':
        npy_blob_sub_dir = ['UVES_RED_BIAS']
    elif unc_blob_sub_dir == 'blue_arc_flat':
        npy_blob_sub_dir = ['UVES_BLUE_WAVE','UVES_DIC1B_FLAT','UVES_DIC1B_DFLAT']
    elif unc_blob_sub_dir == 'red_arc_flat':
        npy_blob_sub_dir = ['UVES_RED_WAVE','UVES_DIC1R_FLAT']
    
    # List the uncompressed data
    unc_files_list = []
    unc_folder_rem = unc_blob_sub_dir + '/'

    BlobList(cont_name_unc, unc_folder_rem, unc_files_list)

    random.shuffle(unc_files_list)
    
    unc_files_list= unc_files_list[0:100]

    print('Working on ' + unc_blob_sub_dir + '...')

    tasks = partial(SelectFits, path_loc, unc_blob_sub_dir, npy_blob_sub_dir)
    with multiprocessing.Pool(1) as p:
        result = list(tqdm.tqdm(p.imap(tasks, unc_files_list), total=len(unc_files_list)))

Working on bias_blue...


100%|██████████| 100/100 [00:23<00:00,  4.34it/s]


Working on bias_red...


100%|██████████| 100/100 [00:55<00:00,  1.92it/s]


Working on blue_arc_flat...


100%|██████████| 100/100 [00:26<00:00,  4.18it/s]


Working on red_arc_flat...


100%|██████████| 100/100 [01:04<00:00,  1.55it/s]
