# Examples
Some example code for working with the ooiopendata storage account.

## Load available keys

In [None]:
import yaml

In [None]:
with open('../secrets/tjcrone.yml', 'r') as stream:
    keys = yaml.safe_load(stream)
keys.keys()

## List the first ten blobs in the camhd container

In [None]:
from azure.storage.blob import BlobServiceClient

In [None]:
storage_account_url = 'https://ooiopendata.blob.core.windows.net'
blob_service_client = BlobServiceClient(storage_account_url, credential = keys['camhd'])
container_client = blob_service_client.get_container_client('camhd')

In [None]:
blob_list = container_client.list_blobs()
for i in range(10):
    print(blob_list.next().name)

## Upload a file to the testing container

In [None]:
storage_account_url = 'https://ooiopendata.blob.core.windows.net'
blob_service_client = BlobServiceClient(storage_account_url, credential = keys['testing'])
container_client = blob_service_client.get_container_client('testing')

In [None]:
local_file = 'asdf.txt'
blob_client_test = blob_service_client.get_blob_client(container = 'testing', blob = local_file)

with open(local_file, 'rb') as data:
    blob_client_test.upload_blob(data, overwrite = True)

In [None]:
blob_list = container_client.list_blobs()
for blob in blob_list:
    print(blob.name)

## Delete file in testing container

In [None]:
blob_client_test.delete_blob()

In [None]:
blob_list = container_client.list_blobs()
for blob in blob_list:
    print(blob.name)

## Check blob md5 hash

In [None]:
storage_account_url = 'https://ooiopendata.blob.core.windows.net'
blob_service_client = BlobServiceClient(storage_account_url, credential = keys['camhd'])
blob_client = blob_service_client.get_blob_client(container = 'camhd', blob = 'CAMHDA301-20200901T034500.mov')

In [None]:
blob_client.get_blob_properties()['content_settings']['content_md5'].hex()

## Get list of CamHD files that are not on Azure

In [None]:
import numpy as np
import requests
from bs4 import BeautifulSoup

In [None]:
def get_camhd_filelist(year):

    url_base_year = 'https://rawdata.oceanobservatories.org/files/RS03ASHS/PN03B/06-CAMHDA301/' + str(year)
    ext = 'mov'
    
    filelist = []
    for month in list(np.arange(1,13)):
        url_base_month = '%s/%02.0f' % (url_base_year, month)
        response = requests.get(url_base_month)
        if response.ok:
            for day in list(np.arange(1,31)):
                url_base_day = '%s/%02.0f/' % (url_base_month, day)
                response = requests.get(url_base_day)
                if response.ok:
                    soup = BeautifulSoup(response.text, 'html.parser')
                    filelist = filelist + [url_base_day + 
                              node.get('href') for node in soup.find_all('a') if node.get('href').endswith(ext)]
    return filelist

### Get all files from 2020 on raw data server

In [None]:
files_server = get_camhd_filelist(2020)
files_server = ['https://rawdata.oceanobservatories.org/files/RS03ASHS/PN03B/06-CAMHDA301/2020/09/01/CAMHDA301-20200901T094500.mp4.md5']
files_server

### Get list of CamHD files on Azure and filter above list

In [None]:
storage_account_url = 'https://ooiopendata.blob.core.windows.net'
blob_service_client = BlobServiceClient(storage_account_url, credential = keys['camhd'])
container_client = blob_service_client.get_container_client('camhd')
blob_list = [blob.name for blob in camhd_container.list_blobs()]

In [None]:
files_to_transfer = []
for file_url in files_2020:
    filename = file_url.split('/')[-1].strip()
    if filename not in blob_list:
        files_to_transfer.append(file_url)

### Transfer files
Probably best to transfer files using external programs like curl or wget and rclone.

## Download a NetCDF file from OOI results server

In [None]:
netcdf_url = 'https://opendap.oceanobservatories.org/async_results/crone@ldeo.columbia.edu/20191022T181459885Z-RS03ASHS-MJ03B-10-CTDPFB304-streamed-ctdpf_optode_sample/deployment0005_RS03ASHS-MJ03B-10-CTDPFB304-streamed-ctdpf_optode_sample_20191022T120127.028053-20191022T180126.225018.nc'
netcdf_filename = netcdf_url.split('/')[-1]

In [None]:
import wget

In [None]:
wget.download(netcdf_url);

## Write the original NetCDF file into Azure blob

In [None]:
from azure.storage.blob import BlockBlobService

In [None]:
if 'botpt' in keys:
    botpt_service = BlockBlobService('ooiopendata', sas_token = keys['botpt'])
else:
    raise Exception('Botpt container key not available')

In [None]:
botpt_service.create_blob_from_path('botpt', netcdf_filename, netcdf_filename, validate_content=True);

In [None]:
for blob in botpt_service.list_blobs('botpt'):
    print(blob.name)

## Load the file into Xarray and save to ooiopendata

In [None]:
import xarray as xr
import zarr
import logging
logging.getLogger("azure.storage").setLevel(logging.CRITICAL)

In [None]:
ds = xr.open_dataset(netcdf_filename)

In [None]:
abs_store = zarr.storage.ABSStore('botpt', netcdf_filename[:-2] + 'zarr', account_name='ooiopendata',
                                  blob_service_kwargs={'sas_token':keys['botpt']})

In [None]:
ds.to_zarr(abs_store);

## Remove local file

In [None]:
import os
os.remove(local_file)

## List remote blobs using public interface

In [None]:
import ooiod

In [None]:
%%time
blobs = ooiod.blobs.list_blobs('ooiopendata', 'camhd')

In [None]:
blobs[0:10]