# VRTs and Cloud storage with GDAL

Advanced usage describes using WarpedVRT for operations, but in the end Geotiff arrays are loaded into memory or written to local disk:

### Some links

https://gist.github.com/sgillies/7e5cd548110a5b4d45ac1a1d93cb17a3

https://github.com/scottyhq/landsat-aws-demo/blob/master/2_Landsat-8-AWS-xarray.ipynb

## Load Libraries

Check that gdal is installed

In [None]:
!gdalinfo --version

In [None]:
from osgeo import gdal
import subprocess
import json 
import pandas as pd
from google.cloud import storage
import os
import glob

In [None]:
os.environ['GS_NO_SIGN_REQUEST'] = 'YES'
os.environ['GDAL_NUM_THREADS'] = 'ALL_CPUS'

Log in to google cloud if needed 

In [None]:
!{gcloud auth login}

## Functions

A function to list blobs on the storage bucket 

In [None]:
def list_blobs_with_prefix(bucket_name, prefix, delimiter=None):
    """Lists all the blobs in the bucket that begin with the prefix.

    This can be used to list all blobs in a "folder", e.g. "public/".

    The delimiter argument can be used to restrict the results to only the
    "files" in the given "folder". Without the delimiter, the entire tree under
    the prefix is returned. For example, given these blobs:

        a/1.txt
        a/b/2.txt

    If you specify prefix ='a/', without a delimiter, you'll get back:

        a/1.txt
        a/b/2.txt

    However, if you specify prefix='a/' and delimiter='/', you'll get back
    only the file directly under 'a/':

        a/1.txt

    As part of the response, you'll also get back a blobs.prefixes entity
    that lists the "subfolders" under `a/`:

        a/b/
    """

    storage_client = storage.Client()

    # Note: Client.list_blobs requires at least package version 1.17.0.
    blobs = storage_client.list_blobs(bucket_name, prefix=prefix, delimiter=delimiter)

    # Note: The call returns a response only when the iterator is consumed.
    print("Blobs:")
    for blob in blobs:
        print(blob.name)

    if delimiter:
        print("Prefixes:")
        for prefix in blobs.prefixes:
            print(prefix)


In [None]:
def dl(lay_name):
    cmd = f'cd tmp | gsutil -m cp -R gs://swhm-image-exports//{lay_name} .'
    !{cmd}

In [None]:
def makevrt(lay_name):
    cmd = f'gdalbuildvrt output.vrt {lay_name}/*'
    !{cmd}
    

In [None]:
def makecog(): 
    cmd = f'''
    gdal_translate output.vrt cog.tif -of COG -co NUM_THREADS=ALL_CPUS -co COMPRESS=LZW -co BIGTIFF=YES
    '''
    !{cmd}
    os.remove('output.vrt')

In [None]:
def ul(lay_name):
    cmd = f'gsutil cp -r cog.tif gs://live_data_layers/rasters/{lay_name}.tif'
    !{cmd}

## Get list of objects in data bucket

In [None]:
df = pd.DataFrame(output, columns=['file_path'])#.iloc[1:]
#df['folder_name'] = df['file_path'].str.split(BUCKET_NAME, 1,expand = True)
df['gdal_path'] = df['file_path'].str.replace('gs://', '/vsigs/') 
df

In [None]:
lay_names= df['file_path'].str.split("gs://"+BUCKET_NAME+'//', 1).str[1]#.str.replace('/','',regex=False)
df['layer_name'] = lay_names.str.split('/',1).str[0]
df

In [None]:
lay_names = df["layer_name"].unique()
lay_names

In [None]:
file_list = df[df['layer_name'] == lay_names[1]]['file_path'].tolist()
file_list


## Loop through layer names

For each layer name do the following: 

    1. download images
    2. make a virtural raster 
    3. Save as a Cloud Optimized GeoTIFF 
    4. Upload to cloud storage 
    5. Clean up local files 

In [None]:
for lay_name in lay_names:
    dl(lay_name)
    makevrt(lay_name)
    makecog()
    ul(lay_name)
    files = glob.glob(f'{lay_name}/*')
    for f in files:
        os.remove(f)
    os.rmdir(lay_name)
    