In [None]:
# default_exp cli

# CLI

> Command line tools for working with storage.

In [None]:
#export
from storage_tools.core import *
from fastcore.script import *

In [None]:
#export
@call_parse
def upload_dataset(storage_name:Param('Section name in config',str),
                   dataset_name:Param('Dataset name',str),
                   config_name:Param('Path to config',str)='secrets/settings.ini',
                   dataset_version:Param('Dataset version',str)='patch'):
    "Create a new dataset archive and upload it to remote storage"
    r=new_storage_client(storage_name,config_name).upload_dataset(dataset_name,dataset_version)
    print('Dataset uploaded to',r)

In [None]:
#export
@call_parse
def download_dataset(storage_name:Param('Section name in config',str),
                     dataset_name:Param('Dataset name',str),
                     config_name:Param('Path to config',str)='secrets/settings.ini',
                     dataset_version:Param('Dataset version',str)='latest',
                     overwrite:Param('If True, delete the dataset and re-download',bool)=False):
    "Download a dataset archive from remote storage"
    l=new_storage_client(storage_name,config_name).download_dataset(dataset_name,dataset_version,overwrite)
    print('Dataset downloaded to',l)

If we have a project folder containing

<pre>
project_root
  &angrt; data
    &angrt; mnist
      &angrt; hand_drawn_digits
        &angrt; digit0.png
        &angrt; digit1.png
        &angrt; ...
  &angrt; secrets
    &angrt; settings.ini
&angrt; main.py
</pre>

where `settings.ini` contains

```
[DEFAULT]
local_path=data

[azure_demo]
storage_client=storage_tools.core.AzureStorageClient
conn_str=<A connection string to an Azure Storage account without credential>
credential=<The credentials with which to authenticate>
container=<The name of a storage container>
```

We can

### Create a new version of a dataset

```
upload_dataset azure_demo mnist/hand_drawn_digits --dataset_version=major
```

### Download the latest version of a dataset

Feel free to delete your local copy of this dataset (from data) to download from azure storage.

```
download_dataset azure_demo mnist/hand_drawn_digits
```

In [None]:
#hide
import shutil
from pathlib import Path
def _rmtree(p):
    try: shutil.rmtree(p)
    except FileNotFoundError: pass

In [None]:
def _make_local_test_data():
    test_files=['a/b/test_data.txt','a/b/more_test_data.txt']
    for i,f in enumerate(test_files):
        f='test/local_path/'+f
        Path(f).parent.mkdir(parents=True,exist_ok=True)
        with open(f, 'w') as _file: _file.write(f'a little bit of data {i}')
    return test_files

In [None]:
for p in ['test/local_path','test/storage_area']: _rmtree(p)

test_files=_make_local_test_data()

def _t(expected,upload_name,version='patch'):
    upload_dataset('local_test',upload_name,'test/settings.ini',version)
_t('a.3.0.0.zip','a','3.0.0')
_t('a.3.0.1.zip','a')
# TODO: check zip contents
_rmtree('test/local_path/a.3.0.0')
_rmtree('test/local_path/a.3.0.1')

download_dataset('local_test','a','test/settings.ini')
download_dataset('local_test','a','test/settings.ini','3.0.0')
download_dataset('local_test','a','test/settings.ini','3.0.0')
download_dataset('local_test','a','test/settings.ini','3.0.0',True)

Dataset uploaded to test\storage_area\a.3.0.0.zip
Dataset uploaded to test\storage_area\a.3.0.1.zip
Dataset downloaded to test\local_path\a.3.0.1
Dataset downloaded to test\local_path\a.3.0.0
Dataset downloaded to test\local_path\a.3.0.0
Dataset downloaded to test\local_path\a.3.0.0


If you have datasets uploaded with with `storage-tools 0.0.3` or earlier, you can 

### add standalone manifest files

```
add_standalone_manifest azure_demo mnist/hand_drawn_digits
```

Note: If your dataset does not contain a manifest, this command will not fail ... but you won't get a standalone manifest either.

In [None]:
#export
@call_parse
def add_standalone_manifest(storage_name:Param('Section name in config',str),
                            dataset_name:Param('Dataset name',str),
                            config_name:Param('Path to config',str)='secrets/settings.ini'):
    "Add stand-alone manifest files for datasets created with `storage-tools 0.0.3` and earlier"
    storage_client=new_storage_client(storage_name,config_name)
    existing_names=[f.name for f in storage_client.ls(name_starts_with=dataset_name)]
    for version in storage_client.ls_versions(dataset_name):
        mf=f'{dataset_name}.{version}.manifest.json'
        if mf not in existing_names:
            r=storage_client.download_dataset(dataset_name,version)
            try:
                print('Manifest uploaded to',storage_client._upload_manifest(r))
            except FileNotFoundError as e:
                print('Manifest not uploaded.',e)

In [None]:
# clear out all a.3.0.x data except for the dataset in "remote storage"
for f in ['a.3.0.0','a.3.0.1']:
    _rmtree(f'test/local_path/{f}')
    Path(f'test/local_path/{f}.zip').unlink()
    Path(f'test/local_path/{f}.manifest.json').unlink()
    Path(f'test/storage_area/{f}.manifest.json').unlink()
    
add_standalone_manifest('local_test','a','test/settings.ini')

Manifest uploaded to test\storage_area\a.3.0.0.manifest.json
Manifest uploaded to test\storage_area\a.3.0.1.manifest.json


In [None]:
for p in ['test/local_path','test/storage_area']: _rmtree(p)