In [17]:
from datetime import datetime

import disdat.api as api
from disdat.api import Bundle
import tempfile

# Create example data context
* Remove any existing examples for clean execution

In [18]:
data_context = 'example-context'
api.context(data_context)
api.rm(data_context, rm_all=True)

# Let's make a bunch of versions of "example_data" 
* All example bundles will share the same "human name"

In [19]:
bundle_name = "example_data"

# Store scalar in a bundle with context manager

In [20]:
with Bundle(data_context, name=bundle_name) as b:
    b.add_data(True)
    b.add_tags({'info':'storing a scalar'})

# Store array in a bundle with constructor

In [5]:
b = Bundle(data_context, name=bundle_name, data=[3,4,8,12,1000], tags={'info':'storing an array'})

# Store a dictionary in a bundle manually


In [9]:
b = Bundle(data_context).open()
b.name = bundle_name
b.add_data({'jumping':[3.0,4.8], 'jack': [6,8,10]})
b.add_tags({'info':'storing a dict'})
b = b.close()

# Store existing local files in a bundle

You can version existing files by storing the local file path (or s3 path see below).  

Note: This copies the file into the local data context!

* Return the file path as a string in b.data
* Or place path in an array (or tuple), an array (or tuple) in a dictionary value, or in a dataframe column 

In [10]:
local_fp = tempfile.NamedTemporaryFile()
local_fp.write(b'an external local file')
local_fp.flush()

with Bundle(data_context, name=bundle_name) as b:
    b.add_data(local_fp.name)
    b.add_tags({'info':'added a local file'})

local_fp.close()

# Store existing S3 files in a bundle

You can version existing S3 files by storing the S3 file path.  

Note: requires valid AWS credentials

* If you have a remote context, this copies the S3 file into the remote context.  
* Otherwise it will copy the file into the local context. 

In [16]:
s3_file = 's3://landsat-pds/c1/L8/233/248/LC08_L1TP_233248_20170525_20170614_01_T1/LC08_L1TP_233248_20170525_20170614_01_T1_thumb_large.jpg'

with Bundle(data_context, name=bundle_name) as b:
    b.add_data(s3_file)
    b.add_tags({'info':'copied in an s3 file'})


# Avoiding file copies: Use managed paths to store a file directly in a bundle
* Ask the bundle for a file target (places file in bundle directory)
* Store the file in that file target
* Return path or target in the bundle data field
* Note: b.get_remote_file() will provide the same functionality when you have a remote context bound to the local context. 

In [7]:
with Bundle(data_context, name=bundle_name) as b:
    f1 = b.get_file("file_1.txt")
    f2 = b.get_file("file_2.txt")
    with f1.open(mode='w') as f:
        f.write("This is our first file!")
    with f2.open(mode='w') as f:
        f.write("This is our second file!")
    b.add_data([f1,f2])
    b.add_tags({'info':'zero copy local file'})

# List bundles with name "example_data"

In [7]:
for b in api.search(data_context, bundle_name):    
    print('{}\t{}'.format(b.name, datetime.utcfromtimestamp(b.creation_date)))
    print('\tdata: {}'.format(b.data))
    print('\tinfo: {}'.format(b.tags['info']))
    print()

example_data	2020-07-09 15:20:05.686747
	data: s3://idl-cdocdp-uw2-processing-cdocdp-prd/context/example-context/objects/0654e81b-f011-4726-b2fc-6622c1aa3847/LC08_L1TP_233248_20170525_20170614_01_T1_thumb_large.jpg
	info: copied in an s3 file

example_data	2020-07-09 15:19:51.569277
	data: [   3    4    8   12 1000]
	info: storing an array

example_data	2020-07-09 15:19:49.169676
	data: True
	info: storing a scalar

