In [1]:
import data_vault

In [2]:
%open_vault -p data/storage.zip --secure False

In [3]:
from pandas import DataFrame
from random import choice, randint
cities = ['London', 'Delhi', 'Tokyo', 'Lagos', 'Warsaw', 'Chongqing']
salaries = DataFrame([
    {'salary': randint(0, 100), 'city': choice(cities)}
    for i in range(10000)
])

In [4]:
salaries.head(3)

Unnamed: 0,salary,city
0,6,Lagos
1,5,Tokyo
2,100,Lagos


## Examples

### Store and import

In [5]:
%vault store salaries in datasets

datasets/salaries


'Stored salaries (None → 5328F82E) at Sunday, 08. Dec 2019 14:25'

In [6]:
%vault import salaries from datasets as salaries_dataset

Reduced memory usage by 87.27%, from 0.79 MB to 0.10 MB.


'Imported salaries_dataset (5328F82E) at Sunday, 08. Dec 2019 14:25'

In [7]:
salaries_dataset.head(3)

Unnamed: 0,salary,city
0,6,Lagos
1,5,Tokyo
2,100,Lagos


In [8]:
salaries_by_age = salaries.assign(age=salaries.index.map(lambda x: randint(0, 100)))

In [9]:
salaries_by_age.head(3)

Unnamed: 0,salary,city,age
0,6,Lagos,76
1,5,Tokyo,57
2,100,Lagos,75


In [10]:
%vault store salaries_by_age in datasets

datasets/salaries_by_age


'Stored salaries_by_age (F0DC3487 → 61F0099E) at Sunday, 08. Dec 2019 14:25'

In [11]:
%vault import datasets

'Imported  at Sunday, 08. Dec 2019 14:25'

In [12]:
# uncomment the line below and press <tab> with cursor after the dot (.)
# to see how the autcompletion works on imported folders:

# datasets.

In [13]:
datasets.salaries.head(3)

Reduced memory usage by 87.27%, from 0.79 MB to 0.10 MB.


Unnamed: 0,salary,city
0,6,Lagos
1,5,Tokyo
2,100,Lagos


In [14]:
datasets.salaries_by_age.head(3)

Reduced memory usage by 87.30%, from 0.87 MB to 0.11 MB.


Unnamed: 0,salary,city,age
0,6,Lagos,76
1,5,Tokyo,57
2,100,Lagos,75


To display all members in code:

In [15]:
dir(datasets)

['salaries', 'salaries_by_age']

### Delete

In [16]:
%vault del salaries from datasets

'Deleted datasets/salaries (5328F82E) at Sunday, 08. Dec 2019 14:25'

In [17]:
dir(datasets)

['salaries_by_age']

### Encrypted storage

Normally you would want to keep the definition of your key in a secure location, not in the notebook itself, but just for the sake of demonstration:

In [18]:
%env STORAGE_KEY=SECRET_PASSWORD

env: STORAGE_KEY=SECRET_PASSWORD


In [19]:
%open_vault -p data/encrypted_storage.zip --encryption_variable STORAGE_KEY

In [20]:
%vault store salaries_by_age in encrypted_datasets

encrypted_datasets/salaries_by_age


'Stored salaries_by_age (5AC461A9 → 61F0099E) at Sunday, 08. Dec 2019 14:25'

In [21]:
!ls data

encrypted_storage.zip  storage.zip


In [22]:
from zipfile import ZipFile

with ZipFile('data/encrypted_storage.zip') as zip_archive:
    try:
        with zip_archive.open('encrypted_datasets/salaries_by_age') as f:
            pass
    except Exception as e:
        print(e)

File 'encrypted_datasets/salaries_by_age' is encrypted, password required for extraction


## Reference

In [23]:
print(data_vault.VaultMagics.__doc__)

The `%vault` magic provides a reproducible caching mechanism for variables exchange between notebooks.

    To open the vault use `%open_vault` magic.
    


In [24]:
print(data_vault.VaultMagics.open_vault.__doc__)

Open a zip archive for the vault. Once opened, all subsequent `%vault` magics operate on this archive.

Open vault arguments:

	 --path, -p, default storage.zip
	 --encryption_variable, -e, default None
	 --secure, -s, default True
	 --optimize_df, -o, default True
	 --timestamp, -t, default True
	 --metadata, -m, default True


In [25]:
print(data_vault.VaultMagics.vault.__doc__)

Perform one of the available actions, print the description and save metadata in the cell.

Vault commands:

# store
	 - store <one_or_many_variables> in <module> as <valid_id> [with <function>]
	 - store <one_or_many_variables> in <module> [with <function> as <valid_id>]
	 - store <one_variable> in <path> [with <function>]
# import
	 - import <valid_id> from <module> as <valid_id> [with <function>]
	 - import <one_or_many_valid_id> from <module> [with <function> as <valid_id>]
	 - import <path> as <valid_id> [with <function>]
	 - import <module> [as <valid_id>]
# del
	 - del <one_variable> from <module> []
	 - del <path> []
# assert
	 - assert <one_variable> in <module> is <hash> [with <hash_method>]
	 - assert <path> is <hash> [with <hash_method>]
