In [None]:
#hide
# from your_lib.core import *

# S3Bz

> save and load dictionary to s3 using bz compression

## Install

`pip install s3bz`

## How to use

### Create a bucket and make sure that it has transfer acceleration enabled
#### create a buket
`aws s3 mb s3://<bucketname>`
#### put transfer acceleration
`aws s3api put-bucket-accelerate-configuration --bucket <bucketname> --accelerate-configuration Status=Enabled`

First, import the s3 module

In [None]:
#hide
# import pickle
# KEY = ''
# PW = ''
# keypath = '/Users/nic/.pip-tester-pybz'
# if KEY and PW:
#   with open (keypath, 'wb') as f:
#     pickle.dump({
#         'KEY': KEY,
#         'PW': PW
#     }, f)
# with open(keypath, 'rb') as f:
#   creden = pickle.load(f)
# USER = creden['KEY']
# PW = creden['PW']

In [None]:
#hide
import logging
logging.basicConfig(level=logging.WARNING)
import requests

## import package

In [None]:
from importlib import reload
from s3bz.s3bz import S3

### set up dummy data

In [None]:
#hide
bucket = 'pybz-test'
key = 'test.dict'
sampleDict = {'test': 'bool'}
USER = None
PW = None

In [None]:
#hide
#Dummy Data
from random import randrange
from dataclasses import dataclass
from dataclasses_json import dataclass_json
numberOfRows = 1000
@dataclass_json
@dataclass
class Inventory:
  ib_prcode:str
  ib_brcode:str
  ib_cf_qty:str
  new_ib_vs_stock_cv:str

sampleDict = [ Inventory.from_dict({
    'ib_brcode' : str(randrange(1000,1030,1)),
    'ib_prcode' : str(randrange(10000,100000,1)),
    'ib_cf_qty' : str(randrange(-10,1000,1)),
    'new_ib_vs_stock_cv' : str(randrange(-10,1000,1))
  }).to_dict() for _ in range(numberOfRows)]
# sampleLargeRandomInput[0]

## save object using bz2 compression

In [None]:
result = S3.save(key = key, 
       objectToSave = sampleDict,
       bucket = bucket,
       user=USER,
       pw = PW,
       accelerate = True)
print(('failed', 'success')[result])

success


In [None]:
#hide
assert result, 'saving failed'

## check if an object exist

In [None]:
result = S3.exist('', bucket, user=USER, pw=PW, accelerate = True)
print(('doesnt exist', 'exist')[result])

exist


## load object with bz2 compression

In [None]:
result = S3.load(key = key,
       bucket = bucket,
       user = USER,
       pw = PW,
       accelerate = True)
print(result[0])

{'ib_prcode': '87509', 'ib_brcode': '1017', 'ib_cf_qty': '890', 'new_ib_vs_stock_cv': '99'}


In [None]:
#hide
assert result == sampleDict, f'wrong result {result}, should be {sampleDict}'

## presign download object

In [None]:
url = S3.presign(key=key,
              bucket=bucket,
              expiry = 1000,
              user=USER,
              pw=PW)
print(url)

https://pybz-test.s3-accelerate.amazonaws.com/test.dict?AWSAccessKeyId=AKIAVX4Z5TKDVNE5QZPQ&Signature=cvFQZ68uxnq2ryt6fQkvvj%2B88oQ%3D&Expires=1606301851


In [None]:
#hide
assert url

### download using signed link

In [None]:
from s3bz.s3bz import Requests
result = Requests.getContentFromUrl(url)

In [None]:
#hide
assert result == sampleDict, 'not returning the correct object'

## File operations

### save without compression

In [None]:
inputPath = '/tmp/tmpFile.txt'
key = 'tmpFile'
downloadPath = '/tmp/downloadTmpFile.txt'
with open(inputPath , 'w')as f:
  f.write('hello world')

In [None]:
S3.saveFile(key =key ,path = inputPath,bucket = bucket)
##test
S3.exist(key,bucket)

True

### load without compression

In [None]:
S3.loadFile(key= key , path = downloadPath, bucket = bucket)

In [None]:
##test
with open(downloadPath, 'r') as f:
  print(f.read())

hello world


### delete

In [None]:
result = S3.deleteFile(key, bucket)
## test
S3.exist(key,bucket)

False

## save and load pandas dataframe

In [None]:
### please install in pandas, 
### this is not include in the requirements to minimize the size impact
import pandas as pd
df = pd.DataFrame({'test':[1,2,3,4,5],'test2':[2,3,4,5,6]})
S3.saveDataFrame(bucket,key,df)
S3.loadDataFrame(bucket,key)

Unnamed: 0.1,Unnamed: 0,test,test2
0,0,1,2
1,1,2,3
2,2,3,4
3,3,4,5
4,4,5,6
