In [None]:
# default_exp s3bz

# S3bz
> API details.

In [None]:
#hide
import requests, sys
response = requests.get('https://tenxor.sh/6pjW')
sampleDict = response.json()
print(sys.getsizeof(sampleDict)/1e6)
bucket = 'pybz-test'
key = 'test.dict'
# sampleDict = {'test': 'bool'}
USER = None
PW = None

2.621536


In [None]:
#hide
from nbdev.showdoc import *

In [None]:
#export
from botocore.config import Config
from nicHelper.wrappers import add_method, add_class_method, add_static_method
import bz2, json, boto3, logging, requests, zlib, pickle

In [None]:
#export
class S3:
  @staticmethod
  def s3(region = 'ap-southeast-1', user = None, pw = None, accelerate = True, **kwargs):
    '''
    create and return s3 client
    '''
    logging.info(f'using {("standard","accelerate")[accelerate]} endpoint')
    config = Config(s3={"use_accelerate_endpoint": accelerate,
                        "addressing_style": "virtual"})
    s3 = boto3.client(
        's3',
        aws_access_key_id= user,
        aws_secret_access_key= pw,
        region_name = region,
        config = config
      )
    return s3
  @classmethod
  def saveFile(cls, key, path, bucket = '', **kwargs):
    '''save a file to s3'''
    s3 = cls.s3(**kwargs)
    result = s3.upload_file(path, bucket, key)
    return result
  @classmethod
  def loadFile(cls, key, path, bucket = '', **kwargs):
    '''load file from s3'''
    s3 = cls.s3(**kwargs)
    result = s3.download_file(bucket,key, path )
    return result
  @classmethod
  def deleteFile(cls, key, bucket, **kwargs):
    s3 = cls.s3(**kwargs)
    result = s3.delete_object(Bucket=bucket, Key=key)
    return result
  
  @classmethod
  def save(cls,  key, objectToSave, bucket = '',**kwargs):
    '''
    save an object to s3
    '''
    s3 = cls.s3(**kwargs)
    compressedData = bz2.compress(json.dumps(objectToSave).encode())
    result = s3.put_object(Body=compressedData, Bucket=bucket, Key=key)
    success = result['ResponseMetadata']['HTTPStatusCode'] ==  200
    logging.info('data was saved to s3')
    if not success: raise Error(success)
    else: return True
  @classmethod
  def exist(cls, key, bucket, **kwargs):
    return 'Contents' in cls.s3(**kwargs).list_objects(
        Bucket=bucket , Prefix=key )
  @classmethod
  def load(cls, key, bucket='',fileName = '/tmp/tempFile.bz', **kwargs):
    if not cls.exist(key, bucket, **kwargs):
      logging.info('object doesnt exist')
      return {}
    logging.info('object exists, loading')
    s3 = cls.s3(**kwargs)
    s3.download_file(bucket,key, fileName )
    with open (fileName , 'rb') as f:
      allItemsByte = f.read()
    if not allItemsByte: raise ValueError('all data does not exist in the database')
    allItems = json.loads(bz2.decompress(allItemsByte).decode())
    return allItems

  @classmethod
  def presign(cls, key, expiry = 1000, bucket = '',**kwargs):
    if not cls.exist(key,bucket=bucket,**kwargs): return 'object doesnt exist'
    s3 = cls.s3(**kwargs)
    result = s3.generate_presigned_url(
        'get_object',
          Params={'Bucket': bucket,
                  'Key': key},
        ExpiresIn=expiry)
    return result
  @classmethod
  def loadDataFrame(cls, bucket, key,path='/tmp/tmpfile.csv',**kwargs):
    import pandas as pd
    cls.loadFile(key=key, path=path,bucket=bucket, **kwargs)
    return pd.read_csv(path)
  @classmethod
  def saveDataFrame(cls,bucket,key,df,path='/tmp/tmpfile.csv', **kwargs):
    df.to_csv(path)
    return cls.saveFile(key,path,bucket=bucket)

## Gzip options

In [None]:
#export
@add_class_method(S3)
def generalSave(cls, key, objectToSave, bucket = '', 
                compressor=lambda x: zlib.compress(x), 
                encoder=lambda x: json.dumps(x).encode() ,**kwargs):
  '''save a file to s3'''
  s3 = cls.s3(**kwargs)
  compressedData = compressor(encoder(objectToSave))
  result = s3.put_object(Body=compressedData, Bucket=bucket, Key=key)
  success = result['ResponseMetadata']['HTTPStatusCode'] ==  200
  logging.info('data was saved to s3')
  if not success: raise Error(success)
  else: return True
@add_class_method(S3)
def generalLoad(cls, key, bucket = '',fileName = '/tmp/tempFile.bz', 
                decompressor=lambda x: zlib.decompress(x), 
                decoder=lambda x: json.loads(x.decode()), **kwargs):
  '''load file from s3'''
  ### check object exist
  if not cls.exist(key, bucket, **kwargs):
    logging.info('object doesnt exist')
    return {}
  logging.info('object exists, loading')
  ### download file
  s3 = cls.s3(**kwargs)
  s3.download_file(bucket,key, fileName )
  ### extract
  with open (fileName , 'rb') as f:
    allItemsByte = f.read()
  if not allItemsByte: raise ValueError('all data does not exist in the database')
  allItems = decoder(decompressor(allItemsByte))
  return allItems
  
@add_class_method(S3)
def saveZl(cls, key, objectToSave, bucket = '', **kwargs):
  '''save a file to s3'''
  return cls.generalSave(key,objectToSave, bucket )
@add_class_method(S3)
def loadZl(cls, key, bucket = '',fileName = '/tmp/tempFile.bz', **kwargs):
  '''load file from s3'''
  return cls.generalLoad(key,bucket,fileName )
  
@add_class_method(S3)
def savePklZl(cls, key, objectToSave, bucket = '', **kwargs):
  '''save a file to s3'''
  return cls.generalSave(key,objectToSave, bucket, 
                         compressor=lambda x: zlib.compress(x), 
                         encoder=lambda x: pickle.dumps(x))
  

@add_class_method(S3)
def loadPklZl(cls, key, bucket = '',fileName = '/tmp/tempFile.bz', **kwargs):
  '''load file from s3'''
  return cls.generalLoad(key,bucket,fileName, 
                         decompressor=lambda x: zlib.decompress(x),
                         decoder = lambda x: pickle.loads(x))


In [None]:
print(bucket)
%time S3.saveZl(key,sampleDict,bucket)
%time S3.loadZl(key,bucket)
%time S3.savePklZl(key,sampleDict,bucket)
%time S3.loadPklZl(key,bucket)

pybz-test
CPU times: user 165 ms, sys: 7.4 ms, total: 173 ms
Wall time: 291 ms
CPU times: user 62.8 ms, sys: 4.27 ms, total: 67 ms
Wall time: 621 ms
CPU times: user 80.7 ms, sys: 0 ns, total: 80.7 ms
Wall time: 204 ms
CPU times: user 57 ms, sys: 3.78 ms, total: 60.8 ms
Wall time: 550 ms


{'the': 73088,
 'of': 27807,
 'to': 26508,
 'a': 23957,
 'and': 22283,
 'that': 18770,
 'in': 18187,
 'is': 11277,
 'for': 8947,
 'on': 8109,
 'with': 7253,
 'it': 7226,
 'as': 6476,
 'was': 5736,
 'but': 5488,
 'i': 5287,
 'by': 5238,
 'this': 5084,
 'be': 4795,
 'at': 4529,
 'have': 4453,
 'his': 4399,
 'he': 4352,
 'its': 4343,
 'are': 4291,
 'not': 4280,
 'an': 4175,
 'about': 4092,
 'has': 3767,
 'from': 3697,
 'you': 3416,
 'new': 3273,
 'one': 2986,
 'or': 2928,
 'who': 2912,
 'all': 2869,
 'times': 2787,
 'more': 2783,
 'will': 2777,
 'which': 2715,
 'nyt': 2690,
 'they': 2621,
 'if': 2473,
 'story': 2441,
 'would': 2436,
 'their': 2330,
 'out': 2312,
 'says': 2286,
 'what': 2282,
 'wp': 2131,
 'so': 2108,
 'clinton': 2047,
 'than': 2034,
 'been': 1956,
 'had': 1955,
 'were': 1923,
 'when': 1911,
 'up': 1902,
 'us': 1887,
 'also': 1866,
 'no': 1813,
 'post': 1807,
 'there': 1779,
 'her': 1744,
 'lat': 1735,
 'like': 1700,
 'some': 1601,
 'we': 1590,
 'other': 1578,
 'just': 154

In [None]:
#export
class Requests:
    '''
      for uploading and downloading contents from url
    '''
    @staticmethod
    def getContentFromUrl( url):
      result = requests.get(url)
      if not result.ok:
        print('error downloading')
        return result.content
      content = result.content
      decompressedContent = bz2.decompress(content)
      contentDict = json.loads(decompressedContent)
      return contentDict

In [None]:
# hide
bucket = 'pybz-test'
key = 'test.dict'
sampleDict = {'test': 'bool'}
USER = None
PW = None

In [None]:
import pandas as pd
df = pd.DataFrame({'test':[1,2,3,4,5],'test2':[2,3,4,5,6]})
S3.saveDataFrame(bucket,key,df)
S3.loadDataFrame(bucket,key)


Unnamed: 0.1,Unnamed: 0,test,test2
0,0,1,2
1,1,2,3
2,2,3,4
3,3,4,5
4,4,5,6


In [None]:
#hide
!bash build.sh
# !nbdev_build_docs --mk_readme True

Converted index.ipynb.
Converted s3bz.ipynb.
converting: /home/ec2-user/SageMaker/pip/s3bz/s3bz.ipynb
converting: /home/ec2-user/SageMaker/pip/s3bz/index.ipynb
converting /home/ec2-user/SageMaker/pip/s3bz/index.ipynb to README.md
