In [None]:
#default_exp s3

# S3Cache

## Deals with everything that reads and write to the s3 cache for the database

In [None]:
#export
from s3bz.s3bz import S3
from nicHelper.wrappers import add_method, add_class_method, add_static_method
from nicHelper.dictUtil import stripDict, printDict, hashDict, saveStringToFile, loadStringFromFile, saveDictToFile, loadDictFromFile
from nicHelper.exception import errorString
from dict_hash import dict_hash, sha256
from base64 import b64encode, b64decode
import os, logging

In [None]:
#hide
import pickle, os

os.environ['DATABASE_TABLE_NAME'] = 'product-table-dev-manual'
os.environ['REGION'] = 'ap-southeast-1'
os.environ['INVENTORY_BUCKET_NAME'] = 'product-bucket-dev-manual'
os.environ['INPUT_BUCKET_NAME'] = 'input-product-bucket-dev-manual'
os.environ['DAX_ENDPOINT'] = 'longtermcluster.vuu7lr.clustercfg.dax.apse1.cache.amazonaws.com:8111'
os.environ['LINEKEY'] = '2uAfV4AoYglUGmKTAk2xNOm0aV2Ufgh1BQPvQl9vJd4'
REGION = 'ap-southeast-1'

In [None]:
from villaProductDatabase.database import ProductDatabase
import logging
logging.basicConfig(level=logging.INFO)

longtermcluster.vuu7lr.clustercfg.dax.apse1.cache.amazonaws.com:8111
longtermcluster.vuu7lr.clustercfg.dax.apse1.cache.amazonaws.com:8111
longtermcluster.vuu7lr.clustercfg.dax.apse1.cache.amazonaws.com:8111


In [None]:
#export
import os
DBHASHLOCATION = '/tmp/database.hash'
DBCACHELOCATION = '/tmp/database.cache'
DATABASE_TABLE_NAME = os.environ.get('DATABASE_TABLE_NAME')
INVENTORY_BUCKET_NAME = os.environ.get('INVENTORY_BUCKET_NAME')
INPUT_BUCKET_NAME = os.environ.get('INPUT_BUCKET_NAME')
REGION = os.environ.get('REGION') or 'ap-southeast-1'
ACCESS_KEY_ID = os.environ.get('USER') or None
SECRET_ACCESS_KEY = os.environ.get('PW') or None
LINEKEY= os.environ.get('LINEKEY')
  
try:
  DAX_ENDPOINT = os.environ['DAX_ENDPOINT']
  print(DAX_ENDPOINT)
except KeyError as e:
  print(f'dax endpoint missing {e}')
  

longtermcluster.vuu7lr.clustercfg.dax.apse1.cache.amazonaws.com:8111


In [None]:
#export
class S3Cache:
  pass

In [None]:
class Tester( S3Cache, ProductDatabase):
  class Meta:
    table_name = os.environ['DATABASE_TABLE_NAME']
    region = os.environ['REGION']
    billing_mode='PAY_PER_REQUEST'
    dax_read_endpoints = [DAX_ENDPOINT] if DAX_ENDPOINT else None
    dax_write_endpoints = [DAX_ENDPOINT] if DAX_ENDPOINT else None
  pass
  

## Save and load hash

In [None]:
#export
@add_class_method(S3Cache)
def saveHash(cls , data:dict, key='allData', bucket=INVENTORY_BUCKET_NAME, 
             cachePath=DBCACHELOCATION, hashPath = DBHASHLOCATION):
  hashKey = f'{key}-hash'
  hashString = hashDict(data)
  dictToSave= {'hash': hashString }
  print(f'hashKey is {hashKey}')
  print(f'saving cache file')
  saveDictToFile(data, path = cachePath)
  print(f'saving hash file')
  saveStringToFile(hashString, path=hashPath)
  print('saving hash to s3')
  S3.save(key=hashKey,objectToSave=dictToSave, bucket=bucket)
@add_class_method(S3Cache)
def loadHash(cls,key='allData', bucket=INVENTORY_BUCKET_NAME):
  hashKey = f'{key}-hash'
  print(f'hashKey is {hashKey}')
  loadedHash= S3.load(hashKey,bucket=bucket).get('hash')
  return loadedHash

In [None]:
S3Cache.saveHash({'test':'test'}, key='testhash')
S3Cache.loadHash(key='testhash')

INFO:root:using accelerate endpoint
INFO:root:data was saved to s3
INFO:root:using accelerate endpoint


hashKey is testhash-hash
saving cache file
saving hash file
saving hash to s3
hashKey is testhash-hash


INFO:root:object exists, loading
INFO:root:using accelerate endpoint


'h3aBJSFv1xUq9jXtp3bYCXksQYA='

In [None]:
#export
@add_class_method(S3Cache)
def loadFromS3(cls, bucketName= INVENTORY_BUCKET_NAME, key = 'allData',
               hashPath=DBHASHLOCATION, cachePath = DBCACHELOCATION,**kwargs):
  '''
  this is not a real time function, there may be a delay of sync between
  the main dynamodb database and the cache
  '''
  
  if os.path.exists(hashPath) and os.path.exists(cachePath):
    print('cache exist')
    if cls.loadHash(key=key) == loadStringFromFile(hashPath):
      db = loadDictFromFile(cachePath)
      return db
    else:
      print('cache has different hash than s3')
  print('cache doesnt exist')
  logging.info(f'loading from {bucketName}')
  logging.info(f'user is {kwargs.get("user")}')
  
  return S3.loadPklZl(key=f'{key}-pklzl', bucket = bucketName,  **kwargs)

In [None]:
%%time
list(Tester.loadFromS3().keys())[0]

INFO:root:using accelerate endpoint
INFO:root:object doesnt exist
INFO:root:loading from product-bucket-dev-manual
INFO:root:user is None
INFO:root:using accelerate endpoint


cache exist
hashKey is allData-hash
cache has different hash than s3
cache doesnt exist


INFO:root:object exists, loading
INFO:root:using accelerate endpoint


CPU times: user 678 ms, sys: 137 ms, total: 815 ms
Wall time: 1.48 s


'0217153'

## Save to s3 with different options

In [None]:
#export
@add_class_method(S3Cache)
def saveAllS3(cls, objectToSave:dict, bucketName= INVENTORY_BUCKET_NAME, key = 'allData', 
              hashPath = DBHASHLOCATION, cachePath = DBCACHELOCATION, **kwargs):
  if os.path.exists(cachePath) and os.path.exists(hashPath):
    if loadStringFromFile(hashPath) == cls.loadHash():
      print('the object did not change, skip saving')
      return
  S3.save(key=key, bucket=bucketName, objectToSave=objectToSave)
  S3.savePklZl(key=f'{key}-pklzl',bucket=bucketName, objectToSave=objectToSave)
  S3.saveZl(key=f'{key}-zl',bucket=bucketName, objectToSave=objectToSave)
  cls.saveHash(objectToSave, key=key)
  

In [None]:
key = 'testKey'
Tester.saveAllS3(objectToSave={'test':'test'}, key = key)
Tester.loadFromS3(key=key)

INFO:root:using accelerate endpoint
INFO:root:object doesnt exist
INFO:root:using accelerate endpoint
INFO:root:data was saved to s3
INFO:root:using accelerate endpoint


hashKey is allData-hash


INFO:root:data was saved to s3
INFO:root:using accelerate endpoint
INFO:root:data was saved to s3
INFO:root:using accelerate endpoint
INFO:root:data was saved to s3
INFO:root:using accelerate endpoint


hashKey is testKey-hash
saving cache file
saving hash file
saving hash to s3
cache exist
hashKey is testKey-hash


INFO:root:object exists, loading
INFO:root:using accelerate endpoint


{'test': 'test'}

In [None]:
%%time
database = Tester.loadFromS3()

INFO:root:loading from product-bucket-dev-manual
INFO:root:user is None
INFO:root:using accelerate endpoint


cache doesnt exist


INFO:root:object exists, loading
INFO:root:using accelerate endpoint


CPU times: user 620 ms, sys: 140 ms, total: 761 ms
Wall time: 1.23 s


In [None]:
import sys
sys.getsizeof(database )

2621536

In [None]:
from nicHelper.dictUtil import hashDict
%time hashDict(database)

CPU times: user 660 ms, sys: 113 ms, total: 773 ms
Wall time: 768 ms


'4dwjGB4O6LKgC8b5VZIWQi1hElU='

In [None]:
S3.save(key='test',objectToSave=hashDict(database), bucket=INVENTORY_BUCKET_NAME)

INFO:root:using accelerate endpoint
INFO:root:data was saved to s3


True