In [1]:
import io
import sys

from dotenv import load_dotenv
from rich.pretty import pprint

sys.path.insert(0,"../")

from doca.utils import document
from doca.utils import event
from doca.utils import storage

load_dotenv("../secrets/.env", override=True)

True

In [2]:
filename = "../README.pdf"
with open(filename, "rb") as f:
    content = f.read()

docs = {}
evs = {}
for provider in ["aws_s3", "gcp_cloud_storage", "azure_storage_blob", "local_file_system"]:
    doc = document.new_document(provider, "DOCA00012", "README.pdf", document_content=content)
    doc.processor_name = "ingestor"
    docs[provider] = doc
    evs[provider] = event.new_event(event.EventType.DOCUMENT_INGESTED, [doc])


In [None]:
print(evs["aws_s3"].json(indent=4))

In [3]:
s3 = storage.AWSSimpleStorageService()

[2023-01-31 23:39:04,957] [31883] [INFO] [AWSSimpleStorageService]: Bucket `aurora-mercedez-dealership` created...
[2023-01-31 23:39:04,958] [31883] [INFO] [AWSSimpleStorageService]: AWS S3 storage layer initialized...


In [4]:
cloud_storage = storage.GCPCloudStorage()

[2023-01-31 23:39:09,255] [31883] [INFO] [GCPCloudStorage]: Bucket `aurora-mercedez-dealership` created...
[2023-01-31 23:39:09,257] [31883] [INFO] [GCPCloudStorage]: Cloud Storage initialized...


In [6]:
storage_blob = storage.AzureStorageBlob()

[2023-01-31 23:39:40,551] [31883] [INFO] [AzureStorageBlob]: Bucket `aurora-mercedez-dealership` created...
[2023-01-31 23:39:40,552] [31883] [INFO] [AzureStorageBlob]: Storage Blob initialized...


In [7]:
local_file_system = storage.LocalFileSystem()

[2023-01-31 23:40:07,278] [31883] [INFO] [LocalFileSystem]: Bucket `aurora-mercedez-dealership` created...
[2023-01-31 23:40:07,279] [31883] [INFO] [LocalFileSystem]: File system store initialized...


In [10]:
stores = dict(
    aws_s3 = s3,
    gcp_cloud_storage = cloud_storage,
    azure_storage_blob = storage_blob,
    local_file_system = local_file_system
)

In [11]:
for provider in ["aws_s3", "gcp_cloud_storage", "azure_storage_blob", "local_file_system"]:
    doc = docs[provider]
    store = stores[provider]
    doc.save(store)

In [None]:
print(id(doc))
print(doc)