From 848beccf059eb34e63eda65f6c5e9536e3f7cc7c Mon Sep 17 00:00:00 2001 From: cvonsteg Date: Sat, 12 Oct 2019 16:37:22 +0100 Subject: [PATCH 01/75] updated .gitignore to ignore .python-version file --- .gitignore | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.gitignore b/.gitignore index 6c3c275c48fb7..919e1b9621a70 100644 --- a/.gitignore +++ b/.gitignore @@ -118,3 +118,6 @@ doc/build/html/index.html doc/tmp.sv env/ doc/source/savefig/ +# pyenv files +.python-version + From d9f7c23636597486cb5e5710560ca9e249ae3a74 Mon Sep 17 00:00:00 2001 From: cvonsteg Date: Sat, 12 Oct 2019 22:54:07 +0100 Subject: [PATCH 02/75] skeleton framework --- pandas/io/sql.py | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/pandas/io/sql.py b/pandas/io/sql.py index 44cb399336d62..e941ef6a10b24 100644 --- a/pandas/io/sql.py +++ b/pandas/io/sql.py @@ -681,6 +681,18 @@ def _execute_insert_multi(self, conn, keys, data_iter): data = [dict(zip(keys, row)) for row in data_iter] conn.execute(self.table.insert(data)) + def _execute_upsert_update(self): + """Execute an SQL UPSERT, and in cases of key clashes, + over-write records in the Database with incoming records. + """ + pass + + def _execute_upsert_ignore(self): + """Execute an SQL UPSERT, and in cases of key clashes, + keep records in the Database, and ignore incoming records. + """ + pass + def insert_data(self): if self.index is not None: temp = self.frame.copy() @@ -728,6 +740,10 @@ def insert(self, chunksize=None, method=None): exec_insert = self._execute_insert elif method == "multi": exec_insert = self._execute_insert_multi + elif method == "upsert_update": + raise NotImplementedError + elif method == "upsert_ignore": + raise NotImplementedError elif callable(method): exec_insert = partial(method, self) else: From f0726b337be3eb2fd86aa01f6dab42c2917f22e9 Mon Sep 17 00:00:00 2001 From: cvonsteg Date: Sat, 12 Oct 2019 22:57:36 +0100 Subject: [PATCH 03/75] update docstring and add TODO --- pandas/io/sql.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas/io/sql.py b/pandas/io/sql.py index e941ef6a10b24..32c9cc440d0c2 100644 --- a/pandas/io/sql.py +++ b/pandas/io/sql.py @@ -683,7 +683,7 @@ def _execute_insert_multi(self, conn, keys, data_iter): def _execute_upsert_update(self): """Execute an SQL UPSERT, and in cases of key clashes, - over-write records in the Database with incoming records. + overwrite records in the Database with incoming records. """ pass @@ -704,6 +704,7 @@ def insert_data(self): else: temp = self.frame + # TODO: column_names by list comprehension? column_names = list(map(str, temp.columns)) ncols = len(column_names) data_list = [None] * ncols From c742f47358065e524a68e58ca04c2334013c4302 Mon Sep 17 00:00:00 2001 From: cvonsteg Date: Sun, 20 Oct 2019 20:10:12 +0100 Subject: [PATCH 04/75] scratch functions added --- pandas/io/sql_scratch.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) create mode 100644 pandas/io/sql_scratch.py diff --git a/pandas/io/sql_scratch.py b/pandas/io/sql_scratch.py new file mode 100644 index 0000000000000..a28b9432aaa66 --- /dev/null +++ b/pandas/io/sql_scratch.py @@ -0,0 +1,12 @@ +from sqlalchemy import Table, select +from sqlalchemy.engine.base import Connection + + +def get_pkey(table: Table): + return [pkey.name for pkey in table.primary_key.columns.values()] + + +def get_pkey_values(table: Table, conn: Connection): + pkeys = get_pkey(table) + statement = select([table.c[name] for name in pkeys]) + return [row for row in conn.execute(statement)] From d2a0c2ddbeed4e7b48742fb7b1d9dded7977106f Mon Sep 17 00:00:00 2001 From: cvonsteg Date: Sun, 20 Oct 2019 20:17:31 +0100 Subject: [PATCH 05/75] generator comment --- pandas/io/sql_scratch.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pandas/io/sql_scratch.py b/pandas/io/sql_scratch.py index a28b9432aaa66..702574af6c8d1 100644 --- a/pandas/io/sql_scratch.py +++ b/pandas/io/sql_scratch.py @@ -10,3 +10,5 @@ def get_pkey_values(table: Table, conn: Connection): pkeys = get_pkey(table) statement = select([table.c[name] for name in pkeys]) return [row for row in conn.execute(statement)] + # for row in conn.execute(statement): + # yield row From 686930cfceaab49b38bec318a35e8d2541193199 Mon Sep 17 00:00:00 2001 From: cvonsteg Date: Fri, 1 Nov 2019 09:29:43 +0000 Subject: [PATCH 06/75] Squashed 'vendor/github.com/V0RT3X4/python_utils/' content from commit 88a5a481b git-subtree-dir: vendor/github.com/V0RT3X4/python_utils git-subtree-split: 88a5a481b5dbec610e762df862fd69918c1b77d4 --- .circleci/config.yml | 35 ++++ .gitignore | 111 +++++++++++ README.md | 108 +++++++++++ aws/README.md | 1 + aws/README.rst | 0 aws/requirements.txt | 5 + aws/setup.py | 50 +++++ aws/tests/__init__.py | 4 + aws/tests/lambda_types/__init__.py | 0 aws/tests/lambda_types/message_eg.py | 19 ++ aws/tests/lambda_types/repeat_eg.py | 19 ++ aws/tests/lambda_types/test_lambda_types.py | 89 +++++++++ .../s3_client_encryption_tests/__init__.py | 4 + .../test_IOAuthDecrypter.py | 22 +++ .../test_IOAuthTagLength.py | 42 ++++ .../test_IODecrypter.py | 94 +++++++++ .../test_decrypt_s3_mime_with_attachment.py | 68 +++++++ .../test_decrypt_s3_object.py | 65 +++++++ .../test_kms_cipher_provider.py | 39 ++++ aws/tests/ses_inbox/__init__.py | 0 aws/tests/ses_inbox/test_get_attachments.py | 0 aws/tests/ses_inbox/test_list_inbox.py | 25 +++ aws/tests/ses_notification_types/__init__.py | 0 .../ses_notification_types/test_action.py | 16 ++ .../test_lambda_record.py | 32 +++ aws/tests/ses_notification_types/test_mail.py | 85 ++++++++ .../test_notification.py | 23 +++ .../ses_notification_types/test_receipt.py | 34 ++++ aws/tests/utils/__init__.py | 0 .../utils/nested_data_classes/__init__.py | 0 .../test_nested_dataclass.py | 36 ++++ aws/vortexa_utils/__init__.py | 5 + aws/vortexa_utils/aws/__init__.py | 4 + aws/vortexa_utils/aws/lambdr/__init__.py | 5 + aws/vortexa_utils/aws/lambdr/types.py | 45 +++++ aws/vortexa_utils/aws/s3/__init__.py | 0 aws/vortexa_utils/aws/s3/client.py | 50 +++++ .../client_side_encryption/IOAuthDecrypter.py | 40 ++++ .../IOAuthDecrypterTagLength.py | 65 +++++++ .../s3/client_side_encryption/IODecrypter.py | 61 ++++++ .../s3/client_side_encryption/IONocrypter.py | 38 ++++ .../aws/s3/client_side_encryption/__init__.py | 183 ++++++++++++++++++ .../client_side_encryption/cipher_provider.py | 17 ++ .../aws/s3/client_side_encryption/client.py | 103 ++++++++++ .../decrypt_handeler.py | 121 ++++++++++++ .../aws/s3/client_side_encryption/get.py | 75 +++++++ .../kms_cipher_provider.py | 61 ++++++ aws/vortexa_utils/aws/ses/__init__.py | 4 + .../aws/ses/application_mapper.py | 102 ++++++++++ aws/vortexa_utils/aws/ses/attachments.py | 15 ++ aws/vortexa_utils/aws/ses/inbox.py | 141 ++++++++++++++ .../aws/ses/notification/__init__.py | 0 .../aws/ses/notification/types/__init__.py | 5 + .../aws/ses/notification/types/action.py | 56 ++++++ .../ses/notification/types/lambda_record.py | 18 ++ .../aws/ses/notification/types/mail.py | 44 +++++ .../ses/notification/types/notification.py | 29 +++ .../aws/ses/notification/types/receipt.py | 65 +++++++ .../aws/ses/notification/types/verdicts.py | 43 ++++ aws/vortexa_utils/aws/utils/__init__.py | 4 + .../aws/utils/dataclasses/__init__.py | 1 + .../aws/utils/dataclasses/nested.py | 20 ++ collections/tests/__init__.py | 0 .../tests/collections/types/__init__.py | 0 .../types/test_instance_caching_abc.py | 130 +++++++++++++ .../vortexa_utils/collections/__inti__.py | 0 .../collections/types/__init__.py | 0 .../collections/types/instance_caching_abc.py | 45 +++++ database/README.md | 21 ++ database/README.rst | 28 +++ database/setup.py | 40 ++++ database/tests/__init__.py | 0 database/tests/test_database_factory.py | 16 ++ database/tests/test_querey_cache.py | 21 ++ database/vortexa_utils/__init__.py | 5 + database/vortexa_utils/database/__init__.py | 7 + database/vortexa_utils/database/database.py | 118 +++++++++++ .../database/default_factories.py | 20 ++ .../vortexa_utils/database/query_cache.py | 77 ++++++++ database/vortexa_utils/database/utils.py | 62 ++++++ deployment/setup.py | 20 ++ deployment/vortexa_utils/__init__.py | 5 + deployment/vortexa_utils/portainer/Readme.md | 1 + .../vortexa_utils/portainer/__init__.py | 8 + deployment/vortexa_utils/portainer/api.py | 56 ++++++ deployment/vortexa_utils/portainer/stacks.py | 61 ++++++ .../vortexa_utils/portainer/update_stack.py | 90 +++++++++ docker/pandas/Dockerfile | 25 +++ general/README.rst | 0 general/setup.py | 40 ++++ general/vortexa_utils/__init__.py | 5 + general/vortexa_utils/general/__init__.py | 0 general/vortexa_utils/git.py | 14 ++ logging/README.md | 55 ++++++ logging/setup.py | 38 ++++ logging/vortexa_utils/__init__.py | 1 + logging/vortexa_utils/logging/__init__.py | 1 + .../logging/exception_decorator.py | 12 ++ logging/vortexa_utils/logging/resources.py | 38 ++++ utils/vortexa_utils/utils/__init__.py | 0 .../utils/byte_stream_spliter.py | 31 +++ .../utils/sockets/socket_client.py | 24 +++ .../utils/sockets/socket_server.py | 17 ++ versioning/VERSION | 1 + versioning/setup.py | 34 ++++ versioning/tests/__init__.py | 0 versioning/tests/test_versioner.py | 47 +++++ versioning/vortexa_utils/__init__.py | 5 + .../vortexa_utils/versioning/__init__.py | 10 + .../vortexa_utils/versioning/__main__.py | 9 + versioning/vortexa_utils/versioning/cli.py | 46 +++++ versioning/vortexa_utils/versioning/utils.py | 22 +++ .../vortexa_utils/versioning/versioner.py | 99 ++++++++++ youve_got_mail/README.md | 0 youve_got_mail/README.rst | 0 youve_got_mail/requirements.txt | 2 + youve_got_mail/setup.py | 48 +++++ youve_got_mail/tests/__init__.py | 0 youve_got_mail/vortexa_utils/__init__.py | 0 .../vortexa_utils/youve_got_mail.py | 43 ++++ 120 files changed, 3944 insertions(+) create mode 100644 .circleci/config.yml create mode 100644 .gitignore create mode 100644 README.md create mode 100644 aws/README.md create mode 100644 aws/README.rst create mode 100644 aws/requirements.txt create mode 100644 aws/setup.py create mode 100644 aws/tests/__init__.py create mode 100644 aws/tests/lambda_types/__init__.py create mode 100644 aws/tests/lambda_types/message_eg.py create mode 100644 aws/tests/lambda_types/repeat_eg.py create mode 100644 aws/tests/lambda_types/test_lambda_types.py create mode 100644 aws/tests/s3_client_encryption_tests/__init__.py create mode 100644 aws/tests/s3_client_encryption_tests/test_IOAuthDecrypter.py create mode 100644 aws/tests/s3_client_encryption_tests/test_IOAuthTagLength.py create mode 100644 aws/tests/s3_client_encryption_tests/test_IODecrypter.py create mode 100644 aws/tests/s3_client_encryption_tests/test_decrypt_s3_mime_with_attachment.py create mode 100644 aws/tests/s3_client_encryption_tests/test_decrypt_s3_object.py create mode 100644 aws/tests/s3_client_encryption_tests/test_kms_cipher_provider.py create mode 100644 aws/tests/ses_inbox/__init__.py create mode 100644 aws/tests/ses_inbox/test_get_attachments.py create mode 100644 aws/tests/ses_inbox/test_list_inbox.py create mode 100644 aws/tests/ses_notification_types/__init__.py create mode 100644 aws/tests/ses_notification_types/test_action.py create mode 100644 aws/tests/ses_notification_types/test_lambda_record.py create mode 100644 aws/tests/ses_notification_types/test_mail.py create mode 100644 aws/tests/ses_notification_types/test_notification.py create mode 100644 aws/tests/ses_notification_types/test_receipt.py create mode 100644 aws/tests/utils/__init__.py create mode 100644 aws/tests/utils/nested_data_classes/__init__.py create mode 100644 aws/tests/utils/nested_data_classes/test_nested_dataclass.py create mode 100644 aws/vortexa_utils/__init__.py create mode 100644 aws/vortexa_utils/aws/__init__.py create mode 100644 aws/vortexa_utils/aws/lambdr/__init__.py create mode 100644 aws/vortexa_utils/aws/lambdr/types.py create mode 100644 aws/vortexa_utils/aws/s3/__init__.py create mode 100644 aws/vortexa_utils/aws/s3/client.py create mode 100644 aws/vortexa_utils/aws/s3/client_side_encryption/IOAuthDecrypter.py create mode 100644 aws/vortexa_utils/aws/s3/client_side_encryption/IOAuthDecrypterTagLength.py create mode 100644 aws/vortexa_utils/aws/s3/client_side_encryption/IODecrypter.py create mode 100644 aws/vortexa_utils/aws/s3/client_side_encryption/IONocrypter.py create mode 100644 aws/vortexa_utils/aws/s3/client_side_encryption/__init__.py create mode 100644 aws/vortexa_utils/aws/s3/client_side_encryption/cipher_provider.py create mode 100644 aws/vortexa_utils/aws/s3/client_side_encryption/client.py create mode 100644 aws/vortexa_utils/aws/s3/client_side_encryption/decrypt_handeler.py create mode 100644 aws/vortexa_utils/aws/s3/client_side_encryption/get.py create mode 100644 aws/vortexa_utils/aws/s3/client_side_encryption/kms_cipher_provider.py create mode 100644 aws/vortexa_utils/aws/ses/__init__.py create mode 100644 aws/vortexa_utils/aws/ses/application_mapper.py create mode 100644 aws/vortexa_utils/aws/ses/attachments.py create mode 100644 aws/vortexa_utils/aws/ses/inbox.py create mode 100644 aws/vortexa_utils/aws/ses/notification/__init__.py create mode 100644 aws/vortexa_utils/aws/ses/notification/types/__init__.py create mode 100644 aws/vortexa_utils/aws/ses/notification/types/action.py create mode 100644 aws/vortexa_utils/aws/ses/notification/types/lambda_record.py create mode 100644 aws/vortexa_utils/aws/ses/notification/types/mail.py create mode 100644 aws/vortexa_utils/aws/ses/notification/types/notification.py create mode 100644 aws/vortexa_utils/aws/ses/notification/types/receipt.py create mode 100644 aws/vortexa_utils/aws/ses/notification/types/verdicts.py create mode 100644 aws/vortexa_utils/aws/utils/__init__.py create mode 100644 aws/vortexa_utils/aws/utils/dataclasses/__init__.py create mode 100644 aws/vortexa_utils/aws/utils/dataclasses/nested.py create mode 100644 collections/tests/__init__.py create mode 100644 collections/tests/collections/types/__init__.py create mode 100644 collections/tests/collections/types/test_instance_caching_abc.py create mode 100644 collections/vortexa_utils/collections/__inti__.py create mode 100644 collections/vortexa_utils/collections/types/__init__.py create mode 100644 collections/vortexa_utils/collections/types/instance_caching_abc.py create mode 100644 database/README.md create mode 100644 database/README.rst create mode 100644 database/setup.py create mode 100644 database/tests/__init__.py create mode 100644 database/tests/test_database_factory.py create mode 100644 database/tests/test_querey_cache.py create mode 100644 database/vortexa_utils/__init__.py create mode 100644 database/vortexa_utils/database/__init__.py create mode 100644 database/vortexa_utils/database/database.py create mode 100644 database/vortexa_utils/database/default_factories.py create mode 100644 database/vortexa_utils/database/query_cache.py create mode 100644 database/vortexa_utils/database/utils.py create mode 100644 deployment/setup.py create mode 100644 deployment/vortexa_utils/__init__.py create mode 100644 deployment/vortexa_utils/portainer/Readme.md create mode 100644 deployment/vortexa_utils/portainer/__init__.py create mode 100644 deployment/vortexa_utils/portainer/api.py create mode 100644 deployment/vortexa_utils/portainer/stacks.py create mode 100644 deployment/vortexa_utils/portainer/update_stack.py create mode 100644 docker/pandas/Dockerfile create mode 100644 general/README.rst create mode 100644 general/setup.py create mode 100644 general/vortexa_utils/__init__.py create mode 100644 general/vortexa_utils/general/__init__.py create mode 100644 general/vortexa_utils/git.py create mode 100644 logging/README.md create mode 100644 logging/setup.py create mode 100644 logging/vortexa_utils/__init__.py create mode 100644 logging/vortexa_utils/logging/__init__.py create mode 100644 logging/vortexa_utils/logging/exception_decorator.py create mode 100644 logging/vortexa_utils/logging/resources.py create mode 100644 utils/vortexa_utils/utils/__init__.py create mode 100644 utils/vortexa_utils/utils/byte_stream_spliter.py create mode 100644 utils/vortexa_utils/utils/sockets/socket_client.py create mode 100644 utils/vortexa_utils/utils/sockets/socket_server.py create mode 100644 versioning/VERSION create mode 100644 versioning/setup.py create mode 100644 versioning/tests/__init__.py create mode 100644 versioning/tests/test_versioner.py create mode 100644 versioning/vortexa_utils/__init__.py create mode 100644 versioning/vortexa_utils/versioning/__init__.py create mode 100644 versioning/vortexa_utils/versioning/__main__.py create mode 100644 versioning/vortexa_utils/versioning/cli.py create mode 100644 versioning/vortexa_utils/versioning/utils.py create mode 100644 versioning/vortexa_utils/versioning/versioner.py create mode 100644 youve_got_mail/README.md create mode 100644 youve_got_mail/README.rst create mode 100644 youve_got_mail/requirements.txt create mode 100644 youve_got_mail/setup.py create mode 100644 youve_got_mail/tests/__init__.py create mode 100644 youve_got_mail/vortexa_utils/__init__.py create mode 100644 youve_got_mail/vortexa_utils/youve_got_mail.py diff --git a/.circleci/config.yml b/.circleci/config.yml new file mode 100644 index 0000000000000..c44edbe3b610c --- /dev/null +++ b/.circleci/config.yml @@ -0,0 +1,35 @@ +version: 2 +jobs: + build: + working_directory: ~/project + docker: + - image: circleci/python:3.7 + steps: + - checkout + - restore_cache: &restore_cache + keys: + - v1-{{ .Branch }}- + # - run: + # name: "Install Python3" + # command: | + # apk add --no-cache \ + # python3 \ + # libc-dev \ + # gcc + - run: + name: "Test aws" + working_directory: ~/project/aws + command: | + python3.7 -m venv .venv; + . .venv/bin/activate + pip install -U pip + pip install -U -r requirements.txt + nose2 + python setup.py test + - save_cache: &save_cache + key: v1-{{ .Branch }}-{{ epoch }} + paths: + - ~/project/aws/.venv + - ~/project/database/.venv + - ~/project/deployment/.venv + - ~/.cache/pip diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000000000..2c06c5a32bbdb --- /dev/null +++ b/.gitignore @@ -0,0 +1,111 @@ +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +.hypothesis/ +.pytest_cache/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# pyenv +.python-version + +# celery beat schedule file +celerybeat-schedule + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ + +# VSCODE +.vscode + +.idea +*.iml +scratch.py diff --git a/README.md b/README.md new file mode 100644 index 0000000000000..028c6e96fb015 --- /dev/null +++ b/README.md @@ -0,0 +1,108 @@ +# python_utils [![CircleCI](https://circleci.com/gh/V0RT3X4/python_utils.svg?style=svg&circle-token=30fa8fb22fa45a521a5d728e9accde63c242c2b4)](https://circleci.com/gh/V0RT3X4/python_utils) +Python utilities and helper functions/classes/modules + +## Sub Packages + +- [AWS](#aws) +- [Database](#database) +- [Deployment](#deployment) + +## Installation + +Installation is done by using [submodule vendoring](#vendoring). +Vendor the package into your project as [below](#vendoring) then you can install +with +``` +pip install vendor/github.com/V0RT3X4/python_utils/ +``` +or +``` +echo vendor/github.com/V0RT3X4/python_utils/ >> requirements.txt +pip install -r requirements.txt +``` + +## Aws + +Helper modules for `s3` client side encryption. `ses` email processing +(s3 as an inbox). `lambda` function handeler types. + +## Database + +Data base connection helpers to get you a +[`SQLAlchemy`](https://www.sqlalchemy.org/) connection [`Engine`](https://docs.sqlalchemy.org/en/latest/core/engines_connections.html) +to an RDS or RedShift database using +`aws secretsmanager` for managing connection credentials and rotation, and with +SSL encryption. + +## Deployment + +Custom Deployment Jazz + +## Installation - Vendoring the subtree +To install the scripts into your project it is recommended to vendor this module as a `git subtree` as opposed to a `git submodule`. You will have a version of this code in your repo, and you can easily update and push changes back upstream. + +To make your life easier install [git-vendor](https://github.com/brettlangdon/git-vendor) + +Then you can vendor the module into your repo and run installation scripts: +``` +git vendor add python_utils git@github.com:V0RT3X4/python_utils.git master +``` + +finally you can install the modules you want +``` +pip install vendor/github.com/V0RT3X4/python_utils/ +``` + +to update the reference +``` +git vendor update python_utils master +``` + +## AS Submodule + +In the project directory +``` +git submodule add \ + --name github.com/V0RT3X4/python_utils \ + git@github.com:V0RT3X4/python_utils.git \ + vendor/github.com/V0RT3X4/python_utils +``` + +Subsequently when you check out the source code (say in +[circleCI](https://circleci.com) or locally). +``` +git clone git@github.com:/V0RT3X4/.git +cd +git submodule init +git submodule update --remote +``` + +finally you can install the modules you want +``` +pip install vendor/github.com/V0RT3X4/python_utils/ +``` + +## Contributing +To contribute and push changes back upstream add this repo as a remote. +``` +git remote add -f python_utils git@github.com:V0RT3X4/python_utils.git +``` +Push changes in the sub tree +``` +git subtree push --prefix=vendor/github.com/V0RT3X4/python_utils python_utils some_branch +``` + +## [git-vendor](https://github.com/brettlangdon/git-vendor) installation + +``` +cd $(mktemp -d) && \ +git clone https://github.com/brettlangdon/git-vendor &> /dev/null && \ +cd git-vendor && \ +sudo make install +``` + +or + +``` +brew install git-vendor +``` diff --git a/aws/README.md b/aws/README.md new file mode 100644 index 0000000000000..f9e28102b5fbf --- /dev/null +++ b/aws/README.md @@ -0,0 +1 @@ +# Vortexa AWS Python Utils diff --git a/aws/README.rst b/aws/README.rst new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/aws/requirements.txt b/aws/requirements.txt new file mode 100644 index 0000000000000..34a10a130c16c --- /dev/null +++ b/aws/requirements.txt @@ -0,0 +1,5 @@ +boto3 +pycryptodomex +nose2 +pandas +logzero diff --git a/aws/setup.py b/aws/setup.py new file mode 100644 index 0000000000000..1e69b1cb89ad6 --- /dev/null +++ b/aws/setup.py @@ -0,0 +1,50 @@ +# @Author: richard +# @Date: 2018-12-04T17:54:43+00:00 +# @Last modified by: richard +# @Last modified time: 2018-12-04T20:16:54+00:00 +import os +import io +from setuptools import setup, find_packages + +namespace = 'vortexa_utils' +name = 'vortexa_utils_aws' +version = '1' +description = 'Vortexa AWS utils helper library', + +dependencies = [ + 'boto3', + 'pycryptodomex' +] + +# Setup boilerplate below + +package_root = os.path.abspath(os.path.dirname(__file__)) + +readme_filename = os.path.join(package_root, 'README.rst') +with io.open(readme_filename, encoding='utf-8') as readme_file: + readme = readme_file.read() + +packages = [ + package for package in find_packages() + if package.startswith(namespace) +] + +setup( + name=name, + version=version, + description=description, + long_description=readme, + + author='Richard Mathie', + author_email='richard.mathie@vortexa.com', + + zip_safe=False, + test_suite='nose2.collector.collector', + tests_require=['nose2', 'pandas'], + + packages=packages, + install_requires=dependencies, + extras_require={ + 'pandas': ['pandas'] + } +) diff --git a/aws/tests/__init__.py b/aws/tests/__init__.py new file mode 100644 index 0000000000000..b0f42e4b71cc9 --- /dev/null +++ b/aws/tests/__init__.py @@ -0,0 +1,4 @@ +# @Author: richard +# @Date: 2018-11-28T18:10:18+00:00 +# @Last modified by: richard +# @Last modified time: 2018-11-28T18:10:18+00:00 diff --git a/aws/tests/lambda_types/__init__.py b/aws/tests/lambda_types/__init__.py new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/aws/tests/lambda_types/message_eg.py b/aws/tests/lambda_types/message_eg.py new file mode 100644 index 0000000000000..9cf39d5a99d58 --- /dev/null +++ b/aws/tests/lambda_types/message_eg.py @@ -0,0 +1,19 @@ +""" Example #1 """ +import os +from vortexa_utils.aws.lambdr.types import LambdaDict, LambdaContext + +MSG_TEMPLATE: str = os.environ.get('MSG_TEMPLATE') or 'Hello {} {}!' +STAGE: str = os.environ.get('stage') or 'dev' + + +def handler(event: LambdaDict, context: LambdaContext) -> LambdaDict: + print('Received event {} for stage {}'.format(event, STAGE)) + first_name: str = event.get('first_name') # optional + last_name: str = event.get('last_name') # optional + return { + 'message': get_message(first_name, last_name), + } + + +def get_message(first_name: str = 'John', last_name: str = 'Smith'): + return MSG_TEMPLATE.format(first_name, last_name) diff --git a/aws/tests/lambda_types/repeat_eg.py b/aws/tests/lambda_types/repeat_eg.py new file mode 100644 index 0000000000000..95d5331e8f5f9 --- /dev/null +++ b/aws/tests/lambda_types/repeat_eg.py @@ -0,0 +1,19 @@ +""" Example #2 """ +import os +from vortexa_utils.aws.lambdr.types import LambdaDict, LambdaContext + +N: int = int(os.environ.get('N') or 10) +STAGE: str = os.environ.get('stage') or 'dev' + + +def handler(event: LambdaDict, context: LambdaContext) -> LambdaDict: + print('Received event {} for stage {}'.format(event, STAGE)) + input: str = event['input'] # required + return { + 'output': get_output(input, N), + } + + +def get_output(input: str, num: int): + """ Return the input string repeated N times. """ + return input * num diff --git a/aws/tests/lambda_types/test_lambda_types.py b/aws/tests/lambda_types/test_lambda_types.py new file mode 100644 index 0000000000000..0cdad796b76dd --- /dev/null +++ b/aws/tests/lambda_types/test_lambda_types.py @@ -0,0 +1,89 @@ +import unittest +from vortexa_utils.aws.lambdr.types import LambdaDict, LambdaContext +from .message_eg import handler as handler_message, get_message +from .repeat_eg import handler as handler_repeat, get_output + + +class TestMessageFunction(unittest.TestCase): + + def setUp(self): + self.context = LambdaContext() + + def test_handler(self) -> None: + event: LambdaDict = { + "first_name": "Alex", + "last_name": "Casalboni", + } + result = handler_message(event, self.context) + self.assertIn('message', result) + + def test_handler_empty(self) -> None: + event: LambdaDict = {} + result = handler_message(event, self.context) + self.assertIn('message', result) + + def test_message_default(self) -> None: + msg = get_message() + self.assertIsInstance(msg, str) + self.assertIn('Hello', msg) + self.assertIn('John', msg) + self.assertIn('Smith', msg) + self.assertTrue(msg.endswith('!')) + + def test_message_firstname(self) -> None: + msg = get_message(first_name='Charlie') + self.assertIsInstance(msg, str) + self.assertIn('Hello', msg) + self.assertIn('Charlie', msg) + self.assertIn('Smith', msg) + self.assertTrue(msg.endswith('!')) + + def test_message_lastname(self) -> None: + msg = get_message(last_name='Brown') + self.assertIsInstance(msg, str) + self.assertIn('Hello', msg) + self.assertIn('John', msg) + self.assertIn('Brown', msg) + self.assertTrue(msg.endswith('!')) + + def test_message(self) -> None: + msg = get_message(first_name='Charlie', last_name='Brown') + self.assertIsInstance(msg, str) + self.assertIn('Hello', msg) + self.assertIn('Charlie', msg) + self.assertIn('Brown', msg) + self.assertTrue(msg.endswith('!')) + + +class TestRepeatFunction(unittest.TestCase): + + def setUp(self): + self.context = LambdaContext() + + def test_handler(self) -> None: + event: LambdaDict = { + "input": "NaN", + } + result = handler_repeat(event, self.context) + self.assertIn('output', result) + self.assertEqual(30, len(result['output'])) + + def test_handler_empty(self) -> None: + event: LambdaDict = {} + with self.assertRaises(KeyError): + handler_repeat(event, self.context) + + def test_repeat_empty_string(self) -> None: + output = get_output('', 100) + self.assertIsInstance(output, str) + self.assertEqual(0, len(output)) + + def test_repeat_zero(self) -> None: + output = get_output('hello', 0) + self.assertIsInstance(output, str) + self.assertEqual(0, len(output)) + + def test_repeat(self) -> None: + output = get_output('hello', 10) + self.assertIsInstance(output, str) + self.assertEqual(50, len(output)) diff --git a/aws/tests/s3_client_encryption_tests/__init__.py b/aws/tests/s3_client_encryption_tests/__init__.py new file mode 100644 index 0000000000000..2e9b828ec304c --- /dev/null +++ b/aws/tests/s3_client_encryption_tests/__init__.py @@ -0,0 +1,4 @@ +# @Author: richard +# @Date: 2018-11-28T18:10:35+00:00 +# @Last modified by: richard +# @Last modified time: 2018-11-28T18:10:36+00:00 diff --git a/aws/tests/s3_client_encryption_tests/test_IOAuthDecrypter.py b/aws/tests/s3_client_encryption_tests/test_IOAuthDecrypter.py new file mode 100644 index 0000000000000..bf64d13548ac0 --- /dev/null +++ b/aws/tests/s3_client_encryption_tests/test_IOAuthDecrypter.py @@ -0,0 +1,22 @@ +# @Author: richard +# @Date: 2018-11-28T18:11:28+00:00 +# @Last modified by: richard +# @Last modified time: 2018-12-06T13:06:33+00:00 +from vortexa_utils.aws.s3.client_side_encryption import IOAuthDecrypter +from nose2.tools import params +from .test_IODecrypter import DummyChunksIO, IODecrypterTestCase + + +class IOAuthDecrypter(IODecrypterTestCase): + io_decrypter_class = IOAuthDecrypter.IOAuthDecrypter + + def get_decrypter(self, cypher, io, content_length): + return self.io_decrypter_class(cypher, io, content_length) + + def get_io(self, content_length): + tag_length = 128 + return DummyChunksIO(content_length + tag_length) + + def invalid_decryption(self, content_length): + with self.assertRaises(ValueError): + super().invalid_decryption(content_length) diff --git a/aws/tests/s3_client_encryption_tests/test_IOAuthTagLength.py b/aws/tests/s3_client_encryption_tests/test_IOAuthTagLength.py new file mode 100644 index 0000000000000..51685c22d13bd --- /dev/null +++ b/aws/tests/s3_client_encryption_tests/test_IOAuthTagLength.py @@ -0,0 +1,42 @@ +import unittest +import io +from vortexa_utils.aws.s3.client_side_encryption.IOAuthDecrypterTagLength \ + import StreamChunker +from nose2.tools import params + + +class StreamChunkerTestCase(unittest.TestCase): + + def get_chunker(self, io, tag_length): + return StreamChunker(io, tag_length) + + def test_tagged(self): + fixture = io.BytesIO(b'1234567890') + chunker = StreamChunker(fixture, 3) + bytes = chunker.read() + self.assertEqual(chunker.tag, b'890') + self.assertEqual(bytes, b'1234567') + + @params(*range(1, 11)) + def test_read_in_chunks(self, chunk): + bytes = b'1234567890' + fixture = io.BytesIO(bytes) + tag_length = 3 + chunker = StreamChunker(fixture, tag_length) + result = [] + index = 0 + while True: + byte = chunker.read(chunk) + if byte == b'': + break + result.append(byte) + self.assertEqual(bytes[index:index + len(byte)], byte) + index += len(byte) + print(result) + self.assertEqual(bytes[-tag_length:], chunker.tag) + self.assertEqual(b''.join(result), bytes[:-tag_length]) + # check that subsuquent reads return nothing and tag is correct + for i in range(10): + byte = chunker.read(chunk) + self.assertEqual(b'', byte) + self.assertEqual(bytes[-tag_length:], chunker.tag) diff --git a/aws/tests/s3_client_encryption_tests/test_IODecrypter.py b/aws/tests/s3_client_encryption_tests/test_IODecrypter.py new file mode 100644 index 0000000000000..cadab6acdaeae --- /dev/null +++ b/aws/tests/s3_client_encryption_tests/test_IODecrypter.py @@ -0,0 +1,94 @@ +# @Author: richard +# @Date: 2018-11-28T18:11:28+00:00 +# @Last modified by: richard +# @Last modified time: 2018-12-06T13:07:14+00:00 +from io import IOBase + +from vortexa_utils.aws.s3.client_side_encryption.IODecrypter import IODecrypter +import unittest +from nose2.tools import params + + +class DummyCipher(object): + def __init__(self, valid: bool = True): + self.valid = valid + + def decrypt(self, bytes): + return bytes + + def verify(self, tag): + if not self.valid: + raise ValueError("MAC check failed") + pass + + +class DummyChunksIO(IOBase): + _DEFAULT_CHUNK_SIZE = 1024 + + def __init__(self, size): + self.bytes_read = 0 + self.size = size + + def read(self, chunk=-1): + if chunk < 0: + chunk = self.size - self.bytes_read + else: + chunk = min(chunk, abs(self.size - self.bytes_read)) + self.bytes_read += chunk + return b' ' * chunk + + def __iter__(self): + """Return an iterator to yield 1k chunks from the raw stream. + """ + return self.iter_chunks(self._DEFAULT_CHUNK_SIZE) + + def iter_chunks(self, chunk_size=_DEFAULT_CHUNK_SIZE): + """Return an iterator to yield chunks of chunk_size bytes from the raw + stream. + """ + while True: + bytes = self.read(chunk_size) + if bytes == b'': + break + yield bytes + + def close(self): + pass + + def readable(self): + return True + + def seekable(self): + return False + + def writable(self): + return False + + +class IODecrypterTestCase(unittest.TestCase): + io_decrypter_class = IODecrypter + + def get_decrypter(self, cypher, io, content_length): + return self.io_decrypter_class(cypher, io) + + def get_io(self, content_length): + return DummyChunksIO(content_length) + + def make_decrypter(self, content_length, valid=True): + io = DummyChunksIO(content_length) + cypher = DummyCipher(valid=valid) + return self.get_decrypter(cypher, io, content_length) + + @params(123, 1024, 1024*3, 1024*3+123, 1, 0) + def test_read(self, content_length): + with self.make_decrypter(content_length) as decrypter: + bytes = list(decrypter) + self.assertEqual(b''.join(bytes), b' ' * content_length) + + @params(123, 1024, 1024*3, 1024*3+123, 1, 0) + def test_invalid(self, content_length): + self.invalid_decryption(content_length) + + def invalid_decryption(self, content_length): + with self.make_decrypter(content_length, valid=False) as decrypter: + list(decrypter) diff --git a/aws/tests/s3_client_encryption_tests/test_decrypt_s3_mime_with_attachment.py b/aws/tests/s3_client_encryption_tests/test_decrypt_s3_mime_with_attachment.py new file mode 100644 index 0000000000000..0be487412d5c2 --- /dev/null +++ b/aws/tests/s3_client_encryption_tests/test_decrypt_s3_mime_with_attachment.py @@ -0,0 +1,68 @@ +# @Author: richard +# @Date: 2018-12-06T17:26:08+00:00 +# @Last modified by: richard +# @Last modified time: 2018-12-06T19:36:16+00:00 +# cd aws/vortexa_utils/ +# import aws.s3.client_side_encryption.client as client +import logging +import vortexa_utils.aws.s3.client_side_encryption.client as client +import io +import email.parser +from email import policy +from email.iterators import _structure +import base64 +from nose2.tools.such import helper + +import pandas as pd + +logger = logging.getLogger(__name__) + +Bucket = 'ops-data.incoming-emails' +Key = 'incoming_email/akrk0l8sq4lm7qkgj8hpurfshpnj8frgqpqe9mg1' +Key = 'incoming_email/8ej2ldqnsmako2tgsbdpqg8tdi6tdnduoscojdo1' + + +def test_get_attachment(): + cl = client.Client() + parser = email.parser.BytesParser(policy=policy.default) + with cl.get_object(Bucket, Key) as io: + parsed = parser.parse(io) + _structure(parsed) + + # with open("/home/richard/an_email", 'wb') as f: + # for b in io: + # f.write(b) + # + # atts = list(parsed.iter_attachments()) + # [a.get_filename() for a in atts] + # [a.get_content_type() for a in atts] + # att = atts[2] + # att + # att.get_content_type() + # pd.read_excel(io.BytesIO(att.get_content())) + + target = parsed['to'] + source = parsed['from'] + helper.assertEqual(target, 'test@opsdata.vortexa.com') + helper.assertEqual(source, 'Richard Mathie ') + + parsed['subject'] + + for part in parsed.walk(): + print(part.get_content_type()) + att = parsed.get_payload() + att[0].get_content_type() + att[0].get_payload()[1].get_payload() + + logger.debug('\nwalking message') + for part in parsed.walk(): + content_type = part.get_content_type() + if content_type.startswith('text'): + logger.debug(content_type) + payload = part.get_payload() + if content_type == 'text/csv': + csv = base64.decodebytes(payload.encode('utf-8')) + for line in csv.splitlines(): + logger.debug(line) + else: + logger.debug('\n%s', payload) diff --git a/aws/tests/s3_client_encryption_tests/test_decrypt_s3_object.py b/aws/tests/s3_client_encryption_tests/test_decrypt_s3_object.py new file mode 100644 index 0000000000000..a33346502b0a2 --- /dev/null +++ b/aws/tests/s3_client_encryption_tests/test_decrypt_s3_object.py @@ -0,0 +1,65 @@ +# @Author: richard +# @Date: 2018-12-06T13:27:47+00:00 +# @Last modified by: richard +# @Last modified time: 2018-12-06T17:24:43+00:00 +import logging +import sys +# cd aws/vortexa_utils/ +# import aws.s3.client_side_encryption.client as client +import vortexa_utils.aws.s3.client_side_encryption.client as client +import email.parser +from nose2.tools.such import helper + + +logger = logging.getLogger(__name__) + +Bucket = 'ops-data.incoming-emails' +Key = 'incoming_email/4pnlhtml86pobumjn9d59mbkcq3to1i43sjbd201' + + +def test_get_obj(): + self = client.Client() + location_info = self.s3.get_bucket_location(Bucket=Bucket) + logger.info('location %s', location_info) + + obj = self.s3.get_object(Bucket=Bucket, Key=Key) + handeler = client.DecryptHandeler(obj, self) + envelop = handeler.envelope_v2(handeler.metadata) + cipher = self.cipher_provider.decryptor(envelop) + assert handeler.auth_tag() + io = handeler.decrypt_auth(cipher) + + bytes = [] + while True: + byte = io.read(1024) + if byte == b'': + break + logger.info("Bytes Read %s/%s", io.bytes_read, io.content_length) + logger.debug("Bytes %s", byte) + bytes.append(byte) + io.verify() + io.close() + # logger.info('bytes %s', str(bytes)) + + +def test_get_obj_io(): + cl = client.Client() + with cl.get_object(Bucket, Key) as io: + list(io) + + +def test_get_obj_mime(): + cl = client.Client() + parser = email.parser.BytesParser() + with cl.get_object(Bucket, Key) as io: + parsed = parser.parse(io) + + target = parsed['to'] + source = parsed['from'] + helper.assertEqual(target, 'test@opsdata.vortexa.com') + helper.assertEqual(source, 'Richard Mathie ') + + logger.info('\twalking message') + for part in parsed.walk(): + if part.get_content_type().startswith('text'): + logger.info('\t%s', part.get_payload()) diff --git a/aws/tests/s3_client_encryption_tests/test_kms_cipher_provider.py b/aws/tests/s3_client_encryption_tests/test_kms_cipher_provider.py new file mode 100644 index 0000000000000..7da39f7a34166 --- /dev/null +++ b/aws/tests/s3_client_encryption_tests/test_kms_cipher_provider.py @@ -0,0 +1,39 @@ +# @Author: richard +# @Date: 2018-12-05T16:23:13+00:00 +# @Last modified by: richard +# @Last modified time: 2018-12-05T19:43:28+00:00 +import unittest +from vortexa_utils.aws.s3.client_side_encryption import kms_cipher_provider +import logging + + +logger = logging.getLogger(__name__) + + +def log_bytes(*bytes): + logger.info(f' bytes: {bytes}') + + +class KMSCipherProviderTest(unittest.TestCase): + test_key_id = 'alias/python_utils_test_key' + + def get_cipher(self): + return kms_cipher_provider.KMSCipherProvider(self.test_key_id) + + def test_encrypt(self): + envelope, cipher = self.get_cipher().encryptor() + plaintext = b"The quick brown fox jumped over the lazy dog" + self.plaintext = plaintext + ciphertext, tag = cipher.encrypt_and_digest(plaintext) + log_bytes(ciphertext, tag) + self.assertNotEqual(ciphertext, plaintext) + package = (envelope, ciphertext, tag) + return package + + def test_decrypt(self): + envelope, ciphertext, tag = self.test_encrypt() + cipher = kms_cipher_provider.KMSCipherProvider().decryptor(envelope) + plaintext = cipher.decrypt(ciphertext) + log_bytes(ciphertext, tag, plaintext) + self.assertEqual(plaintext, self.plaintext) + cipher.verify(tag) diff --git a/aws/tests/ses_inbox/__init__.py b/aws/tests/ses_inbox/__init__.py new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/aws/tests/ses_inbox/test_get_attachments.py b/aws/tests/ses_inbox/test_get_attachments.py new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/aws/tests/ses_inbox/test_list_inbox.py b/aws/tests/ses_inbox/test_list_inbox.py new file mode 100644 index 0000000000000..a8ff2a0bd81ee --- /dev/null +++ b/aws/tests/ses_inbox/test_list_inbox.py @@ -0,0 +1,25 @@ +# cd aws/vortexa_utils +# cd .. +from typing import Iterable +from vortexa_utils.aws.ses.inbox import Inbox +from email.message import EmailMessage +from itertools import islice + + +Path = 'incoming_email/' + +inbox = Inbox(default_bucket='ops-data.incoming-emails') + + +def test_list_inbox(): + inbox = Inbox(default_bucket='ops-data.incoming-emails') + emails: Iterable[EmailMessage] = islice( + inbox.list_emails(Path=Path), + 10 + ) + + for email in emails: + # print(email.as_string()) + attachments = list(email.iter_attachments()) + print(list(a.get_filename() for a in attachments)) + print(list(a.get_content_type() for a in attachments)) diff --git a/aws/tests/ses_notification_types/__init__.py b/aws/tests/ses_notification_types/__init__.py new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/aws/tests/ses_notification_types/test_action.py b/aws/tests/ses_notification_types/test_action.py new file mode 100644 index 0000000000000..1110fda3de888 --- /dev/null +++ b/aws/tests/ses_notification_types/test_action.py @@ -0,0 +1,16 @@ +from vortexa_utils.aws.ses.notification.types import Action +from json import loads + + +action_json_sns = """ +{ + "type": "SNS", + "topicArn": "arn:aws:sns:us-east-1:012345678912:example-topic" +} +""" + + +def test_sns_action(): + action = Action(**loads(action_json_sns)) + assert action.type == "SNS" + assert action.topicArn == "arn:aws:sns:us-east-1:012345678912:example-topic" diff --git a/aws/tests/ses_notification_types/test_lambda_record.py b/aws/tests/ses_notification_types/test_lambda_record.py new file mode 100644 index 0000000000000..c489d6cd84e42 --- /dev/null +++ b/aws/tests/ses_notification_types/test_lambda_record.py @@ -0,0 +1,32 @@ +from vortexa_utils.aws.ses.notification.types import Record +from json import loads +from .test_mail import mail_json +from .test_receipt import receipt_json + + +ses = dict( + receipt=receipt_json, + mail=mail_json +) + + +record_json = loads(""" +{ + "eventSource": "aws:ses", + "eventVersion": "1.0", + "ses": { + "receipt": { + }, + "mail": { + } + } +} +""") + +record_json.update(ses=ses) + + +def test_record(): + record = Record(**record_json) + record.ses + assert record.eventSource == "aws:ses" diff --git a/aws/tests/ses_notification_types/test_mail.py b/aws/tests/ses_notification_types/test_mail.py new file mode 100644 index 0000000000000..bb558b3639e48 --- /dev/null +++ b/aws/tests/ses_notification_types/test_mail.py @@ -0,0 +1,85 @@ +from vortexa_utils.aws.ses.notification.types import Mail +from json import loads + +mail_json = loads(""" +{ +"timestamp": "2015-09-11T20:32:33.936Z", +"source": "61967230-7A45-4A9D-BEC9-87CBCF2211C9@example.com", +"messageId": "d6iitobk75ur44p8kdnnp7g2n800", +"destination": [ + "recipient@example.com" +], +"headersTruncated": false, +"headers": [ + { + "name": "Return-Path", + "value": "<0000014fbe1c09cf-7cb9f704-7531-4e53-89a1-5fa9744f5eb6-000000@amazonses.com>" + }, + { + "name": "Received", + "value": "from a9-183.smtp-out.amazonses.com (a9-183.smtp-out.amazonses.com [54.240.9.183]) by inbound-smtp.us-east-1.amazonaws.com with SMTP id d6iitobk75ur44p8kdnnp7g2n800 for recipient@example.com; Fri, 11 Sep 2015 20:32:33 +0000 (UTC)" + }, + { + "name": "DKIM-Signature", + "value": "v=1; a=rsa-sha256; q=dns/txt; c=relaxed/simple; s=ug7nbtf4gccmlpwj322ax3p6ow6yfsug; d=amazonses.com; t=1442003552; h=From:To:Subject:MIME-Version:Content-Type:Content-Transfer-Encoding:Date:Message-ID:Feedback-ID; bh=DWr3IOmYWoXCA9ARqGC/UaODfghffiwFNRIb2Mckyt4=; b=p4ukUDSFqhqiub+zPR0DW1kp7oJZakrzupr6LBe6sUuvqpBkig56UzUwc29rFbJF hlX3Ov7DeYVNoN38stqwsF8ivcajXpQsXRC1cW9z8x875J041rClAjV7EGbLmudVpPX 4hHst1XPyX5wmgdHIhmUuh8oZKpVqGi6bHGzzf7g=" + }, + { + "name": "From", + "value": "sender@example.com" + }, + { + "name": "To", + "value": "recipient@example.com" + }, + { + "name": "Subject", + "value": "Example subject" + }, + { + "name": "MIME-Version", + "value": "1.0" + }, + { + "name": "Content-Type", + "value": "text/plain; charset=UTF-8" + }, + { + "name": "Content-Transfer-Encoding", + "value": "7bit" + }, + { + "name": "Date", + "value": "Fri, 11 Sep 2015 20:32:32 +0000" + }, + { + "name": "Message-ID", + "value": "<61967230-7A45-4A9D-BEC9-87CBCF2211C9@example.com>" + }, + { + "name": "X-SES-Outgoing", + "value": "2015.09.11-54.240.9.183" + }, + { + "name": "Feedback-ID", + "value": "1.us-east-1.Krv2FKpFdWV+KUYw3Qd6wcpPJ4Sv/pOPpEPSHn2u2o4=:AmazonSES" + } +], +"commonHeaders": { + "returnPath": "0000014fbe1c09cf-7cb9f704-7531-4e53-89a1-5fa9744f5eb6-000000@amazonses.com", + "from": [ + "sender@example.com" + ], + "date": "Fri, 11 Sep 2015 20:32:32 +0000", + "to": [ + "recipient@example.com" + ], + "messageId": "<61967230-7A45-4A9D-BEC9-87CBCF2211C9@example.com>", + "subject": "Example subject" +} +} +""") + + +def test_init(): + mail = Mail(**mail_json) + mail.headers diff --git a/aws/tests/ses_notification_types/test_notification.py b/aws/tests/ses_notification_types/test_notification.py new file mode 100644 index 0000000000000..56884ad7463dd --- /dev/null +++ b/aws/tests/ses_notification_types/test_notification.py @@ -0,0 +1,23 @@ +from vortexa_utils.aws.ses.notification.types import Notification +from json import loads +from .test_mail import mail_json +from .test_action import action_json_sns +from .test_receipt import receipt_json + + +nodification_json = loads(""" +{ +"notificationType": "Received", +"content": "blarblarblar" +} +""" +) + +nodification_json.update( + mail=mail_json, + receipt=receipt_json +) + + +def test_init(): + Notification(**nodification_json) diff --git a/aws/tests/ses_notification_types/test_receipt.py b/aws/tests/ses_notification_types/test_receipt.py new file mode 100644 index 0000000000000..e41ea7f8ce24d --- /dev/null +++ b/aws/tests/ses_notification_types/test_receipt.py @@ -0,0 +1,34 @@ +from json import loads +from vortexa_utils.aws.ses.notification.types import Receipt + + +receipt_json = loads(""" +{ +"timestamp": "2015-09-11T20:32:33.936Z", +"processingTimeMillis": 222, +"recipients": [ + "recipient@example.com" +], +"spamVerdict": { + "status": "PASS" +}, +"virusVerdict": { + "status": "PASS" +}, +"spfVerdict": { + "status": "PASS" +}, +"dkimVerdict": { + "status": "PASS" +}, +"action": { + "type": "SNS", + "topicArn": "arn:aws:sns:us-east-1:012345678912:example-topic" +} +} +""") + + +def test_receipt(): + receipt = Receipt(**receipt_json) + receipt.dkimVerdict.status == "PASS" diff --git a/aws/tests/utils/__init__.py b/aws/tests/utils/__init__.py new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/aws/tests/utils/nested_data_classes/__init__.py b/aws/tests/utils/nested_data_classes/__init__.py new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/aws/tests/utils/nested_data_classes/test_nested_dataclass.py b/aws/tests/utils/nested_data_classes/test_nested_dataclass.py new file mode 100644 index 0000000000000..e15dffd75cc4d --- /dev/null +++ b/aws/tests/utils/nested_data_classes/test_nested_dataclass.py @@ -0,0 +1,36 @@ +from dataclasses import dataclass +# cd vortexa_utils/ +# from aws.utils.dataclasses import nested_dataclass +from vortexa_utils.aws.utils.dataclasses import nested_dataclass + + +@dataclass +class Foo: + a: str + b: int + + +@nested_dataclass +class Bar: + foo: Foo + baz: str + + +@nested_dataclass +class Bill: + bar: Bar + + +def test_init_class(): + data = dict( + bar=dict( + foo=dict(a="hello", b=1), + baz="world" + ) + ) + foo = Foo(**data['bar']['foo']) + bar = Bar(**data['bar']) + bill = Bill(**data) + + assert bill.bar == bar + assert bill.bar.foo == foo diff --git a/aws/vortexa_utils/__init__.py b/aws/vortexa_utils/__init__.py new file mode 100644 index 0000000000000..a7712f632a766 --- /dev/null +++ b/aws/vortexa_utils/__init__.py @@ -0,0 +1,5 @@ +# @Author: richard +# @Date: 2018-12-04T20:12:18+00:00 +# @Last modified by: richard +# @Last modified time: 2018-12-04T20:12:57+00:00 +__path__ = __import__('pkgutil').extend_path(__path__, __name__) diff --git a/aws/vortexa_utils/aws/__init__.py b/aws/vortexa_utils/aws/__init__.py new file mode 100644 index 0000000000000..dda33076e9246 --- /dev/null +++ b/aws/vortexa_utils/aws/__init__.py @@ -0,0 +1,4 @@ +# @Author: richard +# @Date: 2018-12-04T20:13:18+00:00 +# @Last modified by: richard +# @Last modified time: 2018-12-04T20:13:18+00:00 diff --git a/aws/vortexa_utils/aws/lambdr/__init__.py b/aws/vortexa_utils/aws/lambdr/__init__.py new file mode 100644 index 0000000000000..4dcf5531789e7 --- /dev/null +++ b/aws/vortexa_utils/aws/lambdr/__init__.py @@ -0,0 +1,5 @@ +"""Vortexa python utils aws lambda helper functions and types. + +This module is called lambdr as `lambda` is a reserved word in python + +""" diff --git a/aws/vortexa_utils/aws/lambdr/types.py b/aws/vortexa_utils/aws/lambdr/types.py new file mode 100644 index 0000000000000..a1af1904a954b --- /dev/null +++ b/aws/vortexa_utils/aws/lambdr/types.py @@ -0,0 +1,45 @@ +""" Note: this code is used only by the static type checker! + +_see: +_and: + +""" +from typing import Dict, Any + +LambdaDict = Dict[str, Any] + + +class LambdaCognitoIdentity(object): + cognito_identity_id: str + cognito_identity_pool_id: str + + +class LambdaClientContextMobileClient(object): + installation_id: str + app_title: str + app_version_name: str + app_version_code: str + app_package_name: str + + +class LambdaClientContext(object): + client: LambdaClientContextMobileClient + custom: LambdaDict + env: LambdaDict + + +class LambdaContext(object): + function_name: str + function_version: str + invoked_function_arn: str + memory_limit_in_mb: int + aws_request_id: str + log_group_name: str + log_stream_name: str + deadline_ms: int + identity: LambdaCognitoIdentity + client_context: LambdaClientContext + + @staticmethod + def get_remaining_time_in_millis() -> int: + return 0 diff --git a/aws/vortexa_utils/aws/s3/__init__.py b/aws/vortexa_utils/aws/s3/__init__.py new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/aws/vortexa_utils/aws/s3/client.py b/aws/vortexa_utils/aws/s3/client.py new file mode 100644 index 0000000000000..da8e4814d10cd --- /dev/null +++ b/aws/vortexa_utils/aws/s3/client.py @@ -0,0 +1,50 @@ +from tempfile import NamedTemporaryFile + +import boto3 +from logzero import logger +from pandas import DataFrame, read_hdf, read_csv + + +class S3Client: + def __init__(self, s3_bucket: str): + self.s3 = boto3.client("s3") + self.s3_bucket = s3_bucket + + def upload(self, filename: str, s3_key: str, owner_acl: bool = True): + logger.info("[s3] Started uploading: %s", s3_key) + self.s3.upload_file(filename, self.s3_bucket, s3_key) + logger.info("[s3] Finished uploading: %s", s3_key) + if owner_acl: + self.s3.put_object_acl( + ACL="bucket-owner-full-control", Bucket=self.s3_bucket, Key=s3_key + ) + logger.info("[s3] bucket-owner-full-control ACL set") + + def hdf_pd(self, filename) -> DataFrame: + return self.__s3_pd__(filename, "hdf") + + def csv_pd(self, filename) -> DataFrame: + return self.__s3_pd__(filename, "csv") + + def copy(self, src, dest, owner_acl: bool = True): + copy_source = {"Bucket": self.s3_bucket, "Key": src} + self.s3.copy_object(CopySource=copy_source, Bucket=self.s3_bucket, Key=dest) + if owner_acl: + self.s3.put_object_acl( + ACL="bucket-owner-full-control", Bucket=self.s3_bucket, Key=dest + ) + logger.info("[s3] bucket-owner-full-control ACL set") + + def __s3_pd__(self, filename, filetype) -> DataFrame: + with NamedTemporaryFile("wb") as f: + logger.info(f"[s3] Started downloading: s3://{self.s3_bucket}/{filename}") + self.s3.download_fileobj(self.s3_bucket, filename, f) + f.flush() + logger.info(f"[s3] Finished downloading: s3://{self.s3_bucket}/{filename}") + logger.info("[pandas] Started loading: %s", filename) + if filetype == "hdf": + df: DataFrame = read_hdf(f.name) + elif filetype == "csv": + df: DataFrame = read_csv(f.name) + logger.info("[pandas] Finished loading: %s", filename) + return df diff --git a/aws/vortexa_utils/aws/s3/client_side_encryption/IOAuthDecrypter.py b/aws/vortexa_utils/aws/s3/client_side_encryption/IOAuthDecrypter.py new file mode 100644 index 0000000000000..6e948f7032109 --- /dev/null +++ b/aws/vortexa_utils/aws/s3/client_side_encryption/IOAuthDecrypter.py @@ -0,0 +1,40 @@ +# @Author: richard +# @Date: 2018-11-28T17:01:36+00:00 +# @Last modified by: richard +# @Last modified time: 2018-12-06T16:00:31+00:00 +import logging +from .IODecrypter import IODecrypter + +logger = logging.getLogger(__name__) + + +class IOAuthDecrypter(IODecrypter): + def __init__(self, cipher, io, content_length, chunk_size=16*1024): + super().__init__(cipher, io) + self.bytes_read = 0 + self.content_length = content_length + + def read(self, chunk=None): + chunk = min(chunk, self.content_length - self.bytes_read) + bytes = super().read(chunk) + logger.debug("Bytes Read %s/%s", self.bytes_read, self.content_length) + self.bytes_read += len(bytes) + return bytes + + def verify(self): + # the remaining bytes should be the auth tag + tag = self.io.read() + logger.debug("Verifing Tag %s", tag) + self.cipher.verify(tag) + + def iter_chunks(self, chunk_size=None): + """Return an iterator to yield chunks of chunk_size bytes from the raw + stream. + """ + if chunk_size is None: + chunk_size = self._DEFAULT_CHUNK_SIZE + + while self.bytes_read < self.content_length: + bytes = self.read(chunk_size) + yield bytes + self.verify() diff --git a/aws/vortexa_utils/aws/s3/client_side_encryption/IOAuthDecrypterTagLength.py b/aws/vortexa_utils/aws/s3/client_side_encryption/IOAuthDecrypterTagLength.py new file mode 100644 index 0000000000000..c120281198139 --- /dev/null +++ b/aws/vortexa_utils/aws/s3/client_side_encryption/IOAuthDecrypterTagLength.py @@ -0,0 +1,65 @@ +# @Author: richard +# @Date: 2018-11-28T17:01:36+00:00 +# @Last modified by: richard +# @Last modified time: 2018-12-06T16:00:31+00:00 +import logging +from .IODecrypter import IODecrypter +from io import BytesIO, IOBase +logger = logging.getLogger(__name__) + + +class StreamChunker(IOBase): + """StreamChunker a class to keep the last tag bites of a file + + keeps hold of the last `tag_length` bytes in `self.tag` + when reading from a `BytesIO` object. + """ + + def __init__(self, io: BytesIO, tag_length: int): + self.io = io + self.tag_length = tag_length + # get the first chunk if this is the first read + self.tag = self.io.read(self.tag_length) + + def read(self, chunk=None): + bytes = self.tag + self.io.read(chunk) + bytes, self.tag = bytes[:-self.tag_length], bytes[-self.tag_length:] + return bytes + + def close(self): + """Close the underlying http response stream.""" + self.io.close() + + def readable(self): + return True + + def seekable(self): + return False + + def writable(self): + return False + + +class IOAuthDecrypterTagLength(IODecrypter): + def __init__(self, cipher, io, tag_length, chunk_size=16*1024): + super().__init__(cipher, StreamChunker(io, tag_length)) + + def verify(self): + # the remaining bytes should be the auth tag + tag = self.io.tag + logger.debug("Verifing Tag %s", tag) + self.cipher.verify(tag) + + def iter_chunks(self, chunk_size=None): + """Return an iterator to yield chunks of chunk_size bytes from the raw + stream. + """ + if chunk_size is None: + chunk_size = self._DEFAULT_CHUNK_SIZE + + while True: + bytes = self.read(chunk_size) + if bytes == b'': + break + yield bytes + self.verify() diff --git a/aws/vortexa_utils/aws/s3/client_side_encryption/IODecrypter.py b/aws/vortexa_utils/aws/s3/client_side_encryption/IODecrypter.py new file mode 100644 index 0000000000000..9346aafcbe053 --- /dev/null +++ b/aws/vortexa_utils/aws/s3/client_side_encryption/IODecrypter.py @@ -0,0 +1,61 @@ +# @Author: richard +# @Date: 2018-11-28T17:01:20+00:00 +# @Last modified by: richard +# @Last modified time: 2018-12-06T16:57:10+00:00 +# from typing import Iterable + +from io import IOBase +from botocore.response import StreamingBody + +import logging + +logger = logging.getLogger(__name__) + + +class IODecrypter(IOBase): + _DEFAULT_CHUNK_SIZE = 1024 + + def __init__(self, cipher, io: StreamingBody): + self.cipher: object = cipher + self.io: StreamingBody = io + + def read(self, chunk=None): + bytes = self.io.read(chunk) + return self.cipher.decrypt(bytes) + + def __iter__(self): + """Return an iterator to yield 1k chunks from the raw stream.""" + return self.iter_chunks(self._DEFAULT_CHUNK_SIZE) + + def iter_chunks(self, chunk_size: int = _DEFAULT_CHUNK_SIZE): + # type: (...) -> Iterable[bytes] + """Return an iterator to yield chunks bytes from the raw `io` stream. + + Parameters + ---------- + chunk_size : int + iterates over no more than Chunk size bytes. If `None` use + `self._DEFAULT_CHUNK_SIZE`. + + Returns + ------- + Iterator[bytes] + + """ + decrypt = self.cipher.decrypt + chunks = self.io.iter_chunks(chunk_size) + + return (decrypt(bytes) for bytes in chunks) + + def close(self): + """Close the underlying http response stream.""" + self.io.close() + + def readable(self): + return True + + def seekable(self): + return False + + def writable(self): + return False diff --git a/aws/vortexa_utils/aws/s3/client_side_encryption/IONocrypter.py b/aws/vortexa_utils/aws/s3/client_side_encryption/IONocrypter.py new file mode 100644 index 0000000000000..3f613f19550c5 --- /dev/null +++ b/aws/vortexa_utils/aws/s3/client_side_encryption/IONocrypter.py @@ -0,0 +1,38 @@ +# @Author: richard +# @Date: 2018-11-28T17:01:20+00:00 +# @Last modified by: richard +# @Last modified time: 2018-12-06T16:57:10+00:00 +from typing import Iterable +from botocore.response import StreamingBody +from .IODecrypter import IODecrypter + +import logging + +logger = logging.getLogger(__name__) + + +class IONocrypter(IODecrypter): + + def __init__(self, io): + self.io: StreamingBody = io + + def read(self, chunk=None): + return self.io.read(chunk) + + def iter_chunks(self, chunk_size: int = None) -> Iterable[bytes]: + """Return an iterator to yield chunks bytes from the raw `io` stream. + + Parameters + ---------- + chunk_size : int + iterates over no more than Chunk size bytes. If `None` use + `self._DEFAULT_CHUNK_SIZE`. + + Returns + ------- + Iterator[bytes] + + """ + if chunk_size is None: + chunk_size = self._DEFAULT_CHUNK_SIZE + return self.io.iter_chunks(chunk_size) diff --git a/aws/vortexa_utils/aws/s3/client_side_encryption/__init__.py b/aws/vortexa_utils/aws/s3/client_side_encryption/__init__.py new file mode 100644 index 0000000000000..628c41928cecc --- /dev/null +++ b/aws/vortexa_utils/aws/s3/client_side_encryption/__init__.py @@ -0,0 +1,183 @@ +# @Author: richard +# @Date: 2018-11-28T15:15:44+00:00 +# @Last modified by: richard +# @Last modified time: 2018-11-28T15:15:44+00:00 + +""" +# From the RUBY Docs. + +Provides an encryption client that encrypts and decrypts data client-side, +storing the encrypted data in Amazon S3. + +This client uses a process called "envelope encryption". Your private +encryption keys and your data's plain-text are **never** sent to +Amazon S3. **If you lose you encryption keys, you will not be able to +decrypt your data.** + +## Envelope Encryption Overview + +The goal of envelope encryption is to combine the performance of +fast symmetric encryption while maintaining the secure key management +that asymmetric keys provide. + +A one-time-use symmetric key (envelope key) is generated client-side. +This is used to encrypt the data client-side. This key is then +encrypted by your master key and stored alongside your data in Amazon +S3. + +When accessing your encrypted data with the encryption client, +the encrypted envelope key is retrieved and decrypted client-side +with your master key. The envelope key is then used to decrypt the +data client-side. + +One of the benefits of envelope encryption is that if your master key +is compromised, you have the option of just re-encrypting the stored +envelope symmetric keys, instead of re-encrypting all of the +data in your account. + +## Basic Usage + +The encryption client requires an {Aws::S3::Client}. If you do not +provide a `:client`, then a client will be constructed for you. + + require 'openssl' + key = OpenSSL::PKey::RSA.new(1024) + + # encryption client + s3 = aws.s3.client_side_encryption.Client(encryption_key: key) + + # round-trip an object, encrypted/decrypted locally + s3.put_object(bucket:'aws-sdk', key:'secret', body:'handshake') + s3.get_object(bucket:'aws-sdk', key:'secret').body.read + #=> 'handshake' + + # reading encrypted object without the encryption client + # results in the getting the cipher text + Aws::S3::Client.new.get_object(bucket:'aws-sdk', key:'secret').body.read + #=> "... cipher text ..." + +## Keys + +For client-side encryption to work, you must provide one of the following: + +* An encryption key +* A {KeyProvider} +* A KMS encryption key id + +### An Encryption Key + +You can pass a single encryption key. This is used as a master key +encrypting and decrypting all object keys. + + key = OpenSSL::Cipher.new("AES-256-ECB").random_key # symmetric key + key = OpenSSL::PKey::RSA.new(1024) # asymmetric key pair + + s3 = Aws::S3::Encryption::Client.new(encryption_key: key) + +### Key Provider + +Alternatively, you can use a {KeyProvider}. A key provider makes +it easy to work with multiple keys and simplifies key rotation. + +### KMS Encryption Key Id + +If you pass the id to an AWS Key Management Service (KMS) key, +then KMS will be used to generate, encrypt and decrypt object keys. + + # keep track of the kms key id + kms = Aws::KMS::Client.new + key_id = kms.create_key.key_metadata.key_id + + Aws::S3::Encryption::Client.new( + kms_key_id: key_id, + kms_client: kms, + ) + +## Custom Key Providers + +A {KeyProvider} is any object that responds to: + +* `#encryption_materials` +* `#key_for(materials_description)` + +Here is a trivial implementation of an in-memory key provider. +This is provided as a demonstration of the key provider interface, +and should not be used in production: + + class KeyProvider + + def initialize(default_key_name, keys) + @keys = keys + @encryption_materials = Aws::S3::Encryption::Materials.new( + key: @keys[default_key_name], + description: JSON.dump(key: default_key_name), + ) + end + + attr_reader :encryption_materials + + def key_for(matdesc) + key_name = JSON.load(matdesc)['key'] + if key = @keys[key_name] + key + else + raise "encryption key not found for: #{matdesc.inspect}" + end + end + end + +Given the above key provider, you can create an encryption client that +chooses the key to use based on the materials description stored with +the encrypted object. This makes it possible to use multiple keys +and simplifies key rotation. + + # uses "new-key" for encrypting objects, uses either for decrypting + keys = KeyProvider.new('new-key', { + "old-key" => Base64.decode64("kM5UVbhE/4rtMZJfsadYEdm2vaKFsmV2f5+URSeUCV4="), + "new-key" => Base64.decode64("w1WLio3agRWRTSJK/Ouh8NHoqRQ6fn5WbSXDTHjXMSo="), + }), + + # chooses the key based on the materials description stored + # with the encrypted object + s3 = Aws::S3::Encryption::Client.new(key_provider: keys) + +## Materials Description + +A materials description is JSON document string that is stored +in the metadata (or instruction file) of an encrypted object. +The {DefaultKeyProvider} uses the empty JSON document `"{}"`. + +When building a key provider, you are free to store whatever +information you need to identify the master key that was used +to encrypt the object. + +## Envelope Location + +By default, the encryption client store the encryption envelope +with the object, as metadata. You can choose to have the envelope +stored in a separate "instruction file". An instruction file +is an object, with the key of the encrypted object, suffixed with +`".instruction"`. + +Specify the `:envelope_location` option as `:instruction_file` to +use an instruction file for storing the envelope. + + # default behavior + s3 = Aws::S3::Encryption::Client.new( + key_provider: ..., + envelope_location: :metadata, + ) + + # store envelope in a separate object + s3 = Aws::S3::Encryption::Client.new( + key_provider: ..., + envelope_location: :instruction_file, + instruction_file_suffix: '.instruction' # default + ) + +When using an instruction file, multiple requests are made when +putting and getting the object. **This may cause issues if you are +issuing concurrent PUT and GET requests to an encrypted object.** +""" + +from .client import Client diff --git a/aws/vortexa_utils/aws/s3/client_side_encryption/cipher_provider.py b/aws/vortexa_utils/aws/s3/client_side_encryption/cipher_provider.py new file mode 100644 index 0000000000000..954b2276986b2 --- /dev/null +++ b/aws/vortexa_utils/aws/s3/client_side_encryption/cipher_provider.py @@ -0,0 +1,17 @@ +# @Author: richard +# @Date: 2018-11-27T18:22:34+00:00 +# @Last modified by: richard +# @Last modified time: 2018-12-05T16:25:56+00:00 +from Cryptodome.Cipher import AES # pycryptodomex + + +class CipherProvider(object): + def __init__(self, key): + self.key = key + + def decryptor(self, envelope): + pass + + def encryptor(self): + cipher = AES.new(self.key, AES.MODE_GCM) + return cipher diff --git a/aws/vortexa_utils/aws/s3/client_side_encryption/client.py b/aws/vortexa_utils/aws/s3/client_side_encryption/client.py new file mode 100644 index 0000000000000..6ebccdba9b9cd --- /dev/null +++ b/aws/vortexa_utils/aws/s3/client_side_encryption/client.py @@ -0,0 +1,103 @@ +# @Author: richard +# @Date: 2018-11-28T15:15:54+00:00 +# @Last modified by: richard +# @Last modified time: 2018-12-06T18:07:33+00:00 +import boto3 +from .kms_cipher_provider import KMSCipherProvider +from .decrypt_handeler import DecryptHandeler + + +class Client(object): + """ + Client Side Encryption S3 Client. + + Attributes + ---------- + s3 : botocore.client.S3 + cipher_provider : .cipher_provider.CipherProvider + + Methods + ------- + get_object(Bucket, Key) + get and decrypt an object from s3 + + """ + + def __init__( + self, + client=None, + cipher_provider=None, + key_id=None, + strict=None, + **kwargs): + """Initilises the client side encryption s3 client. + + Parameters + ---------- + client : botocore.client.S3 + Optional S3 client to use for s3 interaction + Will create client if not set. + + cipher_provider : CipherProvider + Optional `CipherProvider` to provide encryption cipher + Will default to `KMSCipherProvider()` if not set. + + key_id : str + The kms `key id`, `alias` or `aws::arn` + for the `KMSCipherProvider`. + + region_name : str + The region for the kms and s3 client resources. + + """ + region_name = kwargs.get('region') + self.s3 = client or boto3.client('s3', **kwargs) + self.cipher_provider = ( + cipher_provider or KMSCipherProvider( + key_id=key_id, + region_name=region_name + ) + ) + self.strict = strict + + def get_object(self, Bucket, Key): + """Retrieve object from Amazon S3. + + See also: + `AWS API Documentation `_ + + `AWS Client Side Encryption `_ + + Parameters + ---------- + Bucket : str + **[REQUIRED]** The Bucket + Key : str + **[REQUIRED]** The Path Key in the Bucket + + """ + # location_info = self.s3.get_bucket_location(Bucket=Bucket) + # bucket_region = location_info['LocationConstraint'] + + obj = self.s3.get_object(Bucket=Bucket, Key=Key) + handeler = DecryptHandeler(obj, self, self.strict) + return handeler.decrypt() + + def object_encrypted(self, Bucket, Key) -> bool: + """Check if object has encryption envelope. + + Parameters + ---------- + Bucket : str + **[REQUIRED]** The Bucket + Key : str + **[REQUIRED]** The Path Key in the Bucket + + Returns + ------- + bool + + """ + obj = self.s3.head_object(Bucket=Bucket, Key=Key) + handeler = DecryptHandeler(obj, self) + return handeler.extract_envelop() is not None diff --git a/aws/vortexa_utils/aws/s3/client_side_encryption/decrypt_handeler.py b/aws/vortexa_utils/aws/s3/client_side_encryption/decrypt_handeler.py new file mode 100644 index 0000000000000..464fc3c872642 --- /dev/null +++ b/aws/vortexa_utils/aws/s3/client_side_encryption/decrypt_handeler.py @@ -0,0 +1,121 @@ +# @Author: richard +# @Date: 2018-11-27T17:24:50+00:00 +# @Last modified by: richard +# @Last modified time: 2018-12-06T16:38:19+00:00 +import boto3 +import logging + +from .IODecrypter import IODecrypter +from .IONocrypter import IONocrypter +from .IOAuthDecrypter import IOAuthDecrypter +from .IOAuthDecrypterTagLength import IOAuthDecrypterTagLength + +logger = logging.getLogger(__name__) +kms = boto3.client('kms') + + +class DecryptionException(Exception): + pass + + +class DecryptHandeler(object): + + V1_ENVELOPE_KEYS = { + 'x-amz-key', + 'x-amz-iv', + 'x-amz-matdesc' + } + + V2_ENVELOPE_KEYS = { + 'x-amz-key-v2', + 'x-amz-iv', + 'x-amz-cek-alg', + 'x-amz-wrap-alg', + 'x-amz-matdesc' + } + + POSSIBLE_ENVELOPE_KEYS = V1_ENVELOPE_KEYS | V2_ENVELOPE_KEYS + + POSSIBLE_ENCRYPTION_FORMATS = { + 'AES/GCM/NoPadding', + 'AES/CBC/PKCS5Padding', + 'AES/CBC/PKCS7Padding' + } + + def __init__(self, obj, context, strict=False): + self.obj = obj + self.context = context + self.metadata = obj['Metadata'] + self.body = obj['Body'] + self.strict = strict + + def decrypt(self): + cipher = self.decryption_cipher() + logger.debug(self.metadata) + if cipher: + logger.debug(cipher) + if self.auth_tag(): + return self.decrypt_auth(cipher) + return IODecrypter(cipher=cipher, io=self.body) + # Object not encrypted with an envelope + mesg = f"Unencrypted Object at {self.obj['ETag']}" + if self.strict: + logger.error(mesg) + raise ValueError(mesg) + else: + logger.warning(mesg) + return IONocrypter(io=self.body) + + def auth_tag(self): + return 'x-amz-tag-len' in self.metadata + + def decryption_cipher(self): + envelope = self.extract_envelop(self.metadata) + if envelope: + return self.context.cipher_provider.decryptor(envelope) + + def extract_envelop(self, meta): + if 'x-amz-key' in meta: + return self.envelope_v1(meta) + elif 'x-amz-key-v2' in meta: + return self.envelope_v2(meta) + + key_prefix = 'x-amz-key' + key = next((k for k in meta.keys() if k.startswith(key_prefix)), None) + if key is not None: + key_version = key[len(key_prefix):] + mesg = f'Unknown envelope encryption version {key_version}' + raise DecryptionException(mesg) + # no envelope found + return None + + def envelope_v2(self, meta): + if meta['x-amz-cek-alg'] not in self.POSSIBLE_ENCRYPTION_FORMATS: + alg = meta['x-amz-cek-alg'] + msg = f'unsuported content encrypting key format: {alg}' + raise DecryptionException(msg) + if meta['x-amz-wrap-alg'] != 'kms': + alg = meta['x-amz-wrap-alg'] + msg = f'unsupported key wrapping algorithm: {alg}' + raise DecryptionException(msg) + if not self.V2_ENVELOPE_KEYS <= set(meta.keys()): + msg = "incomplete v2 encryption envelope:\n" + msg += f" expected: #{', '.join(self.V2_ENVELOPE_KEYS)}\n" + msg += f" got: #{', '.join(meta.keys)}" + return meta + + def envelope_v1(self, meta): + return meta + + def decrypt_auth(self, cipher): + meta = self.metadata + + content_length_string = meta.get( + 'x-amz-unencrypted-content-length', + None + ) + if content_length_string is not None: + content_length = int(content_length_string) + return IOAuthDecrypter(cipher, self.body, content_length) + tag_length = int(meta['x-amz-tag-len'])//8 + return IOAuthDecrypterTagLength(cipher, self.body, tag_length) diff --git a/aws/vortexa_utils/aws/s3/client_side_encryption/get.py b/aws/vortexa_utils/aws/s3/client_side_encryption/get.py new file mode 100644 index 0000000000000..7f961e62c814e --- /dev/null +++ b/aws/vortexa_utils/aws/s3/client_side_encryption/get.py @@ -0,0 +1,75 @@ +# @Author: richard +# @Date: 2018-11-27T14:58:39+00:00 +# @Last modified by: richard +# @Last modified time: 2018-11-30T12:09:27+00:00 + +# see https://medium.com/@samnco/reading-aws-ses-encrypted-emails-with-boto3-9c177f8ba130 +# and https://github.com/boto/boto3/issues/38 + +import base64 +import json +from Cryptodome.Cipher import AES # pycryptodomex +import boto3 + + +s3 = boto3.client('s3') +kms = boto3.client('kms') + + +def chunker(length, chunk_size): + index = 0 + while index < length: + chunk = min(chunk_size, length - index) + index += chunk + yield chunk + +list(chunker(2, 3)) + + +def content_streamer(bytes_io, content_length, chunk_size=16*1024): + for chunk in chunker(content_length, chunk_size): + yield bytes_io.read(chunk) + + + + + +def decrypt_object(obj): + metadata = obj['Metadata'] + key_alg = metadata['x-amz-cek-alg'] + + envelope_key = base64.b64decode(metadata['x-amz-key-v2']) + envelope_iv = base64.b64decode(metadata['x-amz-iv']) + encrypt_ctx = json.loads(metadata['x-amz-matdesc']) + + # x-amz-tag-len in is in bits so /8 to get bytes + tag_len = int(metadata['x-amz-tag-len'])/8 + original_size = int(metadata['x-amz-unencrypted-content-length']) + + decrypted_envelope_key = kms.decrypt( + CiphertextBlob=envelope_key, + EncryptionContext=encrypt_ctx + ) + key = decrypted_envelope_key['Plaintext'] + + if key_alg == 'AES/GCM/NoPadding': + # x-amz-tag-len in is in bits so /8 to get bytes + cipher = AES.new(key, AES.MODE_GCM, envelope_iv) + elif key_alg == 'AES/CBC/PKCS5Padding': + cipher = AES.new(key, AES.MODE_CBC, envelope_iv) + else: + raise Exception('unknown encryption algorythem') + + body = obj['Body'] + + body = body.read() + body, tag = body[:original_size], body[original_size:] + email = cipher.decrypt(body) + cipher.verify(tag) + return email + + +def get_object(bucket, key): + obj = s3.get_object(Bucket=bucket_name, Key=key) + location_info = s3.get_bucket_location(Bucket=bucket_name) + bucket_region = location_info['LocationConstraint'] diff --git a/aws/vortexa_utils/aws/s3/client_side_encryption/kms_cipher_provider.py b/aws/vortexa_utils/aws/s3/client_side_encryption/kms_cipher_provider.py new file mode 100644 index 0000000000000..6700eedb5e0b4 --- /dev/null +++ b/aws/vortexa_utils/aws/s3/client_side_encryption/kms_cipher_provider.py @@ -0,0 +1,61 @@ +# @Author: richard +# @Date: 2018-11-27T18:20:28+00:00 +# @Last modified by: richard +# @Last modified time: 2018-12-05T17:20:59+00:00 +import base64 +import boto3 +import json + +from Cryptodome.Cipher import AES # pycryptodomex +from .cipher_provider import CipherProvider + + +class KMSCipherProvider(CipherProvider): + aes_mode_map = { + 'AES/GCM/NoPadding': AES.MODE_GCM, + 'AES/CBC/PKCS5Padding': AES.MODE_CBC, + 'AES/CBC/PKCS7Padding': AES.MODE_CBC + } + + def __init__(self, key_id=None, **kwargs): + self.kms = boto3.client('kms', **kwargs) + self.key_id = key_id + + def decryptor(self, envelope): + key_alg = envelope['x-amz-cek-alg'] + aes_mode = self.aes_mode_map.get(key_alg) + if aes_mode is None: + raise Exception(f'unknown encryption algorythem {key_alg}') + + envelope_key = base64.b64decode(envelope['x-amz-key-v2']) + iv = base64.b64decode(envelope['x-amz-iv']) + encryption_context = json.loads(envelope['x-amz-matdesc']) + + decrypted_envelope = self.kms.decrypt( + CiphertextBlob=envelope_key, + EncryptionContext=encryption_context + ) + key = decrypted_envelope['Plaintext'] + cipher = AES.new(key, aes_mode, iv) + return cipher + + def encryptor(self): + encryption_context = {"kms_cmk_id": self.key_id} + + key_data = self.kms.generate_data_key( + KeyId=self.key_id, + EncryptionContext=encryption_context, + KeySpec='AES_256' + ) + + key = key_data['Plaintext'] + cipher = AES.new(key, AES.MODE_GCM) + + envelope = { + 'x-amz-key-v2': base64.encodebytes(key_data['CiphertextBlob']), + 'x-amz-iv': base64.encodebytes(cipher.nonce), + 'x-amz-cek-alg': 'AES/GCM/NoPadding', + 'x-amz-wrap-alg': 'kms', + 'x-amz-matdesc': json.dumps(encryption_context) + } + return envelope, cipher diff --git a/aws/vortexa_utils/aws/ses/__init__.py b/aws/vortexa_utils/aws/ses/__init__.py new file mode 100644 index 0000000000000..520cb4033d38a --- /dev/null +++ b/aws/vortexa_utils/aws/ses/__init__.py @@ -0,0 +1,4 @@ +# @Author: richard +# @Date: 2018-12-06T18:06:14+00:00 +# @Last modified by: richard +# @Last modified time: 2018-12-06T18:06:14+00:00 diff --git a/aws/vortexa_utils/aws/ses/application_mapper.py b/aws/vortexa_utils/aws/ses/application_mapper.py new file mode 100644 index 0000000000000..1e910af5a7b9c --- /dev/null +++ b/aws/vortexa_utils/aws/ses/application_mapper.py @@ -0,0 +1,102 @@ +import io +from typing import Callable +from collections.abc import Mapping +from functools import wraps +import pandas as pd + + +def read_input_wrapper(read_func=None, **kwargs): + """A decorator to make the `pandas.io.parser.read` functions + take `bytes` as input. + + Parameters + ---------- + `read_func` : `Callable[..., pd.DataFrame]` + The `pandas.io.parsers` function to decorate. + If not set `read_input_wrapper` will return a decorator. + **`kwargs` : `dict` + `kwargs` to pass on to `read_func`. + + Returns + ------- + function : `Callable[input: bytes, pd.DataFrame]` | + `Callable[[Callable[..., pd.DataFrame]], + Callable[input: bytes, pd.DataFrame]]` + either return a decorator which will wrap a pandas parser function + or a wrapped parser function: + + Examples + ------- + Examples should be written in doctest format, and + should illustrate how to use the function/class. + >>> read_csv = read_input_wrapper(pd.read_csv) + >>> read_tsv = read_input_wrapper(pd.read_csv, sep='\t') + + or as a decorator + + @read_input_wrapper + def read_foo(file, **kwargs) -> pd.DataFrame: + # some custom foo + return pd.DataFrame() + + or + + @read_input_wrapper(sep='\t') + def read_bar(file, **kwargs) -> pd.DataFrame: + # some custom bar + return pd.DataFrame() + """ + + def wrapper(func: Callable[..., pd.DataFrame]): + + @wraps(func) + def reader(input: bytes) -> pd.DataFrame: + return func(io.BytesIO(input), **kwargs) + return reader + + if read_func is None: + return wrapper + return wrapper(read_func) + + +read_csv = read_input_wrapper(pd.read_csv) +read_tsv = read_input_wrapper(pd.read_csv, sep='\t') +read_excel = read_input_wrapper(pd.read_excel, sheet_name=None) + + +class ApplicationMapper(Mapping): + """A `Mapping` class to map MIME application types to a pandas reader.""" + + application_mapping = { + "text/plain": read_tsv, + "text/csv": read_csv, + "application/vnd.ms-excel": read_excel + } + + aplication_prefixed = ( + ( + 'application/vnd.ms-excel.sheet', + read_excel + ) + ( + 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet', + read_excel + ) + ) + + def __getitem__(self, key): + func = self.application_mapping.get(key) + if func is not None: + return func + for prefix, func in self.aplication_prefixed: + if key.startswith(prefix): + return read_excel + + def __iter__(self): + return iter(self.application_mapping) + + def __len__(self): + return len(self.application_mapping) + + +application_mapping = ApplicationMapper() diff --git a/aws/vortexa_utils/aws/ses/attachments.py b/aws/vortexa_utils/aws/ses/attachments.py new file mode 100644 index 0000000000000..d5ef58684ee7f --- /dev/null +++ b/aws/vortexa_utils/aws/ses/attachments.py @@ -0,0 +1,15 @@ +import email +from .application_mapper import application_mapping + + +class Attachment(object): + + def __init__(self, attachment: email.message.EmailMessage): + self.attachment = attachment + + def to_df(self): + content_type = self.attachment.get_content_type() + reader = application_mapping.get(content_type) + if reader is None: + raise TypeError(f"unknown content_type {content_type}") + return reader(self.attachment.get_content()) diff --git a/aws/vortexa_utils/aws/ses/inbox.py b/aws/vortexa_utils/aws/ses/inbox.py new file mode 100644 index 0000000000000..4c3664093d938 --- /dev/null +++ b/aws/vortexa_utils/aws/ses/inbox.py @@ -0,0 +1,141 @@ +# @Author: richard +# @Date: 2018-12-06T18:06:25+00:00 +# @Last modified by: richard +# @Last modified time: 2018-12-06T19:36:09+00:00 +from typing import Iterable +import logging +from datetime import datetime +from functools import wraps +import boto3 +# cd aws/vortexa_utils +# import aws.s3.client_side_encryption.client as client +import vortexa_utils.aws.s3.client_side_encryption.client as client +import email +import email.policy +import email.parser +from email.message import EmailMessage + +logger = logging.getLogger(__name__) + + +class Inbox(object): + """Short summary. + + Parameters + ---------- + default_bucket : str + Default s3 Bucket to assosiate the inbox with. + + """ + + def __init__(self, default_bucket: str = None, **kwargs): + """Short summary. + + Parameters + ---------- + default_bucket : str + Default s3 Bucket to assosiate the inbox with. + strict : bool + When True will not fetch unencrypted emails. Defaults to False. + **kwargs : dict + **`kwargs` to pass to `s3.client`. + + """ + self.bucket = default_bucket + self.s3crypto = client.Client(**kwargs) + self.s3 = self.s3crypto.s3 + # Specify the default policy for email parsing else Parser defaults to + # email.policy.compat32 for python 3 and 2 compatibility + self.parser = email.parser.BytesParser(policy=email.policy.default) + + def get_email(self, Key: str, Bucket: str = None) -> EmailMessage: + """Get `EmailMessage` Object from `Bucket`. + + Parameters + ---------- + Key : str + `Key` name of email in s3. + Bucket : str + s3 `Bucket` to look for email, will search `self.bucket` if `None`. + + Returns + ------- + email.message.EmailMessage + Email object. + + """ + Bucket = Bucket or self.bucket + if Bucket is None: + raise ValueError("Bucket not set") + with self.s3crypto.get_object(Bucket=Bucket, Key=Key) as io: + return self.parser.parse(io) + + def list_objects( + self, + Bucket: str = None, + Path: str = None, + Begin: datetime = None, + Until: datetime = None): + # type: (...) -> Iterable['boto3.resources.factory.s3.ObjectSummary'] + """List all objects in `Bucket` prefixed by `Path`. + + Parameters + ---------- + Bucket : str + S3 `Bucket` to look for emails will search `self.bucket` if `None`. + Path : str + The `Path` prefix to filter the emails by, no filter if `None`. + Begin : datetime + Filter object from this datetime. + Until : datetime = None + Filter objects untill this datetime. + + Returns + ------- + iterable boto3.resources.factory.s3.ObjectSummary + List of matching email objects. + + """ + bucket = boto3.resource('s3').Bucket(Bucket or self.bucket) + objs = bucket.objects.filter(Prefix=Path) + if Begin: + objs = (obj for obj in objs if obj.last_modified >= Begin) + if Until: + objs = (obj for obj in objs if obj.last_modified <= Until) + + if Begin is None and Until is None: + # if no timestamps dont bother sorting + return objs + return sorted(objs, key=lambda o: o.last_modified) + + @wraps(list_objects, assigned=('__annotations__',)) + def list_emails(self, **kwargs) -> Iterable[EmailMessage]: + """List all emails in `Bucket` prefixed by `Path`. + + Parameters + ---------- + Bucket : str + S3 `Bucket` to look for emails will search `self.bucket` if `None`. + Path : str + The `Path` prefix to filter the emails by, no filter if `None`. + Begin : datetime + Filter object from this datetime. + Until : datetime = None + Filter objects untill this datetime. + + Returns + ------- + iterable emails + List of matching email objects. + + Examples + ------- + Examples should be written in doctest format, and + should illustrate how to use the function/class. + >>> inbox = Inbox() + >>> inboc.list_emails('/some/sub/folder') + + """ + objects = self.list_objects(**kwargs) + for obj in objects: + yield self.get_email(obj.key, obj.bucket_name) diff --git a/aws/vortexa_utils/aws/ses/notification/__init__.py b/aws/vortexa_utils/aws/ses/notification/__init__.py new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/aws/vortexa_utils/aws/ses/notification/types/__init__.py b/aws/vortexa_utils/aws/ses/notification/types/__init__.py new file mode 100644 index 0000000000000..7eb901a004212 --- /dev/null +++ b/aws/vortexa_utils/aws/ses/notification/types/__init__.py @@ -0,0 +1,5 @@ +from .action import Action +from .mail import Mail +from .receipt import Receipt +from .notification import Notification +from .lambda_record import Record diff --git a/aws/vortexa_utils/aws/ses/notification/types/action.py b/aws/vortexa_utils/aws/ses/notification/types/action.py new file mode 100644 index 0000000000000..d62791f941960 --- /dev/null +++ b/aws/vortexa_utils/aws/ses/notification/types/action.py @@ -0,0 +1,56 @@ +from dataclasses import dataclass + + +@dataclass +class Action: + """Action Object. + + Attributes + ---------- + type : str + action that was executed. [S3, SNS, Bounce, Lambda, Stop, WorkMail]. + topicArn : str + Amazon Resource Name (ARN) of the SNS topic of the notification. + bucketName : str + S3 bucket to which the message was published. + *Present only for the S3 action type.* + objectKey : str + name that uniquely identifies the email in the Amazon S3 bucket. + This is the same as the messageId in the mail Object. + *Present only for the S3 action type.* + smtpReplyCode : str + SMTP reply code, as defined by RFC 5321. + *Present only for the bounce action type.* + statusCode : str + SMTP enhanced status code, as defined by RFC 3463. + *Present only for the bounce action type.* + message : str + human-readable text to include in the bounce message. + *Present only for the bounce action type.* + sender : str + The email address of the sender of the email that bounced. + This is the address from which the bounce message was sent. + *Present only for the bounce action type.* + functionArn : str + ARN of the Lambda function that was triggered. + *Present only for the Lambda action type.* + invocationType : str + invocation type of the Lambda function. [RequestResponse, Event] + *Present only for the Lambda action type.* + organizationArn : str + ARN of the Amazon WorkMail organization. + *Present only for the WorkMail action type.* + + _see + """ + type: str + topicArn: str = None + bucketName: str = None + objectKey: str = None + smtpReplyCode: str = None + statusCode: str = None + message: str = None + sender: str = None + functionArn: str = None + invocationType: str = None + organizationArn: str = None diff --git a/aws/vortexa_utils/aws/ses/notification/types/lambda_record.py b/aws/vortexa_utils/aws/ses/notification/types/lambda_record.py new file mode 100644 index 0000000000000..3eecd720fedf8 --- /dev/null +++ b/aws/vortexa_utils/aws/ses/notification/types/lambda_record.py @@ -0,0 +1,18 @@ +from vortexa_utils.aws.utils.dataclasses import nested_dataclass +from .mail import Mail +from .receipt import Receipt + + +@nested_dataclass +class SESRecord: + receipt: Receipt + mail: Mail + + +@nested_dataclass +class Record: + """ + """ + eventSource: str # "aws:ses", + eventVersion: str # "1.0", + ses: SESRecord diff --git a/aws/vortexa_utils/aws/ses/notification/types/mail.py b/aws/vortexa_utils/aws/ses/notification/types/mail.py new file mode 100644 index 0000000000000..49252ed6610f3 --- /dev/null +++ b/aws/vortexa_utils/aws/ses/notification/types/mail.py @@ -0,0 +1,44 @@ +from typing import List, Dict, Any +from dataclasses import dataclass + + +@dataclass +class Mail: + """Mail Object. + + Attributes + ---------- + destination: List[str] + A complete list of all recipient addresses (including To: and CC:) + from the MIME headers of the incoming email. + messageId: str + String that contains the unique ID assigned to the email by Amazon SES. + If the email was delivered to Amazon S3, the message ID is also the + Amazon S3 object key that was used to write the message to your Amazon + S3 bucket. + source: str + String that contains the email address (the envelope MAIL FROM address) + that the email was sent from. + timestamp: + String that contains the time at which the email was received, + in ISO8601 format. + headers: List[List[str]] + A list of Amazon SES headers and your custom headers. + Each header in the list has a name field and a value field. + commonHeaders: List[List[str]] + A list of headers common to all emails. + Each header in the list is composed of a name and a value. + headersTruncated: str + String that specifies whether the headers were truncated, + which will happen if the headers are larger than 10 KB. + Possible values are true and false. + + """ + + destination: List[str] + messageId: str + source: str + timestamp: str + headers: List[Dict[str, str]] + commonHeaders: Dict[str, Any] + headersTruncated: str diff --git a/aws/vortexa_utils/aws/ses/notification/types/notification.py b/aws/vortexa_utils/aws/ses/notification/types/notification.py new file mode 100644 index 0000000000000..19fee6d3060d4 --- /dev/null +++ b/aws/vortexa_utils/aws/ses/notification/types/notification.py @@ -0,0 +1,29 @@ +from vortexa_utils.aws.utils.dataclasses import nested_dataclass +from . import Mail, Receipt + + +@nested_dataclass +class Notification: + """Notification Object. + + Attributes + ---------- + notificationType: str + The notification type. For this type of notification, + the value is always Received. + receipt : Recipt + Object that contains information about the email delivery. + mail : Mail + Object that contains information about the email + associated with the notification. + content : str + String that contains the raw, unmodified email, which is typically + in Multipurpose Internet Mail Extensions (MIME) format. + *Only if the notification was triggered by an SNS action.* + + """ + + notificationType: str + receipt: Receipt + mail: Mail + content: str diff --git a/aws/vortexa_utils/aws/ses/notification/types/receipt.py b/aws/vortexa_utils/aws/ses/notification/types/receipt.py new file mode 100644 index 0000000000000..b5d1a3857508d --- /dev/null +++ b/aws/vortexa_utils/aws/ses/notification/types/receipt.py @@ -0,0 +1,65 @@ +from vortexa_utils.aws.utils.dataclasses import nested_dataclass +from .action import Action +from .verdicts import (DKIMVerdict, + DMARCVerdict, + SPFVerdict, + SpamVerdict, + VirusVerdict) + + +@nested_dataclass +class Receipt: + """SNS Recipt object. + + Attributes + ---------- + action : Action + Encapsulates information about the action that was executed. + + dkimVerdict : DKIMVerdict + Indicates whether the DomainKeys Identified Mail (DKIM) check passed. + + dmarcPolicy : str + Domain-based Message Authentication, Reporting & Conformance (DMARC) + settings for the sending domain. + This field only appears if the message fails DMARC authentication. + Possible values for this field are: + - none: no specific action be taken on messages that fail DMARC. + - quarantine: messages that fail DMARC be treated as suspicious. + - reject: messages that fail DMARC authentication be rejected. + + dmarcVerdict : DMARCVerdict + Indicates whether the DMARC check passed. + + processingTimeMillis : str + `str` specifies the period, in milliseconds, from the time Amazon SES + received the message to the time it triggered the action. + + recipients : list[str] + list of recipients that were matched by the active receipt rule. + The addresses may differ from those listed by the destination field + in the mail Object. + + spamVerdict : SpamVerdict + Indicates whether the message is spam + + spfVerdict : SPFVerdict + Whether the Sender Policy Framework (SPF) check passed + + timestamp : str + ISO 8601 format string representing when the action was triggered. + + virusVerdict : VirusVerdict + Whether the message contains a virus. + For a list of possible values, see virusVerdict Object. + """ + action: Action + processingTimeMillis: str + recipients: str + timestamp: str + dmarcPolicy: str = None + dmarcVerdict: DMARCVerdict = None + dkimVerdict: DKIMVerdict = None + spamVerdict: SpamVerdict = None + spfVerdict: SPFVerdict = None + virusVerdict: VirusVerdict = None diff --git a/aws/vortexa_utils/aws/ses/notification/types/verdicts.py b/aws/vortexa_utils/aws/ses/notification/types/verdicts.py new file mode 100644 index 0000000000000..a4a47e06ce02f --- /dev/null +++ b/aws/vortexa_utils/aws/ses/notification/types/verdicts.py @@ -0,0 +1,43 @@ +from dataclasses import dataclass + + +@dataclass +class Verdict(object): + """Verdict object. + + Attributes + ---------- + status : str + String that contains the verdict. Possible values are: + - PASS: The message passed the given test. + - FAIL: The message failed the given test. + - GRAY: The message failed the given test, + - PROCESSING_FAILED: There is an issue that prevents Amazon SES + from providing a verdict to the given test. + """ + status: str + + +@dataclass +class DKIMVerdict(Verdict): + ... + + +@dataclass +class DMARCVerdict(Verdict): + ... + + +@dataclass +class SpamVerdict(Verdict): + ... + + +@dataclass +class SPFVerdict(Verdict): + ... + + +@dataclass +class VirusVerdict(Verdict): + ... diff --git a/aws/vortexa_utils/aws/utils/__init__.py b/aws/vortexa_utils/aws/utils/__init__.py new file mode 100644 index 0000000000000..dda33076e9246 --- /dev/null +++ b/aws/vortexa_utils/aws/utils/__init__.py @@ -0,0 +1,4 @@ +# @Author: richard +# @Date: 2018-12-04T20:13:18+00:00 +# @Last modified by: richard +# @Last modified time: 2018-12-04T20:13:18+00:00 diff --git a/aws/vortexa_utils/aws/utils/dataclasses/__init__.py b/aws/vortexa_utils/aws/utils/dataclasses/__init__.py new file mode 100644 index 0000000000000..0b443f83003f7 --- /dev/null +++ b/aws/vortexa_utils/aws/utils/dataclasses/__init__.py @@ -0,0 +1 @@ +from .nested import * diff --git a/aws/vortexa_utils/aws/utils/dataclasses/nested.py b/aws/vortexa_utils/aws/utils/dataclasses/nested.py new file mode 100644 index 0000000000000..22e1b071fd8d0 --- /dev/null +++ b/aws/vortexa_utils/aws/utils/dataclasses/nested.py @@ -0,0 +1,20 @@ +from dataclasses import dataclass, is_dataclass +from functools import wraps + + +def nested_dataclass(*args, **kwargs): + def wrapper(cls): + cls = dataclass(cls, **kwargs) + original_init = cls.__init__ + + @wraps(original_init) + def __init__(self, *args, **kwargs): + for name, value in kwargs.items(): + field_type = cls.__annotations__.get(name, None) + if is_dataclass(field_type) and isinstance(value, dict): + new_obj = field_type(**value) + kwargs[name] = new_obj + original_init(self, *args, **kwargs) + cls.__init__ = __init__ + return cls + return wrapper(args[0]) if args else wrapper diff --git a/collections/tests/__init__.py b/collections/tests/__init__.py new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/collections/tests/collections/types/__init__.py b/collections/tests/collections/types/__init__.py new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/collections/tests/collections/types/test_instance_caching_abc.py b/collections/tests/collections/types/test_instance_caching_abc.py new file mode 100644 index 0000000000000..3b8f0c0e2ec81 --- /dev/null +++ b/collections/tests/collections/types/test_instance_caching_abc.py @@ -0,0 +1,130 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +Created on Mon Nov 19 14:02:03 2018 +@author: richard +""" +import unittest + +from vortexa_utils.collections.types.instance_caching_abc import ( + InstanceCachingABC, + instance_caching) + + +class InstanceCachingABCTests(unittest.TestCase): + + def register_class(self, klass): + setattr(self, klass.__name__, klass) + return klass + + def setUp(self): + @self.register_class + class Foo(object, metaclass=InstanceCachingABC): + pass + + @self.register_class + class Bar(object): + pass + + def test_signiture(self): + self.assertEqual(repr(self.Foo), repr(self.Bar).replace('Bar', 'Foo')) + + def test_instance_cache(self): + # no instances + self.assertFalse(list(self.Foo)) + + # one instance + foo = self.Foo() + foos = list(self.Foo) + self.assertEqual(len(foos), 1) + klass_name, instance = foos[0] + self.assertEqual(instance, foo) + self.assertEqual(klass_name, 'Foo') + + # more instances + foo2 = self.Foo() + foos = list(self.Foo) + self.assertEqual(len(foos), 2) + klass_name, instance = foos[-1] + self.assertEqual(instance, foo2) + self.assertEqual(klass_name, 'Foo') + + +class InstanceCachingDecoratorTests(InstanceCachingABCTests): + + def setUp(self): + register = self.register_class + + @register + class Foo(object): + pass + + self._Foo = Foo + self.Foo = Foo = instance_caching(Foo) + + @register + class Bar(Foo): + pass + + @register + class Baz(Bar): + pass + + @register + class Bo(Foo): + pass + + @register + class Bill(Bo): + pass + + def test_signiture(self): + self.assertEqual(repr(self.Foo), repr(self._Foo)) + + def test_list_subclasses(self): + self.assertEqual( + set(self.Foo._allsubclasses()), + set((self.Foo, self.Bar, self.Baz, self.Bo, self.Bill)) + ) + self.assertEqual( + set(self.Bar._allsubclasses()), + set((self.Bar, self.Baz)) + ) + self.assertEqual( + set(self.Bo._allsubclasses()), + set((self.Bill, self.Bo)) + ) + + def test_instance_cache(self): + super().test_instance_cache() + # no instances in subclasses + for klass in self.Bar._allsubclasses(): + self.assertFalse(list(klass)) + + for klass in self.Bo._allsubclasses(): + self.assertFalse(list(klass)) + + self.assertEqual(len(list(self.Foo)), 2) + # one instance + bar = self.Bar() + foos = list(self.Foo) + bars = list(self.Bar) + self.assertEqual(len(foos), 3) + self.assertEqual(len(bars), 1) + klass_name, instance = bars[0] + self.assertEqual(instance, bar) + self.assertEqual(klass_name, 'Bar') + + baz = self.Baz() + foos = list(self.Foo) + bars = list(self.Bar) + bazs = list(self.Baz) + self.assertEqual(len(foos), 4) + self.assertEqual(len(bars), 2) + self.assertEqual(len(bazs), 1) + klass_name, instance = bazs[0] + self.assertEqual(instance, baz) + self.assertEqual(klass_name, 'Baz') + + for klass in self.Bo._allsubclasses(): + self.assertFalse(list(klass)) diff --git a/collections/vortexa_utils/collections/__inti__.py b/collections/vortexa_utils/collections/__inti__.py new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/collections/vortexa_utils/collections/types/__init__.py b/collections/vortexa_utils/collections/types/__init__.py new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/collections/vortexa_utils/collections/types/instance_caching_abc.py b/collections/vortexa_utils/collections/types/instance_caching_abc.py new file mode 100644 index 0000000000000..cdc6c556c07be --- /dev/null +++ b/collections/vortexa_utils/collections/types/instance_caching_abc.py @@ -0,0 +1,45 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +Created on Mon Nov 19 09:57:05 2018 +@author: richard +""" +from abc import ABCMeta + + +class InstanceCachingABC(ABCMeta): + """Metaclass for defining Instance Caching Abstract Base Classs (ICABC) + Use this metaclass to create an ICABC. An ICABC will remember the instances + created from it and can be iterated over to return all instances and sub + class instances + """ + + def __init__(cls, name, bases, namespace): + super().__init__(name, bases, namespace) + cls._instances = list() + + def __call__(cls, *args, **kwargs): + instance = super().__call__(*args, **kwargs) + cls._instances.append(instance) + return instance + + def _allsubclasses(cls): + yield cls + for subclass in cls.__subclasses__(): + yield from subclass._allsubclasses() + + # Metamethods, called on class objects: + def __iter__(cls): + return ((klass.__name__, instance) + for klass in cls._allsubclasses() + for instance in klass._instances) + + +def instance_caching(klass): + class Decorated(klass, metaclass=InstanceCachingABC): + pass + + Decorated.__name__ = klass.__name__ + Decorated.__qualname__ = klass.__qualname__ + Decorated.__module__ = klass.__module__ + return Decorated diff --git a/database/README.md b/database/README.md new file mode 100644 index 0000000000000..4c64ed6286b79 --- /dev/null +++ b/database/README.md @@ -0,0 +1,21 @@ +# Vortexa Utils DatabaseFactory + +Small factory class to give you a `SqlAlchemy` engine connection to an +`AWS rds` instance ensuring SSL and credentials are obtained with the secrets manager +## Usage + +```python +db_factory = DatabaseFactory() +engine = db_factory.engine(dbname='rolling_backup') + +sql = """ +SELECT + name +FROM new_polygons where name is not Null; +""" + +engine.execute(sql) +``` +## TODO Other utility functions + +- [ ] create a `~/.dbpass` file diff --git a/database/README.rst b/database/README.rst new file mode 100644 index 0000000000000..5f2775e7ba207 --- /dev/null +++ b/database/README.rst @@ -0,0 +1,28 @@ +.. contents:: + :depth: 3 +.. + +Vortexa Utils DatabaseFactory +============================= + +Small factory class to give you a ``SqlAlchemy`` engine connection to an +``AWS rds`` instance ensuring SSL and credentials are obtained with the +secrets manager ## Usage + +.. code:: python + + db_factory = DatabaseFactory() + engine = db_factory.engine(dbname='rolling_backup') + + sql = """ + SELECT + name + FROM new_polygons where name is not Null; + """ + + engine.execute(sql) + +TODO Other utility functions +---------------------------- + +- [ ] create a ``~/.dbpass`` file diff --git a/database/setup.py b/database/setup.py new file mode 100644 index 0000000000000..4ea029d37a074 --- /dev/null +++ b/database/setup.py @@ -0,0 +1,40 @@ +# @Author: richard +# @Date: 2018-12-04T17:54:43+00:00 +# @Last modified by: richard +# @Last modified time: 2018-12-04T19:14:15+00:00 +import io +import os + +from setuptools import find_packages, setup + +namespace = "vortexa_utils" +description = ("Vortexa Database Engine Factory",) + +dependencies = ["boto3", "SqlAlchemy", "psycopg2-binary", "requests"] + +# Setup boilerplate below + +package_root = os.path.abspath(os.path.dirname(__file__)) + +readme_filename = os.path.join(package_root, "README.rst") +with io.open(readme_filename, encoding="utf-8") as readme_file: + readme = readme_file.read() + +packages = [ + package for package in find_packages() if package.startswith(namespace) +] + +setup( + name="vortexa_utils_database", + version="0.0.1", + description=description, + long_description=readme, + author="Richard Mathie", + author_email="richard.mathie@vortexa.com", + zip_safe=False, + tests_require=["nose2"], + test_suite="nose2.collector.collector", + packages=packages, + install_requires=dependencies, + extras_require={"query_cache": ["pandas", "pyarrow"]}, +) diff --git a/database/tests/__init__.py b/database/tests/__init__.py new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/database/tests/test_database_factory.py b/database/tests/test_database_factory.py new file mode 100644 index 0000000000000..45ad343c6c796 --- /dev/null +++ b/database/tests/test_database_factory.py @@ -0,0 +1,16 @@ +import os +import unittest + +from vortexa_utils.database import DatabaseFactory + + +class TestEngineFactory(unittest.TestCase): + def test_create_factory(self): + db_factory = DatabaseFactory() + return db_factory + + def test_get_cert(self): + db_factory = self.test_create_factory() + cert_file = db_factory.fetch_cert() + self.assertEqual(cert_file, db_factory.cert_file) + assert os.path.isfile(cert_file) diff --git a/database/tests/test_querey_cache.py b/database/tests/test_querey_cache.py new file mode 100644 index 0000000000000..2e441f58cdb2e --- /dev/null +++ b/database/tests/test_querey_cache.py @@ -0,0 +1,21 @@ +# cd database +import logging + +from vortexa_utils.database.default_factories import DevFactory +from vortexa_utils.database.query_cache import QueryCache + +logger = logging.getLogger(__name__) + +logging.basicConfig(level=logging.DEBUG) + +# factory = DevFactory() +# engine = factory.engine() +# qc = QueryCache() + +# %time df = qc.read_sql("clarksons", engine) + + +def test_filename(): + qc = QueryCache() + assert qc.filename("some random query") == "qAdzxvMgeSc=.parquet.snappy" + assert qc.filename("banned_words") == "LoRkfDuNmuA=.parquet.snappy" diff --git a/database/vortexa_utils/__init__.py b/database/vortexa_utils/__init__.py new file mode 100644 index 0000000000000..a2ae790eb1d2c --- /dev/null +++ b/database/vortexa_utils/__init__.py @@ -0,0 +1,5 @@ +# @Author: richard +# @Date: 2018-12-04T19:01:36+00:00 +# @Last modified by: richard +# @Last modified time: 2018-12-04T19:01:39+00:00 +__path__ = __import__('pkgutil').extend_path(__path__, __name__) diff --git a/database/vortexa_utils/database/__init__.py b/database/vortexa_utils/database/__init__.py new file mode 100644 index 0000000000000..5c67964aad121 --- /dev/null +++ b/database/vortexa_utils/database/__init__.py @@ -0,0 +1,7 @@ +# @Author: richard +# @Date: 2018-12-04T17:55:58+00:00 +# @Last modified by: richard +# @Last modified time: 2018-12-04T17:55:58+00:00 + +from .database import DatabaseFactory +from .default_factories import DevFactory, ProdFactory, RedFactory diff --git a/database/vortexa_utils/database/database.py b/database/vortexa_utils/database/database.py new file mode 100644 index 0000000000000..8634168939edd --- /dev/null +++ b/database/vortexa_utils/database/database.py @@ -0,0 +1,118 @@ +# @Author: richard +# @Date: 2018-12-04T17:58:19+00:00 +# @Last modified by: richard +# @Last modified time: 2018-12-04T17:58:19+00:00 +import json +import logging +import os +from dataclasses import dataclass, field +from typing import Dict + +import boto3 +import requests +from sqlalchemy import create_engine + +logger = logging.getLogger(__name__) + +secretsmanager = boto3.client("secretsmanager") + +DEFAULT_CERT_URL = ( + "https://s3.amazonaws.com/rds-downloads/rds-combined-ca-bundle.pem" +) +DEFAULT_CERT_PATH = "/tmp/vortexa_utils_py/rds/ca-bundle.pem" + +DEFAULT_CREDENTIAL = "rds/dev/default" +DEFAULT_CREDENTIAL_MAPPING = dict( + host="host", username="user", port="port", password="password" +) + + +@dataclass +class DatabaseFactory(object): + """DatabaseFactory Class. + + Class for createing a database engine factory. + + usage:: + + factory = DatabaseFactory() + engine = factory.engine() + + Parameters + ---------- + secret_id : str + `secret_id` of the database credential. + (the default is 'rds/dev/default' wich points to the dev database host) + cert_file : str + The location to store the ssl certificate file + cert_url : str + The url to fetch the aws rds ssl certificates from + credential_mapping : Dict[str, str] + A mapping between the `psycopg` connection args and the credential keys + """ + + secret_id: str = DEFAULT_CREDENTIAL + cert_file: str = DEFAULT_CERT_PATH + cert_url: str = DEFAULT_CERT_URL + credential_mapping: Dict[str, str] = field( + default_factory=lambda: dict(DEFAULT_CREDENTIAL_MAPPING) + ) + + def __post_init__(self): + logger.debug(f"Created {self.secret_id} factory object") + + def fetch_cert(self, force: bool = False): + if not os.path.isfile(self.cert_file) or force: + logger.info("getting cert") + os.makedirs(os.path.dirname(self.cert_file), exist_ok=True) + cert = requests.get(self.cert_url) + with open(self.cert_file, "w") as f: + f.write(cert.text) + return self.cert_file + + def get_credential(self): + secret = secretsmanager.get_secret_value(SecretId=self.secret_id) + return json.loads(secret["SecretString"]) + + def engine(self, dbname: str = None, echo: bool = False, **kwargs): + # type (...) -> sqlalchemy.engine.Engine + """`sqlalchemy.engine.Engine` instance factory. + + Parameters + ---------- + dbname : str + database name `dbname` to connect to. + (the default is `None`, which will use the dbname in the secret + credential). + echo : bool + `echo` (the default is False). + + Returns + ------- + sqlalchemy.engine.Engine + SQLalchemy connection engine + + Examples + ------- + >>> factory = DatabaseFactory() + >>> engine = factory.engine() + + """ + cert_filename = self.fetch_cert() + credential = self.get_credential() + connect_args = { + v: credential[k] for k, v in self.credential_mapping.items() + } + + dbname = dbname or os.environ.get("DBNAME") or credential["dbname"] + host = connect_args.pop("host") + port = connect_args.pop("port") + + connect_args.update(sslmode="verify-full", sslrootcert=cert_filename) + engine = create_engine( + f"postgresql://{host}:{port}/{dbname}", + echo=echo, + connect_args=connect_args, + **kwargs, + ) + return engine diff --git a/database/vortexa_utils/database/default_factories.py b/database/vortexa_utils/database/default_factories.py new file mode 100644 index 0000000000000..d4f8ae0ca09e1 --- /dev/null +++ b/database/vortexa_utils/database/default_factories.py @@ -0,0 +1,20 @@ +from dataclasses import dataclass + +from .database import DatabaseFactory + + +@dataclass +class DevFactory(DatabaseFactory): + secret_id: str = "rds/dev/default" + + +@dataclass +class ProdFactory(DatabaseFactory): + secret_id: str = "rds/prod/default" + + +@dataclass +class RedFactory(DatabaseFactory): + cert_url: str = "https://s3.amazonaws.com/redshift-downloads/redshift-ca-bundle.crt" + cert_file: str = "/tmp/vortexa_utils_py/rds/redshift-ca-bundle.pem" + secret_id: str = "redshift/prod/default" diff --git a/database/vortexa_utils/database/query_cache.py b/database/vortexa_utils/database/query_cache.py new file mode 100644 index 0000000000000..ea86e9a914cd5 --- /dev/null +++ b/database/vortexa_utils/database/query_cache.py @@ -0,0 +1,77 @@ +import hashlib +import logging +import os +import time +from base64 import urlsafe_b64encode +from dataclasses import dataclass +from functools import wraps +from typing import Union + +import pandas as pd +from sqlalchemy.engine import Connection, Engine + +from pyarrow.lib import ArrowIOError + +logger = logging.getLogger(__name__) + + +@dataclass +class QueryCache(object): + result_extension: str = ".parquet.snappy" + cache_dir: str = os.path.join( + "/tmp", "python_utils", "database", "query_cache", "df_cache" + ) + ttl: int = 3600 + + def __post_init__(self): + os.makedirs(self.cache_dir, exist_ok=True) + + def path(self, url): + return os.path.join( + self.cache_dir, + url.drivername, + f"{url.host}:{url.port}", + url.database, + ) + + def filename(self, query): + query_digest = urlsafe_b64encode( + hashlib.blake2s(str(query).encode(), digest_size=8).digest() + ) + return query_digest.decode("ascii") + self.result_extension + + @wraps(pd.read_sql) + def read_sql( + self, + query: str, + con: Union[Engine, Connection], + ttl: int = None, + invalidate_cache: bool = False, + *args, + **kwargs, + ) -> pd.DataFrame: + + # formulate a path + path = self.path(con.engine.url) + filename = self.filename(query) + filepath = os.path.join(path, filename) + os.makedirs(path, exist_ok=True) + + # check if the cache exists and is valid + ttl = self.ttl if ttl is None else ttl + + if ( + os.path.isfile(filepath) + and time.time() - os.path.getmtime(filepath) < ttl + ): + try: + logger.debug("reading from cache %s", filepath) + df = pd.read_parquet(filepath) + except ArrowIOError as e: + logger.error("Invalid Cache file, error: %s", e) + else: + return df + logger.debug("reading from database") + df = pd.read_sql(query, con, *args, **kwargs) + df.to_parquet(filepath) + return df diff --git a/database/vortexa_utils/database/utils.py b/database/vortexa_utils/database/utils.py new file mode 100644 index 0000000000000..811e36443265d --- /dev/null +++ b/database/vortexa_utils/database/utils.py @@ -0,0 +1,62 @@ +from typing import Iterable, List + +import sqlalchemy +from pandas.io.sql import SQLTable +from sqlalchemy.engine import Connectable + + +def upsert( + table: SQLTable, conn: Connectable, keys: List[str], data_iter: Iterable +): + """Upsert method to be used with `pandas.DataFrame.to_sql`. + + In pandas > 0.24.0 you can specify a method to control the insertion clause + used by `pandas.DataFrame.to_sql`. + + Parameters + ---------- + table : pandas.io.sql.SQLTable + Description of parameter `table`. + conn : sqlalchemy.engine.Connectable + Description of parameter `conn`. + keys : List[str] + Description of parameter `keys`. + data_iter : Iterable + Description of parameter `data_iter`. + + Returns + ------- + type + Description of returned object. + + Examples + ------- + Examples should be written in doctest format, and + should illustrate how to use the function/class. + >>> + + """ + cols = ", ".join(f'"{k}"' for k in keys) + if table.schema: + tname = "{}.{}".format(table.schema, table.name) + else: + tname = table.name + + # placeholder = ", ".join(["?"] * len(keys)) + placeholder = ", ".join([f":{k}" for k in keys]) + datas = ({k: d for k, d in zip(keys, data)} for data in data_iter) + if conn.engine.driver.endswith("sqlite"): + # sqlite + sql = f"INSERT or IGNORE INTO {tname} ({cols}) VALUES ({placeholder})" + else: + # postgresql + sql = sqlalchemy.text( + f""" + INSERT INTO {tname} + ({cols}) + VALUES ({placeholder}) + ON CONFLICT DO NOTHING + """ + ) + + conn.execute(sql, *datas) diff --git a/deployment/setup.py b/deployment/setup.py new file mode 100644 index 0000000000000..6432302dac087 --- /dev/null +++ b/deployment/setup.py @@ -0,0 +1,20 @@ +# @Author: richard +# @Date: 2018-12-04T17:54:43+00:00 +# @Last modified by: richard +# @Last modified time: 2018-12-04T19:16:08+00:00 +from setuptools import setup, find_packages + + +setup( + name='vortexa_utils_deploy', + version='0.0.1', + description='', + long_description='', + + author='Richard Mathie', + author_email='richard.mathie@vortexa.com', + + zip_safe=False, + + packages=find_packages(), +) diff --git a/deployment/vortexa_utils/__init__.py b/deployment/vortexa_utils/__init__.py new file mode 100644 index 0000000000000..a2ae790eb1d2c --- /dev/null +++ b/deployment/vortexa_utils/__init__.py @@ -0,0 +1,5 @@ +# @Author: richard +# @Date: 2018-12-04T19:01:36+00:00 +# @Last modified by: richard +# @Last modified time: 2018-12-04T19:01:39+00:00 +__path__ = __import__('pkgutil').extend_path(__path__, __name__) diff --git a/deployment/vortexa_utils/portainer/Readme.md b/deployment/vortexa_utils/portainer/Readme.md new file mode 100644 index 0000000000000..f1d2dcd78744e --- /dev/null +++ b/deployment/vortexa_utils/portainer/Readme.md @@ -0,0 +1 @@ +# Portainer API Helper Module diff --git a/deployment/vortexa_utils/portainer/__init__.py b/deployment/vortexa_utils/portainer/__init__.py new file mode 100644 index 0000000000000..26e33c55820aa --- /dev/null +++ b/deployment/vortexa_utils/portainer/__init__.py @@ -0,0 +1,8 @@ +# @Author: richard +# @Date: 2018-12-04T17:56:21+00:00 +# @Last modified by: richard +# @Last modified time: 2018-12-04T18:11:43+00:00 + + +def notNone(x): + return x is not None diff --git a/deployment/vortexa_utils/portainer/api.py b/deployment/vortexa_utils/portainer/api.py new file mode 100644 index 0000000000000..456ace9496cba --- /dev/null +++ b/deployment/vortexa_utils/portainer/api.py @@ -0,0 +1,56 @@ +# @Author: richard +# @Date: 2018-12-04T18:05:38+00:00 +# @Last modified by: richard +# @Last modified time: 2018-12-04T18:14:40+00:00 +import os +import requests +from functools import wraps +from urllib.parse import urlparse +from .stacks import Stacks +from . import notNone + + +class RequestHelper(object): + def __init__(self, api, base_url='api'): + self.api = api + self.base_url = base_url + + def wrapper(self, func): + @wraps(func) + def caller(url=None, *args, **kwargs): + parts = filter(notNone, (self.api.host, self.base_url, url)) + parts = map(str, parts) + headers = kwargs.get("headers", {}) + headers.update(self.api.get_header()) + kwargs["headers"] = headers + return func(os.path.join(*parts), + *args, **kwargs).json() + return caller + + def __getattr__(self, name, *args, **kwargs): + method = getattr(requests, name, *args, **kwargs) + return self.wrapper(method) + + +class PortainerAPI(object): + def __init__(self, host, user=None, pw=None): + self.host = urlparse(host, scheme='http').geturl() + self.user = user + self.pw = pw + if any(ting is not None for ting in (host, user, pw)): + self.get_jwt() + self.requests = RequestHelper(self) + self.stacks = Stacks(self) + + def get_jwt(self): + """ + http POST :9000/api/auth Username="admin" Password="adminpassword" + """ + url = f'{self.host}/api/auth' + resp = requests.post(url, json=dict(Username=self.user, + Password=self.pw)) + self.token = resp.json().get('jwt') + return self.token + + def get_header(self): + return {"Authorization": f"Bearer {self.token}"} diff --git a/deployment/vortexa_utils/portainer/stacks.py b/deployment/vortexa_utils/portainer/stacks.py new file mode 100644 index 0000000000000..8eaf2f8d7482d --- /dev/null +++ b/deployment/vortexa_utils/portainer/stacks.py @@ -0,0 +1,61 @@ +# @Author: richard +# @Date: 2018-12-04T18:04:55+00:00 +# @Last modified by: richard +# @Last modified time: 2018-12-04T18:34:09+00:00 +from .api import RequestHelper + + +class Stacks(object): + def __init__(self, api): + self.api = api + self.requests = RequestHelper(api, 'api/stacks') + + def get(self, stack_id): + return self.requests.get(stack_id) + + def list(self): + return self.get(None) + + def filter(self, **kwargs): + def filter_kwargs(stack): + return all(str(stack[k]) == str(v) for k, v in kwargs.items()) + + return filter(filter_kwargs, self.list()) + + def first(self, **kwargs): + return next(self.filter(**kwargs)) + + def get_file(self, stack_id): + return self.requests.get(f'{stack_id}/file') + + def update(self, stack_id=None, endpointId=None, name=None, + Env=None, StackFileContent=None, Prune=False): + # get the stack by filtering on name or stack_id + if name is not None: + stack = self.first(Name=name) + stack_id = stack['Id'] + elif stack_id is not None: + stack = self.get(stack_id) + + endpointId = stack.get('EndpointId', endpointId) + if endpointId is None: + raise Exception("no entrypointID found or set") + + # update the old Env with the new Env + old_Env = stack.get('Env') + if old_Env is not None: + update_keys = set(e['name'] for e in Env) + old_Env = list(e for e in old_Env if e['name'] not in update_keys) + Env += old_Env + + if StackFileContent is None: + StackFileContent = self.get_file(stack_id)['StackFileContent'] + body = dict(StackFileContent=StackFileContent, + Env=Env, + Prune=Prune) + + return self.requests.put( + stack_id, + params=dict(endpointId=endpointId), + json=body + ) diff --git a/deployment/vortexa_utils/portainer/update_stack.py b/deployment/vortexa_utils/portainer/update_stack.py new file mode 100644 index 0000000000000..275f8e6dd8604 --- /dev/null +++ b/deployment/vortexa_utils/portainer/update_stack.py @@ -0,0 +1,90 @@ +#!/user/bin/env python3 +# @Author: richard +# @Date: 2018-12-04T18:10:07+00:00 +# @Last modified by: richard +# @Last modified time: 2018-12-04T18:31:45+00:00 +import argparse +from pprint import pprint +from .import notNone +from .api import PortainerAPI + + +if __name__ == '__main__': + parser = argparse.ArgumentParser(description='Portainer API CLI') + parser.add_argument('--host', '-H', type=str, + help='Host name of Portainer API', + default='https://lawgiver.vortexa.com:9000') + parser.add_argument('--user', '-u', type=str, + help='User name', + default='kraftwork_updater') + parser.add_argument('--pass', '-p', type=str, dest='password', + help='Password name') + + parser.add_argument('--name', '-n', type=str, + help='Stack name to filter') + + parser.add_argument('--env', '-e', nargs=2, action='append', + help='key value pairs of confic to update') + + parser.add_argument('--filter', '-f', nargs=2, action='append', + help='key value pairs of confic to update') + + def add_cmd(flag): + def command(func): + parser.add_argument( + flag, + action='store_const', + const=func, + dest='cmd' + ) + return func + + def get_filter(): + Filter = {} + if args.filter is not None: + Filter.update(args.filter) + if args.name is not None: + Filter.update(Name=args.name) + return Filter + + @add_cmd('--list') + def list_stacks(): + if any(map(notNone, ((args.name, args.filter)))): + Filter = get_filter() + return list(api.stacks.filter(**Filter)) + else: + return api.stacks.list() + + @add_cmd('--update') + def update_stacks(): + env = [dict(name=k, value=v) for k, v in args.env] + return api.stacks.update(name=args.name, Env=env) + + args = parser.parse_args() + + api = PortainerAPI(host=args.host, + user=args.user, + pw=args.password) + + pprint(args.cmd()) + +# api.stacks.list() +# api.stacks.update( +# 1, 1, +# Env=[{ +# "name": "KFAFTWERK_BUILD_NUM", +# "value": '376' +# }] +# ) +# +# +# content = Path('docker/scripts/docker-compose.yml').read_text() +# +# api.requests.post('stacks?type=1&method=string&endpointId=1', +# json=dict( +# Name="myStack", +# StackFileContent=content, +# Env=[dict(name="Hello",value="world")], +# SwarmID='729a4f2h5kj2sd42x34pl3uu1' +# ) +# ) diff --git a/docker/pandas/Dockerfile b/docker/pandas/Dockerfile new file mode 100644 index 0000000000000..76155dd44eb33 --- /dev/null +++ b/docker/pandas/Dockerfile @@ -0,0 +1,25 @@ +FROM python:3.7-alpine +ARG PANDAS_VERSION=0.24.1 + +RUN apk add libstdc++ \ + && apk add --no-cache --virtual .build-deps \ + g++ \ + gcc \ + gfortran \ + build-base \ + wget \ + freetype-dev \ + libpng-dev \ + openblas-dev \ + postgresql-dev \ + musl-dev \ + && ln -s /usr/include/locale.h /usr/include/xlocale.h \ + && pip install wheel \ + && pip install --no-cache \ + numpy==1.15.1 \ + scipy \ + psycopg2-binary \ + sqlalchemy \ + && pip install --no-cache \ + pandas==${PANDAS_VERSION} \ + && apk del .build-deps diff --git a/general/README.rst b/general/README.rst new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/general/setup.py b/general/setup.py new file mode 100644 index 0000000000000..c4a958adb103c --- /dev/null +++ b/general/setup.py @@ -0,0 +1,40 @@ +import io +import os + +from setuptools import setup, find_packages + +namespace = 'vortexa_utils' +name = 'vortexa_utils_general' +version = '1.0.0' +description = 'Vortexa general utils helper library', + +dependencies = [ + 'gitpython', + 'logzero', + 'tenacity' +] + +# Setup boilerplate below + +package_root = os.path.abspath(os.path.dirname(__file__)) + +readme_filename = os.path.join(package_root, 'README.rst') +with io.open(readme_filename, encoding='utf-8') as readme_file: + readme = readme_file.read() + +packages = [ + package for package in find_packages() + if package.startswith(namespace) +] + +setup( + name=name, + version=version, + description=description, + long_description=readme, + author='Marcin Szymanski', + author_email='marcin.szymanski@vortexa.com', + zip_safe=False, + packages=packages, + install_requires=dependencies, +) diff --git a/general/vortexa_utils/__init__.py b/general/vortexa_utils/__init__.py new file mode 100644 index 0000000000000..a7712f632a766 --- /dev/null +++ b/general/vortexa_utils/__init__.py @@ -0,0 +1,5 @@ +# @Author: richard +# @Date: 2018-12-04T20:12:18+00:00 +# @Last modified by: richard +# @Last modified time: 2018-12-04T20:12:57+00:00 +__path__ = __import__('pkgutil').extend_path(__path__, __name__) diff --git a/general/vortexa_utils/general/__init__.py b/general/vortexa_utils/general/__init__.py new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/general/vortexa_utils/git.py b/general/vortexa_utils/git.py new file mode 100644 index 0000000000000..79bc2365032ed --- /dev/null +++ b/general/vortexa_utils/git.py @@ -0,0 +1,14 @@ +import os + +from git import Git, Repo +from logzero import logger +from tenacity import retry, wait_fixed, stop_after_attempt + + +@retry(wait=wait_fixed(10), stop=stop_after_attempt(3)) +def clone_repo(repo_url: str, path: str, ssh_key: str): + os.environ['GIT_SSH_COMMAND'] = f'ssh -i {ssh_key}' + with Git().custom_environment(): + logger.info('Cloning git repo %s to %s', repo_url, path) + Repo.clone_from(repo_url, path, branch='master') + logger.info('Repo cloned successfully') diff --git a/logging/README.md b/logging/README.md new file mode 100644 index 0000000000000..28b90c8686b22 --- /dev/null +++ b/logging/README.md @@ -0,0 +1,55 @@ +# Vortexa Utils Logging Helpers + +Small helper scripts to abstract logging-related boilerplate code. + + +## log_unhandled_exceptions + +Function decorator, designed to be wrapped around any `main()` (or equivalent) function, to capture errors, prefix them with `ERROR`, and raise them in-line, when executed in AWS Batch. + +### Problem: + +AWS Batch jobs all output logs onto CloudWatch Log Group (`/aws/batch/job`). Therefore, to raise specific alarms, python jobs should use logging, with the logger pattern containing a unique identifier for the job (such as the job/repo name), so the CloudWatch can filter logs and look for specific exceptions. + +When Errors are raised by a python program logging to CloudWatch, the loger pattern and the Error/stacktrace are output on 2 consecutive lines. CloudWatch Alarm triggers can only look for patterns combinations which are in-line, therefore, for a CloudWatch Alarm to be raised when a job fails, the logger pattern and some form of identifiable error key most be printed in-line. + + +### Solution: + +`log_unhandled_exceptions` decorator, can be wrapped around main executing functions, and if any errors are raised during run-time, will capture these errors, and raise them in-line with the logging pattern, using the common pattern `ERROR: `. CloudWatch alerts can now be set to look for (1) the unique logging pattern of the project (i.e. name) and (2) the key `ERROR`, to raise targeted alerts. The full stacktrace will still be output to Cloudwatch logs. + +### Usage: + +```python +from vortexa_utils.logging import log_unhandled_exceptions + +# The following is the logger set-up boilerplate code. +# This can be done as below, or imported from a project-logger dir. +# The following is only intended as a sample and should not be copied without understanding what is happening. +import logging + +logger = logging.getLogger(__name__) +log_format = logging.Formatter( + f"PROJECT_NAME:%(name)s:%(message)s" +) # Only a sample format, can be designed at will, as long as unique identifier (e.g. PROJECT_NAME) is included +logger.setFormatter(log_format) +logger.setLevel(logging.INFO) + +@log_unhandled_exceptions(logger) +def main(): + return int(1) + str('two') + +if __name__ == "__main__": + main() +``` + +Code snippet above would return: + +``` +PROJECT_NAME:__main__:ERROR: unsupported operan types(s) for +: 'int' and 'str' + Traceback (most recent call last): + ... ... + TypeError: unsupported operand type(s) for +: 'int' and 'str' +``` + +As a result, a cloudwatch alarm can now be set on the pattern `PROJECT_NAME ERROR` diff --git a/logging/setup.py b/logging/setup.py new file mode 100644 index 0000000000000..7081b7db26c4e --- /dev/null +++ b/logging/setup.py @@ -0,0 +1,38 @@ +import io +import os + +from setuptools import find_packages, setup + +namespace = "vortexa_utils" +description = ("Vortexa Error Logging",) + +# Setup boilerplate below + +package_root = os.path.abspath(os.path.dirname(__file__)) + +readme_filename = os.path.join(package_root, "README.md") +with io.open(readme_filename, encoding="utf-8") as readme_file: + readme = readme_file.read() + +packages = [ + package for package in find_packages() if package.startswith(namespace) +] + +requirements = [ + "logzero", + "psutil" +] + +setup( + name="vortexa_utils_logging", + version="0.0.1", + description=description, + long_description=readme, + author="Tino von Stegmann", + author_email="constantin.vonstegmann@vortexa.com", + zip_safe=False, + tests_require=["nose2"], + install_requires=requirements, + test_suite="nose2.collector.collector", + packages=packages, +) diff --git a/logging/vortexa_utils/__init__.py b/logging/vortexa_utils/__init__.py new file mode 100644 index 0000000000000..69e3be50dac40 --- /dev/null +++ b/logging/vortexa_utils/__init__.py @@ -0,0 +1 @@ +__path__ = __import__('pkgutil').extend_path(__path__, __name__) diff --git a/logging/vortexa_utils/logging/__init__.py b/logging/vortexa_utils/logging/__init__.py new file mode 100644 index 0000000000000..14783dcbadd01 --- /dev/null +++ b/logging/vortexa_utils/logging/__init__.py @@ -0,0 +1 @@ +from .exception_decorator import log_unhandled_exceptions diff --git a/logging/vortexa_utils/logging/exception_decorator.py b/logging/vortexa_utils/logging/exception_decorator.py new file mode 100644 index 0000000000000..52b49bac513e0 --- /dev/null +++ b/logging/vortexa_utils/logging/exception_decorator.py @@ -0,0 +1,12 @@ +def log_unhandled_exceptions(logger): + def outer_wrapper(main): + def wrapper(*args, **kwargs): + try: + main(*args, **kwargs) + except Exception as e: + logger.exception(f"ERROR: {e}") + raise e + + return wrapper + + return outer_wrapper diff --git a/logging/vortexa_utils/logging/resources.py b/logging/vortexa_utils/logging/resources.py new file mode 100644 index 0000000000000..de2bac29e6c44 --- /dev/null +++ b/logging/vortexa_utils/logging/resources.py @@ -0,0 +1,38 @@ +import os +import resource + +import psutil +from logzero import logger + +RESOURCE_LOG = """---RESOURCE--- +User time: {0} +System time: {1} +Max resident size: {2} +Block input operations: {3} +Block output operations: {4} +---MEMORY_INFO--- +RSS: {5} +VMS: {6} +Data: {7} +""" + + +def log_resource_usage(step: str): + mem = psutil.Process(os.getpid()).memory_info() + res = resource.getrusage(resource.RUSAGE_SELF) + # MacOs only + try: + data = mem.data + except AttributeError: + data = 0 + res_log = RESOURCE_LOG.format( + res.ru_utime, + res.ru_stime, + res.ru_maxrss, + res.ru_inblock, + res.ru_oublock, + mem.rss, + mem.vms, + data, + ) + logger.info(f"[resource][{step}] {res_log}") diff --git a/utils/vortexa_utils/utils/__init__.py b/utils/vortexa_utils/utils/__init__.py new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/utils/vortexa_utils/utils/byte_stream_spliter.py b/utils/vortexa_utils/utils/byte_stream_spliter.py new file mode 100644 index 0000000000000..64e537577cd9a --- /dev/null +++ b/utils/vortexa_utils/utils/byte_stream_spliter.py @@ -0,0 +1,31 @@ +import io + + +socket_bytes = io.BytesIO(b"So\x01me\r\nbytes\rto\nparsB") + +byte_joiner = b''.join + +list(socket_bytes) + +def split_on(buffer, *spliters): + if not spliters: + spliters = {b'\n', b'\r'} + else: + spliters = set(spliters) + line = [] + while True: + b = buffer.read(1) + split = b in {b'\n', b'\r'} + + if split or not b: + if line: + yield byte_joiner(line) + if split: + line = [] + elif not b: + return + else: + line.append(b) + +gen = split_on(socket_bytes) +list(gen) diff --git a/utils/vortexa_utils/utils/sockets/socket_client.py b/utils/vortexa_utils/utils/sockets/socket_client.py new file mode 100644 index 0000000000000..fdc97e08c2cb1 --- /dev/null +++ b/utils/vortexa_utils/utils/sockets/socket_client.py @@ -0,0 +1,24 @@ +import socket + + +s = socket.socket(socket.AF_INET, socket.SOCK_STREAM) +s.connect(("www.python.org", 80)) + +import socket + +HOST = '127.0.0.1' # The server's hostname or IP address +PORT = 65432 # The port used by the server + +with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s: + s.connect((HOST, PORT)) + s.sendall(b'Hello, world') + data = s.recv(1024) + +print('Received', repr(data)) + +sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) +sock.connect((HOST, PORT)) +sio = sock.makefile('r', encoding='ascii', errors='backslashreplace', newline=None) +next(sio) +sock.close() +sio.close() diff --git a/utils/vortexa_utils/utils/sockets/socket_server.py b/utils/vortexa_utils/utils/sockets/socket_server.py new file mode 100644 index 0000000000000..c1d427b6b0882 --- /dev/null +++ b/utils/vortexa_utils/utils/sockets/socket_server.py @@ -0,0 +1,17 @@ +import socket + +HOST = '127.0.0.1' # Standard loopback interface address (localhost) +PORT = 65432 # Port to listen on (non-privileged ports are > 1023) + +with + +sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + +sock.bind((HOST, PORT)) +sock.listen() + +while True: + conn, addr = sock.accept() + with conn: + while True: + conn.sendall(b'some\rdata\nbyt\1\xffest\r\nadslfkja\n\raslkdj') diff --git a/versioning/VERSION b/versioning/VERSION new file mode 100644 index 0000000000000..7bcd0e3612da7 --- /dev/null +++ b/versioning/VERSION @@ -0,0 +1 @@ +0.0.2 \ No newline at end of file diff --git a/versioning/setup.py b/versioning/setup.py new file mode 100644 index 0000000000000..2a6c50ab207b1 --- /dev/null +++ b/versioning/setup.py @@ -0,0 +1,34 @@ +# @Author: richard +# @Date: 2018-12-04T17:54:43+00:00 +# @Last modified by: richard +# @Last modified time: 2018-12-04T19:14:15+00:00 +import os +from setuptools import setup, find_packages +from vortexa_utils.versioning import __version__ + +namespace = 'vortexa_utils' + +# Setup boilerplate below + +package_root = os.path.abspath(os.path.dirname(__file__)) + +packages = [ + package for package in find_packages() + if package.startswith(namespace) +] + +setup( + name="vortexa_utils_versioning", + version=__version__, + description="", + long_description="", + + author="Richard Mathie", + author_email="richard.mathie@vortexa.com", + + zip_safe=False, + tests_require=['nose2'], + test_suite='nose2.collector.collector', + + packages=packages, +) diff --git a/versioning/tests/__init__.py b/versioning/tests/__init__.py new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/versioning/tests/test_versioner.py b/versioning/tests/test_versioner.py new file mode 100644 index 0000000000000..27be1a07217a9 --- /dev/null +++ b/versioning/tests/test_versioner.py @@ -0,0 +1,47 @@ +import unittest +import os +import tempfile +from nose2.tools import params +from vortexa_utils.versioning.versioner import Versioner + +specs = [ + ((0, 0, 0), (0, 0, 1)), + ((0, 0, 1), (0, 0, 2)), + ((0, 1, 0), (0, 1, 0)), + ((0, 1, 1), (0, 1, 0)), + ((1, 0, 0), (1, 0, 0)), + ((1, 0, 1), (1, 0, 0)), + ((1, 1, 0), (1, 0, 0)), + ((1, 1, 1), (1, 0, 0)) +] + + +class TestVersioner(unittest.TestCase): + def setUp(self): + fh, filename = tempfile.mkstemp() + os.fdopen(fh).close() + self.version: Versioner = Versioner(filename) + + def tearDown(self): + os.remove(self.version.VERSION_FILE) + + def test_version_none(self): + self.assertEqual(self.version.__version__, None) + + def test_version_init(self): + self.assertEqual( + self.version.version, + self.version.SemanticVersion(0, 0, 1) + ) + self.assertTrue(os.path.isfile(self.version.VERSION_FILE)) + with open(self.version.VERSION_FILE, "r") as f: + self.assertEqual(f.readline(), "0.0.1") + + @params(*specs) + def test_version_incriment(self, flags, output): + self.test_version_init() + self.version.update_version(flags) + self.assertEqual( + self.version.version, + self.version.SemanticVersion(*output) + ) diff --git a/versioning/vortexa_utils/__init__.py b/versioning/vortexa_utils/__init__.py new file mode 100644 index 0000000000000..a7712f632a766 --- /dev/null +++ b/versioning/vortexa_utils/__init__.py @@ -0,0 +1,5 @@ +# @Author: richard +# @Date: 2018-12-04T20:12:18+00:00 +# @Last modified by: richard +# @Last modified time: 2018-12-04T20:12:57+00:00 +__path__ = __import__('pkgutil').extend_path(__path__, __name__) diff --git a/versioning/vortexa_utils/versioning/__init__.py b/versioning/vortexa_utils/versioning/__init__.py new file mode 100644 index 0000000000000..977291bcc6396 --- /dev/null +++ b/versioning/vortexa_utils/versioning/__init__.py @@ -0,0 +1,10 @@ +from .versioner import Versioner + +version = Versioner("../../VERSION", __file__) +__version_numeric__ = version.version +__version__ = str(version) + + +if __name__ == "__main__": + from .cli import VersionCLI + VersionCLI(version).parse_args() diff --git a/versioning/vortexa_utils/versioning/__main__.py b/versioning/vortexa_utils/versioning/__main__.py new file mode 100644 index 0000000000000..c9ce8d27293a2 --- /dev/null +++ b/versioning/vortexa_utils/versioning/__main__.py @@ -0,0 +1,9 @@ +from . import version +from .cli import VersionCLI + +__version_numeric__ = version.version +__version__ = str(version) + + +if __name__ == "__main__": + VersionCLI(version).parse_args() diff --git a/versioning/vortexa_utils/versioning/cli.py b/versioning/vortexa_utils/versioning/cli.py new file mode 100644 index 0000000000000..8e414bb5e7c08 --- /dev/null +++ b/versioning/vortexa_utils/versioning/cli.py @@ -0,0 +1,46 @@ +from argparse import ArgumentParser +from dataclasses import dataclass, field +from vortexa_utils.versioning.versioner import Versioner + + +@dataclass +class VersionCLI(object): + versioner: Versioner + parser: ArgumentParser = field(default=None, init=False) + + def __post_init__(self): + self.parser = ArgumentParser( + description='Package Version Tool.' + ) + self.specs = self.versioner.VERSION_SPEC.split( + self.versioner.VERSION_SEP + ) + for spec in self.specs: + self.parser.add_argument( + f'--bump-{spec.lower()}', + f'-{spec[0]}', + action='store_true' + ) + + def parse_args(self): + args = self.parser.parse_args() + spec_flags = list( + getattr(args, f'bump_{spec.lower()}') + for spec in self.specs + ) + if any(spec_flags): + print(f"Current Version: {self.versioner}") + if sum(spec_flags) > 1: + print("You can only bump one spec at a time") + self.parser.print_help() + else: + self.versioner.update_version(spec_flags) + print(f"New Version {self.versioner}") + else: + print(f"{self.versioner}") + + +if __name__ == "__main__": + version = Versioner() + cli = VersionCLI(version) + cli.parse_args() diff --git a/versioning/vortexa_utils/versioning/utils.py b/versioning/vortexa_utils/versioning/utils.py new file mode 100644 index 0000000000000..0d3f9b544b13e --- /dev/null +++ b/versioning/vortexa_utils/versioning/utils.py @@ -0,0 +1,22 @@ +# @Author: richard +# @Date: 2018-12-21T16:37:39+00:00 +# @Last modified by: richard +# @Last modified time: 2018-12-21T16:37:56+00:00 + + +class cached_property(object): + """ + A property that is only computed once per instance and then replaces itself + with an ordinary attribute. Deleting the attribute resets the property. + """ # noqa + + def __init__(self, func): + self.__doc__ = getattr(func, "__doc__") + self.func = func + + def __get__(self, obj, cls): + if obj is None: + return self + + value = obj.__dict__[self.func.__name__] = self.func(obj) + return value diff --git a/versioning/vortexa_utils/versioning/versioner.py b/versioning/vortexa_utils/versioning/versioner.py new file mode 100644 index 0000000000000..285481c05ad1a --- /dev/null +++ b/versioning/vortexa_utils/versioning/versioner.py @@ -0,0 +1,99 @@ +"""Class to track the version of a package.""" +import os +from dataclasses import dataclass, field, InitVar +from collections import namedtuple +from .utils import cached_property + + +@dataclass +class Versioner(object): + VERSION_FILE: str = "VERSION" + MODULE_FILE: InitVar[str] = None + VERSION_SEP: str = "." + VERSION_SPEC: str = "Major.minor.patch" + __version__: namedtuple = field(default=None, init=False) + __version_file__: namedtuple = field(default=None, init=False) + + def __post_init__(self, MODULE_FILE): + parts = [] + if MODULE_FILE is not None: + dir = os.path.dirname(os.path.abspath(MODULE_FILE)) + parts.append(dir) + parts.append(self.VERSION_FILE) + path = os.path.join(*parts) + self.__version_file__ = os.path.abspath(path) + + @cached_property + def SemanticVersion(self): + version_type = namedtuple( + "SemanticVersion", + self.VERSION_SPEC.lower().split(self.VERSION_SEP) + ) + return version_type + + def init_version(self): + fields = self.SemanticVersion._fields + version = ( + 1 if i == len(fields) - 1 else 0 + for i, field in enumerate(fields) + ) + self.version = self.SemanticVersion(*version) + self.write() + return self.version + + def new_version(self, spec_flags): + bumped = False + for spec, ver in zip(spec_flags, self.version): + if bumped: + yield 0 + elif spec: + bumped = True + yield ver + 1 + else: + yield ver + + def update_version(self, spec_flags): + version = self.SemanticVersion(*self.new_version(spec_flags)) + self.version = version + self.write() + return version + + def read(self): + try: + with open(self.__version_file__, "r") as file: + version_string = file.readline().strip() + except FileNotFoundError: + version = self.init_version() + else: + if version_string == "": + version = self.init_version() + else: + version = self.parse_verion(version_string) + self.version = version + return version + + def write(self): + with open(self.__version_file__, "w") as file: + file.write(str(self)) + + @property + def version(self): + if self.__version__ is None: + self.read() + return self.__version__ + + @version.setter + def version(self, version): + if isinstance(version, str): + version = self.parse_verion(version) + if isinstance(version, self.SemanticVersion): + self.__version__ = version + else: + raise TypeError("Version is not str or self.SemanticVersion") + + def parse_verion(self, version: str): + parts = (int(v) for v in version.split(self.VERSION_SEP)) + return self.SemanticVersion(*parts) + + def __str__(self): + return self.VERSION_SEP.join(str(v) for v in self.version) diff --git a/youve_got_mail/README.md b/youve_got_mail/README.md new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/youve_got_mail/README.rst b/youve_got_mail/README.rst new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/youve_got_mail/requirements.txt b/youve_got_mail/requirements.txt new file mode 100644 index 0000000000000..dfedbe37089fc --- /dev/null +++ b/youve_got_mail/requirements.txt @@ -0,0 +1,2 @@ +sendgrid < 6.0.0 +boto3 diff --git a/youve_got_mail/setup.py b/youve_got_mail/setup.py new file mode 100644 index 0000000000000..1b998bcd47eba --- /dev/null +++ b/youve_got_mail/setup.py @@ -0,0 +1,48 @@ +# @Author: richard +# @Date: 2018-12-04T17:54:43+00:00 +# @Last modified by: richard +# @Last modified time: 2018-12-04T20:16:54+00:00 +import os +import io +from setuptools import setup, find_packages + +namespace = 'vortexa_utils' +name = 'vortexa_utils_youve_got_mail' +version = '1' +description = 'Vortexa E-mail utils helper library', + +dependencies = [ + 'boto3', + 'sendgrid<6.0.0' +] + +# Setup boilerplate below + +package_root = os.path.abspath(os.path.dirname(__file__)) + +readme_filename = os.path.join(package_root, 'README.rst') +with io.open(readme_filename, encoding='utf-8') as readme_file: + readme = readme_file.read() + +packages = [ + package for package in find_packages() + if package.startswith(namespace) +] + +setup( + name=name, + version=version, + description=description, + long_description=readme, + + author='Richard Mathie', + author_email='richard.mathie@vortexa.com', + + zip_safe=False, + test_suite='nose2.collector.collector', + tests_require=['nose2'], + + packages=packages, + install_requires=dependencies, + extras_require={} +) diff --git a/youve_got_mail/tests/__init__.py b/youve_got_mail/tests/__init__.py new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/youve_got_mail/vortexa_utils/__init__.py b/youve_got_mail/vortexa_utils/__init__.py new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/youve_got_mail/vortexa_utils/youve_got_mail.py b/youve_got_mail/vortexa_utils/youve_got_mail.py new file mode 100644 index 0000000000000..aae86d37cf7e6 --- /dev/null +++ b/youve_got_mail/vortexa_utils/youve_got_mail.py @@ -0,0 +1,43 @@ +import base64 +import boto3 +import json +import mimetypes +import sendgrid +from sendgrid.helpers.mail import * +from typing import List + + +secretsmanager = boto3.client('secretsmanager') + + +def create_sendgrid_client(): + secret = secretsmanager.get_secret_value(SecretId='prod/sendgrid') + api_key = json.loads(secret['SecretString'])['SENDGRID_API_KEY'] + + return sendgrid.SendGridAPIClient(apikey=api_key) + + +def build_attachment(buf: bytes, filename: str, disposition: str = "attachment", content_id: str = None): + encoded = base64.b64encode(buf).decode() + + mime_type, encoding = mimetypes.guess_type(filename) + + attachment = Attachment() + attachment.content = encoded + attachment.type = mime_type + attachment.filename = filename + attachment.disposition = disposition + attachment.content_id = content_id + + return attachment + + +def add_recipients(recipients: List[str], mail: Mail): + personalization = Personalization() + + for rec in recipients: + personalization.add_to(Email(rec)) + + mail.add_personalization(personalization) + + return mail From c95de1d59dd8f6ef6f818223667fc6bd7a444eb4 Mon Sep 17 00:00:00 2001 From: cvonsteg Date: Fri, 1 Nov 2019 17:52:15 +0000 Subject: [PATCH 07/75] update scratch --- pandas/io/sql_scratch.py | 42 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) diff --git a/pandas/io/sql_scratch.py b/pandas/io/sql_scratch.py index 702574af6c8d1..ada9c8bedf323 100644 --- a/pandas/io/sql_scratch.py +++ b/pandas/io/sql_scratch.py @@ -12,3 +12,45 @@ def get_pkey_values(table: Table, conn: Connection): return [row for row in conn.execute(statement)] # for row in conn.execute(statement): # yield row + + +def pkey_generator(table, engine): + pkeys = get_pkey(table) + statement = select([table.c[name] for name in pkeys]) + with engine.connect() as conn: + for row in conn.execute(statement): + yield row + +# Leaves connection open +def pkey_results_proxy(table, engine): + pkeys = get_pkey(table) + statement = select([table.c[name] for name in pkeys]) + with engine.connect() as conn: + result = conn.execute(statement) + return result + + +def pkey_generator2(table, engine): + pkeys = get_pkey(table) + statement = select([table.c[name] for name in pkeys]) + with engine.connect() as conn: + result = conn.execute(statement) + for row in result: + yield result.fetchone() + +# replace table with self +def get_pkey_values(table: SQLTable): + pkeys = [pkey.name for pkey in table.table.primary_key.columns.values()] + statement = select([table.table.c[name] for name in pkeys]) + table.pd_sql.execute(statement) + + +### REPRODUCIBLE SQLTable Creation: +import sqlalchemy + +engine = sqlalchemy.create_engine('enter string here') +meta = MetaData(engine) +table_name = 'charterers' # or wtv +meta.reflect(only=[table_name], views=True) +db = SQLDatabase(engine, meta=meta) +table = SQLTable(table_name, db, index=None, schema=None) \ No newline at end of file From 0082347901d33a5e117ec0f65fd2675127abdc79 Mon Sep 17 00:00:00 2001 From: cvonsteg Date: Fri, 1 Nov 2019 17:52:15 +0000 Subject: [PATCH 08/75] update scratch --- pandas/io/sql_scratch.py | 42 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) diff --git a/pandas/io/sql_scratch.py b/pandas/io/sql_scratch.py index 702574af6c8d1..ada9c8bedf323 100644 --- a/pandas/io/sql_scratch.py +++ b/pandas/io/sql_scratch.py @@ -12,3 +12,45 @@ def get_pkey_values(table: Table, conn: Connection): return [row for row in conn.execute(statement)] # for row in conn.execute(statement): # yield row + + +def pkey_generator(table, engine): + pkeys = get_pkey(table) + statement = select([table.c[name] for name in pkeys]) + with engine.connect() as conn: + for row in conn.execute(statement): + yield row + +# Leaves connection open +def pkey_results_proxy(table, engine): + pkeys = get_pkey(table) + statement = select([table.c[name] for name in pkeys]) + with engine.connect() as conn: + result = conn.execute(statement) + return result + + +def pkey_generator2(table, engine): + pkeys = get_pkey(table) + statement = select([table.c[name] for name in pkeys]) + with engine.connect() as conn: + result = conn.execute(statement) + for row in result: + yield result.fetchone() + +# replace table with self +def get_pkey_values(table: SQLTable): + pkeys = [pkey.name for pkey in table.table.primary_key.columns.values()] + statement = select([table.table.c[name] for name in pkeys]) + table.pd_sql.execute(statement) + + +### REPRODUCIBLE SQLTable Creation: +import sqlalchemy + +engine = sqlalchemy.create_engine('enter string here') +meta = MetaData(engine) +table_name = 'charterers' # or wtv +meta.reflect(only=[table_name], views=True) +db = SQLDatabase(engine, meta=meta) +table = SQLTable(table_name, db, index=None, schema=None) \ No newline at end of file From 2d85c70c750685b28a2cf933ff9a42a4c432ee29 Mon Sep 17 00:00:00 2001 From: cvonsteg Date: Mon, 4 Nov 2019 19:20:23 +0000 Subject: [PATCH 09/75] starting to modify sql.py adding workflow for insert method, and scratch function written down for deleting pkeys --- pandas/io/sql.py | 46 ++++++++++++++++++++++++++++++++++++++++ pandas/io/sql_scratch.py | 33 ++++++++++++++++++++++++++-- 2 files changed, 77 insertions(+), 2 deletions(-) diff --git a/pandas/io/sql.py b/pandas/io/sql.py index 32c9cc440d0c2..c8ca955136265 100644 --- a/pandas/io/sql.py +++ b/pandas/io/sql.py @@ -734,6 +734,29 @@ def insert_data(self): return column_names, data_list + @staticmethod + def _get_primary_key_values(sql_table, primary_keys): + """ + This static method gets all values for specified columns, returning them via + a lazy generator + Parameters + ---------- + sql_table: SQLTable + Table from which data is to be returned + primary_keys: List[str] + Names of columns to be returned + + Returns + ------- + generator Object + """ + from sqlalchemy import select + statement = select([sql_table.table.c[key] for key in primary_keys]) + result = sql_table.pd_sql.execute(statement) + for row in result: + yield row + + def insert(self, chunksize=None, method=None): # set insert method @@ -750,6 +773,29 @@ def insert(self, chunksize=None, method=None): else: raise ValueError("Invalid parameter `method`: {}".format(method)) + if method.startswith('upsert_'): + # Upsert operation will require knowledge of what is already in the database + # Following will create new meta and SQLDatabase objects so that we have + # access to existing table without overriding objects' self.meta attribute + from sqlalchemy.schema import MetaData + upsert_meta = MetaData(self.pd_sql.connectable, schema=self.schema) + upsert_meta.reflect(only=[self.name], views=True) + upsert_sql_database = SQLDatabase( + engine=self.pd_sql.connectable, schema=self.schema, meta=upsert_meta + ) + # Check if table exists in given database connection + if upsert_sql_database.has_table(name=self.name, schema=self.schema): + upsert_sql_table = upsert_sql_database.get_table(self.name, self.schema) + primary_keys = [ + primary_key.name for primary_key in upsert_sql_table.table.primary_keys.columns.values() + ] + # Create generator object to lazily return rows in primary key columns + primary_key_values = self._get_primary_key_values(upsert_sql_table, primary_keys) + else: + raise ValueError( + f"No table named {self.name} found in database" + ) + keys, data_list = self.insert_data() nrows = len(self.frame) diff --git a/pandas/io/sql_scratch.py b/pandas/io/sql_scratch.py index ada9c8bedf323..728371ee99ebc 100644 --- a/pandas/io/sql_scratch.py +++ b/pandas/io/sql_scratch.py @@ -45,7 +45,7 @@ def get_pkey_values(table: SQLTable): table.pd_sql.execute(statement) -### REPRODUCIBLE SQLTable Creation: +### REPRODUCIBLE SQLTable Creation:table import sqlalchemy engine = sqlalchemy.create_engine('enter string here') @@ -53,4 +53,33 @@ def get_pkey_values(table: SQLTable): table_name = 'charterers' # or wtv meta.reflect(only=[table_name], views=True) db = SQLDatabase(engine, meta=meta) -table = SQLTable(table_name, db, index=None, schema=None) \ No newline at end of file +table = SQLTable(table_name, db, index=None, schema=None) + + +from vortexa_utils.database import ProdFactory +from sqlalchemy import create_engine +import pandas as pd +from pandas.io.sql import SQLTable, SQLDatabase + +engine_v = ProdFactory().engine() +engine = create_engine('sqlite:///:memory:') +table_name = 'charterers' +df = pd.read_sql_table(table_name, engine_v) +df_test = df.head().copy() +df_test['name'] = df_test['name'].apply(lambda x: x + '_TEST') +engine.execute("create table charterers(id varchar primary key, name text, energy integer)") +df.to_sql(table_name, index=False, if_exists='append', con=engine) + +db = SQLDatabase(engine, schema=None, meta=None) +new_data = SQLTable(table_name, db, frame=df_test, index=False) + +from sqlalchemy.sql import tuple_ + + +def delete_matching_keys(sql_table, key_columns, value_iter): + delete_expression = sql_table.table.delete().where( + tuple_(*(table.table.c[col] for col in key_columns)).in_(value_iter) + ) + with sql_table.connectable.connect() as conn: + conn.execute(delete_expression) + From 0f32cd09f54888746b2e06af7a6fc7cb7a1a22eb Mon Sep 17 00:00:00 2001 From: cvonsteg Date: Mon, 4 Nov 2019 19:20:23 +0000 Subject: [PATCH 10/75] starting to modify sql.py adding workflow for insert method, and scratch function written down for deleting pkeys --- pandas/io/sql.py | 46 ++++++++++++++++++++++++++++++++++++++++ pandas/io/sql_scratch.py | 33 ++++++++++++++++++++++++++-- 2 files changed, 77 insertions(+), 2 deletions(-) diff --git a/pandas/io/sql.py b/pandas/io/sql.py index 32c9cc440d0c2..c8ca955136265 100644 --- a/pandas/io/sql.py +++ b/pandas/io/sql.py @@ -734,6 +734,29 @@ def insert_data(self): return column_names, data_list + @staticmethod + def _get_primary_key_values(sql_table, primary_keys): + """ + This static method gets all values for specified columns, returning them via + a lazy generator + Parameters + ---------- + sql_table: SQLTable + Table from which data is to be returned + primary_keys: List[str] + Names of columns to be returned + + Returns + ------- + generator Object + """ + from sqlalchemy import select + statement = select([sql_table.table.c[key] for key in primary_keys]) + result = sql_table.pd_sql.execute(statement) + for row in result: + yield row + + def insert(self, chunksize=None, method=None): # set insert method @@ -750,6 +773,29 @@ def insert(self, chunksize=None, method=None): else: raise ValueError("Invalid parameter `method`: {}".format(method)) + if method.startswith('upsert_'): + # Upsert operation will require knowledge of what is already in the database + # Following will create new meta and SQLDatabase objects so that we have + # access to existing table without overriding objects' self.meta attribute + from sqlalchemy.schema import MetaData + upsert_meta = MetaData(self.pd_sql.connectable, schema=self.schema) + upsert_meta.reflect(only=[self.name], views=True) + upsert_sql_database = SQLDatabase( + engine=self.pd_sql.connectable, schema=self.schema, meta=upsert_meta + ) + # Check if table exists in given database connection + if upsert_sql_database.has_table(name=self.name, schema=self.schema): + upsert_sql_table = upsert_sql_database.get_table(self.name, self.schema) + primary_keys = [ + primary_key.name for primary_key in upsert_sql_table.table.primary_keys.columns.values() + ] + # Create generator object to lazily return rows in primary key columns + primary_key_values = self._get_primary_key_values(upsert_sql_table, primary_keys) + else: + raise ValueError( + f"No table named {self.name} found in database" + ) + keys, data_list = self.insert_data() nrows = len(self.frame) diff --git a/pandas/io/sql_scratch.py b/pandas/io/sql_scratch.py index ada9c8bedf323..728371ee99ebc 100644 --- a/pandas/io/sql_scratch.py +++ b/pandas/io/sql_scratch.py @@ -45,7 +45,7 @@ def get_pkey_values(table: SQLTable): table.pd_sql.execute(statement) -### REPRODUCIBLE SQLTable Creation: +### REPRODUCIBLE SQLTable Creation:table import sqlalchemy engine = sqlalchemy.create_engine('enter string here') @@ -53,4 +53,33 @@ def get_pkey_values(table: SQLTable): table_name = 'charterers' # or wtv meta.reflect(only=[table_name], views=True) db = SQLDatabase(engine, meta=meta) -table = SQLTable(table_name, db, index=None, schema=None) \ No newline at end of file +table = SQLTable(table_name, db, index=None, schema=None) + + +from vortexa_utils.database import ProdFactory +from sqlalchemy import create_engine +import pandas as pd +from pandas.io.sql import SQLTable, SQLDatabase + +engine_v = ProdFactory().engine() +engine = create_engine('sqlite:///:memory:') +table_name = 'charterers' +df = pd.read_sql_table(table_name, engine_v) +df_test = df.head().copy() +df_test['name'] = df_test['name'].apply(lambda x: x + '_TEST') +engine.execute("create table charterers(id varchar primary key, name text, energy integer)") +df.to_sql(table_name, index=False, if_exists='append', con=engine) + +db = SQLDatabase(engine, schema=None, meta=None) +new_data = SQLTable(table_name, db, frame=df_test, index=False) + +from sqlalchemy.sql import tuple_ + + +def delete_matching_keys(sql_table, key_columns, value_iter): + delete_expression = sql_table.table.delete().where( + tuple_(*(table.table.c[col] for col in key_columns)).in_(value_iter) + ) + with sql_table.connectable.connect() as conn: + conn.execute(delete_expression) + From dc70c408b7d958248202f495e64e6718b8640bb1 Mon Sep 17 00:00:00 2001 From: cvonsteg Date: Tue, 5 Nov 2019 19:16:27 +0000 Subject: [PATCH 11/75] sql.py - methods to get pkey columns, pkey iterator, and almost finished upsert ignore method --- pandas/io/sql.py | 95 ++++++++++++++++++++++++++-------------- pandas/io/sql_scratch.py | 26 ++++++++--- 2 files changed, 82 insertions(+), 39 deletions(-) diff --git a/pandas/io/sql.py b/pandas/io/sql.py index c8ca955136265..c8939b61c7cc3 100644 --- a/pandas/io/sql.py +++ b/pandas/io/sql.py @@ -681,17 +681,36 @@ def _execute_insert_multi(self, conn, keys, data_iter): data = [dict(zip(keys, row)) for row in data_iter] conn.execute(self.table.insert(data)) - def _execute_upsert_update(self): + def _execute_upsert_update(self, conn, keys, data_iter, primary_keys): """Execute an SQL UPSERT, and in cases of key clashes, overwrite records in the Database with incoming records. """ pass - def _execute_upsert_ignore(self): + def _execute_upsert_ignore(self, conn, keys, data_iter, primary_keys): """Execute an SQL UPSERT, and in cases of key clashes, keep records in the Database, and ignore incoming records. """ - pass + incoming_pkey_values = list(zip(*[self.frame[key] for key in primary_keys])) + existing_pkey_values = self._primary_key_iterator(primary_keys=primary_keys) + + + for pkey_value in existing_pkey_values: + # stop iterating over query results if all incoming values are exhausted + if len(incoming_pkey_values) == 0: + break # OR return? + elif pkey_value in incoming_pkey_values: + incoming_pkey_values.remove(pkey_value) + + temp_frame = self.frame.reset_index() + # WHAT IF INDEX IS NOT NONE? + ipv = zip(*[temp_frame[col] for col in ['index'] + primary_keys]) + dov = {tuple(val for val in vals): idx for idx, *vals in ipv} + for pkey_value in existing_pkey_values: + if len(dov) == 0: + break + elif dov[pkey_value] is not None: + del dov[pkey_value] def insert_data(self): if self.index is not None: @@ -734,14 +753,14 @@ def insert_data(self): return column_names, data_list - @staticmethod - def _get_primary_key_values(sql_table, primary_keys): + def _primary_key_iterator(self, primary_keys): """ This static method gets all values for specified columns, returning them via a lazy generator + Parameters ---------- - sql_table: SQLTable + self: SQLTable Table from which data is to be returned primary_keys: List[str] Names of columns to be returned @@ -751,14 +770,43 @@ def _get_primary_key_values(sql_table, primary_keys): generator Object """ from sqlalchemy import select - statement = select([sql_table.table.c[key] for key in primary_keys]) - result = sql_table.pd_sql.execute(statement) - for row in result: - yield row + statement = select([self.table.c[key] for key in primary_keys]) + result = self.pd_sql.execute(statement) + while True: + data = result.fetchone() + if not data: + result.close() + break + else: + yield data + def _get_primary_key_columns(self): + """ + Upsert workflows require knowledge of what is already in the database + this method reflects the meta object and gets primary key a list of primary keys - def insert(self, chunksize=None, method=None): + Returns + ------- + List[str] - list of primary key column names + """ + # reflect MetaData object and assign contents of db to self.table attribute + self.pd_sql.meta.reflect(only=[self.name], views=True) + self.table = self.pd_sql.get_table(table_name=self.name, schema=self.schema) + primary_keys = [ + str(primary_key.name) for primary_key in self.table.primary_key.columns.values() + ] + + # For the time being, this method is defensive and will break if no pkeys are found + # If desired this default behaviour could be changed so that in cases where no pkeys + # are found, it could default to a normal insert + if len(primary_keys) == 0: + raise ValueError( + f"No primary keys found for table {self.name}" + ) + return primary_keys + + def insert(self, chunksize=None, method=None): # set insert method if method is None: exec_insert = self._execute_insert @@ -773,29 +821,8 @@ def insert(self, chunksize=None, method=None): else: raise ValueError("Invalid parameter `method`: {}".format(method)) - if method.startswith('upsert_'): - # Upsert operation will require knowledge of what is already in the database - # Following will create new meta and SQLDatabase objects so that we have - # access to existing table without overriding objects' self.meta attribute - from sqlalchemy.schema import MetaData - upsert_meta = MetaData(self.pd_sql.connectable, schema=self.schema) - upsert_meta.reflect(only=[self.name], views=True) - upsert_sql_database = SQLDatabase( - engine=self.pd_sql.connectable, schema=self.schema, meta=upsert_meta - ) - # Check if table exists in given database connection - if upsert_sql_database.has_table(name=self.name, schema=self.schema): - upsert_sql_table = upsert_sql_database.get_table(self.name, self.schema) - primary_keys = [ - primary_key.name for primary_key in upsert_sql_table.table.primary_keys.columns.values() - ] - # Create generator object to lazily return rows in primary key columns - primary_key_values = self._get_primary_key_values(upsert_sql_table, primary_keys) - else: - raise ValueError( - f"No table named {self.name} found in database" - ) - + # Need to pre-process data for upsert here + # for upsert ignore - delete records from self.frame directly keys, data_list = self.insert_data() nrows = len(self.frame) diff --git a/pandas/io/sql_scratch.py b/pandas/io/sql_scratch.py index 728371ee99ebc..63c52ebdd432d 100644 --- a/pandas/io/sql_scratch.py +++ b/pandas/io/sql_scratch.py @@ -35,8 +35,12 @@ def pkey_generator2(table, engine): statement = select([table.c[name] for name in pkeys]) with engine.connect() as conn: result = conn.execute(statement) - for row in result: - yield result.fetchone() + try: + for row in result: + yield result.fetchone() + finally: + result.close() + # replace table with self def get_pkey_values(table: SQLTable): @@ -44,6 +48,12 @@ def get_pkey_values(table: SQLTable): statement = select([table.table.c[name] for name in pkeys]) table.pd_sql.execute(statement) +def generate_mask(df, dictionary): + return [df[key] == value for key, value in dictionary.items()] + +def generate_mask_of_masks(list_of_masks): + return pd.concat([mask for mask in list_of_masks], axis=1).all(1) + ### REPRODUCIBLE SQLTable Creation:table import sqlalchemy @@ -67,19 +77,25 @@ def get_pkey_values(table: SQLTable): df = pd.read_sql_table(table_name, engine_v) df_test = df.head().copy() df_test['name'] = df_test['name'].apply(lambda x: x + '_TEST') -engine.execute("create table charterers(id varchar primary key, name text, energy integer)") +engine.execute("create table charterers(id text primary key, name text, energy integer)") df.to_sql(table_name, index=False, if_exists='append', con=engine) db = SQLDatabase(engine, schema=None, meta=None) new_data = SQLTable(table_name, db, frame=df_test, index=False) + + from sqlalchemy.sql import tuple_ def delete_matching_keys(sql_table, key_columns, value_iter): delete_expression = sql_table.table.delete().where( - tuple_(*(table.table.c[col] for col in key_columns)).in_(value_iter) + tuple_( + *(table.table.c[col] for col in key_columns) + ).in_( + list(zip(value_iter)) + ) ) - with sql_table.connectable.connect() as conn: + with sql_table.pd_sql.run_transaction() as conn: conn.execute(delete_expression) From a7b8e8eb47862f0d20b547a1af1f7047cb13ee2e Mon Sep 17 00:00:00 2001 From: cvonsteg Date: Tue, 5 Nov 2019 19:16:27 +0000 Subject: [PATCH 12/75] sql.py - methods to get pkey columns, pkey iterator, and almost finished upsert ignore method --- pandas/io/sql.py | 95 ++++++++++++++++++++++++++-------------- pandas/io/sql_scratch.py | 26 ++++++++--- 2 files changed, 82 insertions(+), 39 deletions(-) diff --git a/pandas/io/sql.py b/pandas/io/sql.py index c8ca955136265..c8939b61c7cc3 100644 --- a/pandas/io/sql.py +++ b/pandas/io/sql.py @@ -681,17 +681,36 @@ def _execute_insert_multi(self, conn, keys, data_iter): data = [dict(zip(keys, row)) for row in data_iter] conn.execute(self.table.insert(data)) - def _execute_upsert_update(self): + def _execute_upsert_update(self, conn, keys, data_iter, primary_keys): """Execute an SQL UPSERT, and in cases of key clashes, overwrite records in the Database with incoming records. """ pass - def _execute_upsert_ignore(self): + def _execute_upsert_ignore(self, conn, keys, data_iter, primary_keys): """Execute an SQL UPSERT, and in cases of key clashes, keep records in the Database, and ignore incoming records. """ - pass + incoming_pkey_values = list(zip(*[self.frame[key] for key in primary_keys])) + existing_pkey_values = self._primary_key_iterator(primary_keys=primary_keys) + + + for pkey_value in existing_pkey_values: + # stop iterating over query results if all incoming values are exhausted + if len(incoming_pkey_values) == 0: + break # OR return? + elif pkey_value in incoming_pkey_values: + incoming_pkey_values.remove(pkey_value) + + temp_frame = self.frame.reset_index() + # WHAT IF INDEX IS NOT NONE? + ipv = zip(*[temp_frame[col] for col in ['index'] + primary_keys]) + dov = {tuple(val for val in vals): idx for idx, *vals in ipv} + for pkey_value in existing_pkey_values: + if len(dov) == 0: + break + elif dov[pkey_value] is not None: + del dov[pkey_value] def insert_data(self): if self.index is not None: @@ -734,14 +753,14 @@ def insert_data(self): return column_names, data_list - @staticmethod - def _get_primary_key_values(sql_table, primary_keys): + def _primary_key_iterator(self, primary_keys): """ This static method gets all values for specified columns, returning them via a lazy generator + Parameters ---------- - sql_table: SQLTable + self: SQLTable Table from which data is to be returned primary_keys: List[str] Names of columns to be returned @@ -751,14 +770,43 @@ def _get_primary_key_values(sql_table, primary_keys): generator Object """ from sqlalchemy import select - statement = select([sql_table.table.c[key] for key in primary_keys]) - result = sql_table.pd_sql.execute(statement) - for row in result: - yield row + statement = select([self.table.c[key] for key in primary_keys]) + result = self.pd_sql.execute(statement) + while True: + data = result.fetchone() + if not data: + result.close() + break + else: + yield data + def _get_primary_key_columns(self): + """ + Upsert workflows require knowledge of what is already in the database + this method reflects the meta object and gets primary key a list of primary keys - def insert(self, chunksize=None, method=None): + Returns + ------- + List[str] - list of primary key column names + """ + # reflect MetaData object and assign contents of db to self.table attribute + self.pd_sql.meta.reflect(only=[self.name], views=True) + self.table = self.pd_sql.get_table(table_name=self.name, schema=self.schema) + primary_keys = [ + str(primary_key.name) for primary_key in self.table.primary_key.columns.values() + ] + + # For the time being, this method is defensive and will break if no pkeys are found + # If desired this default behaviour could be changed so that in cases where no pkeys + # are found, it could default to a normal insert + if len(primary_keys) == 0: + raise ValueError( + f"No primary keys found for table {self.name}" + ) + return primary_keys + + def insert(self, chunksize=None, method=None): # set insert method if method is None: exec_insert = self._execute_insert @@ -773,29 +821,8 @@ def insert(self, chunksize=None, method=None): else: raise ValueError("Invalid parameter `method`: {}".format(method)) - if method.startswith('upsert_'): - # Upsert operation will require knowledge of what is already in the database - # Following will create new meta and SQLDatabase objects so that we have - # access to existing table without overriding objects' self.meta attribute - from sqlalchemy.schema import MetaData - upsert_meta = MetaData(self.pd_sql.connectable, schema=self.schema) - upsert_meta.reflect(only=[self.name], views=True) - upsert_sql_database = SQLDatabase( - engine=self.pd_sql.connectable, schema=self.schema, meta=upsert_meta - ) - # Check if table exists in given database connection - if upsert_sql_database.has_table(name=self.name, schema=self.schema): - upsert_sql_table = upsert_sql_database.get_table(self.name, self.schema) - primary_keys = [ - primary_key.name for primary_key in upsert_sql_table.table.primary_keys.columns.values() - ] - # Create generator object to lazily return rows in primary key columns - primary_key_values = self._get_primary_key_values(upsert_sql_table, primary_keys) - else: - raise ValueError( - f"No table named {self.name} found in database" - ) - + # Need to pre-process data for upsert here + # for upsert ignore - delete records from self.frame directly keys, data_list = self.insert_data() nrows = len(self.frame) diff --git a/pandas/io/sql_scratch.py b/pandas/io/sql_scratch.py index 728371ee99ebc..63c52ebdd432d 100644 --- a/pandas/io/sql_scratch.py +++ b/pandas/io/sql_scratch.py @@ -35,8 +35,12 @@ def pkey_generator2(table, engine): statement = select([table.c[name] for name in pkeys]) with engine.connect() as conn: result = conn.execute(statement) - for row in result: - yield result.fetchone() + try: + for row in result: + yield result.fetchone() + finally: + result.close() + # replace table with self def get_pkey_values(table: SQLTable): @@ -44,6 +48,12 @@ def get_pkey_values(table: SQLTable): statement = select([table.table.c[name] for name in pkeys]) table.pd_sql.execute(statement) +def generate_mask(df, dictionary): + return [df[key] == value for key, value in dictionary.items()] + +def generate_mask_of_masks(list_of_masks): + return pd.concat([mask for mask in list_of_masks], axis=1).all(1) + ### REPRODUCIBLE SQLTable Creation:table import sqlalchemy @@ -67,19 +77,25 @@ def get_pkey_values(table: SQLTable): df = pd.read_sql_table(table_name, engine_v) df_test = df.head().copy() df_test['name'] = df_test['name'].apply(lambda x: x + '_TEST') -engine.execute("create table charterers(id varchar primary key, name text, energy integer)") +engine.execute("create table charterers(id text primary key, name text, energy integer)") df.to_sql(table_name, index=False, if_exists='append', con=engine) db = SQLDatabase(engine, schema=None, meta=None) new_data = SQLTable(table_name, db, frame=df_test, index=False) + + from sqlalchemy.sql import tuple_ def delete_matching_keys(sql_table, key_columns, value_iter): delete_expression = sql_table.table.delete().where( - tuple_(*(table.table.c[col] for col in key_columns)).in_(value_iter) + tuple_( + *(table.table.c[col] for col in key_columns) + ).in_( + list(zip(value_iter)) + ) ) - with sql_table.connectable.connect() as conn: + with sql_table.pd_sql.run_transaction() as conn: conn.execute(delete_expression) From 1d936ef7f63ba63846ec064700cd21cef62406df Mon Sep 17 00:00:00 2001 From: cvonsteg Date: Thu, 7 Nov 2019 18:52:26 +0000 Subject: [PATCH 13/75] changed upsert to if_exists option, beginning implementation --- pandas/io/sql.py | 170 ++++++++++++++++++++++++++++------------------- 1 file changed, 101 insertions(+), 69 deletions(-) diff --git a/pandas/io/sql.py b/pandas/io/sql.py index c8939b61c7cc3..0eab1d823d022 100644 --- a/pandas/io/sql.py +++ b/pandas/io/sql.py @@ -496,7 +496,7 @@ def to_sql( .. versionadded:: 0.24.0 """ - if if_exists not in ("fail", "replace", "append"): + if if_exists not in ("fail", "replace", "append"): #TODO: add upserts raise ValueError("'{0}' is not valid for if_exists".format(if_exists)) pandas_sql = pandasSQL_builder(con, schema=schema) @@ -650,6 +650,12 @@ def create(self): self._execute_create() elif self.if_exists == "append": pass + elif self.if_exists == "upsert_delete": + # execute delete from db statement, then + pass + elif self.if_exists == "upsert_ignore": + # clear rows out of dataframe + pass else: raise ValueError( "'{0}' is not valid for if_exists".format(self.if_exists) @@ -657,6 +663,22 @@ def create(self): else: self._execute_create() + def _upsert_delete_processing(self): + from sqlalchemy import tuple_ + #get pkeys + primary_keys = self._get_primary_key_columns() + # get pkey values from table + + #generate delete statement + delete_statement = self.table.delete().where( + tuple_( + *(self.table.c[col] for col in primary_keys) + ).in_( + #PKEY VALUES FORM TABLE HERE + ) + ) + + def _execute_insert(self, conn, keys, data_iter): """Execute SQL statement inserting data @@ -691,78 +713,41 @@ def _execute_upsert_ignore(self, conn, keys, data_iter, primary_keys): """Execute an SQL UPSERT, and in cases of key clashes, keep records in the Database, and ignore incoming records. """ - incoming_pkey_values = list(zip(*[self.frame[key] for key in primary_keys])) - existing_pkey_values = self._primary_key_iterator(primary_keys=primary_keys) - + # TODO: DATYPE CHECKING? - for pkey_value in existing_pkey_values: - # stop iterating over query results if all incoming values are exhausted - if len(incoming_pkey_values) == 0: - break # OR return? - elif pkey_value in incoming_pkey_values: - incoming_pkey_values.remove(pkey_value) - - temp_frame = self.frame.reset_index() - # WHAT IF INDEX IS NOT NONE? - ipv = zip(*[temp_frame[col] for col in ['index'] + primary_keys]) - dov = {tuple(val for val in vals): idx for idx, *vals in ipv} - for pkey_value in existing_pkey_values: - if len(dov) == 0: - break - elif dov[pkey_value] is not None: - del dov[pkey_value] - - def insert_data(self): - if self.index is not None: - temp = self.frame.copy() - temp.index.names = self.index - try: - temp.reset_index(inplace=True) - except ValueError as err: - raise ValueError("duplicate name in index/columns: {0}".format(err)) - else: - temp = self.frame + existing_pkey_values = self._database_column_iterator(columns=primary_keys) - # TODO: column_names by list comprehension? - column_names = list(map(str, temp.columns)) - ncols = len(column_names) - data_list = [None] * ncols - blocks = temp._data.blocks + # Creating temp frame accounts for cases where self.frame.index is also to be added + # to database + temp_frame = self._generate_temp_dataframe() - for b in blocks: - if b.is_datetime: - # return datetime.datetime objects - if b.is_datetimetz: - # GH 9086: Ensure we return datetimes with timezone info - # Need to return 2-D data; DatetimeIndex is 1D - d = b.values.to_pydatetime() - d = np.atleast_2d(d) - else: - # convert to microsecond resolution for datetime.datetime - d = b.values.astype("M8[us]").astype(object) - else: - d = np.array(b.get_values(), dtype=object) - - # replace NaN with None - if b._can_hold_na: - mask = isna(d) - d[mask] = None + # Only get columns corresponding to primary keys, and index for deletion + incoming_pkey_iterator = zip( + zip(*[temp_frame[col] for col in primary_keys]), + [temp_frame.index] + ) - for col_loc, col in zip(b.mgr_locs, d): - data_list[col_loc] = col + # Turn incoming self.frame data into same format at db read + dict_of_pkey_values = dict(incoming_pkey_iterator) - return column_names, data_list + # Loop to delete values from self.frame if they already exist in database + for pkey_value in existing_pkey_values: + # Break loop if self.frame is empty + if self.frame.empty: + break + elif pkey_value in dict_of_pkey_values: + self.frame.drop(index=dict_of_pkey_values[pkey_value], inplace=True) - def _primary_key_iterator(self, primary_keys): + def _database_column_iterator(self, columns): """ - This static method gets all values for specified columns, returning them via + This method gets all values for specified columns, returning them via a lazy generator Parameters ---------- self: SQLTable Table from which data is to be returned - primary_keys: List[str] + columns: List[str] Names of columns to be returned Returns @@ -770,20 +755,14 @@ def _primary_key_iterator(self, primary_keys): generator Object """ from sqlalchemy import select - statement = select([self.table.c[key] for key in primary_keys]) + statement = select([self.table.c[key] for key in columns]) result = self.pd_sql.execute(statement) - while True: - data = result.fetchone() - if not data: - result.close() - break - else: - yield data + return self.pd_sql._query_iterator(result=result, chunksize=chunksize, columns=columns) def _get_primary_key_columns(self): """ Upsert workflows require knowledge of what is already in the database - this method reflects the meta object and gets primary key a list of primary keys + this method reflects the meta object and gets a list of primary keys Returns ------- @@ -806,6 +785,59 @@ def _get_primary_key_columns(self): ) return primary_keys + def _generate_temp_dataframe(self): + """ + + Returns + ------- + DataFrame object + """ + # Originally from insert_data() method, but needed in more places + # so abstracted, to keep code DRY. + if self.index is not None: + temp = self.frame.copy() + temp.index.names = self.index + try: + temp.reset_index(inplace=True) + except ValueError as err: + raise ValueError("duplicate name in index/columns: {0}".format(err)) + else: + temp = self.frame + + return temp + + def insert_data(self): + temp = self._generate_temp_dataframe() + # TODO: column_names by list comprehension? + column_names = list(map(str, temp.columns)) + ncols = len(column_names) + data_list = [None] * ncols + blocks = temp._data.blocks + + for b in blocks: + if b.is_datetime: + # return datetime.datetime objects + if b.is_datetimetz: + # GH 9086: Ensure we return datetimes with timezone info + # Need to return 2-D data; DatetimeIndex is 1D + d = b.values.to_pydatetime() + d = np.atleast_2d(d) + else: + # convert to microsecond resolution for datetime.datetime + d = b.values.astype("M8[us]").astype(object) + else: + d = np.array(b.get_values(), dtype=object) + + # replace NaN with None + if b._can_hold_na: + mask = isna(d) + d[mask] = None + + for col_loc, col in zip(b.mgr_locs, d): + data_list[col_loc] = col + + return column_names, data_list + def insert(self, chunksize=None, method=None): # set insert method if method is None: From 9db2aaa80a642052c7d3e24e037869bcf45e9b49 Mon Sep 17 00:00:00 2001 From: cvonsteg Date: Thu, 7 Nov 2019 18:52:26 +0000 Subject: [PATCH 14/75] changed upsert to if_exists option, beginning implementation --- pandas/io/sql.py | 170 ++++++++++++++++++++++++++++------------------- 1 file changed, 101 insertions(+), 69 deletions(-) diff --git a/pandas/io/sql.py b/pandas/io/sql.py index c8939b61c7cc3..0eab1d823d022 100644 --- a/pandas/io/sql.py +++ b/pandas/io/sql.py @@ -496,7 +496,7 @@ def to_sql( .. versionadded:: 0.24.0 """ - if if_exists not in ("fail", "replace", "append"): + if if_exists not in ("fail", "replace", "append"): #TODO: add upserts raise ValueError("'{0}' is not valid for if_exists".format(if_exists)) pandas_sql = pandasSQL_builder(con, schema=schema) @@ -650,6 +650,12 @@ def create(self): self._execute_create() elif self.if_exists == "append": pass + elif self.if_exists == "upsert_delete": + # execute delete from db statement, then + pass + elif self.if_exists == "upsert_ignore": + # clear rows out of dataframe + pass else: raise ValueError( "'{0}' is not valid for if_exists".format(self.if_exists) @@ -657,6 +663,22 @@ def create(self): else: self._execute_create() + def _upsert_delete_processing(self): + from sqlalchemy import tuple_ + #get pkeys + primary_keys = self._get_primary_key_columns() + # get pkey values from table + + #generate delete statement + delete_statement = self.table.delete().where( + tuple_( + *(self.table.c[col] for col in primary_keys) + ).in_( + #PKEY VALUES FORM TABLE HERE + ) + ) + + def _execute_insert(self, conn, keys, data_iter): """Execute SQL statement inserting data @@ -691,78 +713,41 @@ def _execute_upsert_ignore(self, conn, keys, data_iter, primary_keys): """Execute an SQL UPSERT, and in cases of key clashes, keep records in the Database, and ignore incoming records. """ - incoming_pkey_values = list(zip(*[self.frame[key] for key in primary_keys])) - existing_pkey_values = self._primary_key_iterator(primary_keys=primary_keys) - + # TODO: DATYPE CHECKING? - for pkey_value in existing_pkey_values: - # stop iterating over query results if all incoming values are exhausted - if len(incoming_pkey_values) == 0: - break # OR return? - elif pkey_value in incoming_pkey_values: - incoming_pkey_values.remove(pkey_value) - - temp_frame = self.frame.reset_index() - # WHAT IF INDEX IS NOT NONE? - ipv = zip(*[temp_frame[col] for col in ['index'] + primary_keys]) - dov = {tuple(val for val in vals): idx for idx, *vals in ipv} - for pkey_value in existing_pkey_values: - if len(dov) == 0: - break - elif dov[pkey_value] is not None: - del dov[pkey_value] - - def insert_data(self): - if self.index is not None: - temp = self.frame.copy() - temp.index.names = self.index - try: - temp.reset_index(inplace=True) - except ValueError as err: - raise ValueError("duplicate name in index/columns: {0}".format(err)) - else: - temp = self.frame + existing_pkey_values = self._database_column_iterator(columns=primary_keys) - # TODO: column_names by list comprehension? - column_names = list(map(str, temp.columns)) - ncols = len(column_names) - data_list = [None] * ncols - blocks = temp._data.blocks + # Creating temp frame accounts for cases where self.frame.index is also to be added + # to database + temp_frame = self._generate_temp_dataframe() - for b in blocks: - if b.is_datetime: - # return datetime.datetime objects - if b.is_datetimetz: - # GH 9086: Ensure we return datetimes with timezone info - # Need to return 2-D data; DatetimeIndex is 1D - d = b.values.to_pydatetime() - d = np.atleast_2d(d) - else: - # convert to microsecond resolution for datetime.datetime - d = b.values.astype("M8[us]").astype(object) - else: - d = np.array(b.get_values(), dtype=object) - - # replace NaN with None - if b._can_hold_na: - mask = isna(d) - d[mask] = None + # Only get columns corresponding to primary keys, and index for deletion + incoming_pkey_iterator = zip( + zip(*[temp_frame[col] for col in primary_keys]), + [temp_frame.index] + ) - for col_loc, col in zip(b.mgr_locs, d): - data_list[col_loc] = col + # Turn incoming self.frame data into same format at db read + dict_of_pkey_values = dict(incoming_pkey_iterator) - return column_names, data_list + # Loop to delete values from self.frame if they already exist in database + for pkey_value in existing_pkey_values: + # Break loop if self.frame is empty + if self.frame.empty: + break + elif pkey_value in dict_of_pkey_values: + self.frame.drop(index=dict_of_pkey_values[pkey_value], inplace=True) - def _primary_key_iterator(self, primary_keys): + def _database_column_iterator(self, columns): """ - This static method gets all values for specified columns, returning them via + This method gets all values for specified columns, returning them via a lazy generator Parameters ---------- self: SQLTable Table from which data is to be returned - primary_keys: List[str] + columns: List[str] Names of columns to be returned Returns @@ -770,20 +755,14 @@ def _primary_key_iterator(self, primary_keys): generator Object """ from sqlalchemy import select - statement = select([self.table.c[key] for key in primary_keys]) + statement = select([self.table.c[key] for key in columns]) result = self.pd_sql.execute(statement) - while True: - data = result.fetchone() - if not data: - result.close() - break - else: - yield data + return self.pd_sql._query_iterator(result=result, chunksize=chunksize, columns=columns) def _get_primary_key_columns(self): """ Upsert workflows require knowledge of what is already in the database - this method reflects the meta object and gets primary key a list of primary keys + this method reflects the meta object and gets a list of primary keys Returns ------- @@ -806,6 +785,59 @@ def _get_primary_key_columns(self): ) return primary_keys + def _generate_temp_dataframe(self): + """ + + Returns + ------- + DataFrame object + """ + # Originally from insert_data() method, but needed in more places + # so abstracted, to keep code DRY. + if self.index is not None: + temp = self.frame.copy() + temp.index.names = self.index + try: + temp.reset_index(inplace=True) + except ValueError as err: + raise ValueError("duplicate name in index/columns: {0}".format(err)) + else: + temp = self.frame + + return temp + + def insert_data(self): + temp = self._generate_temp_dataframe() + # TODO: column_names by list comprehension? + column_names = list(map(str, temp.columns)) + ncols = len(column_names) + data_list = [None] * ncols + blocks = temp._data.blocks + + for b in blocks: + if b.is_datetime: + # return datetime.datetime objects + if b.is_datetimetz: + # GH 9086: Ensure we return datetimes with timezone info + # Need to return 2-D data; DatetimeIndex is 1D + d = b.values.to_pydatetime() + d = np.atleast_2d(d) + else: + # convert to microsecond resolution for datetime.datetime + d = b.values.astype("M8[us]").astype(object) + else: + d = np.array(b.get_values(), dtype=object) + + # replace NaN with None + if b._can_hold_na: + mask = isna(d) + d[mask] = None + + for col_loc, col in zip(b.mgr_locs, d): + data_list[col_loc] = col + + return column_names, data_list + def insert(self, chunksize=None, method=None): # set insert method if method is None: From 57a246df16be8ac53f6a6504f26c10d6cfe14743 Mon Sep 17 00:00:00 2001 From: cvonsteg Date: Fri, 8 Nov 2019 18:35:03 +0000 Subject: [PATCH 15/75] all workflow present - now to debugging --- pandas/io/sql.py | 187 ++++++++++++++++++++++------------------------- 1 file changed, 88 insertions(+), 99 deletions(-) diff --git a/pandas/io/sql.py b/pandas/io/sql.py index 0eab1d823d022..2997ac95b8adb 100644 --- a/pandas/io/sql.py +++ b/pandas/io/sql.py @@ -496,7 +496,7 @@ def to_sql( .. versionadded:: 0.24.0 """ - if if_exists not in ("fail", "replace", "append"): #TODO: add upserts + if if_exists not in ("fail", "replace", "append", "upsert_ignore", "upsert_delete"): #TODO: add upserts raise ValueError("'{0}' is not valid for if_exists".format(if_exists)) pandas_sql = pandasSQL_builder(con, schema=schema) @@ -651,7 +651,6 @@ def create(self): elif self.if_exists == "append": pass elif self.if_exists == "upsert_delete": - # execute delete from db statement, then pass elif self.if_exists == "upsert_ignore": # clear rows out of dataframe @@ -665,109 +664,55 @@ def create(self): def _upsert_delete_processing(self): from sqlalchemy import tuple_ - #get pkeys - primary_keys = self._get_primary_key_columns() - # get pkey values from table - - #generate delete statement + # Primary key data + primary_keys, primary_key_values = self._get_primary_key_data() + # Generate delete statement delete_statement = self.table.delete().where( tuple_( *(self.table.c[col] for col in primary_keys) ).in_( - #PKEY VALUES FORM TABLE HERE + primary_key_values ) ) + return delete_statement + def _upsert_ignore_processing(self): + from sqlalchemy import tuple_, select + # Primary key data + primary_keys, primary_key_values = self._get_primary_key_data() - def _execute_insert(self, conn, keys, data_iter): - """Execute SQL statement inserting data - - Parameters - ---------- - conn : sqlalchemy.engine.Engine or sqlalchemy.engine.Connection - keys : list of str - Column names - data_iter : generator of list - Each item contains a list of values to be inserted - """ - data = [dict(zip(keys, row)) for row in data_iter] - conn.execute(self.table.insert(), data) - - def _execute_insert_multi(self, conn, keys, data_iter): - """Alternative to _execute_insert for DBs support multivalue INSERT. - - Note: multi-value insert is usually faster for analytics DBs - and tables containing a few columns - but performance degrades quickly with increase of columns. - """ - data = [dict(zip(keys, row)) for row in data_iter] - conn.execute(self.table.insert(data)) - - def _execute_upsert_update(self, conn, keys, data_iter, primary_keys): - """Execute an SQL UPSERT, and in cases of key clashes, - overwrite records in the Database with incoming records. - """ - pass - - def _execute_upsert_ignore(self, conn, keys, data_iter, primary_keys): - """Execute an SQL UPSERT, and in cases of key clashes, - keep records in the Database, and ignore incoming records. - """ - # TODO: DATYPE CHECKING? - - existing_pkey_values = self._database_column_iterator(columns=primary_keys) - - # Creating temp frame accounts for cases where self.frame.index is also to be added - # to database - temp_frame = self._generate_temp_dataframe() + # Fetch matching pkey values from database + columns_to_fetch = [self.table.c[key] for key in primary_keys] - # Only get columns corresponding to primary keys, and index for deletion - incoming_pkey_iterator = zip( - zip(*[temp_frame[col] for col in primary_keys]), - [temp_frame.index] + select_statement = select(columns_to_fetch).where( + tuple_(*columns_to_fetch).in_(primary_key_values) ) - # Turn incoming self.frame data into same format at db read - dict_of_pkey_values = dict(incoming_pkey_iterator) + result = self.pd_sql.execute(select_statement) - # Loop to delete values from self.frame if they already exist in database - for pkey_value in existing_pkey_values: - # Break loop if self.frame is empty - if self.frame.empty: - break - elif pkey_value in dict_of_pkey_values: - self.frame.drop(index=dict_of_pkey_values[pkey_value], inplace=True) + pkeys_from_database = _wrap_result(data=result, columns=primary_keys) - def _database_column_iterator(self, columns): - """ - This method gets all values for specified columns, returning them via - a lazy generator + # Delete rows from self.frame where primary keys match + self.frame = self._get_index_formatted_dataframe() - Parameters - ---------- - self: SQLTable - Table from which data is to be returned - columns: List[str] - Names of columns to be returned + to_be_deleted_mask = self.frame[primary_keys].isin( + pkeys_from_database[primary_keys] + ).all(1) - Returns - ------- - generator Object - """ - from sqlalchemy import select - statement = select([self.table.c[key] for key in columns]) - result = self.pd_sql.execute(statement) - return self.pd_sql._query_iterator(result=result, chunksize=chunksize, columns=columns) + self.frame.drop(self.frame[to_be_deleted_mask].index, inplace=True) - def _get_primary_key_columns(self): + def _get_primary_key_data(self): """ Upsert workflows require knowledge of what is already in the database this method reflects the meta object and gets a list of primary keys Returns ------- - List[str] - list of primary key column names + primary_keys, primary_key_values : Tuple[List[str], Iterable] + - primary_keys : List of primary key column names + - primary_key_values : Iterable of dataframe rows corresponding to primary_key columns """ + # reflect MetaData object and assign contents of db to self.table attribute self.pd_sql.meta.reflect(only=[self.name], views=True) self.table = self.pd_sql.get_table(table_name=self.name, schema=self.schema) @@ -783,31 +728,69 @@ def _get_primary_key_columns(self): raise ValueError( f"No primary keys found for table {self.name}" ) - return primary_keys - def _generate_temp_dataframe(self): + primary_key_values = zip(*[self.frame[key] for key in primary_keys]) + return primary_keys, primary_key_values + + def _execute_insert(self, conn, keys, data_iter): + """Execute SQL statement inserting data + + Parameters + ---------- + conn : sqlalchemy.engine.Engine or sqlalchemy.engine.Connection + keys : list of str + Column names + data_iter : generator of list + Each item contains a list of values to be inserted """ + data = [dict(zip(keys, row)) for row in data_iter] + conn.execute(self.table.insert(), data) + + def _execute_insert_multi(self, conn, keys, data_iter): + """Alternative to _execute_insert for DBs support multivalue INSERT. + + Note: multi-value insert is usually faster for analytics DBs + and tables containing a few columns + but performance degrades quickly with increase of columns. + """ + data = [dict(zip(keys, row)) for row in data_iter] + conn.execute(self.table.insert(data)) + + def _get_index_formatted_dataframe(self): + """ + Method that checks whether the dataframe index is also to be added to the + database table. If it is, it takes care of formatting the incoming dataframe + accordingly Returns ------- DataFrame object """ - # Originally from insert_data() method, but needed in more places - # so abstracted, to keep code DRY. + + # Originally this functionality formed the first step of the insert_data() method, + # however it will be useful to have in other places, so to keep code DRY it has been moved here. + if self.index is not None: - temp = self.frame.copy() - temp.index.names = self.index - try: - temp.reset_index(inplace=True) - except ValueError as err: - raise ValueError("duplicate name in index/columns: {0}".format(err)) + # The following check ensures that the method can be called multiple times, + # without the dataframe getting wrongfully formatted + if all(idx in self.frame.columns for idx in self.index): + temp = self.frame + else: + temp = self.frame.copy() + temp.index.names = self.index + try: + temp.reset_index(inplace=True) + except ValueError as err: + raise ValueError("duplicate name in index/columns: {0}".format(err)) else: temp = self.frame return temp def insert_data(self): - temp = self._generate_temp_dataframe() + + temp = self._get_index_formatted_dataframe() + # TODO: column_names by list comprehension? column_names = list(map(str, temp.columns)) ncols = len(column_names) @@ -839,22 +822,28 @@ def insert_data(self): return column_names, data_list def insert(self, chunksize=None, method=None): + if self.if_exists == "upsert_ignore": + self._upsert_ignore_processing() + self._insert(chunksize=chunksize, method=method) + elif self.if_exists == "upsert_delete": + delete_statement = self._upsert_delete_processing() + with self.pd_sql.run_transaction() as trans: + trans.execute(delete_statement) + self._insert(chunksize=chunksize, method=method) + else: + self._insert(chunksize=chunksize, method=method) + + def _insert(self, chunksize=None, method=None): # set insert method if method is None: exec_insert = self._execute_insert elif method == "multi": exec_insert = self._execute_insert_multi - elif method == "upsert_update": - raise NotImplementedError - elif method == "upsert_ignore": - raise NotImplementedError elif callable(method): exec_insert = partial(method, self) else: raise ValueError("Invalid parameter `method`: {}".format(method)) - # Need to pre-process data for upsert here - # for upsert ignore - delete records from self.frame directly keys, data_list = self.insert_data() nrows = len(self.frame) From c290a78ccdc0a507a7e3e142ff88dd2a99237e3e Mon Sep 17 00:00:00 2001 From: cvonsteg Date: Fri, 8 Nov 2019 18:35:03 +0000 Subject: [PATCH 16/75] all workflow present - now to debugging --- pandas/io/sql.py | 187 ++++++++++++++++++++++------------------------- 1 file changed, 88 insertions(+), 99 deletions(-) diff --git a/pandas/io/sql.py b/pandas/io/sql.py index 0eab1d823d022..2997ac95b8adb 100644 --- a/pandas/io/sql.py +++ b/pandas/io/sql.py @@ -496,7 +496,7 @@ def to_sql( .. versionadded:: 0.24.0 """ - if if_exists not in ("fail", "replace", "append"): #TODO: add upserts + if if_exists not in ("fail", "replace", "append", "upsert_ignore", "upsert_delete"): #TODO: add upserts raise ValueError("'{0}' is not valid for if_exists".format(if_exists)) pandas_sql = pandasSQL_builder(con, schema=schema) @@ -651,7 +651,6 @@ def create(self): elif self.if_exists == "append": pass elif self.if_exists == "upsert_delete": - # execute delete from db statement, then pass elif self.if_exists == "upsert_ignore": # clear rows out of dataframe @@ -665,109 +664,55 @@ def create(self): def _upsert_delete_processing(self): from sqlalchemy import tuple_ - #get pkeys - primary_keys = self._get_primary_key_columns() - # get pkey values from table - - #generate delete statement + # Primary key data + primary_keys, primary_key_values = self._get_primary_key_data() + # Generate delete statement delete_statement = self.table.delete().where( tuple_( *(self.table.c[col] for col in primary_keys) ).in_( - #PKEY VALUES FORM TABLE HERE + primary_key_values ) ) + return delete_statement + def _upsert_ignore_processing(self): + from sqlalchemy import tuple_, select + # Primary key data + primary_keys, primary_key_values = self._get_primary_key_data() - def _execute_insert(self, conn, keys, data_iter): - """Execute SQL statement inserting data - - Parameters - ---------- - conn : sqlalchemy.engine.Engine or sqlalchemy.engine.Connection - keys : list of str - Column names - data_iter : generator of list - Each item contains a list of values to be inserted - """ - data = [dict(zip(keys, row)) for row in data_iter] - conn.execute(self.table.insert(), data) - - def _execute_insert_multi(self, conn, keys, data_iter): - """Alternative to _execute_insert for DBs support multivalue INSERT. - - Note: multi-value insert is usually faster for analytics DBs - and tables containing a few columns - but performance degrades quickly with increase of columns. - """ - data = [dict(zip(keys, row)) for row in data_iter] - conn.execute(self.table.insert(data)) - - def _execute_upsert_update(self, conn, keys, data_iter, primary_keys): - """Execute an SQL UPSERT, and in cases of key clashes, - overwrite records in the Database with incoming records. - """ - pass - - def _execute_upsert_ignore(self, conn, keys, data_iter, primary_keys): - """Execute an SQL UPSERT, and in cases of key clashes, - keep records in the Database, and ignore incoming records. - """ - # TODO: DATYPE CHECKING? - - existing_pkey_values = self._database_column_iterator(columns=primary_keys) - - # Creating temp frame accounts for cases where self.frame.index is also to be added - # to database - temp_frame = self._generate_temp_dataframe() + # Fetch matching pkey values from database + columns_to_fetch = [self.table.c[key] for key in primary_keys] - # Only get columns corresponding to primary keys, and index for deletion - incoming_pkey_iterator = zip( - zip(*[temp_frame[col] for col in primary_keys]), - [temp_frame.index] + select_statement = select(columns_to_fetch).where( + tuple_(*columns_to_fetch).in_(primary_key_values) ) - # Turn incoming self.frame data into same format at db read - dict_of_pkey_values = dict(incoming_pkey_iterator) + result = self.pd_sql.execute(select_statement) - # Loop to delete values from self.frame if they already exist in database - for pkey_value in existing_pkey_values: - # Break loop if self.frame is empty - if self.frame.empty: - break - elif pkey_value in dict_of_pkey_values: - self.frame.drop(index=dict_of_pkey_values[pkey_value], inplace=True) + pkeys_from_database = _wrap_result(data=result, columns=primary_keys) - def _database_column_iterator(self, columns): - """ - This method gets all values for specified columns, returning them via - a lazy generator + # Delete rows from self.frame where primary keys match + self.frame = self._get_index_formatted_dataframe() - Parameters - ---------- - self: SQLTable - Table from which data is to be returned - columns: List[str] - Names of columns to be returned + to_be_deleted_mask = self.frame[primary_keys].isin( + pkeys_from_database[primary_keys] + ).all(1) - Returns - ------- - generator Object - """ - from sqlalchemy import select - statement = select([self.table.c[key] for key in columns]) - result = self.pd_sql.execute(statement) - return self.pd_sql._query_iterator(result=result, chunksize=chunksize, columns=columns) + self.frame.drop(self.frame[to_be_deleted_mask].index, inplace=True) - def _get_primary_key_columns(self): + def _get_primary_key_data(self): """ Upsert workflows require knowledge of what is already in the database this method reflects the meta object and gets a list of primary keys Returns ------- - List[str] - list of primary key column names + primary_keys, primary_key_values : Tuple[List[str], Iterable] + - primary_keys : List of primary key column names + - primary_key_values : Iterable of dataframe rows corresponding to primary_key columns """ + # reflect MetaData object and assign contents of db to self.table attribute self.pd_sql.meta.reflect(only=[self.name], views=True) self.table = self.pd_sql.get_table(table_name=self.name, schema=self.schema) @@ -783,31 +728,69 @@ def _get_primary_key_columns(self): raise ValueError( f"No primary keys found for table {self.name}" ) - return primary_keys - def _generate_temp_dataframe(self): + primary_key_values = zip(*[self.frame[key] for key in primary_keys]) + return primary_keys, primary_key_values + + def _execute_insert(self, conn, keys, data_iter): + """Execute SQL statement inserting data + + Parameters + ---------- + conn : sqlalchemy.engine.Engine or sqlalchemy.engine.Connection + keys : list of str + Column names + data_iter : generator of list + Each item contains a list of values to be inserted """ + data = [dict(zip(keys, row)) for row in data_iter] + conn.execute(self.table.insert(), data) + + def _execute_insert_multi(self, conn, keys, data_iter): + """Alternative to _execute_insert for DBs support multivalue INSERT. + + Note: multi-value insert is usually faster for analytics DBs + and tables containing a few columns + but performance degrades quickly with increase of columns. + """ + data = [dict(zip(keys, row)) for row in data_iter] + conn.execute(self.table.insert(data)) + + def _get_index_formatted_dataframe(self): + """ + Method that checks whether the dataframe index is also to be added to the + database table. If it is, it takes care of formatting the incoming dataframe + accordingly Returns ------- DataFrame object """ - # Originally from insert_data() method, but needed in more places - # so abstracted, to keep code DRY. + + # Originally this functionality formed the first step of the insert_data() method, + # however it will be useful to have in other places, so to keep code DRY it has been moved here. + if self.index is not None: - temp = self.frame.copy() - temp.index.names = self.index - try: - temp.reset_index(inplace=True) - except ValueError as err: - raise ValueError("duplicate name in index/columns: {0}".format(err)) + # The following check ensures that the method can be called multiple times, + # without the dataframe getting wrongfully formatted + if all(idx in self.frame.columns for idx in self.index): + temp = self.frame + else: + temp = self.frame.copy() + temp.index.names = self.index + try: + temp.reset_index(inplace=True) + except ValueError as err: + raise ValueError("duplicate name in index/columns: {0}".format(err)) else: temp = self.frame return temp def insert_data(self): - temp = self._generate_temp_dataframe() + + temp = self._get_index_formatted_dataframe() + # TODO: column_names by list comprehension? column_names = list(map(str, temp.columns)) ncols = len(column_names) @@ -839,22 +822,28 @@ def insert_data(self): return column_names, data_list def insert(self, chunksize=None, method=None): + if self.if_exists == "upsert_ignore": + self._upsert_ignore_processing() + self._insert(chunksize=chunksize, method=method) + elif self.if_exists == "upsert_delete": + delete_statement = self._upsert_delete_processing() + with self.pd_sql.run_transaction() as trans: + trans.execute(delete_statement) + self._insert(chunksize=chunksize, method=method) + else: + self._insert(chunksize=chunksize, method=method) + + def _insert(self, chunksize=None, method=None): # set insert method if method is None: exec_insert = self._execute_insert elif method == "multi": exec_insert = self._execute_insert_multi - elif method == "upsert_update": - raise NotImplementedError - elif method == "upsert_ignore": - raise NotImplementedError elif callable(method): exec_insert = partial(method, self) else: raise ValueError("Invalid parameter `method`: {}".format(method)) - # Need to pre-process data for upsert here - # for upsert ignore - delete records from self.frame directly keys, data_list = self.insert_data() nrows = len(self.frame) From 35e0fc40bbe3e44a1c01f86be9f0d5104a6beafa Mon Sep 17 00:00:00 2001 From: cvonsteg Date: Sat, 9 Nov 2019 16:49:35 +0000 Subject: [PATCH 17/75] tidy up repo --- .gitignore | 1 + .pre-commit-config.yaml | 1 + pandas/io/sql.py | 44 +++-- pandas/io/sql_scratch.py | 48 +++-- .../V0RT3X4/python_utils/.circleci/config.yml | 35 ---- .../V0RT3X4/python_utils/.gitignore | 111 ----------- .../github.com/V0RT3X4/python_utils/README.md | 108 ----------- .../V0RT3X4/python_utils/aws/README.md | 1 - .../V0RT3X4/python_utils/aws/README.rst | 0 .../V0RT3X4/python_utils/aws/requirements.txt | 5 - .../V0RT3X4/python_utils/aws/setup.py | 50 ----- .../python_utils/aws/tests/__init__.py | 4 - .../aws/tests/lambda_types/__init__.py | 0 .../aws/tests/lambda_types/message_eg.py | 19 -- .../aws/tests/lambda_types/repeat_eg.py | 19 -- .../tests/lambda_types/test_lambda_types.py | 89 --------- .../s3_client_encryption_tests/__init__.py | 4 - .../test_IOAuthDecrypter.py | 22 --- .../test_IOAuthTagLength.py | 42 ---- .../test_IODecrypter.py | 94 --------- .../test_decrypt_s3_mime_with_attachment.py | 68 ------- .../test_decrypt_s3_object.py | 65 ------- .../test_kms_cipher_provider.py | 39 ---- .../aws/tests/ses_inbox/__init__.py | 0 .../tests/ses_inbox/test_get_attachments.py | 0 .../aws/tests/ses_inbox/test_list_inbox.py | 25 --- .../tests/ses_notification_types/__init__.py | 0 .../ses_notification_types/test_action.py | 16 -- .../test_lambda_record.py | 32 --- .../tests/ses_notification_types/test_mail.py | 85 -------- .../test_notification.py | 23 --- .../ses_notification_types/test_receipt.py | 34 ---- .../python_utils/aws/tests/utils/__init__.py | 0 .../utils/nested_data_classes/__init__.py | 0 .../test_nested_dataclass.py | 36 ---- .../aws/vortexa_utils/__init__.py | 5 - .../aws/vortexa_utils/aws/__init__.py | 4 - .../aws/vortexa_utils/aws/lambdr/__init__.py | 5 - .../aws/vortexa_utils/aws/lambdr/types.py | 45 ----- .../aws/vortexa_utils/aws/s3/__init__.py | 0 .../aws/vortexa_utils/aws/s3/client.py | 50 ----- .../client_side_encryption/IOAuthDecrypter.py | 40 ---- .../IOAuthDecrypterTagLength.py | 65 ------- .../s3/client_side_encryption/IODecrypter.py | 61 ------ .../s3/client_side_encryption/IONocrypter.py | 38 ---- .../aws/s3/client_side_encryption/__init__.py | 183 ------------------ .../client_side_encryption/cipher_provider.py | 17 -- .../aws/s3/client_side_encryption/client.py | 103 ---------- .../decrypt_handeler.py | 121 ------------ .../aws/s3/client_side_encryption/get.py | 75 ------- .../kms_cipher_provider.py | 61 ------ .../aws/vortexa_utils/aws/ses/__init__.py | 4 - .../aws/ses/application_mapper.py | 102 ---------- .../aws/vortexa_utils/aws/ses/attachments.py | 15 -- .../aws/vortexa_utils/aws/ses/inbox.py | 141 -------------- .../aws/ses/notification/__init__.py | 0 .../aws/ses/notification/types/__init__.py | 5 - .../aws/ses/notification/types/action.py | 56 ------ .../ses/notification/types/lambda_record.py | 18 -- .../aws/ses/notification/types/mail.py | 44 ----- .../ses/notification/types/notification.py | 29 --- .../aws/ses/notification/types/receipt.py | 65 ------- .../aws/ses/notification/types/verdicts.py | 43 ---- .../aws/vortexa_utils/aws/utils/__init__.py | 4 - .../aws/utils/dataclasses/__init__.py | 1 - .../aws/utils/dataclasses/nested.py | 20 -- .../collections/tests/__init__.py | 0 .../tests/collections/types/__init__.py | 0 .../types/test_instance_caching_abc.py | 130 ------------- .../vortexa_utils/collections/__inti__.py | 0 .../collections/types/__init__.py | 0 .../collections/types/instance_caching_abc.py | 45 ----- .../V0RT3X4/python_utils/database/README.md | 21 -- .../V0RT3X4/python_utils/database/README.rst | 28 --- .../V0RT3X4/python_utils/database/setup.py | 40 ---- .../python_utils/database/tests/__init__.py | 0 .../database/tests/test_database_factory.py | 16 -- .../database/tests/test_querey_cache.py | 21 -- .../database/vortexa_utils/__init__.py | 5 - .../vortexa_utils/database/__init__.py | 7 - .../vortexa_utils/database/database.py | 118 ----------- .../database/default_factories.py | 20 -- .../vortexa_utils/database/query_cache.py | 77 -------- .../database/vortexa_utils/database/utils.py | 62 ------ .../V0RT3X4/python_utils/deployment/setup.py | 20 -- .../deployment/vortexa_utils/__init__.py | 5 - .../vortexa_utils/portainer/Readme.md | 1 - .../vortexa_utils/portainer/__init__.py | 8 - .../deployment/vortexa_utils/portainer/api.py | 56 ------ .../vortexa_utils/portainer/stacks.py | 61 ------ .../vortexa_utils/portainer/update_stack.py | 90 --------- .../python_utils/docker/pandas/Dockerfile | 25 --- .../V0RT3X4/python_utils/general/README.rst | 0 .../V0RT3X4/python_utils/general/setup.py | 40 ---- .../general/vortexa_utils/__init__.py | 5 - .../general/vortexa_utils/general/__init__.py | 0 .../python_utils/general/vortexa_utils/git.py | 14 -- .../V0RT3X4/python_utils/logging/README.md | 55 ------ .../V0RT3X4/python_utils/logging/setup.py | 38 ---- .../logging/vortexa_utils/__init__.py | 1 - .../logging/vortexa_utils/logging/__init__.py | 1 - .../logging/exception_decorator.py | 12 -- .../vortexa_utils/logging/resources.py | 38 ---- .../utils/vortexa_utils/utils/__init__.py | 0 .../utils/byte_stream_spliter.py | 31 --- .../utils/sockets/socket_client.py | 24 --- .../utils/sockets/socket_server.py | 17 -- .../V0RT3X4/python_utils/versioning/VERSION | 1 - .../V0RT3X4/python_utils/versioning/setup.py | 34 ---- .../python_utils/versioning/tests/__init__.py | 0 .../versioning/tests/test_versioner.py | 47 ----- .../versioning/vortexa_utils/__init__.py | 5 - .../vortexa_utils/versioning/__init__.py | 10 - .../vortexa_utils/versioning/__main__.py | 9 - .../vortexa_utils/versioning/cli.py | 46 ----- .../vortexa_utils/versioning/utils.py | 22 --- .../vortexa_utils/versioning/versioner.py | 99 ---------- .../python_utils/youve_got_mail/README.md | 0 .../python_utils/youve_got_mail/README.rst | 0 .../youve_got_mail/requirements.txt | 2 - .../python_utils/youve_got_mail/setup.py | 48 ----- .../youve_got_mail/tests/__init__.py | 0 .../youve_got_mail/vortexa_utils/__init__.py | 0 .../vortexa_utils/youve_got_mail.py | 43 ---- 124 files changed, 48 insertions(+), 3990 deletions(-) delete mode 100644 vendor/github.com/V0RT3X4/python_utils/.circleci/config.yml delete mode 100644 vendor/github.com/V0RT3X4/python_utils/.gitignore delete mode 100644 vendor/github.com/V0RT3X4/python_utils/README.md delete mode 100644 vendor/github.com/V0RT3X4/python_utils/aws/README.md delete mode 100644 vendor/github.com/V0RT3X4/python_utils/aws/README.rst delete mode 100644 vendor/github.com/V0RT3X4/python_utils/aws/requirements.txt delete mode 100644 vendor/github.com/V0RT3X4/python_utils/aws/setup.py delete mode 100644 vendor/github.com/V0RT3X4/python_utils/aws/tests/__init__.py delete mode 100644 vendor/github.com/V0RT3X4/python_utils/aws/tests/lambda_types/__init__.py delete mode 100644 vendor/github.com/V0RT3X4/python_utils/aws/tests/lambda_types/message_eg.py delete mode 100644 vendor/github.com/V0RT3X4/python_utils/aws/tests/lambda_types/repeat_eg.py delete mode 100644 vendor/github.com/V0RT3X4/python_utils/aws/tests/lambda_types/test_lambda_types.py delete mode 100644 vendor/github.com/V0RT3X4/python_utils/aws/tests/s3_client_encryption_tests/__init__.py delete mode 100644 vendor/github.com/V0RT3X4/python_utils/aws/tests/s3_client_encryption_tests/test_IOAuthDecrypter.py delete mode 100644 vendor/github.com/V0RT3X4/python_utils/aws/tests/s3_client_encryption_tests/test_IOAuthTagLength.py delete mode 100644 vendor/github.com/V0RT3X4/python_utils/aws/tests/s3_client_encryption_tests/test_IODecrypter.py delete mode 100644 vendor/github.com/V0RT3X4/python_utils/aws/tests/s3_client_encryption_tests/test_decrypt_s3_mime_with_attachment.py delete mode 100644 vendor/github.com/V0RT3X4/python_utils/aws/tests/s3_client_encryption_tests/test_decrypt_s3_object.py delete mode 100644 vendor/github.com/V0RT3X4/python_utils/aws/tests/s3_client_encryption_tests/test_kms_cipher_provider.py delete mode 100644 vendor/github.com/V0RT3X4/python_utils/aws/tests/ses_inbox/__init__.py delete mode 100644 vendor/github.com/V0RT3X4/python_utils/aws/tests/ses_inbox/test_get_attachments.py delete mode 100644 vendor/github.com/V0RT3X4/python_utils/aws/tests/ses_inbox/test_list_inbox.py delete mode 100644 vendor/github.com/V0RT3X4/python_utils/aws/tests/ses_notification_types/__init__.py delete mode 100644 vendor/github.com/V0RT3X4/python_utils/aws/tests/ses_notification_types/test_action.py delete mode 100644 vendor/github.com/V0RT3X4/python_utils/aws/tests/ses_notification_types/test_lambda_record.py delete mode 100644 vendor/github.com/V0RT3X4/python_utils/aws/tests/ses_notification_types/test_mail.py delete mode 100644 vendor/github.com/V0RT3X4/python_utils/aws/tests/ses_notification_types/test_notification.py delete mode 100644 vendor/github.com/V0RT3X4/python_utils/aws/tests/ses_notification_types/test_receipt.py delete mode 100644 vendor/github.com/V0RT3X4/python_utils/aws/tests/utils/__init__.py delete mode 100644 vendor/github.com/V0RT3X4/python_utils/aws/tests/utils/nested_data_classes/__init__.py delete mode 100644 vendor/github.com/V0RT3X4/python_utils/aws/tests/utils/nested_data_classes/test_nested_dataclass.py delete mode 100644 vendor/github.com/V0RT3X4/python_utils/aws/vortexa_utils/__init__.py delete mode 100644 vendor/github.com/V0RT3X4/python_utils/aws/vortexa_utils/aws/__init__.py delete mode 100644 vendor/github.com/V0RT3X4/python_utils/aws/vortexa_utils/aws/lambdr/__init__.py delete mode 100644 vendor/github.com/V0RT3X4/python_utils/aws/vortexa_utils/aws/lambdr/types.py delete mode 100644 vendor/github.com/V0RT3X4/python_utils/aws/vortexa_utils/aws/s3/__init__.py delete mode 100644 vendor/github.com/V0RT3X4/python_utils/aws/vortexa_utils/aws/s3/client.py delete mode 100644 vendor/github.com/V0RT3X4/python_utils/aws/vortexa_utils/aws/s3/client_side_encryption/IOAuthDecrypter.py delete mode 100644 vendor/github.com/V0RT3X4/python_utils/aws/vortexa_utils/aws/s3/client_side_encryption/IOAuthDecrypterTagLength.py delete mode 100644 vendor/github.com/V0RT3X4/python_utils/aws/vortexa_utils/aws/s3/client_side_encryption/IODecrypter.py delete mode 100644 vendor/github.com/V0RT3X4/python_utils/aws/vortexa_utils/aws/s3/client_side_encryption/IONocrypter.py delete mode 100644 vendor/github.com/V0RT3X4/python_utils/aws/vortexa_utils/aws/s3/client_side_encryption/__init__.py delete mode 100644 vendor/github.com/V0RT3X4/python_utils/aws/vortexa_utils/aws/s3/client_side_encryption/cipher_provider.py delete mode 100644 vendor/github.com/V0RT3X4/python_utils/aws/vortexa_utils/aws/s3/client_side_encryption/client.py delete mode 100644 vendor/github.com/V0RT3X4/python_utils/aws/vortexa_utils/aws/s3/client_side_encryption/decrypt_handeler.py delete mode 100644 vendor/github.com/V0RT3X4/python_utils/aws/vortexa_utils/aws/s3/client_side_encryption/get.py delete mode 100644 vendor/github.com/V0RT3X4/python_utils/aws/vortexa_utils/aws/s3/client_side_encryption/kms_cipher_provider.py delete mode 100644 vendor/github.com/V0RT3X4/python_utils/aws/vortexa_utils/aws/ses/__init__.py delete mode 100644 vendor/github.com/V0RT3X4/python_utils/aws/vortexa_utils/aws/ses/application_mapper.py delete mode 100644 vendor/github.com/V0RT3X4/python_utils/aws/vortexa_utils/aws/ses/attachments.py delete mode 100644 vendor/github.com/V0RT3X4/python_utils/aws/vortexa_utils/aws/ses/inbox.py delete mode 100644 vendor/github.com/V0RT3X4/python_utils/aws/vortexa_utils/aws/ses/notification/__init__.py delete mode 100644 vendor/github.com/V0RT3X4/python_utils/aws/vortexa_utils/aws/ses/notification/types/__init__.py delete mode 100644 vendor/github.com/V0RT3X4/python_utils/aws/vortexa_utils/aws/ses/notification/types/action.py delete mode 100644 vendor/github.com/V0RT3X4/python_utils/aws/vortexa_utils/aws/ses/notification/types/lambda_record.py delete mode 100644 vendor/github.com/V0RT3X4/python_utils/aws/vortexa_utils/aws/ses/notification/types/mail.py delete mode 100644 vendor/github.com/V0RT3X4/python_utils/aws/vortexa_utils/aws/ses/notification/types/notification.py delete mode 100644 vendor/github.com/V0RT3X4/python_utils/aws/vortexa_utils/aws/ses/notification/types/receipt.py delete mode 100644 vendor/github.com/V0RT3X4/python_utils/aws/vortexa_utils/aws/ses/notification/types/verdicts.py delete mode 100644 vendor/github.com/V0RT3X4/python_utils/aws/vortexa_utils/aws/utils/__init__.py delete mode 100644 vendor/github.com/V0RT3X4/python_utils/aws/vortexa_utils/aws/utils/dataclasses/__init__.py delete mode 100644 vendor/github.com/V0RT3X4/python_utils/aws/vortexa_utils/aws/utils/dataclasses/nested.py delete mode 100644 vendor/github.com/V0RT3X4/python_utils/collections/tests/__init__.py delete mode 100644 vendor/github.com/V0RT3X4/python_utils/collections/tests/collections/types/__init__.py delete mode 100644 vendor/github.com/V0RT3X4/python_utils/collections/tests/collections/types/test_instance_caching_abc.py delete mode 100644 vendor/github.com/V0RT3X4/python_utils/collections/vortexa_utils/collections/__inti__.py delete mode 100644 vendor/github.com/V0RT3X4/python_utils/collections/vortexa_utils/collections/types/__init__.py delete mode 100644 vendor/github.com/V0RT3X4/python_utils/collections/vortexa_utils/collections/types/instance_caching_abc.py delete mode 100644 vendor/github.com/V0RT3X4/python_utils/database/README.md delete mode 100644 vendor/github.com/V0RT3X4/python_utils/database/README.rst delete mode 100644 vendor/github.com/V0RT3X4/python_utils/database/setup.py delete mode 100644 vendor/github.com/V0RT3X4/python_utils/database/tests/__init__.py delete mode 100644 vendor/github.com/V0RT3X4/python_utils/database/tests/test_database_factory.py delete mode 100644 vendor/github.com/V0RT3X4/python_utils/database/tests/test_querey_cache.py delete mode 100644 vendor/github.com/V0RT3X4/python_utils/database/vortexa_utils/__init__.py delete mode 100644 vendor/github.com/V0RT3X4/python_utils/database/vortexa_utils/database/__init__.py delete mode 100644 vendor/github.com/V0RT3X4/python_utils/database/vortexa_utils/database/database.py delete mode 100644 vendor/github.com/V0RT3X4/python_utils/database/vortexa_utils/database/default_factories.py delete mode 100644 vendor/github.com/V0RT3X4/python_utils/database/vortexa_utils/database/query_cache.py delete mode 100644 vendor/github.com/V0RT3X4/python_utils/database/vortexa_utils/database/utils.py delete mode 100644 vendor/github.com/V0RT3X4/python_utils/deployment/setup.py delete mode 100644 vendor/github.com/V0RT3X4/python_utils/deployment/vortexa_utils/__init__.py delete mode 100644 vendor/github.com/V0RT3X4/python_utils/deployment/vortexa_utils/portainer/Readme.md delete mode 100644 vendor/github.com/V0RT3X4/python_utils/deployment/vortexa_utils/portainer/__init__.py delete mode 100644 vendor/github.com/V0RT3X4/python_utils/deployment/vortexa_utils/portainer/api.py delete mode 100644 vendor/github.com/V0RT3X4/python_utils/deployment/vortexa_utils/portainer/stacks.py delete mode 100644 vendor/github.com/V0RT3X4/python_utils/deployment/vortexa_utils/portainer/update_stack.py delete mode 100644 vendor/github.com/V0RT3X4/python_utils/docker/pandas/Dockerfile delete mode 100644 vendor/github.com/V0RT3X4/python_utils/general/README.rst delete mode 100644 vendor/github.com/V0RT3X4/python_utils/general/setup.py delete mode 100644 vendor/github.com/V0RT3X4/python_utils/general/vortexa_utils/__init__.py delete mode 100644 vendor/github.com/V0RT3X4/python_utils/general/vortexa_utils/general/__init__.py delete mode 100644 vendor/github.com/V0RT3X4/python_utils/general/vortexa_utils/git.py delete mode 100644 vendor/github.com/V0RT3X4/python_utils/logging/README.md delete mode 100644 vendor/github.com/V0RT3X4/python_utils/logging/setup.py delete mode 100644 vendor/github.com/V0RT3X4/python_utils/logging/vortexa_utils/__init__.py delete mode 100644 vendor/github.com/V0RT3X4/python_utils/logging/vortexa_utils/logging/__init__.py delete mode 100644 vendor/github.com/V0RT3X4/python_utils/logging/vortexa_utils/logging/exception_decorator.py delete mode 100644 vendor/github.com/V0RT3X4/python_utils/logging/vortexa_utils/logging/resources.py delete mode 100644 vendor/github.com/V0RT3X4/python_utils/utils/vortexa_utils/utils/__init__.py delete mode 100644 vendor/github.com/V0RT3X4/python_utils/utils/vortexa_utils/utils/byte_stream_spliter.py delete mode 100644 vendor/github.com/V0RT3X4/python_utils/utils/vortexa_utils/utils/sockets/socket_client.py delete mode 100644 vendor/github.com/V0RT3X4/python_utils/utils/vortexa_utils/utils/sockets/socket_server.py delete mode 100644 vendor/github.com/V0RT3X4/python_utils/versioning/VERSION delete mode 100644 vendor/github.com/V0RT3X4/python_utils/versioning/setup.py delete mode 100644 vendor/github.com/V0RT3X4/python_utils/versioning/tests/__init__.py delete mode 100644 vendor/github.com/V0RT3X4/python_utils/versioning/tests/test_versioner.py delete mode 100644 vendor/github.com/V0RT3X4/python_utils/versioning/vortexa_utils/__init__.py delete mode 100644 vendor/github.com/V0RT3X4/python_utils/versioning/vortexa_utils/versioning/__init__.py delete mode 100644 vendor/github.com/V0RT3X4/python_utils/versioning/vortexa_utils/versioning/__main__.py delete mode 100644 vendor/github.com/V0RT3X4/python_utils/versioning/vortexa_utils/versioning/cli.py delete mode 100644 vendor/github.com/V0RT3X4/python_utils/versioning/vortexa_utils/versioning/utils.py delete mode 100644 vendor/github.com/V0RT3X4/python_utils/versioning/vortexa_utils/versioning/versioner.py delete mode 100644 vendor/github.com/V0RT3X4/python_utils/youve_got_mail/README.md delete mode 100644 vendor/github.com/V0RT3X4/python_utils/youve_got_mail/README.rst delete mode 100644 vendor/github.com/V0RT3X4/python_utils/youve_got_mail/requirements.txt delete mode 100644 vendor/github.com/V0RT3X4/python_utils/youve_got_mail/setup.py delete mode 100644 vendor/github.com/V0RT3X4/python_utils/youve_got_mail/tests/__init__.py delete mode 100644 vendor/github.com/V0RT3X4/python_utils/youve_got_mail/vortexa_utils/__init__.py delete mode 100644 vendor/github.com/V0RT3X4/python_utils/youve_got_mail/vortexa_utils/youve_got_mail.py diff --git a/.gitignore b/.gitignore index 919e1b9621a70..d17a87294796b 100644 --- a/.gitignore +++ b/.gitignore @@ -121,3 +121,4 @@ doc/source/savefig/ # pyenv files .python-version +vendor/ diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 3f98273a336cf..e79383238dc7e 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -10,6 +10,7 @@ repos: - id: flake8 language: python_venv additional_dependencies: [flake8-comprehensions] + exclude: ^pandas/io/sql_scratch.py$ - repo: https://github.com/pre-commit/mirrors-isort rev: v4.3.20 hooks: diff --git a/pandas/io/sql.py b/pandas/io/sql.py index 2997ac95b8adb..e1630fbc9a6f4 100644 --- a/pandas/io/sql.py +++ b/pandas/io/sql.py @@ -496,7 +496,13 @@ def to_sql( .. versionadded:: 0.24.0 """ - if if_exists not in ("fail", "replace", "append", "upsert_ignore", "upsert_delete"): #TODO: add upserts + if if_exists not in ( + "fail", + "replace", + "append", + "upsert_ignore", + "upsert_delete", + ): # TODO: add upserts raise ValueError("'{0}' is not valid for if_exists".format(if_exists)) pandas_sql = pandasSQL_builder(con, schema=schema) @@ -653,7 +659,6 @@ def create(self): elif self.if_exists == "upsert_delete": pass elif self.if_exists == "upsert_ignore": - # clear rows out of dataframe pass else: raise ValueError( @@ -664,20 +669,18 @@ def create(self): def _upsert_delete_processing(self): from sqlalchemy import tuple_ + # Primary key data primary_keys, primary_key_values = self._get_primary_key_data() # Generate delete statement delete_statement = self.table.delete().where( - tuple_( - *(self.table.c[col] for col in primary_keys) - ).in_( - primary_key_values - ) + tuple_(*(self.table.c[col] for col in primary_keys)).in_(primary_key_values) ) return delete_statement def _upsert_ignore_processing(self): from sqlalchemy import tuple_, select + # Primary key data primary_keys, primary_key_values = self._get_primary_key_data() @@ -695,9 +698,9 @@ def _upsert_ignore_processing(self): # Delete rows from self.frame where primary keys match self.frame = self._get_index_formatted_dataframe() - to_be_deleted_mask = self.frame[primary_keys].isin( - pkeys_from_database[primary_keys] - ).all(1) + to_be_deleted_mask = ( + self.frame[primary_keys].isin(pkeys_from_database[primary_keys]).all(1) + ) self.frame.drop(self.frame[to_be_deleted_mask].index, inplace=True) @@ -710,7 +713,8 @@ def _get_primary_key_data(self): ------- primary_keys, primary_key_values : Tuple[List[str], Iterable] - primary_keys : List of primary key column names - - primary_key_values : Iterable of dataframe rows corresponding to primary_key columns + - primary_key_values : Iterable of dataframe rows + corresponding to primary_key columns """ # reflect MetaData object and assign contents of db to self.table attribute @@ -718,16 +722,16 @@ def _get_primary_key_data(self): self.table = self.pd_sql.get_table(table_name=self.name, schema=self.schema) primary_keys = [ - str(primary_key.name) for primary_key in self.table.primary_key.columns.values() + str(primary_key.name) + for primary_key in self.table.primary_key.columns.values() ] - # For the time being, this method is defensive and will break if no pkeys are found - # If desired this default behaviour could be changed so that in cases where no pkeys - # are found, it could default to a normal insert + # For the time being, this method is defensive and will break if + # no pkeys are found. If desired this default behaviour could be + # changed so that in cases where no pkeys are found, + # it could default to a normal insert if len(primary_keys) == 0: - raise ValueError( - f"No primary keys found for table {self.name}" - ) + raise ValueError(f"No primary keys found for table {self.name}") primary_key_values = zip(*[self.frame[key] for key in primary_keys]) return primary_keys, primary_key_values @@ -767,8 +771,8 @@ def _get_index_formatted_dataframe(self): DataFrame object """ - # Originally this functionality formed the first step of the insert_data() method, - # however it will be useful to have in other places, so to keep code DRY it has been moved here. + # Originally this functionality formed the first step of the insert_data method. + # It will be useful to have in other places, so moved here to keep code DRY. if self.index is not None: # The following check ensures that the method can be called multiple times, diff --git a/pandas/io/sql_scratch.py b/pandas/io/sql_scratch.py index 63c52ebdd432d..e0c683813618c 100644 --- a/pandas/io/sql_scratch.py +++ b/pandas/io/sql_scratch.py @@ -1,5 +1,13 @@ -from sqlalchemy import Table, select +### REPRODUCIBLE SQLTable Creation:table +import sqlalchemy +from sqlalchemy import Table, create_engine, select from sqlalchemy.engine.base import Connection +from sqlalchemy.sql import tuple_ + +import pandas as pd +from vortexa_utils.database import ProdFactory + +from pandas.io.sql import SQLDatabase, SQLTable def get_pkey(table: Table): @@ -21,6 +29,7 @@ def pkey_generator(table, engine): for row in conn.execute(statement): yield row + # Leaves connection open def pkey_results_proxy(table, engine): pkeys = get_pkey(table) @@ -48,54 +57,41 @@ def get_pkey_values(table: SQLTable): statement = select([table.table.c[name] for name in pkeys]) table.pd_sql.execute(statement) + def generate_mask(df, dictionary): return [df[key] == value for key, value in dictionary.items()] + def generate_mask_of_masks(list_of_masks): return pd.concat([mask for mask in list_of_masks], axis=1).all(1) -### REPRODUCIBLE SQLTable Creation:table -import sqlalchemy - -engine = sqlalchemy.create_engine('enter string here') +engine = sqlalchemy.create_engine("enter string here") meta = MetaData(engine) -table_name = 'charterers' # or wtv +table_name = "charterers" # or wtv meta.reflect(only=[table_name], views=True) db = SQLDatabase(engine, meta=meta) table = SQLTable(table_name, db, index=None, schema=None) -from vortexa_utils.database import ProdFactory -from sqlalchemy import create_engine -import pandas as pd -from pandas.io.sql import SQLTable, SQLDatabase - engine_v = ProdFactory().engine() -engine = create_engine('sqlite:///:memory:') -table_name = 'charterers' +engine = create_engine("sqlite:///:memory:") +table_name = "charterers" df = pd.read_sql_table(table_name, engine_v) df_test = df.head().copy() -df_test['name'] = df_test['name'].apply(lambda x: x + '_TEST') -engine.execute("create table charterers(id text primary key, name text, energy integer)") -df.to_sql(table_name, index=False, if_exists='append', con=engine) +df_test["name"] = df_test["name"].apply(lambda x: x + "_TEST") +engine.execute( + "create table charterers(id text primary key, name text, energy integer)" +) +df.to_sql(table_name, index=False, if_exists="append", con=engine) db = SQLDatabase(engine, schema=None, meta=None) new_data = SQLTable(table_name, db, frame=df_test, index=False) - -from sqlalchemy.sql import tuple_ - - def delete_matching_keys(sql_table, key_columns, value_iter): delete_expression = sql_table.table.delete().where( - tuple_( - *(table.table.c[col] for col in key_columns) - ).in_( - list(zip(value_iter)) - ) + tuple_(*(table.table.c[col] for col in key_columns)).in_(list(zip(value_iter))) ) with sql_table.pd_sql.run_transaction() as conn: conn.execute(delete_expression) - diff --git a/vendor/github.com/V0RT3X4/python_utils/.circleci/config.yml b/vendor/github.com/V0RT3X4/python_utils/.circleci/config.yml deleted file mode 100644 index c44edbe3b610c..0000000000000 --- a/vendor/github.com/V0RT3X4/python_utils/.circleci/config.yml +++ /dev/null @@ -1,35 +0,0 @@ -version: 2 -jobs: - build: - working_directory: ~/project - docker: - - image: circleci/python:3.7 - steps: - - checkout - - restore_cache: &restore_cache - keys: - - v1-{{ .Branch }}- - # - run: - # name: "Install Python3" - # command: | - # apk add --no-cache \ - # python3 \ - # libc-dev \ - # gcc - - run: - name: "Test aws" - working_directory: ~/project/aws - command: | - python3.7 -m venv .venv; - . .venv/bin/activate - pip install -U pip - pip install -U -r requirements.txt - nose2 - python setup.py test - - save_cache: &save_cache - key: v1-{{ .Branch }}-{{ epoch }} - paths: - - ~/project/aws/.venv - - ~/project/database/.venv - - ~/project/deployment/.venv - - ~/.cache/pip diff --git a/vendor/github.com/V0RT3X4/python_utils/.gitignore b/vendor/github.com/V0RT3X4/python_utils/.gitignore deleted file mode 100644 index 2c06c5a32bbdb..0000000000000 --- a/vendor/github.com/V0RT3X4/python_utils/.gitignore +++ /dev/null @@ -1,111 +0,0 @@ -# Byte-compiled / optimized / DLL files -__pycache__/ -*.py[cod] -*$py.class - -# C extensions -*.so - -# Distribution / packaging -.Python -build/ -develop-eggs/ -dist/ -downloads/ -eggs/ -.eggs/ -lib/ -lib64/ -parts/ -sdist/ -var/ -wheels/ -*.egg-info/ -.installed.cfg -*.egg -MANIFEST - -# PyInstaller -# Usually these files are written by a python script from a template -# before PyInstaller builds the exe, so as to inject date/other infos into it. -*.manifest -*.spec - -# Installer logs -pip-log.txt -pip-delete-this-directory.txt - -# Unit test / coverage reports -htmlcov/ -.tox/ -.coverage -.coverage.* -.cache -nosetests.xml -coverage.xml -*.cover -.hypothesis/ -.pytest_cache/ - -# Translations -*.mo -*.pot - -# Django stuff: -*.log -local_settings.py -db.sqlite3 - -# Flask stuff: -instance/ -.webassets-cache - -# Scrapy stuff: -.scrapy - -# Sphinx documentation -docs/_build/ - -# PyBuilder -target/ - -# Jupyter Notebook -.ipynb_checkpoints - -# pyenv -.python-version - -# celery beat schedule file -celerybeat-schedule - -# SageMath parsed files -*.sage.py - -# Environments -.env -.venv -env/ -venv/ -ENV/ -env.bak/ -venv.bak/ - -# Spyder project settings -.spyderproject -.spyproject - -# Rope project settings -.ropeproject - -# mkdocs documentation -/site - -# mypy -.mypy_cache/ - -# VSCODE -.vscode - -.idea -*.iml -scratch.py diff --git a/vendor/github.com/V0RT3X4/python_utils/README.md b/vendor/github.com/V0RT3X4/python_utils/README.md deleted file mode 100644 index 028c6e96fb015..0000000000000 --- a/vendor/github.com/V0RT3X4/python_utils/README.md +++ /dev/null @@ -1,108 +0,0 @@ -# python_utils [![CircleCI](https://circleci.com/gh/V0RT3X4/python_utils.svg?style=svg&circle-token=30fa8fb22fa45a521a5d728e9accde63c242c2b4)](https://circleci.com/gh/V0RT3X4/python_utils) -Python utilities and helper functions/classes/modules - -## Sub Packages - -- [AWS](#aws) -- [Database](#database) -- [Deployment](#deployment) - -## Installation - -Installation is done by using [submodule vendoring](#vendoring). -Vendor the package into your project as [below](#vendoring) then you can install -with -``` -pip install vendor/github.com/V0RT3X4/python_utils/ -``` -or -``` -echo vendor/github.com/V0RT3X4/python_utils/ >> requirements.txt -pip install -r requirements.txt -``` - -## Aws - -Helper modules for `s3` client side encryption. `ses` email processing -(s3 as an inbox). `lambda` function handeler types. - -## Database - -Data base connection helpers to get you a -[`SQLAlchemy`](https://www.sqlalchemy.org/) connection [`Engine`](https://docs.sqlalchemy.org/en/latest/core/engines_connections.html) -to an RDS or RedShift database using -`aws secretsmanager` for managing connection credentials and rotation, and with -SSL encryption. - -## Deployment - -Custom Deployment Jazz - -## Installation - Vendoring the subtree -To install the scripts into your project it is recommended to vendor this module as a `git subtree` as opposed to a `git submodule`. You will have a version of this code in your repo, and you can easily update and push changes back upstream. - -To make your life easier install [git-vendor](https://github.com/brettlangdon/git-vendor) - -Then you can vendor the module into your repo and run installation scripts: -``` -git vendor add python_utils git@github.com:V0RT3X4/python_utils.git master -``` - -finally you can install the modules you want -``` -pip install vendor/github.com/V0RT3X4/python_utils/ -``` - -to update the reference -``` -git vendor update python_utils master -``` - -## AS Submodule - -In the project directory -``` -git submodule add \ - --name github.com/V0RT3X4/python_utils \ - git@github.com:V0RT3X4/python_utils.git \ - vendor/github.com/V0RT3X4/python_utils -``` - -Subsequently when you check out the source code (say in -[circleCI](https://circleci.com) or locally). -``` -git clone git@github.com:/V0RT3X4/.git -cd -git submodule init -git submodule update --remote -``` - -finally you can install the modules you want -``` -pip install vendor/github.com/V0RT3X4/python_utils/ -``` - -## Contributing -To contribute and push changes back upstream add this repo as a remote. -``` -git remote add -f python_utils git@github.com:V0RT3X4/python_utils.git -``` -Push changes in the sub tree -``` -git subtree push --prefix=vendor/github.com/V0RT3X4/python_utils python_utils some_branch -``` - -## [git-vendor](https://github.com/brettlangdon/git-vendor) installation - -``` -cd $(mktemp -d) && \ -git clone https://github.com/brettlangdon/git-vendor &> /dev/null && \ -cd git-vendor && \ -sudo make install -``` - -or - -``` -brew install git-vendor -``` diff --git a/vendor/github.com/V0RT3X4/python_utils/aws/README.md b/vendor/github.com/V0RT3X4/python_utils/aws/README.md deleted file mode 100644 index f9e28102b5fbf..0000000000000 --- a/vendor/github.com/V0RT3X4/python_utils/aws/README.md +++ /dev/null @@ -1 +0,0 @@ -# Vortexa AWS Python Utils diff --git a/vendor/github.com/V0RT3X4/python_utils/aws/README.rst b/vendor/github.com/V0RT3X4/python_utils/aws/README.rst deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/vendor/github.com/V0RT3X4/python_utils/aws/requirements.txt b/vendor/github.com/V0RT3X4/python_utils/aws/requirements.txt deleted file mode 100644 index 34a10a130c16c..0000000000000 --- a/vendor/github.com/V0RT3X4/python_utils/aws/requirements.txt +++ /dev/null @@ -1,5 +0,0 @@ -boto3 -pycryptodomex -nose2 -pandas -logzero diff --git a/vendor/github.com/V0RT3X4/python_utils/aws/setup.py b/vendor/github.com/V0RT3X4/python_utils/aws/setup.py deleted file mode 100644 index 1e69b1cb89ad6..0000000000000 --- a/vendor/github.com/V0RT3X4/python_utils/aws/setup.py +++ /dev/null @@ -1,50 +0,0 @@ -# @Author: richard -# @Date: 2018-12-04T17:54:43+00:00 -# @Last modified by: richard -# @Last modified time: 2018-12-04T20:16:54+00:00 -import os -import io -from setuptools import setup, find_packages - -namespace = 'vortexa_utils' -name = 'vortexa_utils_aws' -version = '1' -description = 'Vortexa AWS utils helper library', - -dependencies = [ - 'boto3', - 'pycryptodomex' -] - -# Setup boilerplate below - -package_root = os.path.abspath(os.path.dirname(__file__)) - -readme_filename = os.path.join(package_root, 'README.rst') -with io.open(readme_filename, encoding='utf-8') as readme_file: - readme = readme_file.read() - -packages = [ - package for package in find_packages() - if package.startswith(namespace) -] - -setup( - name=name, - version=version, - description=description, - long_description=readme, - - author='Richard Mathie', - author_email='richard.mathie@vortexa.com', - - zip_safe=False, - test_suite='nose2.collector.collector', - tests_require=['nose2', 'pandas'], - - packages=packages, - install_requires=dependencies, - extras_require={ - 'pandas': ['pandas'] - } -) diff --git a/vendor/github.com/V0RT3X4/python_utils/aws/tests/__init__.py b/vendor/github.com/V0RT3X4/python_utils/aws/tests/__init__.py deleted file mode 100644 index b0f42e4b71cc9..0000000000000 --- a/vendor/github.com/V0RT3X4/python_utils/aws/tests/__init__.py +++ /dev/null @@ -1,4 +0,0 @@ -# @Author: richard -# @Date: 2018-11-28T18:10:18+00:00 -# @Last modified by: richard -# @Last modified time: 2018-11-28T18:10:18+00:00 diff --git a/vendor/github.com/V0RT3X4/python_utils/aws/tests/lambda_types/__init__.py b/vendor/github.com/V0RT3X4/python_utils/aws/tests/lambda_types/__init__.py deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/vendor/github.com/V0RT3X4/python_utils/aws/tests/lambda_types/message_eg.py b/vendor/github.com/V0RT3X4/python_utils/aws/tests/lambda_types/message_eg.py deleted file mode 100644 index 9cf39d5a99d58..0000000000000 --- a/vendor/github.com/V0RT3X4/python_utils/aws/tests/lambda_types/message_eg.py +++ /dev/null @@ -1,19 +0,0 @@ -""" Example #1 """ -import os -from vortexa_utils.aws.lambdr.types import LambdaDict, LambdaContext - -MSG_TEMPLATE: str = os.environ.get('MSG_TEMPLATE') or 'Hello {} {}!' -STAGE: str = os.environ.get('stage') or 'dev' - - -def handler(event: LambdaDict, context: LambdaContext) -> LambdaDict: - print('Received event {} for stage {}'.format(event, STAGE)) - first_name: str = event.get('first_name') # optional - last_name: str = event.get('last_name') # optional - return { - 'message': get_message(first_name, last_name), - } - - -def get_message(first_name: str = 'John', last_name: str = 'Smith'): - return MSG_TEMPLATE.format(first_name, last_name) diff --git a/vendor/github.com/V0RT3X4/python_utils/aws/tests/lambda_types/repeat_eg.py b/vendor/github.com/V0RT3X4/python_utils/aws/tests/lambda_types/repeat_eg.py deleted file mode 100644 index 95d5331e8f5f9..0000000000000 --- a/vendor/github.com/V0RT3X4/python_utils/aws/tests/lambda_types/repeat_eg.py +++ /dev/null @@ -1,19 +0,0 @@ -""" Example #2 """ -import os -from vortexa_utils.aws.lambdr.types import LambdaDict, LambdaContext - -N: int = int(os.environ.get('N') or 10) -STAGE: str = os.environ.get('stage') or 'dev' - - -def handler(event: LambdaDict, context: LambdaContext) -> LambdaDict: - print('Received event {} for stage {}'.format(event, STAGE)) - input: str = event['input'] # required - return { - 'output': get_output(input, N), - } - - -def get_output(input: str, num: int): - """ Return the input string repeated N times. """ - return input * num diff --git a/vendor/github.com/V0RT3X4/python_utils/aws/tests/lambda_types/test_lambda_types.py b/vendor/github.com/V0RT3X4/python_utils/aws/tests/lambda_types/test_lambda_types.py deleted file mode 100644 index 0cdad796b76dd..0000000000000 --- a/vendor/github.com/V0RT3X4/python_utils/aws/tests/lambda_types/test_lambda_types.py +++ /dev/null @@ -1,89 +0,0 @@ -import unittest -from vortexa_utils.aws.lambdr.types import LambdaDict, LambdaContext -from .message_eg import handler as handler_message, get_message -from .repeat_eg import handler as handler_repeat, get_output - - -class TestMessageFunction(unittest.TestCase): - - def setUp(self): - self.context = LambdaContext() - - def test_handler(self) -> None: - event: LambdaDict = { - "first_name": "Alex", - "last_name": "Casalboni", - } - result = handler_message(event, self.context) - self.assertIn('message', result) - - def test_handler_empty(self) -> None: - event: LambdaDict = {} - result = handler_message(event, self.context) - self.assertIn('message', result) - - def test_message_default(self) -> None: - msg = get_message() - self.assertIsInstance(msg, str) - self.assertIn('Hello', msg) - self.assertIn('John', msg) - self.assertIn('Smith', msg) - self.assertTrue(msg.endswith('!')) - - def test_message_firstname(self) -> None: - msg = get_message(first_name='Charlie') - self.assertIsInstance(msg, str) - self.assertIn('Hello', msg) - self.assertIn('Charlie', msg) - self.assertIn('Smith', msg) - self.assertTrue(msg.endswith('!')) - - def test_message_lastname(self) -> None: - msg = get_message(last_name='Brown') - self.assertIsInstance(msg, str) - self.assertIn('Hello', msg) - self.assertIn('John', msg) - self.assertIn('Brown', msg) - self.assertTrue(msg.endswith('!')) - - def test_message(self) -> None: - msg = get_message(first_name='Charlie', last_name='Brown') - self.assertIsInstance(msg, str) - self.assertIn('Hello', msg) - self.assertIn('Charlie', msg) - self.assertIn('Brown', msg) - self.assertTrue(msg.endswith('!')) - - -class TestRepeatFunction(unittest.TestCase): - - def setUp(self): - self.context = LambdaContext() - - def test_handler(self) -> None: - event: LambdaDict = { - "input": "NaN", - } - result = handler_repeat(event, self.context) - self.assertIn('output', result) - self.assertEqual(30, len(result['output'])) - - def test_handler_empty(self) -> None: - event: LambdaDict = {} - with self.assertRaises(KeyError): - handler_repeat(event, self.context) - - def test_repeat_empty_string(self) -> None: - output = get_output('', 100) - self.assertIsInstance(output, str) - self.assertEqual(0, len(output)) - - def test_repeat_zero(self) -> None: - output = get_output('hello', 0) - self.assertIsInstance(output, str) - self.assertEqual(0, len(output)) - - def test_repeat(self) -> None: - output = get_output('hello', 10) - self.assertIsInstance(output, str) - self.assertEqual(50, len(output)) diff --git a/vendor/github.com/V0RT3X4/python_utils/aws/tests/s3_client_encryption_tests/__init__.py b/vendor/github.com/V0RT3X4/python_utils/aws/tests/s3_client_encryption_tests/__init__.py deleted file mode 100644 index 2e9b828ec304c..0000000000000 --- a/vendor/github.com/V0RT3X4/python_utils/aws/tests/s3_client_encryption_tests/__init__.py +++ /dev/null @@ -1,4 +0,0 @@ -# @Author: richard -# @Date: 2018-11-28T18:10:35+00:00 -# @Last modified by: richard -# @Last modified time: 2018-11-28T18:10:36+00:00 diff --git a/vendor/github.com/V0RT3X4/python_utils/aws/tests/s3_client_encryption_tests/test_IOAuthDecrypter.py b/vendor/github.com/V0RT3X4/python_utils/aws/tests/s3_client_encryption_tests/test_IOAuthDecrypter.py deleted file mode 100644 index bf64d13548ac0..0000000000000 --- a/vendor/github.com/V0RT3X4/python_utils/aws/tests/s3_client_encryption_tests/test_IOAuthDecrypter.py +++ /dev/null @@ -1,22 +0,0 @@ -# @Author: richard -# @Date: 2018-11-28T18:11:28+00:00 -# @Last modified by: richard -# @Last modified time: 2018-12-06T13:06:33+00:00 -from vortexa_utils.aws.s3.client_side_encryption import IOAuthDecrypter -from nose2.tools import params -from .test_IODecrypter import DummyChunksIO, IODecrypterTestCase - - -class IOAuthDecrypter(IODecrypterTestCase): - io_decrypter_class = IOAuthDecrypter.IOAuthDecrypter - - def get_decrypter(self, cypher, io, content_length): - return self.io_decrypter_class(cypher, io, content_length) - - def get_io(self, content_length): - tag_length = 128 - return DummyChunksIO(content_length + tag_length) - - def invalid_decryption(self, content_length): - with self.assertRaises(ValueError): - super().invalid_decryption(content_length) diff --git a/vendor/github.com/V0RT3X4/python_utils/aws/tests/s3_client_encryption_tests/test_IOAuthTagLength.py b/vendor/github.com/V0RT3X4/python_utils/aws/tests/s3_client_encryption_tests/test_IOAuthTagLength.py deleted file mode 100644 index 51685c22d13bd..0000000000000 --- a/vendor/github.com/V0RT3X4/python_utils/aws/tests/s3_client_encryption_tests/test_IOAuthTagLength.py +++ /dev/null @@ -1,42 +0,0 @@ -import unittest -import io -from vortexa_utils.aws.s3.client_side_encryption.IOAuthDecrypterTagLength \ - import StreamChunker -from nose2.tools import params - - -class StreamChunkerTestCase(unittest.TestCase): - - def get_chunker(self, io, tag_length): - return StreamChunker(io, tag_length) - - def test_tagged(self): - fixture = io.BytesIO(b'1234567890') - chunker = StreamChunker(fixture, 3) - bytes = chunker.read() - self.assertEqual(chunker.tag, b'890') - self.assertEqual(bytes, b'1234567') - - @params(*range(1, 11)) - def test_read_in_chunks(self, chunk): - bytes = b'1234567890' - fixture = io.BytesIO(bytes) - tag_length = 3 - chunker = StreamChunker(fixture, tag_length) - result = [] - index = 0 - while True: - byte = chunker.read(chunk) - if byte == b'': - break - result.append(byte) - self.assertEqual(bytes[index:index + len(byte)], byte) - index += len(byte) - print(result) - self.assertEqual(bytes[-tag_length:], chunker.tag) - self.assertEqual(b''.join(result), bytes[:-tag_length]) - # check that subsuquent reads return nothing and tag is correct - for i in range(10): - byte = chunker.read(chunk) - self.assertEqual(b'', byte) - self.assertEqual(bytes[-tag_length:], chunker.tag) diff --git a/vendor/github.com/V0RT3X4/python_utils/aws/tests/s3_client_encryption_tests/test_IODecrypter.py b/vendor/github.com/V0RT3X4/python_utils/aws/tests/s3_client_encryption_tests/test_IODecrypter.py deleted file mode 100644 index cadab6acdaeae..0000000000000 --- a/vendor/github.com/V0RT3X4/python_utils/aws/tests/s3_client_encryption_tests/test_IODecrypter.py +++ /dev/null @@ -1,94 +0,0 @@ -# @Author: richard -# @Date: 2018-11-28T18:11:28+00:00 -# @Last modified by: richard -# @Last modified time: 2018-12-06T13:07:14+00:00 -from io import IOBase - -from vortexa_utils.aws.s3.client_side_encryption.IODecrypter import IODecrypter -import unittest -from nose2.tools import params - - -class DummyCipher(object): - def __init__(self, valid: bool = True): - self.valid = valid - - def decrypt(self, bytes): - return bytes - - def verify(self, tag): - if not self.valid: - raise ValueError("MAC check failed") - pass - - -class DummyChunksIO(IOBase): - _DEFAULT_CHUNK_SIZE = 1024 - - def __init__(self, size): - self.bytes_read = 0 - self.size = size - - def read(self, chunk=-1): - if chunk < 0: - chunk = self.size - self.bytes_read - else: - chunk = min(chunk, abs(self.size - self.bytes_read)) - self.bytes_read += chunk - return b' ' * chunk - - def __iter__(self): - """Return an iterator to yield 1k chunks from the raw stream. - """ - return self.iter_chunks(self._DEFAULT_CHUNK_SIZE) - - def iter_chunks(self, chunk_size=_DEFAULT_CHUNK_SIZE): - """Return an iterator to yield chunks of chunk_size bytes from the raw - stream. - """ - while True: - bytes = self.read(chunk_size) - if bytes == b'': - break - yield bytes - - def close(self): - pass - - def readable(self): - return True - - def seekable(self): - return False - - def writable(self): - return False - - -class IODecrypterTestCase(unittest.TestCase): - io_decrypter_class = IODecrypter - - def get_decrypter(self, cypher, io, content_length): - return self.io_decrypter_class(cypher, io) - - def get_io(self, content_length): - return DummyChunksIO(content_length) - - def make_decrypter(self, content_length, valid=True): - io = DummyChunksIO(content_length) - cypher = DummyCipher(valid=valid) - return self.get_decrypter(cypher, io, content_length) - - @params(123, 1024, 1024*3, 1024*3+123, 1, 0) - def test_read(self, content_length): - with self.make_decrypter(content_length) as decrypter: - bytes = list(decrypter) - self.assertEqual(b''.join(bytes), b' ' * content_length) - - @params(123, 1024, 1024*3, 1024*3+123, 1, 0) - def test_invalid(self, content_length): - self.invalid_decryption(content_length) - - def invalid_decryption(self, content_length): - with self.make_decrypter(content_length, valid=False) as decrypter: - list(decrypter) diff --git a/vendor/github.com/V0RT3X4/python_utils/aws/tests/s3_client_encryption_tests/test_decrypt_s3_mime_with_attachment.py b/vendor/github.com/V0RT3X4/python_utils/aws/tests/s3_client_encryption_tests/test_decrypt_s3_mime_with_attachment.py deleted file mode 100644 index 0be487412d5c2..0000000000000 --- a/vendor/github.com/V0RT3X4/python_utils/aws/tests/s3_client_encryption_tests/test_decrypt_s3_mime_with_attachment.py +++ /dev/null @@ -1,68 +0,0 @@ -# @Author: richard -# @Date: 2018-12-06T17:26:08+00:00 -# @Last modified by: richard -# @Last modified time: 2018-12-06T19:36:16+00:00 -# cd aws/vortexa_utils/ -# import aws.s3.client_side_encryption.client as client -import logging -import vortexa_utils.aws.s3.client_side_encryption.client as client -import io -import email.parser -from email import policy -from email.iterators import _structure -import base64 -from nose2.tools.such import helper - -import pandas as pd - -logger = logging.getLogger(__name__) - -Bucket = 'ops-data.incoming-emails' -Key = 'incoming_email/akrk0l8sq4lm7qkgj8hpurfshpnj8frgqpqe9mg1' -Key = 'incoming_email/8ej2ldqnsmako2tgsbdpqg8tdi6tdnduoscojdo1' - - -def test_get_attachment(): - cl = client.Client() - parser = email.parser.BytesParser(policy=policy.default) - with cl.get_object(Bucket, Key) as io: - parsed = parser.parse(io) - _structure(parsed) - - # with open("/home/richard/an_email", 'wb') as f: - # for b in io: - # f.write(b) - # - # atts = list(parsed.iter_attachments()) - # [a.get_filename() for a in atts] - # [a.get_content_type() for a in atts] - # att = atts[2] - # att - # att.get_content_type() - # pd.read_excel(io.BytesIO(att.get_content())) - - target = parsed['to'] - source = parsed['from'] - helper.assertEqual(target, 'test@opsdata.vortexa.com') - helper.assertEqual(source, 'Richard Mathie ') - - parsed['subject'] - - for part in parsed.walk(): - print(part.get_content_type()) - att = parsed.get_payload() - att[0].get_content_type() - att[0].get_payload()[1].get_payload() - - logger.debug('\nwalking message') - for part in parsed.walk(): - content_type = part.get_content_type() - if content_type.startswith('text'): - logger.debug(content_type) - payload = part.get_payload() - if content_type == 'text/csv': - csv = base64.decodebytes(payload.encode('utf-8')) - for line in csv.splitlines(): - logger.debug(line) - else: - logger.debug('\n%s', payload) diff --git a/vendor/github.com/V0RT3X4/python_utils/aws/tests/s3_client_encryption_tests/test_decrypt_s3_object.py b/vendor/github.com/V0RT3X4/python_utils/aws/tests/s3_client_encryption_tests/test_decrypt_s3_object.py deleted file mode 100644 index a33346502b0a2..0000000000000 --- a/vendor/github.com/V0RT3X4/python_utils/aws/tests/s3_client_encryption_tests/test_decrypt_s3_object.py +++ /dev/null @@ -1,65 +0,0 @@ -# @Author: richard -# @Date: 2018-12-06T13:27:47+00:00 -# @Last modified by: richard -# @Last modified time: 2018-12-06T17:24:43+00:00 -import logging -import sys -# cd aws/vortexa_utils/ -# import aws.s3.client_side_encryption.client as client -import vortexa_utils.aws.s3.client_side_encryption.client as client -import email.parser -from nose2.tools.such import helper - - -logger = logging.getLogger(__name__) - -Bucket = 'ops-data.incoming-emails' -Key = 'incoming_email/4pnlhtml86pobumjn9d59mbkcq3to1i43sjbd201' - - -def test_get_obj(): - self = client.Client() - location_info = self.s3.get_bucket_location(Bucket=Bucket) - logger.info('location %s', location_info) - - obj = self.s3.get_object(Bucket=Bucket, Key=Key) - handeler = client.DecryptHandeler(obj, self) - envelop = handeler.envelope_v2(handeler.metadata) - cipher = self.cipher_provider.decryptor(envelop) - assert handeler.auth_tag() - io = handeler.decrypt_auth(cipher) - - bytes = [] - while True: - byte = io.read(1024) - if byte == b'': - break - logger.info("Bytes Read %s/%s", io.bytes_read, io.content_length) - logger.debug("Bytes %s", byte) - bytes.append(byte) - io.verify() - io.close() - # logger.info('bytes %s', str(bytes)) - - -def test_get_obj_io(): - cl = client.Client() - with cl.get_object(Bucket, Key) as io: - list(io) - - -def test_get_obj_mime(): - cl = client.Client() - parser = email.parser.BytesParser() - with cl.get_object(Bucket, Key) as io: - parsed = parser.parse(io) - - target = parsed['to'] - source = parsed['from'] - helper.assertEqual(target, 'test@opsdata.vortexa.com') - helper.assertEqual(source, 'Richard Mathie ') - - logger.info('\twalking message') - for part in parsed.walk(): - if part.get_content_type().startswith('text'): - logger.info('\t%s', part.get_payload()) diff --git a/vendor/github.com/V0RT3X4/python_utils/aws/tests/s3_client_encryption_tests/test_kms_cipher_provider.py b/vendor/github.com/V0RT3X4/python_utils/aws/tests/s3_client_encryption_tests/test_kms_cipher_provider.py deleted file mode 100644 index 7da39f7a34166..0000000000000 --- a/vendor/github.com/V0RT3X4/python_utils/aws/tests/s3_client_encryption_tests/test_kms_cipher_provider.py +++ /dev/null @@ -1,39 +0,0 @@ -# @Author: richard -# @Date: 2018-12-05T16:23:13+00:00 -# @Last modified by: richard -# @Last modified time: 2018-12-05T19:43:28+00:00 -import unittest -from vortexa_utils.aws.s3.client_side_encryption import kms_cipher_provider -import logging - - -logger = logging.getLogger(__name__) - - -def log_bytes(*bytes): - logger.info(f' bytes: {bytes}') - - -class KMSCipherProviderTest(unittest.TestCase): - test_key_id = 'alias/python_utils_test_key' - - def get_cipher(self): - return kms_cipher_provider.KMSCipherProvider(self.test_key_id) - - def test_encrypt(self): - envelope, cipher = self.get_cipher().encryptor() - plaintext = b"The quick brown fox jumped over the lazy dog" - self.plaintext = plaintext - ciphertext, tag = cipher.encrypt_and_digest(plaintext) - log_bytes(ciphertext, tag) - self.assertNotEqual(ciphertext, plaintext) - package = (envelope, ciphertext, tag) - return package - - def test_decrypt(self): - envelope, ciphertext, tag = self.test_encrypt() - cipher = kms_cipher_provider.KMSCipherProvider().decryptor(envelope) - plaintext = cipher.decrypt(ciphertext) - log_bytes(ciphertext, tag, plaintext) - self.assertEqual(plaintext, self.plaintext) - cipher.verify(tag) diff --git a/vendor/github.com/V0RT3X4/python_utils/aws/tests/ses_inbox/__init__.py b/vendor/github.com/V0RT3X4/python_utils/aws/tests/ses_inbox/__init__.py deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/vendor/github.com/V0RT3X4/python_utils/aws/tests/ses_inbox/test_get_attachments.py b/vendor/github.com/V0RT3X4/python_utils/aws/tests/ses_inbox/test_get_attachments.py deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/vendor/github.com/V0RT3X4/python_utils/aws/tests/ses_inbox/test_list_inbox.py b/vendor/github.com/V0RT3X4/python_utils/aws/tests/ses_inbox/test_list_inbox.py deleted file mode 100644 index a8ff2a0bd81ee..0000000000000 --- a/vendor/github.com/V0RT3X4/python_utils/aws/tests/ses_inbox/test_list_inbox.py +++ /dev/null @@ -1,25 +0,0 @@ -# cd aws/vortexa_utils -# cd .. -from typing import Iterable -from vortexa_utils.aws.ses.inbox import Inbox -from email.message import EmailMessage -from itertools import islice - - -Path = 'incoming_email/' - -inbox = Inbox(default_bucket='ops-data.incoming-emails') - - -def test_list_inbox(): - inbox = Inbox(default_bucket='ops-data.incoming-emails') - emails: Iterable[EmailMessage] = islice( - inbox.list_emails(Path=Path), - 10 - ) - - for email in emails: - # print(email.as_string()) - attachments = list(email.iter_attachments()) - print(list(a.get_filename() for a in attachments)) - print(list(a.get_content_type() for a in attachments)) diff --git a/vendor/github.com/V0RT3X4/python_utils/aws/tests/ses_notification_types/__init__.py b/vendor/github.com/V0RT3X4/python_utils/aws/tests/ses_notification_types/__init__.py deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/vendor/github.com/V0RT3X4/python_utils/aws/tests/ses_notification_types/test_action.py b/vendor/github.com/V0RT3X4/python_utils/aws/tests/ses_notification_types/test_action.py deleted file mode 100644 index 1110fda3de888..0000000000000 --- a/vendor/github.com/V0RT3X4/python_utils/aws/tests/ses_notification_types/test_action.py +++ /dev/null @@ -1,16 +0,0 @@ -from vortexa_utils.aws.ses.notification.types import Action -from json import loads - - -action_json_sns = """ -{ - "type": "SNS", - "topicArn": "arn:aws:sns:us-east-1:012345678912:example-topic" -} -""" - - -def test_sns_action(): - action = Action(**loads(action_json_sns)) - assert action.type == "SNS" - assert action.topicArn == "arn:aws:sns:us-east-1:012345678912:example-topic" diff --git a/vendor/github.com/V0RT3X4/python_utils/aws/tests/ses_notification_types/test_lambda_record.py b/vendor/github.com/V0RT3X4/python_utils/aws/tests/ses_notification_types/test_lambda_record.py deleted file mode 100644 index c489d6cd84e42..0000000000000 --- a/vendor/github.com/V0RT3X4/python_utils/aws/tests/ses_notification_types/test_lambda_record.py +++ /dev/null @@ -1,32 +0,0 @@ -from vortexa_utils.aws.ses.notification.types import Record -from json import loads -from .test_mail import mail_json -from .test_receipt import receipt_json - - -ses = dict( - receipt=receipt_json, - mail=mail_json -) - - -record_json = loads(""" -{ - "eventSource": "aws:ses", - "eventVersion": "1.0", - "ses": { - "receipt": { - }, - "mail": { - } - } -} -""") - -record_json.update(ses=ses) - - -def test_record(): - record = Record(**record_json) - record.ses - assert record.eventSource == "aws:ses" diff --git a/vendor/github.com/V0RT3X4/python_utils/aws/tests/ses_notification_types/test_mail.py b/vendor/github.com/V0RT3X4/python_utils/aws/tests/ses_notification_types/test_mail.py deleted file mode 100644 index bb558b3639e48..0000000000000 --- a/vendor/github.com/V0RT3X4/python_utils/aws/tests/ses_notification_types/test_mail.py +++ /dev/null @@ -1,85 +0,0 @@ -from vortexa_utils.aws.ses.notification.types import Mail -from json import loads - -mail_json = loads(""" -{ -"timestamp": "2015-09-11T20:32:33.936Z", -"source": "61967230-7A45-4A9D-BEC9-87CBCF2211C9@example.com", -"messageId": "d6iitobk75ur44p8kdnnp7g2n800", -"destination": [ - "recipient@example.com" -], -"headersTruncated": false, -"headers": [ - { - "name": "Return-Path", - "value": "<0000014fbe1c09cf-7cb9f704-7531-4e53-89a1-5fa9744f5eb6-000000@amazonses.com>" - }, - { - "name": "Received", - "value": "from a9-183.smtp-out.amazonses.com (a9-183.smtp-out.amazonses.com [54.240.9.183]) by inbound-smtp.us-east-1.amazonaws.com with SMTP id d6iitobk75ur44p8kdnnp7g2n800 for recipient@example.com; Fri, 11 Sep 2015 20:32:33 +0000 (UTC)" - }, - { - "name": "DKIM-Signature", - "value": "v=1; a=rsa-sha256; q=dns/txt; c=relaxed/simple; s=ug7nbtf4gccmlpwj322ax3p6ow6yfsug; d=amazonses.com; t=1442003552; h=From:To:Subject:MIME-Version:Content-Type:Content-Transfer-Encoding:Date:Message-ID:Feedback-ID; bh=DWr3IOmYWoXCA9ARqGC/UaODfghffiwFNRIb2Mckyt4=; b=p4ukUDSFqhqiub+zPR0DW1kp7oJZakrzupr6LBe6sUuvqpBkig56UzUwc29rFbJF hlX3Ov7DeYVNoN38stqwsF8ivcajXpQsXRC1cW9z8x875J041rClAjV7EGbLmudVpPX 4hHst1XPyX5wmgdHIhmUuh8oZKpVqGi6bHGzzf7g=" - }, - { - "name": "From", - "value": "sender@example.com" - }, - { - "name": "To", - "value": "recipient@example.com" - }, - { - "name": "Subject", - "value": "Example subject" - }, - { - "name": "MIME-Version", - "value": "1.0" - }, - { - "name": "Content-Type", - "value": "text/plain; charset=UTF-8" - }, - { - "name": "Content-Transfer-Encoding", - "value": "7bit" - }, - { - "name": "Date", - "value": "Fri, 11 Sep 2015 20:32:32 +0000" - }, - { - "name": "Message-ID", - "value": "<61967230-7A45-4A9D-BEC9-87CBCF2211C9@example.com>" - }, - { - "name": "X-SES-Outgoing", - "value": "2015.09.11-54.240.9.183" - }, - { - "name": "Feedback-ID", - "value": "1.us-east-1.Krv2FKpFdWV+KUYw3Qd6wcpPJ4Sv/pOPpEPSHn2u2o4=:AmazonSES" - } -], -"commonHeaders": { - "returnPath": "0000014fbe1c09cf-7cb9f704-7531-4e53-89a1-5fa9744f5eb6-000000@amazonses.com", - "from": [ - "sender@example.com" - ], - "date": "Fri, 11 Sep 2015 20:32:32 +0000", - "to": [ - "recipient@example.com" - ], - "messageId": "<61967230-7A45-4A9D-BEC9-87CBCF2211C9@example.com>", - "subject": "Example subject" -} -} -""") - - -def test_init(): - mail = Mail(**mail_json) - mail.headers diff --git a/vendor/github.com/V0RT3X4/python_utils/aws/tests/ses_notification_types/test_notification.py b/vendor/github.com/V0RT3X4/python_utils/aws/tests/ses_notification_types/test_notification.py deleted file mode 100644 index 56884ad7463dd..0000000000000 --- a/vendor/github.com/V0RT3X4/python_utils/aws/tests/ses_notification_types/test_notification.py +++ /dev/null @@ -1,23 +0,0 @@ -from vortexa_utils.aws.ses.notification.types import Notification -from json import loads -from .test_mail import mail_json -from .test_action import action_json_sns -from .test_receipt import receipt_json - - -nodification_json = loads(""" -{ -"notificationType": "Received", -"content": "blarblarblar" -} -""" -) - -nodification_json.update( - mail=mail_json, - receipt=receipt_json -) - - -def test_init(): - Notification(**nodification_json) diff --git a/vendor/github.com/V0RT3X4/python_utils/aws/tests/ses_notification_types/test_receipt.py b/vendor/github.com/V0RT3X4/python_utils/aws/tests/ses_notification_types/test_receipt.py deleted file mode 100644 index e41ea7f8ce24d..0000000000000 --- a/vendor/github.com/V0RT3X4/python_utils/aws/tests/ses_notification_types/test_receipt.py +++ /dev/null @@ -1,34 +0,0 @@ -from json import loads -from vortexa_utils.aws.ses.notification.types import Receipt - - -receipt_json = loads(""" -{ -"timestamp": "2015-09-11T20:32:33.936Z", -"processingTimeMillis": 222, -"recipients": [ - "recipient@example.com" -], -"spamVerdict": { - "status": "PASS" -}, -"virusVerdict": { - "status": "PASS" -}, -"spfVerdict": { - "status": "PASS" -}, -"dkimVerdict": { - "status": "PASS" -}, -"action": { - "type": "SNS", - "topicArn": "arn:aws:sns:us-east-1:012345678912:example-topic" -} -} -""") - - -def test_receipt(): - receipt = Receipt(**receipt_json) - receipt.dkimVerdict.status == "PASS" diff --git a/vendor/github.com/V0RT3X4/python_utils/aws/tests/utils/__init__.py b/vendor/github.com/V0RT3X4/python_utils/aws/tests/utils/__init__.py deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/vendor/github.com/V0RT3X4/python_utils/aws/tests/utils/nested_data_classes/__init__.py b/vendor/github.com/V0RT3X4/python_utils/aws/tests/utils/nested_data_classes/__init__.py deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/vendor/github.com/V0RT3X4/python_utils/aws/tests/utils/nested_data_classes/test_nested_dataclass.py b/vendor/github.com/V0RT3X4/python_utils/aws/tests/utils/nested_data_classes/test_nested_dataclass.py deleted file mode 100644 index e15dffd75cc4d..0000000000000 --- a/vendor/github.com/V0RT3X4/python_utils/aws/tests/utils/nested_data_classes/test_nested_dataclass.py +++ /dev/null @@ -1,36 +0,0 @@ -from dataclasses import dataclass -# cd vortexa_utils/ -# from aws.utils.dataclasses import nested_dataclass -from vortexa_utils.aws.utils.dataclasses import nested_dataclass - - -@dataclass -class Foo: - a: str - b: int - - -@nested_dataclass -class Bar: - foo: Foo - baz: str - - -@nested_dataclass -class Bill: - bar: Bar - - -def test_init_class(): - data = dict( - bar=dict( - foo=dict(a="hello", b=1), - baz="world" - ) - ) - foo = Foo(**data['bar']['foo']) - bar = Bar(**data['bar']) - bill = Bill(**data) - - assert bill.bar == bar - assert bill.bar.foo == foo diff --git a/vendor/github.com/V0RT3X4/python_utils/aws/vortexa_utils/__init__.py b/vendor/github.com/V0RT3X4/python_utils/aws/vortexa_utils/__init__.py deleted file mode 100644 index a7712f632a766..0000000000000 --- a/vendor/github.com/V0RT3X4/python_utils/aws/vortexa_utils/__init__.py +++ /dev/null @@ -1,5 +0,0 @@ -# @Author: richard -# @Date: 2018-12-04T20:12:18+00:00 -# @Last modified by: richard -# @Last modified time: 2018-12-04T20:12:57+00:00 -__path__ = __import__('pkgutil').extend_path(__path__, __name__) diff --git a/vendor/github.com/V0RT3X4/python_utils/aws/vortexa_utils/aws/__init__.py b/vendor/github.com/V0RT3X4/python_utils/aws/vortexa_utils/aws/__init__.py deleted file mode 100644 index dda33076e9246..0000000000000 --- a/vendor/github.com/V0RT3X4/python_utils/aws/vortexa_utils/aws/__init__.py +++ /dev/null @@ -1,4 +0,0 @@ -# @Author: richard -# @Date: 2018-12-04T20:13:18+00:00 -# @Last modified by: richard -# @Last modified time: 2018-12-04T20:13:18+00:00 diff --git a/vendor/github.com/V0RT3X4/python_utils/aws/vortexa_utils/aws/lambdr/__init__.py b/vendor/github.com/V0RT3X4/python_utils/aws/vortexa_utils/aws/lambdr/__init__.py deleted file mode 100644 index 4dcf5531789e7..0000000000000 --- a/vendor/github.com/V0RT3X4/python_utils/aws/vortexa_utils/aws/lambdr/__init__.py +++ /dev/null @@ -1,5 +0,0 @@ -"""Vortexa python utils aws lambda helper functions and types. - -This module is called lambdr as `lambda` is a reserved word in python - -""" diff --git a/vendor/github.com/V0RT3X4/python_utils/aws/vortexa_utils/aws/lambdr/types.py b/vendor/github.com/V0RT3X4/python_utils/aws/vortexa_utils/aws/lambdr/types.py deleted file mode 100644 index a1af1904a954b..0000000000000 --- a/vendor/github.com/V0RT3X4/python_utils/aws/vortexa_utils/aws/lambdr/types.py +++ /dev/null @@ -1,45 +0,0 @@ -""" Note: this code is used only by the static type checker! - -_see: -_and: - -""" -from typing import Dict, Any - -LambdaDict = Dict[str, Any] - - -class LambdaCognitoIdentity(object): - cognito_identity_id: str - cognito_identity_pool_id: str - - -class LambdaClientContextMobileClient(object): - installation_id: str - app_title: str - app_version_name: str - app_version_code: str - app_package_name: str - - -class LambdaClientContext(object): - client: LambdaClientContextMobileClient - custom: LambdaDict - env: LambdaDict - - -class LambdaContext(object): - function_name: str - function_version: str - invoked_function_arn: str - memory_limit_in_mb: int - aws_request_id: str - log_group_name: str - log_stream_name: str - deadline_ms: int - identity: LambdaCognitoIdentity - client_context: LambdaClientContext - - @staticmethod - def get_remaining_time_in_millis() -> int: - return 0 diff --git a/vendor/github.com/V0RT3X4/python_utils/aws/vortexa_utils/aws/s3/__init__.py b/vendor/github.com/V0RT3X4/python_utils/aws/vortexa_utils/aws/s3/__init__.py deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/vendor/github.com/V0RT3X4/python_utils/aws/vortexa_utils/aws/s3/client.py b/vendor/github.com/V0RT3X4/python_utils/aws/vortexa_utils/aws/s3/client.py deleted file mode 100644 index da8e4814d10cd..0000000000000 --- a/vendor/github.com/V0RT3X4/python_utils/aws/vortexa_utils/aws/s3/client.py +++ /dev/null @@ -1,50 +0,0 @@ -from tempfile import NamedTemporaryFile - -import boto3 -from logzero import logger -from pandas import DataFrame, read_hdf, read_csv - - -class S3Client: - def __init__(self, s3_bucket: str): - self.s3 = boto3.client("s3") - self.s3_bucket = s3_bucket - - def upload(self, filename: str, s3_key: str, owner_acl: bool = True): - logger.info("[s3] Started uploading: %s", s3_key) - self.s3.upload_file(filename, self.s3_bucket, s3_key) - logger.info("[s3] Finished uploading: %s", s3_key) - if owner_acl: - self.s3.put_object_acl( - ACL="bucket-owner-full-control", Bucket=self.s3_bucket, Key=s3_key - ) - logger.info("[s3] bucket-owner-full-control ACL set") - - def hdf_pd(self, filename) -> DataFrame: - return self.__s3_pd__(filename, "hdf") - - def csv_pd(self, filename) -> DataFrame: - return self.__s3_pd__(filename, "csv") - - def copy(self, src, dest, owner_acl: bool = True): - copy_source = {"Bucket": self.s3_bucket, "Key": src} - self.s3.copy_object(CopySource=copy_source, Bucket=self.s3_bucket, Key=dest) - if owner_acl: - self.s3.put_object_acl( - ACL="bucket-owner-full-control", Bucket=self.s3_bucket, Key=dest - ) - logger.info("[s3] bucket-owner-full-control ACL set") - - def __s3_pd__(self, filename, filetype) -> DataFrame: - with NamedTemporaryFile("wb") as f: - logger.info(f"[s3] Started downloading: s3://{self.s3_bucket}/{filename}") - self.s3.download_fileobj(self.s3_bucket, filename, f) - f.flush() - logger.info(f"[s3] Finished downloading: s3://{self.s3_bucket}/{filename}") - logger.info("[pandas] Started loading: %s", filename) - if filetype == "hdf": - df: DataFrame = read_hdf(f.name) - elif filetype == "csv": - df: DataFrame = read_csv(f.name) - logger.info("[pandas] Finished loading: %s", filename) - return df diff --git a/vendor/github.com/V0RT3X4/python_utils/aws/vortexa_utils/aws/s3/client_side_encryption/IOAuthDecrypter.py b/vendor/github.com/V0RT3X4/python_utils/aws/vortexa_utils/aws/s3/client_side_encryption/IOAuthDecrypter.py deleted file mode 100644 index 6e948f7032109..0000000000000 --- a/vendor/github.com/V0RT3X4/python_utils/aws/vortexa_utils/aws/s3/client_side_encryption/IOAuthDecrypter.py +++ /dev/null @@ -1,40 +0,0 @@ -# @Author: richard -# @Date: 2018-11-28T17:01:36+00:00 -# @Last modified by: richard -# @Last modified time: 2018-12-06T16:00:31+00:00 -import logging -from .IODecrypter import IODecrypter - -logger = logging.getLogger(__name__) - - -class IOAuthDecrypter(IODecrypter): - def __init__(self, cipher, io, content_length, chunk_size=16*1024): - super().__init__(cipher, io) - self.bytes_read = 0 - self.content_length = content_length - - def read(self, chunk=None): - chunk = min(chunk, self.content_length - self.bytes_read) - bytes = super().read(chunk) - logger.debug("Bytes Read %s/%s", self.bytes_read, self.content_length) - self.bytes_read += len(bytes) - return bytes - - def verify(self): - # the remaining bytes should be the auth tag - tag = self.io.read() - logger.debug("Verifing Tag %s", tag) - self.cipher.verify(tag) - - def iter_chunks(self, chunk_size=None): - """Return an iterator to yield chunks of chunk_size bytes from the raw - stream. - """ - if chunk_size is None: - chunk_size = self._DEFAULT_CHUNK_SIZE - - while self.bytes_read < self.content_length: - bytes = self.read(chunk_size) - yield bytes - self.verify() diff --git a/vendor/github.com/V0RT3X4/python_utils/aws/vortexa_utils/aws/s3/client_side_encryption/IOAuthDecrypterTagLength.py b/vendor/github.com/V0RT3X4/python_utils/aws/vortexa_utils/aws/s3/client_side_encryption/IOAuthDecrypterTagLength.py deleted file mode 100644 index c120281198139..0000000000000 --- a/vendor/github.com/V0RT3X4/python_utils/aws/vortexa_utils/aws/s3/client_side_encryption/IOAuthDecrypterTagLength.py +++ /dev/null @@ -1,65 +0,0 @@ -# @Author: richard -# @Date: 2018-11-28T17:01:36+00:00 -# @Last modified by: richard -# @Last modified time: 2018-12-06T16:00:31+00:00 -import logging -from .IODecrypter import IODecrypter -from io import BytesIO, IOBase -logger = logging.getLogger(__name__) - - -class StreamChunker(IOBase): - """StreamChunker a class to keep the last tag bites of a file - - keeps hold of the last `tag_length` bytes in `self.tag` - when reading from a `BytesIO` object. - """ - - def __init__(self, io: BytesIO, tag_length: int): - self.io = io - self.tag_length = tag_length - # get the first chunk if this is the first read - self.tag = self.io.read(self.tag_length) - - def read(self, chunk=None): - bytes = self.tag + self.io.read(chunk) - bytes, self.tag = bytes[:-self.tag_length], bytes[-self.tag_length:] - return bytes - - def close(self): - """Close the underlying http response stream.""" - self.io.close() - - def readable(self): - return True - - def seekable(self): - return False - - def writable(self): - return False - - -class IOAuthDecrypterTagLength(IODecrypter): - def __init__(self, cipher, io, tag_length, chunk_size=16*1024): - super().__init__(cipher, StreamChunker(io, tag_length)) - - def verify(self): - # the remaining bytes should be the auth tag - tag = self.io.tag - logger.debug("Verifing Tag %s", tag) - self.cipher.verify(tag) - - def iter_chunks(self, chunk_size=None): - """Return an iterator to yield chunks of chunk_size bytes from the raw - stream. - """ - if chunk_size is None: - chunk_size = self._DEFAULT_CHUNK_SIZE - - while True: - bytes = self.read(chunk_size) - if bytes == b'': - break - yield bytes - self.verify() diff --git a/vendor/github.com/V0RT3X4/python_utils/aws/vortexa_utils/aws/s3/client_side_encryption/IODecrypter.py b/vendor/github.com/V0RT3X4/python_utils/aws/vortexa_utils/aws/s3/client_side_encryption/IODecrypter.py deleted file mode 100644 index 9346aafcbe053..0000000000000 --- a/vendor/github.com/V0RT3X4/python_utils/aws/vortexa_utils/aws/s3/client_side_encryption/IODecrypter.py +++ /dev/null @@ -1,61 +0,0 @@ -# @Author: richard -# @Date: 2018-11-28T17:01:20+00:00 -# @Last modified by: richard -# @Last modified time: 2018-12-06T16:57:10+00:00 -# from typing import Iterable - -from io import IOBase -from botocore.response import StreamingBody - -import logging - -logger = logging.getLogger(__name__) - - -class IODecrypter(IOBase): - _DEFAULT_CHUNK_SIZE = 1024 - - def __init__(self, cipher, io: StreamingBody): - self.cipher: object = cipher - self.io: StreamingBody = io - - def read(self, chunk=None): - bytes = self.io.read(chunk) - return self.cipher.decrypt(bytes) - - def __iter__(self): - """Return an iterator to yield 1k chunks from the raw stream.""" - return self.iter_chunks(self._DEFAULT_CHUNK_SIZE) - - def iter_chunks(self, chunk_size: int = _DEFAULT_CHUNK_SIZE): - # type: (...) -> Iterable[bytes] - """Return an iterator to yield chunks bytes from the raw `io` stream. - - Parameters - ---------- - chunk_size : int - iterates over no more than Chunk size bytes. If `None` use - `self._DEFAULT_CHUNK_SIZE`. - - Returns - ------- - Iterator[bytes] - - """ - decrypt = self.cipher.decrypt - chunks = self.io.iter_chunks(chunk_size) - - return (decrypt(bytes) for bytes in chunks) - - def close(self): - """Close the underlying http response stream.""" - self.io.close() - - def readable(self): - return True - - def seekable(self): - return False - - def writable(self): - return False diff --git a/vendor/github.com/V0RT3X4/python_utils/aws/vortexa_utils/aws/s3/client_side_encryption/IONocrypter.py b/vendor/github.com/V0RT3X4/python_utils/aws/vortexa_utils/aws/s3/client_side_encryption/IONocrypter.py deleted file mode 100644 index 3f613f19550c5..0000000000000 --- a/vendor/github.com/V0RT3X4/python_utils/aws/vortexa_utils/aws/s3/client_side_encryption/IONocrypter.py +++ /dev/null @@ -1,38 +0,0 @@ -# @Author: richard -# @Date: 2018-11-28T17:01:20+00:00 -# @Last modified by: richard -# @Last modified time: 2018-12-06T16:57:10+00:00 -from typing import Iterable -from botocore.response import StreamingBody -from .IODecrypter import IODecrypter - -import logging - -logger = logging.getLogger(__name__) - - -class IONocrypter(IODecrypter): - - def __init__(self, io): - self.io: StreamingBody = io - - def read(self, chunk=None): - return self.io.read(chunk) - - def iter_chunks(self, chunk_size: int = None) -> Iterable[bytes]: - """Return an iterator to yield chunks bytes from the raw `io` stream. - - Parameters - ---------- - chunk_size : int - iterates over no more than Chunk size bytes. If `None` use - `self._DEFAULT_CHUNK_SIZE`. - - Returns - ------- - Iterator[bytes] - - """ - if chunk_size is None: - chunk_size = self._DEFAULT_CHUNK_SIZE - return self.io.iter_chunks(chunk_size) diff --git a/vendor/github.com/V0RT3X4/python_utils/aws/vortexa_utils/aws/s3/client_side_encryption/__init__.py b/vendor/github.com/V0RT3X4/python_utils/aws/vortexa_utils/aws/s3/client_side_encryption/__init__.py deleted file mode 100644 index 628c41928cecc..0000000000000 --- a/vendor/github.com/V0RT3X4/python_utils/aws/vortexa_utils/aws/s3/client_side_encryption/__init__.py +++ /dev/null @@ -1,183 +0,0 @@ -# @Author: richard -# @Date: 2018-11-28T15:15:44+00:00 -# @Last modified by: richard -# @Last modified time: 2018-11-28T15:15:44+00:00 - -""" -# From the RUBY Docs. - -Provides an encryption client that encrypts and decrypts data client-side, -storing the encrypted data in Amazon S3. - -This client uses a process called "envelope encryption". Your private -encryption keys and your data's plain-text are **never** sent to -Amazon S3. **If you lose you encryption keys, you will not be able to -decrypt your data.** - -## Envelope Encryption Overview - -The goal of envelope encryption is to combine the performance of -fast symmetric encryption while maintaining the secure key management -that asymmetric keys provide. - -A one-time-use symmetric key (envelope key) is generated client-side. -This is used to encrypt the data client-side. This key is then -encrypted by your master key and stored alongside your data in Amazon -S3. - -When accessing your encrypted data with the encryption client, -the encrypted envelope key is retrieved and decrypted client-side -with your master key. The envelope key is then used to decrypt the -data client-side. - -One of the benefits of envelope encryption is that if your master key -is compromised, you have the option of just re-encrypting the stored -envelope symmetric keys, instead of re-encrypting all of the -data in your account. - -## Basic Usage - -The encryption client requires an {Aws::S3::Client}. If you do not -provide a `:client`, then a client will be constructed for you. - - require 'openssl' - key = OpenSSL::PKey::RSA.new(1024) - - # encryption client - s3 = aws.s3.client_side_encryption.Client(encryption_key: key) - - # round-trip an object, encrypted/decrypted locally - s3.put_object(bucket:'aws-sdk', key:'secret', body:'handshake') - s3.get_object(bucket:'aws-sdk', key:'secret').body.read - #=> 'handshake' - - # reading encrypted object without the encryption client - # results in the getting the cipher text - Aws::S3::Client.new.get_object(bucket:'aws-sdk', key:'secret').body.read - #=> "... cipher text ..." - -## Keys - -For client-side encryption to work, you must provide one of the following: - -* An encryption key -* A {KeyProvider} -* A KMS encryption key id - -### An Encryption Key - -You can pass a single encryption key. This is used as a master key -encrypting and decrypting all object keys. - - key = OpenSSL::Cipher.new("AES-256-ECB").random_key # symmetric key - key = OpenSSL::PKey::RSA.new(1024) # asymmetric key pair - - s3 = Aws::S3::Encryption::Client.new(encryption_key: key) - -### Key Provider - -Alternatively, you can use a {KeyProvider}. A key provider makes -it easy to work with multiple keys and simplifies key rotation. - -### KMS Encryption Key Id - -If you pass the id to an AWS Key Management Service (KMS) key, -then KMS will be used to generate, encrypt and decrypt object keys. - - # keep track of the kms key id - kms = Aws::KMS::Client.new - key_id = kms.create_key.key_metadata.key_id - - Aws::S3::Encryption::Client.new( - kms_key_id: key_id, - kms_client: kms, - ) - -## Custom Key Providers - -A {KeyProvider} is any object that responds to: - -* `#encryption_materials` -* `#key_for(materials_description)` - -Here is a trivial implementation of an in-memory key provider. -This is provided as a demonstration of the key provider interface, -and should not be used in production: - - class KeyProvider - - def initialize(default_key_name, keys) - @keys = keys - @encryption_materials = Aws::S3::Encryption::Materials.new( - key: @keys[default_key_name], - description: JSON.dump(key: default_key_name), - ) - end - - attr_reader :encryption_materials - - def key_for(matdesc) - key_name = JSON.load(matdesc)['key'] - if key = @keys[key_name] - key - else - raise "encryption key not found for: #{matdesc.inspect}" - end - end - end - -Given the above key provider, you can create an encryption client that -chooses the key to use based on the materials description stored with -the encrypted object. This makes it possible to use multiple keys -and simplifies key rotation. - - # uses "new-key" for encrypting objects, uses either for decrypting - keys = KeyProvider.new('new-key', { - "old-key" => Base64.decode64("kM5UVbhE/4rtMZJfsadYEdm2vaKFsmV2f5+URSeUCV4="), - "new-key" => Base64.decode64("w1WLio3agRWRTSJK/Ouh8NHoqRQ6fn5WbSXDTHjXMSo="), - }), - - # chooses the key based on the materials description stored - # with the encrypted object - s3 = Aws::S3::Encryption::Client.new(key_provider: keys) - -## Materials Description - -A materials description is JSON document string that is stored -in the metadata (or instruction file) of an encrypted object. -The {DefaultKeyProvider} uses the empty JSON document `"{}"`. - -When building a key provider, you are free to store whatever -information you need to identify the master key that was used -to encrypt the object. - -## Envelope Location - -By default, the encryption client store the encryption envelope -with the object, as metadata. You can choose to have the envelope -stored in a separate "instruction file". An instruction file -is an object, with the key of the encrypted object, suffixed with -`".instruction"`. - -Specify the `:envelope_location` option as `:instruction_file` to -use an instruction file for storing the envelope. - - # default behavior - s3 = Aws::S3::Encryption::Client.new( - key_provider: ..., - envelope_location: :metadata, - ) - - # store envelope in a separate object - s3 = Aws::S3::Encryption::Client.new( - key_provider: ..., - envelope_location: :instruction_file, - instruction_file_suffix: '.instruction' # default - ) - -When using an instruction file, multiple requests are made when -putting and getting the object. **This may cause issues if you are -issuing concurrent PUT and GET requests to an encrypted object.** -""" - -from .client import Client diff --git a/vendor/github.com/V0RT3X4/python_utils/aws/vortexa_utils/aws/s3/client_side_encryption/cipher_provider.py b/vendor/github.com/V0RT3X4/python_utils/aws/vortexa_utils/aws/s3/client_side_encryption/cipher_provider.py deleted file mode 100644 index 954b2276986b2..0000000000000 --- a/vendor/github.com/V0RT3X4/python_utils/aws/vortexa_utils/aws/s3/client_side_encryption/cipher_provider.py +++ /dev/null @@ -1,17 +0,0 @@ -# @Author: richard -# @Date: 2018-11-27T18:22:34+00:00 -# @Last modified by: richard -# @Last modified time: 2018-12-05T16:25:56+00:00 -from Cryptodome.Cipher import AES # pycryptodomex - - -class CipherProvider(object): - def __init__(self, key): - self.key = key - - def decryptor(self, envelope): - pass - - def encryptor(self): - cipher = AES.new(self.key, AES.MODE_GCM) - return cipher diff --git a/vendor/github.com/V0RT3X4/python_utils/aws/vortexa_utils/aws/s3/client_side_encryption/client.py b/vendor/github.com/V0RT3X4/python_utils/aws/vortexa_utils/aws/s3/client_side_encryption/client.py deleted file mode 100644 index 6ebccdba9b9cd..0000000000000 --- a/vendor/github.com/V0RT3X4/python_utils/aws/vortexa_utils/aws/s3/client_side_encryption/client.py +++ /dev/null @@ -1,103 +0,0 @@ -# @Author: richard -# @Date: 2018-11-28T15:15:54+00:00 -# @Last modified by: richard -# @Last modified time: 2018-12-06T18:07:33+00:00 -import boto3 -from .kms_cipher_provider import KMSCipherProvider -from .decrypt_handeler import DecryptHandeler - - -class Client(object): - """ - Client Side Encryption S3 Client. - - Attributes - ---------- - s3 : botocore.client.S3 - cipher_provider : .cipher_provider.CipherProvider - - Methods - ------- - get_object(Bucket, Key) - get and decrypt an object from s3 - - """ - - def __init__( - self, - client=None, - cipher_provider=None, - key_id=None, - strict=None, - **kwargs): - """Initilises the client side encryption s3 client. - - Parameters - ---------- - client : botocore.client.S3 - Optional S3 client to use for s3 interaction - Will create client if not set. - - cipher_provider : CipherProvider - Optional `CipherProvider` to provide encryption cipher - Will default to `KMSCipherProvider()` if not set. - - key_id : str - The kms `key id`, `alias` or `aws::arn` - for the `KMSCipherProvider`. - - region_name : str - The region for the kms and s3 client resources. - - """ - region_name = kwargs.get('region') - self.s3 = client or boto3.client('s3', **kwargs) - self.cipher_provider = ( - cipher_provider or KMSCipherProvider( - key_id=key_id, - region_name=region_name - ) - ) - self.strict = strict - - def get_object(self, Bucket, Key): - """Retrieve object from Amazon S3. - - See also: - `AWS API Documentation `_ - - `AWS Client Side Encryption `_ - - Parameters - ---------- - Bucket : str - **[REQUIRED]** The Bucket - Key : str - **[REQUIRED]** The Path Key in the Bucket - - """ - # location_info = self.s3.get_bucket_location(Bucket=Bucket) - # bucket_region = location_info['LocationConstraint'] - - obj = self.s3.get_object(Bucket=Bucket, Key=Key) - handeler = DecryptHandeler(obj, self, self.strict) - return handeler.decrypt() - - def object_encrypted(self, Bucket, Key) -> bool: - """Check if object has encryption envelope. - - Parameters - ---------- - Bucket : str - **[REQUIRED]** The Bucket - Key : str - **[REQUIRED]** The Path Key in the Bucket - - Returns - ------- - bool - - """ - obj = self.s3.head_object(Bucket=Bucket, Key=Key) - handeler = DecryptHandeler(obj, self) - return handeler.extract_envelop() is not None diff --git a/vendor/github.com/V0RT3X4/python_utils/aws/vortexa_utils/aws/s3/client_side_encryption/decrypt_handeler.py b/vendor/github.com/V0RT3X4/python_utils/aws/vortexa_utils/aws/s3/client_side_encryption/decrypt_handeler.py deleted file mode 100644 index 464fc3c872642..0000000000000 --- a/vendor/github.com/V0RT3X4/python_utils/aws/vortexa_utils/aws/s3/client_side_encryption/decrypt_handeler.py +++ /dev/null @@ -1,121 +0,0 @@ -# @Author: richard -# @Date: 2018-11-27T17:24:50+00:00 -# @Last modified by: richard -# @Last modified time: 2018-12-06T16:38:19+00:00 -import boto3 -import logging - -from .IODecrypter import IODecrypter -from .IONocrypter import IONocrypter -from .IOAuthDecrypter import IOAuthDecrypter -from .IOAuthDecrypterTagLength import IOAuthDecrypterTagLength - -logger = logging.getLogger(__name__) -kms = boto3.client('kms') - - -class DecryptionException(Exception): - pass - - -class DecryptHandeler(object): - - V1_ENVELOPE_KEYS = { - 'x-amz-key', - 'x-amz-iv', - 'x-amz-matdesc' - } - - V2_ENVELOPE_KEYS = { - 'x-amz-key-v2', - 'x-amz-iv', - 'x-amz-cek-alg', - 'x-amz-wrap-alg', - 'x-amz-matdesc' - } - - POSSIBLE_ENVELOPE_KEYS = V1_ENVELOPE_KEYS | V2_ENVELOPE_KEYS - - POSSIBLE_ENCRYPTION_FORMATS = { - 'AES/GCM/NoPadding', - 'AES/CBC/PKCS5Padding', - 'AES/CBC/PKCS7Padding' - } - - def __init__(self, obj, context, strict=False): - self.obj = obj - self.context = context - self.metadata = obj['Metadata'] - self.body = obj['Body'] - self.strict = strict - - def decrypt(self): - cipher = self.decryption_cipher() - logger.debug(self.metadata) - if cipher: - logger.debug(cipher) - if self.auth_tag(): - return self.decrypt_auth(cipher) - return IODecrypter(cipher=cipher, io=self.body) - # Object not encrypted with an envelope - mesg = f"Unencrypted Object at {self.obj['ETag']}" - if self.strict: - logger.error(mesg) - raise ValueError(mesg) - else: - logger.warning(mesg) - return IONocrypter(io=self.body) - - def auth_tag(self): - return 'x-amz-tag-len' in self.metadata - - def decryption_cipher(self): - envelope = self.extract_envelop(self.metadata) - if envelope: - return self.context.cipher_provider.decryptor(envelope) - - def extract_envelop(self, meta): - if 'x-amz-key' in meta: - return self.envelope_v1(meta) - elif 'x-amz-key-v2' in meta: - return self.envelope_v2(meta) - - key_prefix = 'x-amz-key' - key = next((k for k in meta.keys() if k.startswith(key_prefix)), None) - if key is not None: - key_version = key[len(key_prefix):] - mesg = f'Unknown envelope encryption version {key_version}' - raise DecryptionException(mesg) - # no envelope found - return None - - def envelope_v2(self, meta): - if meta['x-amz-cek-alg'] not in self.POSSIBLE_ENCRYPTION_FORMATS: - alg = meta['x-amz-cek-alg'] - msg = f'unsuported content encrypting key format: {alg}' - raise DecryptionException(msg) - if meta['x-amz-wrap-alg'] != 'kms': - alg = meta['x-amz-wrap-alg'] - msg = f'unsupported key wrapping algorithm: {alg}' - raise DecryptionException(msg) - if not self.V2_ENVELOPE_KEYS <= set(meta.keys()): - msg = "incomplete v2 encryption envelope:\n" - msg += f" expected: #{', '.join(self.V2_ENVELOPE_KEYS)}\n" - msg += f" got: #{', '.join(meta.keys)}" - return meta - - def envelope_v1(self, meta): - return meta - - def decrypt_auth(self, cipher): - meta = self.metadata - - content_length_string = meta.get( - 'x-amz-unencrypted-content-length', - None - ) - if content_length_string is not None: - content_length = int(content_length_string) - return IOAuthDecrypter(cipher, self.body, content_length) - tag_length = int(meta['x-amz-tag-len'])//8 - return IOAuthDecrypterTagLength(cipher, self.body, tag_length) diff --git a/vendor/github.com/V0RT3X4/python_utils/aws/vortexa_utils/aws/s3/client_side_encryption/get.py b/vendor/github.com/V0RT3X4/python_utils/aws/vortexa_utils/aws/s3/client_side_encryption/get.py deleted file mode 100644 index 7f961e62c814e..0000000000000 --- a/vendor/github.com/V0RT3X4/python_utils/aws/vortexa_utils/aws/s3/client_side_encryption/get.py +++ /dev/null @@ -1,75 +0,0 @@ -# @Author: richard -# @Date: 2018-11-27T14:58:39+00:00 -# @Last modified by: richard -# @Last modified time: 2018-11-30T12:09:27+00:00 - -# see https://medium.com/@samnco/reading-aws-ses-encrypted-emails-with-boto3-9c177f8ba130 -# and https://github.com/boto/boto3/issues/38 - -import base64 -import json -from Cryptodome.Cipher import AES # pycryptodomex -import boto3 - - -s3 = boto3.client('s3') -kms = boto3.client('kms') - - -def chunker(length, chunk_size): - index = 0 - while index < length: - chunk = min(chunk_size, length - index) - index += chunk - yield chunk - -list(chunker(2, 3)) - - -def content_streamer(bytes_io, content_length, chunk_size=16*1024): - for chunk in chunker(content_length, chunk_size): - yield bytes_io.read(chunk) - - - - - -def decrypt_object(obj): - metadata = obj['Metadata'] - key_alg = metadata['x-amz-cek-alg'] - - envelope_key = base64.b64decode(metadata['x-amz-key-v2']) - envelope_iv = base64.b64decode(metadata['x-amz-iv']) - encrypt_ctx = json.loads(metadata['x-amz-matdesc']) - - # x-amz-tag-len in is in bits so /8 to get bytes - tag_len = int(metadata['x-amz-tag-len'])/8 - original_size = int(metadata['x-amz-unencrypted-content-length']) - - decrypted_envelope_key = kms.decrypt( - CiphertextBlob=envelope_key, - EncryptionContext=encrypt_ctx - ) - key = decrypted_envelope_key['Plaintext'] - - if key_alg == 'AES/GCM/NoPadding': - # x-amz-tag-len in is in bits so /8 to get bytes - cipher = AES.new(key, AES.MODE_GCM, envelope_iv) - elif key_alg == 'AES/CBC/PKCS5Padding': - cipher = AES.new(key, AES.MODE_CBC, envelope_iv) - else: - raise Exception('unknown encryption algorythem') - - body = obj['Body'] - - body = body.read() - body, tag = body[:original_size], body[original_size:] - email = cipher.decrypt(body) - cipher.verify(tag) - return email - - -def get_object(bucket, key): - obj = s3.get_object(Bucket=bucket_name, Key=key) - location_info = s3.get_bucket_location(Bucket=bucket_name) - bucket_region = location_info['LocationConstraint'] diff --git a/vendor/github.com/V0RT3X4/python_utils/aws/vortexa_utils/aws/s3/client_side_encryption/kms_cipher_provider.py b/vendor/github.com/V0RT3X4/python_utils/aws/vortexa_utils/aws/s3/client_side_encryption/kms_cipher_provider.py deleted file mode 100644 index 6700eedb5e0b4..0000000000000 --- a/vendor/github.com/V0RT3X4/python_utils/aws/vortexa_utils/aws/s3/client_side_encryption/kms_cipher_provider.py +++ /dev/null @@ -1,61 +0,0 @@ -# @Author: richard -# @Date: 2018-11-27T18:20:28+00:00 -# @Last modified by: richard -# @Last modified time: 2018-12-05T17:20:59+00:00 -import base64 -import boto3 -import json - -from Cryptodome.Cipher import AES # pycryptodomex -from .cipher_provider import CipherProvider - - -class KMSCipherProvider(CipherProvider): - aes_mode_map = { - 'AES/GCM/NoPadding': AES.MODE_GCM, - 'AES/CBC/PKCS5Padding': AES.MODE_CBC, - 'AES/CBC/PKCS7Padding': AES.MODE_CBC - } - - def __init__(self, key_id=None, **kwargs): - self.kms = boto3.client('kms', **kwargs) - self.key_id = key_id - - def decryptor(self, envelope): - key_alg = envelope['x-amz-cek-alg'] - aes_mode = self.aes_mode_map.get(key_alg) - if aes_mode is None: - raise Exception(f'unknown encryption algorythem {key_alg}') - - envelope_key = base64.b64decode(envelope['x-amz-key-v2']) - iv = base64.b64decode(envelope['x-amz-iv']) - encryption_context = json.loads(envelope['x-amz-matdesc']) - - decrypted_envelope = self.kms.decrypt( - CiphertextBlob=envelope_key, - EncryptionContext=encryption_context - ) - key = decrypted_envelope['Plaintext'] - cipher = AES.new(key, aes_mode, iv) - return cipher - - def encryptor(self): - encryption_context = {"kms_cmk_id": self.key_id} - - key_data = self.kms.generate_data_key( - KeyId=self.key_id, - EncryptionContext=encryption_context, - KeySpec='AES_256' - ) - - key = key_data['Plaintext'] - cipher = AES.new(key, AES.MODE_GCM) - - envelope = { - 'x-amz-key-v2': base64.encodebytes(key_data['CiphertextBlob']), - 'x-amz-iv': base64.encodebytes(cipher.nonce), - 'x-amz-cek-alg': 'AES/GCM/NoPadding', - 'x-amz-wrap-alg': 'kms', - 'x-amz-matdesc': json.dumps(encryption_context) - } - return envelope, cipher diff --git a/vendor/github.com/V0RT3X4/python_utils/aws/vortexa_utils/aws/ses/__init__.py b/vendor/github.com/V0RT3X4/python_utils/aws/vortexa_utils/aws/ses/__init__.py deleted file mode 100644 index 520cb4033d38a..0000000000000 --- a/vendor/github.com/V0RT3X4/python_utils/aws/vortexa_utils/aws/ses/__init__.py +++ /dev/null @@ -1,4 +0,0 @@ -# @Author: richard -# @Date: 2018-12-06T18:06:14+00:00 -# @Last modified by: richard -# @Last modified time: 2018-12-06T18:06:14+00:00 diff --git a/vendor/github.com/V0RT3X4/python_utils/aws/vortexa_utils/aws/ses/application_mapper.py b/vendor/github.com/V0RT3X4/python_utils/aws/vortexa_utils/aws/ses/application_mapper.py deleted file mode 100644 index 1e910af5a7b9c..0000000000000 --- a/vendor/github.com/V0RT3X4/python_utils/aws/vortexa_utils/aws/ses/application_mapper.py +++ /dev/null @@ -1,102 +0,0 @@ -import io -from typing import Callable -from collections.abc import Mapping -from functools import wraps -import pandas as pd - - -def read_input_wrapper(read_func=None, **kwargs): - """A decorator to make the `pandas.io.parser.read` functions - take `bytes` as input. - - Parameters - ---------- - `read_func` : `Callable[..., pd.DataFrame]` - The `pandas.io.parsers` function to decorate. - If not set `read_input_wrapper` will return a decorator. - **`kwargs` : `dict` - `kwargs` to pass on to `read_func`. - - Returns - ------- - function : `Callable[input: bytes, pd.DataFrame]` | - `Callable[[Callable[..., pd.DataFrame]], - Callable[input: bytes, pd.DataFrame]]` - either return a decorator which will wrap a pandas parser function - or a wrapped parser function: - - Examples - ------- - Examples should be written in doctest format, and - should illustrate how to use the function/class. - >>> read_csv = read_input_wrapper(pd.read_csv) - >>> read_tsv = read_input_wrapper(pd.read_csv, sep='\t') - - or as a decorator - - @read_input_wrapper - def read_foo(file, **kwargs) -> pd.DataFrame: - # some custom foo - return pd.DataFrame() - - or - - @read_input_wrapper(sep='\t') - def read_bar(file, **kwargs) -> pd.DataFrame: - # some custom bar - return pd.DataFrame() - """ - - def wrapper(func: Callable[..., pd.DataFrame]): - - @wraps(func) - def reader(input: bytes) -> pd.DataFrame: - return func(io.BytesIO(input), **kwargs) - return reader - - if read_func is None: - return wrapper - return wrapper(read_func) - - -read_csv = read_input_wrapper(pd.read_csv) -read_tsv = read_input_wrapper(pd.read_csv, sep='\t') -read_excel = read_input_wrapper(pd.read_excel, sheet_name=None) - - -class ApplicationMapper(Mapping): - """A `Mapping` class to map MIME application types to a pandas reader.""" - - application_mapping = { - "text/plain": read_tsv, - "text/csv": read_csv, - "application/vnd.ms-excel": read_excel - } - - aplication_prefixed = ( - ( - 'application/vnd.ms-excel.sheet', - read_excel - ) - ( - 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet', - read_excel - ) - ) - - def __getitem__(self, key): - func = self.application_mapping.get(key) - if func is not None: - return func - for prefix, func in self.aplication_prefixed: - if key.startswith(prefix): - return read_excel - - def __iter__(self): - return iter(self.application_mapping) - - def __len__(self): - return len(self.application_mapping) - - -application_mapping = ApplicationMapper() diff --git a/vendor/github.com/V0RT3X4/python_utils/aws/vortexa_utils/aws/ses/attachments.py b/vendor/github.com/V0RT3X4/python_utils/aws/vortexa_utils/aws/ses/attachments.py deleted file mode 100644 index d5ef58684ee7f..0000000000000 --- a/vendor/github.com/V0RT3X4/python_utils/aws/vortexa_utils/aws/ses/attachments.py +++ /dev/null @@ -1,15 +0,0 @@ -import email -from .application_mapper import application_mapping - - -class Attachment(object): - - def __init__(self, attachment: email.message.EmailMessage): - self.attachment = attachment - - def to_df(self): - content_type = self.attachment.get_content_type() - reader = application_mapping.get(content_type) - if reader is None: - raise TypeError(f"unknown content_type {content_type}") - return reader(self.attachment.get_content()) diff --git a/vendor/github.com/V0RT3X4/python_utils/aws/vortexa_utils/aws/ses/inbox.py b/vendor/github.com/V0RT3X4/python_utils/aws/vortexa_utils/aws/ses/inbox.py deleted file mode 100644 index 4c3664093d938..0000000000000 --- a/vendor/github.com/V0RT3X4/python_utils/aws/vortexa_utils/aws/ses/inbox.py +++ /dev/null @@ -1,141 +0,0 @@ -# @Author: richard -# @Date: 2018-12-06T18:06:25+00:00 -# @Last modified by: richard -# @Last modified time: 2018-12-06T19:36:09+00:00 -from typing import Iterable -import logging -from datetime import datetime -from functools import wraps -import boto3 -# cd aws/vortexa_utils -# import aws.s3.client_side_encryption.client as client -import vortexa_utils.aws.s3.client_side_encryption.client as client -import email -import email.policy -import email.parser -from email.message import EmailMessage - -logger = logging.getLogger(__name__) - - -class Inbox(object): - """Short summary. - - Parameters - ---------- - default_bucket : str - Default s3 Bucket to assosiate the inbox with. - - """ - - def __init__(self, default_bucket: str = None, **kwargs): - """Short summary. - - Parameters - ---------- - default_bucket : str - Default s3 Bucket to assosiate the inbox with. - strict : bool - When True will not fetch unencrypted emails. Defaults to False. - **kwargs : dict - **`kwargs` to pass to `s3.client`. - - """ - self.bucket = default_bucket - self.s3crypto = client.Client(**kwargs) - self.s3 = self.s3crypto.s3 - # Specify the default policy for email parsing else Parser defaults to - # email.policy.compat32 for python 3 and 2 compatibility - self.parser = email.parser.BytesParser(policy=email.policy.default) - - def get_email(self, Key: str, Bucket: str = None) -> EmailMessage: - """Get `EmailMessage` Object from `Bucket`. - - Parameters - ---------- - Key : str - `Key` name of email in s3. - Bucket : str - s3 `Bucket` to look for email, will search `self.bucket` if `None`. - - Returns - ------- - email.message.EmailMessage - Email object. - - """ - Bucket = Bucket or self.bucket - if Bucket is None: - raise ValueError("Bucket not set") - with self.s3crypto.get_object(Bucket=Bucket, Key=Key) as io: - return self.parser.parse(io) - - def list_objects( - self, - Bucket: str = None, - Path: str = None, - Begin: datetime = None, - Until: datetime = None): - # type: (...) -> Iterable['boto3.resources.factory.s3.ObjectSummary'] - """List all objects in `Bucket` prefixed by `Path`. - - Parameters - ---------- - Bucket : str - S3 `Bucket` to look for emails will search `self.bucket` if `None`. - Path : str - The `Path` prefix to filter the emails by, no filter if `None`. - Begin : datetime - Filter object from this datetime. - Until : datetime = None - Filter objects untill this datetime. - - Returns - ------- - iterable boto3.resources.factory.s3.ObjectSummary - List of matching email objects. - - """ - bucket = boto3.resource('s3').Bucket(Bucket or self.bucket) - objs = bucket.objects.filter(Prefix=Path) - if Begin: - objs = (obj for obj in objs if obj.last_modified >= Begin) - if Until: - objs = (obj for obj in objs if obj.last_modified <= Until) - - if Begin is None and Until is None: - # if no timestamps dont bother sorting - return objs - return sorted(objs, key=lambda o: o.last_modified) - - @wraps(list_objects, assigned=('__annotations__',)) - def list_emails(self, **kwargs) -> Iterable[EmailMessage]: - """List all emails in `Bucket` prefixed by `Path`. - - Parameters - ---------- - Bucket : str - S3 `Bucket` to look for emails will search `self.bucket` if `None`. - Path : str - The `Path` prefix to filter the emails by, no filter if `None`. - Begin : datetime - Filter object from this datetime. - Until : datetime = None - Filter objects untill this datetime. - - Returns - ------- - iterable emails - List of matching email objects. - - Examples - ------- - Examples should be written in doctest format, and - should illustrate how to use the function/class. - >>> inbox = Inbox() - >>> inboc.list_emails('/some/sub/folder') - - """ - objects = self.list_objects(**kwargs) - for obj in objects: - yield self.get_email(obj.key, obj.bucket_name) diff --git a/vendor/github.com/V0RT3X4/python_utils/aws/vortexa_utils/aws/ses/notification/__init__.py b/vendor/github.com/V0RT3X4/python_utils/aws/vortexa_utils/aws/ses/notification/__init__.py deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/vendor/github.com/V0RT3X4/python_utils/aws/vortexa_utils/aws/ses/notification/types/__init__.py b/vendor/github.com/V0RT3X4/python_utils/aws/vortexa_utils/aws/ses/notification/types/__init__.py deleted file mode 100644 index 7eb901a004212..0000000000000 --- a/vendor/github.com/V0RT3X4/python_utils/aws/vortexa_utils/aws/ses/notification/types/__init__.py +++ /dev/null @@ -1,5 +0,0 @@ -from .action import Action -from .mail import Mail -from .receipt import Receipt -from .notification import Notification -from .lambda_record import Record diff --git a/vendor/github.com/V0RT3X4/python_utils/aws/vortexa_utils/aws/ses/notification/types/action.py b/vendor/github.com/V0RT3X4/python_utils/aws/vortexa_utils/aws/ses/notification/types/action.py deleted file mode 100644 index d62791f941960..0000000000000 --- a/vendor/github.com/V0RT3X4/python_utils/aws/vortexa_utils/aws/ses/notification/types/action.py +++ /dev/null @@ -1,56 +0,0 @@ -from dataclasses import dataclass - - -@dataclass -class Action: - """Action Object. - - Attributes - ---------- - type : str - action that was executed. [S3, SNS, Bounce, Lambda, Stop, WorkMail]. - topicArn : str - Amazon Resource Name (ARN) of the SNS topic of the notification. - bucketName : str - S3 bucket to which the message was published. - *Present only for the S3 action type.* - objectKey : str - name that uniquely identifies the email in the Amazon S3 bucket. - This is the same as the messageId in the mail Object. - *Present only for the S3 action type.* - smtpReplyCode : str - SMTP reply code, as defined by RFC 5321. - *Present only for the bounce action type.* - statusCode : str - SMTP enhanced status code, as defined by RFC 3463. - *Present only for the bounce action type.* - message : str - human-readable text to include in the bounce message. - *Present only for the bounce action type.* - sender : str - The email address of the sender of the email that bounced. - This is the address from which the bounce message was sent. - *Present only for the bounce action type.* - functionArn : str - ARN of the Lambda function that was triggered. - *Present only for the Lambda action type.* - invocationType : str - invocation type of the Lambda function. [RequestResponse, Event] - *Present only for the Lambda action type.* - organizationArn : str - ARN of the Amazon WorkMail organization. - *Present only for the WorkMail action type.* - - _see - """ - type: str - topicArn: str = None - bucketName: str = None - objectKey: str = None - smtpReplyCode: str = None - statusCode: str = None - message: str = None - sender: str = None - functionArn: str = None - invocationType: str = None - organizationArn: str = None diff --git a/vendor/github.com/V0RT3X4/python_utils/aws/vortexa_utils/aws/ses/notification/types/lambda_record.py b/vendor/github.com/V0RT3X4/python_utils/aws/vortexa_utils/aws/ses/notification/types/lambda_record.py deleted file mode 100644 index 3eecd720fedf8..0000000000000 --- a/vendor/github.com/V0RT3X4/python_utils/aws/vortexa_utils/aws/ses/notification/types/lambda_record.py +++ /dev/null @@ -1,18 +0,0 @@ -from vortexa_utils.aws.utils.dataclasses import nested_dataclass -from .mail import Mail -from .receipt import Receipt - - -@nested_dataclass -class SESRecord: - receipt: Receipt - mail: Mail - - -@nested_dataclass -class Record: - """ - """ - eventSource: str # "aws:ses", - eventVersion: str # "1.0", - ses: SESRecord diff --git a/vendor/github.com/V0RT3X4/python_utils/aws/vortexa_utils/aws/ses/notification/types/mail.py b/vendor/github.com/V0RT3X4/python_utils/aws/vortexa_utils/aws/ses/notification/types/mail.py deleted file mode 100644 index 49252ed6610f3..0000000000000 --- a/vendor/github.com/V0RT3X4/python_utils/aws/vortexa_utils/aws/ses/notification/types/mail.py +++ /dev/null @@ -1,44 +0,0 @@ -from typing import List, Dict, Any -from dataclasses import dataclass - - -@dataclass -class Mail: - """Mail Object. - - Attributes - ---------- - destination: List[str] - A complete list of all recipient addresses (including To: and CC:) - from the MIME headers of the incoming email. - messageId: str - String that contains the unique ID assigned to the email by Amazon SES. - If the email was delivered to Amazon S3, the message ID is also the - Amazon S3 object key that was used to write the message to your Amazon - S3 bucket. - source: str - String that contains the email address (the envelope MAIL FROM address) - that the email was sent from. - timestamp: - String that contains the time at which the email was received, - in ISO8601 format. - headers: List[List[str]] - A list of Amazon SES headers and your custom headers. - Each header in the list has a name field and a value field. - commonHeaders: List[List[str]] - A list of headers common to all emails. - Each header in the list is composed of a name and a value. - headersTruncated: str - String that specifies whether the headers were truncated, - which will happen if the headers are larger than 10 KB. - Possible values are true and false. - - """ - - destination: List[str] - messageId: str - source: str - timestamp: str - headers: List[Dict[str, str]] - commonHeaders: Dict[str, Any] - headersTruncated: str diff --git a/vendor/github.com/V0RT3X4/python_utils/aws/vortexa_utils/aws/ses/notification/types/notification.py b/vendor/github.com/V0RT3X4/python_utils/aws/vortexa_utils/aws/ses/notification/types/notification.py deleted file mode 100644 index 19fee6d3060d4..0000000000000 --- a/vendor/github.com/V0RT3X4/python_utils/aws/vortexa_utils/aws/ses/notification/types/notification.py +++ /dev/null @@ -1,29 +0,0 @@ -from vortexa_utils.aws.utils.dataclasses import nested_dataclass -from . import Mail, Receipt - - -@nested_dataclass -class Notification: - """Notification Object. - - Attributes - ---------- - notificationType: str - The notification type. For this type of notification, - the value is always Received. - receipt : Recipt - Object that contains information about the email delivery. - mail : Mail - Object that contains information about the email - associated with the notification. - content : str - String that contains the raw, unmodified email, which is typically - in Multipurpose Internet Mail Extensions (MIME) format. - *Only if the notification was triggered by an SNS action.* - - """ - - notificationType: str - receipt: Receipt - mail: Mail - content: str diff --git a/vendor/github.com/V0RT3X4/python_utils/aws/vortexa_utils/aws/ses/notification/types/receipt.py b/vendor/github.com/V0RT3X4/python_utils/aws/vortexa_utils/aws/ses/notification/types/receipt.py deleted file mode 100644 index b5d1a3857508d..0000000000000 --- a/vendor/github.com/V0RT3X4/python_utils/aws/vortexa_utils/aws/ses/notification/types/receipt.py +++ /dev/null @@ -1,65 +0,0 @@ -from vortexa_utils.aws.utils.dataclasses import nested_dataclass -from .action import Action -from .verdicts import (DKIMVerdict, - DMARCVerdict, - SPFVerdict, - SpamVerdict, - VirusVerdict) - - -@nested_dataclass -class Receipt: - """SNS Recipt object. - - Attributes - ---------- - action : Action - Encapsulates information about the action that was executed. - - dkimVerdict : DKIMVerdict - Indicates whether the DomainKeys Identified Mail (DKIM) check passed. - - dmarcPolicy : str - Domain-based Message Authentication, Reporting & Conformance (DMARC) - settings for the sending domain. - This field only appears if the message fails DMARC authentication. - Possible values for this field are: - - none: no specific action be taken on messages that fail DMARC. - - quarantine: messages that fail DMARC be treated as suspicious. - - reject: messages that fail DMARC authentication be rejected. - - dmarcVerdict : DMARCVerdict - Indicates whether the DMARC check passed. - - processingTimeMillis : str - `str` specifies the period, in milliseconds, from the time Amazon SES - received the message to the time it triggered the action. - - recipients : list[str] - list of recipients that were matched by the active receipt rule. - The addresses may differ from those listed by the destination field - in the mail Object. - - spamVerdict : SpamVerdict - Indicates whether the message is spam - - spfVerdict : SPFVerdict - Whether the Sender Policy Framework (SPF) check passed - - timestamp : str - ISO 8601 format string representing when the action was triggered. - - virusVerdict : VirusVerdict - Whether the message contains a virus. - For a list of possible values, see virusVerdict Object. - """ - action: Action - processingTimeMillis: str - recipients: str - timestamp: str - dmarcPolicy: str = None - dmarcVerdict: DMARCVerdict = None - dkimVerdict: DKIMVerdict = None - spamVerdict: SpamVerdict = None - spfVerdict: SPFVerdict = None - virusVerdict: VirusVerdict = None diff --git a/vendor/github.com/V0RT3X4/python_utils/aws/vortexa_utils/aws/ses/notification/types/verdicts.py b/vendor/github.com/V0RT3X4/python_utils/aws/vortexa_utils/aws/ses/notification/types/verdicts.py deleted file mode 100644 index a4a47e06ce02f..0000000000000 --- a/vendor/github.com/V0RT3X4/python_utils/aws/vortexa_utils/aws/ses/notification/types/verdicts.py +++ /dev/null @@ -1,43 +0,0 @@ -from dataclasses import dataclass - - -@dataclass -class Verdict(object): - """Verdict object. - - Attributes - ---------- - status : str - String that contains the verdict. Possible values are: - - PASS: The message passed the given test. - - FAIL: The message failed the given test. - - GRAY: The message failed the given test, - - PROCESSING_FAILED: There is an issue that prevents Amazon SES - from providing a verdict to the given test. - """ - status: str - - -@dataclass -class DKIMVerdict(Verdict): - ... - - -@dataclass -class DMARCVerdict(Verdict): - ... - - -@dataclass -class SpamVerdict(Verdict): - ... - - -@dataclass -class SPFVerdict(Verdict): - ... - - -@dataclass -class VirusVerdict(Verdict): - ... diff --git a/vendor/github.com/V0RT3X4/python_utils/aws/vortexa_utils/aws/utils/__init__.py b/vendor/github.com/V0RT3X4/python_utils/aws/vortexa_utils/aws/utils/__init__.py deleted file mode 100644 index dda33076e9246..0000000000000 --- a/vendor/github.com/V0RT3X4/python_utils/aws/vortexa_utils/aws/utils/__init__.py +++ /dev/null @@ -1,4 +0,0 @@ -# @Author: richard -# @Date: 2018-12-04T20:13:18+00:00 -# @Last modified by: richard -# @Last modified time: 2018-12-04T20:13:18+00:00 diff --git a/vendor/github.com/V0RT3X4/python_utils/aws/vortexa_utils/aws/utils/dataclasses/__init__.py b/vendor/github.com/V0RT3X4/python_utils/aws/vortexa_utils/aws/utils/dataclasses/__init__.py deleted file mode 100644 index 0b443f83003f7..0000000000000 --- a/vendor/github.com/V0RT3X4/python_utils/aws/vortexa_utils/aws/utils/dataclasses/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from .nested import * diff --git a/vendor/github.com/V0RT3X4/python_utils/aws/vortexa_utils/aws/utils/dataclasses/nested.py b/vendor/github.com/V0RT3X4/python_utils/aws/vortexa_utils/aws/utils/dataclasses/nested.py deleted file mode 100644 index 22e1b071fd8d0..0000000000000 --- a/vendor/github.com/V0RT3X4/python_utils/aws/vortexa_utils/aws/utils/dataclasses/nested.py +++ /dev/null @@ -1,20 +0,0 @@ -from dataclasses import dataclass, is_dataclass -from functools import wraps - - -def nested_dataclass(*args, **kwargs): - def wrapper(cls): - cls = dataclass(cls, **kwargs) - original_init = cls.__init__ - - @wraps(original_init) - def __init__(self, *args, **kwargs): - for name, value in kwargs.items(): - field_type = cls.__annotations__.get(name, None) - if is_dataclass(field_type) and isinstance(value, dict): - new_obj = field_type(**value) - kwargs[name] = new_obj - original_init(self, *args, **kwargs) - cls.__init__ = __init__ - return cls - return wrapper(args[0]) if args else wrapper diff --git a/vendor/github.com/V0RT3X4/python_utils/collections/tests/__init__.py b/vendor/github.com/V0RT3X4/python_utils/collections/tests/__init__.py deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/vendor/github.com/V0RT3X4/python_utils/collections/tests/collections/types/__init__.py b/vendor/github.com/V0RT3X4/python_utils/collections/tests/collections/types/__init__.py deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/vendor/github.com/V0RT3X4/python_utils/collections/tests/collections/types/test_instance_caching_abc.py b/vendor/github.com/V0RT3X4/python_utils/collections/tests/collections/types/test_instance_caching_abc.py deleted file mode 100644 index 3b8f0c0e2ec81..0000000000000 --- a/vendor/github.com/V0RT3X4/python_utils/collections/tests/collections/types/test_instance_caching_abc.py +++ /dev/null @@ -1,130 +0,0 @@ -#!/usr/bin/env python3 -# -*- coding: utf-8 -*- -""" -Created on Mon Nov 19 14:02:03 2018 -@author: richard -""" -import unittest - -from vortexa_utils.collections.types.instance_caching_abc import ( - InstanceCachingABC, - instance_caching) - - -class InstanceCachingABCTests(unittest.TestCase): - - def register_class(self, klass): - setattr(self, klass.__name__, klass) - return klass - - def setUp(self): - @self.register_class - class Foo(object, metaclass=InstanceCachingABC): - pass - - @self.register_class - class Bar(object): - pass - - def test_signiture(self): - self.assertEqual(repr(self.Foo), repr(self.Bar).replace('Bar', 'Foo')) - - def test_instance_cache(self): - # no instances - self.assertFalse(list(self.Foo)) - - # one instance - foo = self.Foo() - foos = list(self.Foo) - self.assertEqual(len(foos), 1) - klass_name, instance = foos[0] - self.assertEqual(instance, foo) - self.assertEqual(klass_name, 'Foo') - - # more instances - foo2 = self.Foo() - foos = list(self.Foo) - self.assertEqual(len(foos), 2) - klass_name, instance = foos[-1] - self.assertEqual(instance, foo2) - self.assertEqual(klass_name, 'Foo') - - -class InstanceCachingDecoratorTests(InstanceCachingABCTests): - - def setUp(self): - register = self.register_class - - @register - class Foo(object): - pass - - self._Foo = Foo - self.Foo = Foo = instance_caching(Foo) - - @register - class Bar(Foo): - pass - - @register - class Baz(Bar): - pass - - @register - class Bo(Foo): - pass - - @register - class Bill(Bo): - pass - - def test_signiture(self): - self.assertEqual(repr(self.Foo), repr(self._Foo)) - - def test_list_subclasses(self): - self.assertEqual( - set(self.Foo._allsubclasses()), - set((self.Foo, self.Bar, self.Baz, self.Bo, self.Bill)) - ) - self.assertEqual( - set(self.Bar._allsubclasses()), - set((self.Bar, self.Baz)) - ) - self.assertEqual( - set(self.Bo._allsubclasses()), - set((self.Bill, self.Bo)) - ) - - def test_instance_cache(self): - super().test_instance_cache() - # no instances in subclasses - for klass in self.Bar._allsubclasses(): - self.assertFalse(list(klass)) - - for klass in self.Bo._allsubclasses(): - self.assertFalse(list(klass)) - - self.assertEqual(len(list(self.Foo)), 2) - # one instance - bar = self.Bar() - foos = list(self.Foo) - bars = list(self.Bar) - self.assertEqual(len(foos), 3) - self.assertEqual(len(bars), 1) - klass_name, instance = bars[0] - self.assertEqual(instance, bar) - self.assertEqual(klass_name, 'Bar') - - baz = self.Baz() - foos = list(self.Foo) - bars = list(self.Bar) - bazs = list(self.Baz) - self.assertEqual(len(foos), 4) - self.assertEqual(len(bars), 2) - self.assertEqual(len(bazs), 1) - klass_name, instance = bazs[0] - self.assertEqual(instance, baz) - self.assertEqual(klass_name, 'Baz') - - for klass in self.Bo._allsubclasses(): - self.assertFalse(list(klass)) diff --git a/vendor/github.com/V0RT3X4/python_utils/collections/vortexa_utils/collections/__inti__.py b/vendor/github.com/V0RT3X4/python_utils/collections/vortexa_utils/collections/__inti__.py deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/vendor/github.com/V0RT3X4/python_utils/collections/vortexa_utils/collections/types/__init__.py b/vendor/github.com/V0RT3X4/python_utils/collections/vortexa_utils/collections/types/__init__.py deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/vendor/github.com/V0RT3X4/python_utils/collections/vortexa_utils/collections/types/instance_caching_abc.py b/vendor/github.com/V0RT3X4/python_utils/collections/vortexa_utils/collections/types/instance_caching_abc.py deleted file mode 100644 index cdc6c556c07be..0000000000000 --- a/vendor/github.com/V0RT3X4/python_utils/collections/vortexa_utils/collections/types/instance_caching_abc.py +++ /dev/null @@ -1,45 +0,0 @@ -#!/usr/bin/env python3 -# -*- coding: utf-8 -*- -""" -Created on Mon Nov 19 09:57:05 2018 -@author: richard -""" -from abc import ABCMeta - - -class InstanceCachingABC(ABCMeta): - """Metaclass for defining Instance Caching Abstract Base Classs (ICABC) - Use this metaclass to create an ICABC. An ICABC will remember the instances - created from it and can be iterated over to return all instances and sub - class instances - """ - - def __init__(cls, name, bases, namespace): - super().__init__(name, bases, namespace) - cls._instances = list() - - def __call__(cls, *args, **kwargs): - instance = super().__call__(*args, **kwargs) - cls._instances.append(instance) - return instance - - def _allsubclasses(cls): - yield cls - for subclass in cls.__subclasses__(): - yield from subclass._allsubclasses() - - # Metamethods, called on class objects: - def __iter__(cls): - return ((klass.__name__, instance) - for klass in cls._allsubclasses() - for instance in klass._instances) - - -def instance_caching(klass): - class Decorated(klass, metaclass=InstanceCachingABC): - pass - - Decorated.__name__ = klass.__name__ - Decorated.__qualname__ = klass.__qualname__ - Decorated.__module__ = klass.__module__ - return Decorated diff --git a/vendor/github.com/V0RT3X4/python_utils/database/README.md b/vendor/github.com/V0RT3X4/python_utils/database/README.md deleted file mode 100644 index 4c64ed6286b79..0000000000000 --- a/vendor/github.com/V0RT3X4/python_utils/database/README.md +++ /dev/null @@ -1,21 +0,0 @@ -# Vortexa Utils DatabaseFactory - -Small factory class to give you a `SqlAlchemy` engine connection to an -`AWS rds` instance ensuring SSL and credentials are obtained with the secrets manager -## Usage - -```python -db_factory = DatabaseFactory() -engine = db_factory.engine(dbname='rolling_backup') - -sql = """ -SELECT - name -FROM new_polygons where name is not Null; -""" - -engine.execute(sql) -``` -## TODO Other utility functions - -- [ ] create a `~/.dbpass` file diff --git a/vendor/github.com/V0RT3X4/python_utils/database/README.rst b/vendor/github.com/V0RT3X4/python_utils/database/README.rst deleted file mode 100644 index 5f2775e7ba207..0000000000000 --- a/vendor/github.com/V0RT3X4/python_utils/database/README.rst +++ /dev/null @@ -1,28 +0,0 @@ -.. contents:: - :depth: 3 -.. - -Vortexa Utils DatabaseFactory -============================= - -Small factory class to give you a ``SqlAlchemy`` engine connection to an -``AWS rds`` instance ensuring SSL and credentials are obtained with the -secrets manager ## Usage - -.. code:: python - - db_factory = DatabaseFactory() - engine = db_factory.engine(dbname='rolling_backup') - - sql = """ - SELECT - name - FROM new_polygons where name is not Null; - """ - - engine.execute(sql) - -TODO Other utility functions ----------------------------- - -- [ ] create a ``~/.dbpass`` file diff --git a/vendor/github.com/V0RT3X4/python_utils/database/setup.py b/vendor/github.com/V0RT3X4/python_utils/database/setup.py deleted file mode 100644 index 4ea029d37a074..0000000000000 --- a/vendor/github.com/V0RT3X4/python_utils/database/setup.py +++ /dev/null @@ -1,40 +0,0 @@ -# @Author: richard -# @Date: 2018-12-04T17:54:43+00:00 -# @Last modified by: richard -# @Last modified time: 2018-12-04T19:14:15+00:00 -import io -import os - -from setuptools import find_packages, setup - -namespace = "vortexa_utils" -description = ("Vortexa Database Engine Factory",) - -dependencies = ["boto3", "SqlAlchemy", "psycopg2-binary", "requests"] - -# Setup boilerplate below - -package_root = os.path.abspath(os.path.dirname(__file__)) - -readme_filename = os.path.join(package_root, "README.rst") -with io.open(readme_filename, encoding="utf-8") as readme_file: - readme = readme_file.read() - -packages = [ - package for package in find_packages() if package.startswith(namespace) -] - -setup( - name="vortexa_utils_database", - version="0.0.1", - description=description, - long_description=readme, - author="Richard Mathie", - author_email="richard.mathie@vortexa.com", - zip_safe=False, - tests_require=["nose2"], - test_suite="nose2.collector.collector", - packages=packages, - install_requires=dependencies, - extras_require={"query_cache": ["pandas", "pyarrow"]}, -) diff --git a/vendor/github.com/V0RT3X4/python_utils/database/tests/__init__.py b/vendor/github.com/V0RT3X4/python_utils/database/tests/__init__.py deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/vendor/github.com/V0RT3X4/python_utils/database/tests/test_database_factory.py b/vendor/github.com/V0RT3X4/python_utils/database/tests/test_database_factory.py deleted file mode 100644 index 45ad343c6c796..0000000000000 --- a/vendor/github.com/V0RT3X4/python_utils/database/tests/test_database_factory.py +++ /dev/null @@ -1,16 +0,0 @@ -import os -import unittest - -from vortexa_utils.database import DatabaseFactory - - -class TestEngineFactory(unittest.TestCase): - def test_create_factory(self): - db_factory = DatabaseFactory() - return db_factory - - def test_get_cert(self): - db_factory = self.test_create_factory() - cert_file = db_factory.fetch_cert() - self.assertEqual(cert_file, db_factory.cert_file) - assert os.path.isfile(cert_file) diff --git a/vendor/github.com/V0RT3X4/python_utils/database/tests/test_querey_cache.py b/vendor/github.com/V0RT3X4/python_utils/database/tests/test_querey_cache.py deleted file mode 100644 index 2e441f58cdb2e..0000000000000 --- a/vendor/github.com/V0RT3X4/python_utils/database/tests/test_querey_cache.py +++ /dev/null @@ -1,21 +0,0 @@ -# cd database -import logging - -from vortexa_utils.database.default_factories import DevFactory -from vortexa_utils.database.query_cache import QueryCache - -logger = logging.getLogger(__name__) - -logging.basicConfig(level=logging.DEBUG) - -# factory = DevFactory() -# engine = factory.engine() -# qc = QueryCache() - -# %time df = qc.read_sql("clarksons", engine) - - -def test_filename(): - qc = QueryCache() - assert qc.filename("some random query") == "qAdzxvMgeSc=.parquet.snappy" - assert qc.filename("banned_words") == "LoRkfDuNmuA=.parquet.snappy" diff --git a/vendor/github.com/V0RT3X4/python_utils/database/vortexa_utils/__init__.py b/vendor/github.com/V0RT3X4/python_utils/database/vortexa_utils/__init__.py deleted file mode 100644 index a2ae790eb1d2c..0000000000000 --- a/vendor/github.com/V0RT3X4/python_utils/database/vortexa_utils/__init__.py +++ /dev/null @@ -1,5 +0,0 @@ -# @Author: richard -# @Date: 2018-12-04T19:01:36+00:00 -# @Last modified by: richard -# @Last modified time: 2018-12-04T19:01:39+00:00 -__path__ = __import__('pkgutil').extend_path(__path__, __name__) diff --git a/vendor/github.com/V0RT3X4/python_utils/database/vortexa_utils/database/__init__.py b/vendor/github.com/V0RT3X4/python_utils/database/vortexa_utils/database/__init__.py deleted file mode 100644 index 5c67964aad121..0000000000000 --- a/vendor/github.com/V0RT3X4/python_utils/database/vortexa_utils/database/__init__.py +++ /dev/null @@ -1,7 +0,0 @@ -# @Author: richard -# @Date: 2018-12-04T17:55:58+00:00 -# @Last modified by: richard -# @Last modified time: 2018-12-04T17:55:58+00:00 - -from .database import DatabaseFactory -from .default_factories import DevFactory, ProdFactory, RedFactory diff --git a/vendor/github.com/V0RT3X4/python_utils/database/vortexa_utils/database/database.py b/vendor/github.com/V0RT3X4/python_utils/database/vortexa_utils/database/database.py deleted file mode 100644 index 8634168939edd..0000000000000 --- a/vendor/github.com/V0RT3X4/python_utils/database/vortexa_utils/database/database.py +++ /dev/null @@ -1,118 +0,0 @@ -# @Author: richard -# @Date: 2018-12-04T17:58:19+00:00 -# @Last modified by: richard -# @Last modified time: 2018-12-04T17:58:19+00:00 -import json -import logging -import os -from dataclasses import dataclass, field -from typing import Dict - -import boto3 -import requests -from sqlalchemy import create_engine - -logger = logging.getLogger(__name__) - -secretsmanager = boto3.client("secretsmanager") - -DEFAULT_CERT_URL = ( - "https://s3.amazonaws.com/rds-downloads/rds-combined-ca-bundle.pem" -) -DEFAULT_CERT_PATH = "/tmp/vortexa_utils_py/rds/ca-bundle.pem" - -DEFAULT_CREDENTIAL = "rds/dev/default" -DEFAULT_CREDENTIAL_MAPPING = dict( - host="host", username="user", port="port", password="password" -) - - -@dataclass -class DatabaseFactory(object): - """DatabaseFactory Class. - - Class for createing a database engine factory. - - usage:: - - factory = DatabaseFactory() - engine = factory.engine() - - Parameters - ---------- - secret_id : str - `secret_id` of the database credential. - (the default is 'rds/dev/default' wich points to the dev database host) - cert_file : str - The location to store the ssl certificate file - cert_url : str - The url to fetch the aws rds ssl certificates from - credential_mapping : Dict[str, str] - A mapping between the `psycopg` connection args and the credential keys - """ - - secret_id: str = DEFAULT_CREDENTIAL - cert_file: str = DEFAULT_CERT_PATH - cert_url: str = DEFAULT_CERT_URL - credential_mapping: Dict[str, str] = field( - default_factory=lambda: dict(DEFAULT_CREDENTIAL_MAPPING) - ) - - def __post_init__(self): - logger.debug(f"Created {self.secret_id} factory object") - - def fetch_cert(self, force: bool = False): - if not os.path.isfile(self.cert_file) or force: - logger.info("getting cert") - os.makedirs(os.path.dirname(self.cert_file), exist_ok=True) - cert = requests.get(self.cert_url) - with open(self.cert_file, "w") as f: - f.write(cert.text) - return self.cert_file - - def get_credential(self): - secret = secretsmanager.get_secret_value(SecretId=self.secret_id) - return json.loads(secret["SecretString"]) - - def engine(self, dbname: str = None, echo: bool = False, **kwargs): - # type (...) -> sqlalchemy.engine.Engine - """`sqlalchemy.engine.Engine` instance factory. - - Parameters - ---------- - dbname : str - database name `dbname` to connect to. - (the default is `None`, which will use the dbname in the secret - credential). - echo : bool - `echo` (the default is False). - - Returns - ------- - sqlalchemy.engine.Engine - SQLalchemy connection engine - - Examples - ------- - >>> factory = DatabaseFactory() - >>> engine = factory.engine() - - """ - cert_filename = self.fetch_cert() - credential = self.get_credential() - connect_args = { - v: credential[k] for k, v in self.credential_mapping.items() - } - - dbname = dbname or os.environ.get("DBNAME") or credential["dbname"] - host = connect_args.pop("host") - port = connect_args.pop("port") - - connect_args.update(sslmode="verify-full", sslrootcert=cert_filename) - engine = create_engine( - f"postgresql://{host}:{port}/{dbname}", - echo=echo, - connect_args=connect_args, - **kwargs, - ) - return engine diff --git a/vendor/github.com/V0RT3X4/python_utils/database/vortexa_utils/database/default_factories.py b/vendor/github.com/V0RT3X4/python_utils/database/vortexa_utils/database/default_factories.py deleted file mode 100644 index d4f8ae0ca09e1..0000000000000 --- a/vendor/github.com/V0RT3X4/python_utils/database/vortexa_utils/database/default_factories.py +++ /dev/null @@ -1,20 +0,0 @@ -from dataclasses import dataclass - -from .database import DatabaseFactory - - -@dataclass -class DevFactory(DatabaseFactory): - secret_id: str = "rds/dev/default" - - -@dataclass -class ProdFactory(DatabaseFactory): - secret_id: str = "rds/prod/default" - - -@dataclass -class RedFactory(DatabaseFactory): - cert_url: str = "https://s3.amazonaws.com/redshift-downloads/redshift-ca-bundle.crt" - cert_file: str = "/tmp/vortexa_utils_py/rds/redshift-ca-bundle.pem" - secret_id: str = "redshift/prod/default" diff --git a/vendor/github.com/V0RT3X4/python_utils/database/vortexa_utils/database/query_cache.py b/vendor/github.com/V0RT3X4/python_utils/database/vortexa_utils/database/query_cache.py deleted file mode 100644 index ea86e9a914cd5..0000000000000 --- a/vendor/github.com/V0RT3X4/python_utils/database/vortexa_utils/database/query_cache.py +++ /dev/null @@ -1,77 +0,0 @@ -import hashlib -import logging -import os -import time -from base64 import urlsafe_b64encode -from dataclasses import dataclass -from functools import wraps -from typing import Union - -import pandas as pd -from sqlalchemy.engine import Connection, Engine - -from pyarrow.lib import ArrowIOError - -logger = logging.getLogger(__name__) - - -@dataclass -class QueryCache(object): - result_extension: str = ".parquet.snappy" - cache_dir: str = os.path.join( - "/tmp", "python_utils", "database", "query_cache", "df_cache" - ) - ttl: int = 3600 - - def __post_init__(self): - os.makedirs(self.cache_dir, exist_ok=True) - - def path(self, url): - return os.path.join( - self.cache_dir, - url.drivername, - f"{url.host}:{url.port}", - url.database, - ) - - def filename(self, query): - query_digest = urlsafe_b64encode( - hashlib.blake2s(str(query).encode(), digest_size=8).digest() - ) - return query_digest.decode("ascii") + self.result_extension - - @wraps(pd.read_sql) - def read_sql( - self, - query: str, - con: Union[Engine, Connection], - ttl: int = None, - invalidate_cache: bool = False, - *args, - **kwargs, - ) -> pd.DataFrame: - - # formulate a path - path = self.path(con.engine.url) - filename = self.filename(query) - filepath = os.path.join(path, filename) - os.makedirs(path, exist_ok=True) - - # check if the cache exists and is valid - ttl = self.ttl if ttl is None else ttl - - if ( - os.path.isfile(filepath) - and time.time() - os.path.getmtime(filepath) < ttl - ): - try: - logger.debug("reading from cache %s", filepath) - df = pd.read_parquet(filepath) - except ArrowIOError as e: - logger.error("Invalid Cache file, error: %s", e) - else: - return df - logger.debug("reading from database") - df = pd.read_sql(query, con, *args, **kwargs) - df.to_parquet(filepath) - return df diff --git a/vendor/github.com/V0RT3X4/python_utils/database/vortexa_utils/database/utils.py b/vendor/github.com/V0RT3X4/python_utils/database/vortexa_utils/database/utils.py deleted file mode 100644 index 811e36443265d..0000000000000 --- a/vendor/github.com/V0RT3X4/python_utils/database/vortexa_utils/database/utils.py +++ /dev/null @@ -1,62 +0,0 @@ -from typing import Iterable, List - -import sqlalchemy -from pandas.io.sql import SQLTable -from sqlalchemy.engine import Connectable - - -def upsert( - table: SQLTable, conn: Connectable, keys: List[str], data_iter: Iterable -): - """Upsert method to be used with `pandas.DataFrame.to_sql`. - - In pandas > 0.24.0 you can specify a method to control the insertion clause - used by `pandas.DataFrame.to_sql`. - - Parameters - ---------- - table : pandas.io.sql.SQLTable - Description of parameter `table`. - conn : sqlalchemy.engine.Connectable - Description of parameter `conn`. - keys : List[str] - Description of parameter `keys`. - data_iter : Iterable - Description of parameter `data_iter`. - - Returns - ------- - type - Description of returned object. - - Examples - ------- - Examples should be written in doctest format, and - should illustrate how to use the function/class. - >>> - - """ - cols = ", ".join(f'"{k}"' for k in keys) - if table.schema: - tname = "{}.{}".format(table.schema, table.name) - else: - tname = table.name - - # placeholder = ", ".join(["?"] * len(keys)) - placeholder = ", ".join([f":{k}" for k in keys]) - datas = ({k: d for k, d in zip(keys, data)} for data in data_iter) - if conn.engine.driver.endswith("sqlite"): - # sqlite - sql = f"INSERT or IGNORE INTO {tname} ({cols}) VALUES ({placeholder})" - else: - # postgresql - sql = sqlalchemy.text( - f""" - INSERT INTO {tname} - ({cols}) - VALUES ({placeholder}) - ON CONFLICT DO NOTHING - """ - ) - - conn.execute(sql, *datas) diff --git a/vendor/github.com/V0RT3X4/python_utils/deployment/setup.py b/vendor/github.com/V0RT3X4/python_utils/deployment/setup.py deleted file mode 100644 index 6432302dac087..0000000000000 --- a/vendor/github.com/V0RT3X4/python_utils/deployment/setup.py +++ /dev/null @@ -1,20 +0,0 @@ -# @Author: richard -# @Date: 2018-12-04T17:54:43+00:00 -# @Last modified by: richard -# @Last modified time: 2018-12-04T19:16:08+00:00 -from setuptools import setup, find_packages - - -setup( - name='vortexa_utils_deploy', - version='0.0.1', - description='', - long_description='', - - author='Richard Mathie', - author_email='richard.mathie@vortexa.com', - - zip_safe=False, - - packages=find_packages(), -) diff --git a/vendor/github.com/V0RT3X4/python_utils/deployment/vortexa_utils/__init__.py b/vendor/github.com/V0RT3X4/python_utils/deployment/vortexa_utils/__init__.py deleted file mode 100644 index a2ae790eb1d2c..0000000000000 --- a/vendor/github.com/V0RT3X4/python_utils/deployment/vortexa_utils/__init__.py +++ /dev/null @@ -1,5 +0,0 @@ -# @Author: richard -# @Date: 2018-12-04T19:01:36+00:00 -# @Last modified by: richard -# @Last modified time: 2018-12-04T19:01:39+00:00 -__path__ = __import__('pkgutil').extend_path(__path__, __name__) diff --git a/vendor/github.com/V0RT3X4/python_utils/deployment/vortexa_utils/portainer/Readme.md b/vendor/github.com/V0RT3X4/python_utils/deployment/vortexa_utils/portainer/Readme.md deleted file mode 100644 index f1d2dcd78744e..0000000000000 --- a/vendor/github.com/V0RT3X4/python_utils/deployment/vortexa_utils/portainer/Readme.md +++ /dev/null @@ -1 +0,0 @@ -# Portainer API Helper Module diff --git a/vendor/github.com/V0RT3X4/python_utils/deployment/vortexa_utils/portainer/__init__.py b/vendor/github.com/V0RT3X4/python_utils/deployment/vortexa_utils/portainer/__init__.py deleted file mode 100644 index 26e33c55820aa..0000000000000 --- a/vendor/github.com/V0RT3X4/python_utils/deployment/vortexa_utils/portainer/__init__.py +++ /dev/null @@ -1,8 +0,0 @@ -# @Author: richard -# @Date: 2018-12-04T17:56:21+00:00 -# @Last modified by: richard -# @Last modified time: 2018-12-04T18:11:43+00:00 - - -def notNone(x): - return x is not None diff --git a/vendor/github.com/V0RT3X4/python_utils/deployment/vortexa_utils/portainer/api.py b/vendor/github.com/V0RT3X4/python_utils/deployment/vortexa_utils/portainer/api.py deleted file mode 100644 index 456ace9496cba..0000000000000 --- a/vendor/github.com/V0RT3X4/python_utils/deployment/vortexa_utils/portainer/api.py +++ /dev/null @@ -1,56 +0,0 @@ -# @Author: richard -# @Date: 2018-12-04T18:05:38+00:00 -# @Last modified by: richard -# @Last modified time: 2018-12-04T18:14:40+00:00 -import os -import requests -from functools import wraps -from urllib.parse import urlparse -from .stacks import Stacks -from . import notNone - - -class RequestHelper(object): - def __init__(self, api, base_url='api'): - self.api = api - self.base_url = base_url - - def wrapper(self, func): - @wraps(func) - def caller(url=None, *args, **kwargs): - parts = filter(notNone, (self.api.host, self.base_url, url)) - parts = map(str, parts) - headers = kwargs.get("headers", {}) - headers.update(self.api.get_header()) - kwargs["headers"] = headers - return func(os.path.join(*parts), - *args, **kwargs).json() - return caller - - def __getattr__(self, name, *args, **kwargs): - method = getattr(requests, name, *args, **kwargs) - return self.wrapper(method) - - -class PortainerAPI(object): - def __init__(self, host, user=None, pw=None): - self.host = urlparse(host, scheme='http').geturl() - self.user = user - self.pw = pw - if any(ting is not None for ting in (host, user, pw)): - self.get_jwt() - self.requests = RequestHelper(self) - self.stacks = Stacks(self) - - def get_jwt(self): - """ - http POST :9000/api/auth Username="admin" Password="adminpassword" - """ - url = f'{self.host}/api/auth' - resp = requests.post(url, json=dict(Username=self.user, - Password=self.pw)) - self.token = resp.json().get('jwt') - return self.token - - def get_header(self): - return {"Authorization": f"Bearer {self.token}"} diff --git a/vendor/github.com/V0RT3X4/python_utils/deployment/vortexa_utils/portainer/stacks.py b/vendor/github.com/V0RT3X4/python_utils/deployment/vortexa_utils/portainer/stacks.py deleted file mode 100644 index 8eaf2f8d7482d..0000000000000 --- a/vendor/github.com/V0RT3X4/python_utils/deployment/vortexa_utils/portainer/stacks.py +++ /dev/null @@ -1,61 +0,0 @@ -# @Author: richard -# @Date: 2018-12-04T18:04:55+00:00 -# @Last modified by: richard -# @Last modified time: 2018-12-04T18:34:09+00:00 -from .api import RequestHelper - - -class Stacks(object): - def __init__(self, api): - self.api = api - self.requests = RequestHelper(api, 'api/stacks') - - def get(self, stack_id): - return self.requests.get(stack_id) - - def list(self): - return self.get(None) - - def filter(self, **kwargs): - def filter_kwargs(stack): - return all(str(stack[k]) == str(v) for k, v in kwargs.items()) - - return filter(filter_kwargs, self.list()) - - def first(self, **kwargs): - return next(self.filter(**kwargs)) - - def get_file(self, stack_id): - return self.requests.get(f'{stack_id}/file') - - def update(self, stack_id=None, endpointId=None, name=None, - Env=None, StackFileContent=None, Prune=False): - # get the stack by filtering on name or stack_id - if name is not None: - stack = self.first(Name=name) - stack_id = stack['Id'] - elif stack_id is not None: - stack = self.get(stack_id) - - endpointId = stack.get('EndpointId', endpointId) - if endpointId is None: - raise Exception("no entrypointID found or set") - - # update the old Env with the new Env - old_Env = stack.get('Env') - if old_Env is not None: - update_keys = set(e['name'] for e in Env) - old_Env = list(e for e in old_Env if e['name'] not in update_keys) - Env += old_Env - - if StackFileContent is None: - StackFileContent = self.get_file(stack_id)['StackFileContent'] - body = dict(StackFileContent=StackFileContent, - Env=Env, - Prune=Prune) - - return self.requests.put( - stack_id, - params=dict(endpointId=endpointId), - json=body - ) diff --git a/vendor/github.com/V0RT3X4/python_utils/deployment/vortexa_utils/portainer/update_stack.py b/vendor/github.com/V0RT3X4/python_utils/deployment/vortexa_utils/portainer/update_stack.py deleted file mode 100644 index 275f8e6dd8604..0000000000000 --- a/vendor/github.com/V0RT3X4/python_utils/deployment/vortexa_utils/portainer/update_stack.py +++ /dev/null @@ -1,90 +0,0 @@ -#!/user/bin/env python3 -# @Author: richard -# @Date: 2018-12-04T18:10:07+00:00 -# @Last modified by: richard -# @Last modified time: 2018-12-04T18:31:45+00:00 -import argparse -from pprint import pprint -from .import notNone -from .api import PortainerAPI - - -if __name__ == '__main__': - parser = argparse.ArgumentParser(description='Portainer API CLI') - parser.add_argument('--host', '-H', type=str, - help='Host name of Portainer API', - default='https://lawgiver.vortexa.com:9000') - parser.add_argument('--user', '-u', type=str, - help='User name', - default='kraftwork_updater') - parser.add_argument('--pass', '-p', type=str, dest='password', - help='Password name') - - parser.add_argument('--name', '-n', type=str, - help='Stack name to filter') - - parser.add_argument('--env', '-e', nargs=2, action='append', - help='key value pairs of confic to update') - - parser.add_argument('--filter', '-f', nargs=2, action='append', - help='key value pairs of confic to update') - - def add_cmd(flag): - def command(func): - parser.add_argument( - flag, - action='store_const', - const=func, - dest='cmd' - ) - return func - - def get_filter(): - Filter = {} - if args.filter is not None: - Filter.update(args.filter) - if args.name is not None: - Filter.update(Name=args.name) - return Filter - - @add_cmd('--list') - def list_stacks(): - if any(map(notNone, ((args.name, args.filter)))): - Filter = get_filter() - return list(api.stacks.filter(**Filter)) - else: - return api.stacks.list() - - @add_cmd('--update') - def update_stacks(): - env = [dict(name=k, value=v) for k, v in args.env] - return api.stacks.update(name=args.name, Env=env) - - args = parser.parse_args() - - api = PortainerAPI(host=args.host, - user=args.user, - pw=args.password) - - pprint(args.cmd()) - -# api.stacks.list() -# api.stacks.update( -# 1, 1, -# Env=[{ -# "name": "KFAFTWERK_BUILD_NUM", -# "value": '376' -# }] -# ) -# -# -# content = Path('docker/scripts/docker-compose.yml').read_text() -# -# api.requests.post('stacks?type=1&method=string&endpointId=1', -# json=dict( -# Name="myStack", -# StackFileContent=content, -# Env=[dict(name="Hello",value="world")], -# SwarmID='729a4f2h5kj2sd42x34pl3uu1' -# ) -# ) diff --git a/vendor/github.com/V0RT3X4/python_utils/docker/pandas/Dockerfile b/vendor/github.com/V0RT3X4/python_utils/docker/pandas/Dockerfile deleted file mode 100644 index 76155dd44eb33..0000000000000 --- a/vendor/github.com/V0RT3X4/python_utils/docker/pandas/Dockerfile +++ /dev/null @@ -1,25 +0,0 @@ -FROM python:3.7-alpine -ARG PANDAS_VERSION=0.24.1 - -RUN apk add libstdc++ \ - && apk add --no-cache --virtual .build-deps \ - g++ \ - gcc \ - gfortran \ - build-base \ - wget \ - freetype-dev \ - libpng-dev \ - openblas-dev \ - postgresql-dev \ - musl-dev \ - && ln -s /usr/include/locale.h /usr/include/xlocale.h \ - && pip install wheel \ - && pip install --no-cache \ - numpy==1.15.1 \ - scipy \ - psycopg2-binary \ - sqlalchemy \ - && pip install --no-cache \ - pandas==${PANDAS_VERSION} \ - && apk del .build-deps diff --git a/vendor/github.com/V0RT3X4/python_utils/general/README.rst b/vendor/github.com/V0RT3X4/python_utils/general/README.rst deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/vendor/github.com/V0RT3X4/python_utils/general/setup.py b/vendor/github.com/V0RT3X4/python_utils/general/setup.py deleted file mode 100644 index c4a958adb103c..0000000000000 --- a/vendor/github.com/V0RT3X4/python_utils/general/setup.py +++ /dev/null @@ -1,40 +0,0 @@ -import io -import os - -from setuptools import setup, find_packages - -namespace = 'vortexa_utils' -name = 'vortexa_utils_general' -version = '1.0.0' -description = 'Vortexa general utils helper library', - -dependencies = [ - 'gitpython', - 'logzero', - 'tenacity' -] - -# Setup boilerplate below - -package_root = os.path.abspath(os.path.dirname(__file__)) - -readme_filename = os.path.join(package_root, 'README.rst') -with io.open(readme_filename, encoding='utf-8') as readme_file: - readme = readme_file.read() - -packages = [ - package for package in find_packages() - if package.startswith(namespace) -] - -setup( - name=name, - version=version, - description=description, - long_description=readme, - author='Marcin Szymanski', - author_email='marcin.szymanski@vortexa.com', - zip_safe=False, - packages=packages, - install_requires=dependencies, -) diff --git a/vendor/github.com/V0RT3X4/python_utils/general/vortexa_utils/__init__.py b/vendor/github.com/V0RT3X4/python_utils/general/vortexa_utils/__init__.py deleted file mode 100644 index a7712f632a766..0000000000000 --- a/vendor/github.com/V0RT3X4/python_utils/general/vortexa_utils/__init__.py +++ /dev/null @@ -1,5 +0,0 @@ -# @Author: richard -# @Date: 2018-12-04T20:12:18+00:00 -# @Last modified by: richard -# @Last modified time: 2018-12-04T20:12:57+00:00 -__path__ = __import__('pkgutil').extend_path(__path__, __name__) diff --git a/vendor/github.com/V0RT3X4/python_utils/general/vortexa_utils/general/__init__.py b/vendor/github.com/V0RT3X4/python_utils/general/vortexa_utils/general/__init__.py deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/vendor/github.com/V0RT3X4/python_utils/general/vortexa_utils/git.py b/vendor/github.com/V0RT3X4/python_utils/general/vortexa_utils/git.py deleted file mode 100644 index 79bc2365032ed..0000000000000 --- a/vendor/github.com/V0RT3X4/python_utils/general/vortexa_utils/git.py +++ /dev/null @@ -1,14 +0,0 @@ -import os - -from git import Git, Repo -from logzero import logger -from tenacity import retry, wait_fixed, stop_after_attempt - - -@retry(wait=wait_fixed(10), stop=stop_after_attempt(3)) -def clone_repo(repo_url: str, path: str, ssh_key: str): - os.environ['GIT_SSH_COMMAND'] = f'ssh -i {ssh_key}' - with Git().custom_environment(): - logger.info('Cloning git repo %s to %s', repo_url, path) - Repo.clone_from(repo_url, path, branch='master') - logger.info('Repo cloned successfully') diff --git a/vendor/github.com/V0RT3X4/python_utils/logging/README.md b/vendor/github.com/V0RT3X4/python_utils/logging/README.md deleted file mode 100644 index 28b90c8686b22..0000000000000 --- a/vendor/github.com/V0RT3X4/python_utils/logging/README.md +++ /dev/null @@ -1,55 +0,0 @@ -# Vortexa Utils Logging Helpers - -Small helper scripts to abstract logging-related boilerplate code. - - -## log_unhandled_exceptions - -Function decorator, designed to be wrapped around any `main()` (or equivalent) function, to capture errors, prefix them with `ERROR`, and raise them in-line, when executed in AWS Batch. - -### Problem: - -AWS Batch jobs all output logs onto CloudWatch Log Group (`/aws/batch/job`). Therefore, to raise specific alarms, python jobs should use logging, with the logger pattern containing a unique identifier for the job (such as the job/repo name), so the CloudWatch can filter logs and look for specific exceptions. - -When Errors are raised by a python program logging to CloudWatch, the loger pattern and the Error/stacktrace are output on 2 consecutive lines. CloudWatch Alarm triggers can only look for patterns combinations which are in-line, therefore, for a CloudWatch Alarm to be raised when a job fails, the logger pattern and some form of identifiable error key most be printed in-line. - - -### Solution: - -`log_unhandled_exceptions` decorator, can be wrapped around main executing functions, and if any errors are raised during run-time, will capture these errors, and raise them in-line with the logging pattern, using the common pattern `ERROR: `. CloudWatch alerts can now be set to look for (1) the unique logging pattern of the project (i.e. name) and (2) the key `ERROR`, to raise targeted alerts. The full stacktrace will still be output to Cloudwatch logs. - -### Usage: - -```python -from vortexa_utils.logging import log_unhandled_exceptions - -# The following is the logger set-up boilerplate code. -# This can be done as below, or imported from a project-logger dir. -# The following is only intended as a sample and should not be copied without understanding what is happening. -import logging - -logger = logging.getLogger(__name__) -log_format = logging.Formatter( - f"PROJECT_NAME:%(name)s:%(message)s" -) # Only a sample format, can be designed at will, as long as unique identifier (e.g. PROJECT_NAME) is included -logger.setFormatter(log_format) -logger.setLevel(logging.INFO) - -@log_unhandled_exceptions(logger) -def main(): - return int(1) + str('two') - -if __name__ == "__main__": - main() -``` - -Code snippet above would return: - -``` -PROJECT_NAME:__main__:ERROR: unsupported operan types(s) for +: 'int' and 'str' - Traceback (most recent call last): - ... ... - TypeError: unsupported operand type(s) for +: 'int' and 'str' -``` - -As a result, a cloudwatch alarm can now be set on the pattern `PROJECT_NAME ERROR` diff --git a/vendor/github.com/V0RT3X4/python_utils/logging/setup.py b/vendor/github.com/V0RT3X4/python_utils/logging/setup.py deleted file mode 100644 index 7081b7db26c4e..0000000000000 --- a/vendor/github.com/V0RT3X4/python_utils/logging/setup.py +++ /dev/null @@ -1,38 +0,0 @@ -import io -import os - -from setuptools import find_packages, setup - -namespace = "vortexa_utils" -description = ("Vortexa Error Logging",) - -# Setup boilerplate below - -package_root = os.path.abspath(os.path.dirname(__file__)) - -readme_filename = os.path.join(package_root, "README.md") -with io.open(readme_filename, encoding="utf-8") as readme_file: - readme = readme_file.read() - -packages = [ - package for package in find_packages() if package.startswith(namespace) -] - -requirements = [ - "logzero", - "psutil" -] - -setup( - name="vortexa_utils_logging", - version="0.0.1", - description=description, - long_description=readme, - author="Tino von Stegmann", - author_email="constantin.vonstegmann@vortexa.com", - zip_safe=False, - tests_require=["nose2"], - install_requires=requirements, - test_suite="nose2.collector.collector", - packages=packages, -) diff --git a/vendor/github.com/V0RT3X4/python_utils/logging/vortexa_utils/__init__.py b/vendor/github.com/V0RT3X4/python_utils/logging/vortexa_utils/__init__.py deleted file mode 100644 index 69e3be50dac40..0000000000000 --- a/vendor/github.com/V0RT3X4/python_utils/logging/vortexa_utils/__init__.py +++ /dev/null @@ -1 +0,0 @@ -__path__ = __import__('pkgutil').extend_path(__path__, __name__) diff --git a/vendor/github.com/V0RT3X4/python_utils/logging/vortexa_utils/logging/__init__.py b/vendor/github.com/V0RT3X4/python_utils/logging/vortexa_utils/logging/__init__.py deleted file mode 100644 index 14783dcbadd01..0000000000000 --- a/vendor/github.com/V0RT3X4/python_utils/logging/vortexa_utils/logging/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from .exception_decorator import log_unhandled_exceptions diff --git a/vendor/github.com/V0RT3X4/python_utils/logging/vortexa_utils/logging/exception_decorator.py b/vendor/github.com/V0RT3X4/python_utils/logging/vortexa_utils/logging/exception_decorator.py deleted file mode 100644 index 52b49bac513e0..0000000000000 --- a/vendor/github.com/V0RT3X4/python_utils/logging/vortexa_utils/logging/exception_decorator.py +++ /dev/null @@ -1,12 +0,0 @@ -def log_unhandled_exceptions(logger): - def outer_wrapper(main): - def wrapper(*args, **kwargs): - try: - main(*args, **kwargs) - except Exception as e: - logger.exception(f"ERROR: {e}") - raise e - - return wrapper - - return outer_wrapper diff --git a/vendor/github.com/V0RT3X4/python_utils/logging/vortexa_utils/logging/resources.py b/vendor/github.com/V0RT3X4/python_utils/logging/vortexa_utils/logging/resources.py deleted file mode 100644 index de2bac29e6c44..0000000000000 --- a/vendor/github.com/V0RT3X4/python_utils/logging/vortexa_utils/logging/resources.py +++ /dev/null @@ -1,38 +0,0 @@ -import os -import resource - -import psutil -from logzero import logger - -RESOURCE_LOG = """---RESOURCE--- -User time: {0} -System time: {1} -Max resident size: {2} -Block input operations: {3} -Block output operations: {4} ----MEMORY_INFO--- -RSS: {5} -VMS: {6} -Data: {7} -""" - - -def log_resource_usage(step: str): - mem = psutil.Process(os.getpid()).memory_info() - res = resource.getrusage(resource.RUSAGE_SELF) - # MacOs only - try: - data = mem.data - except AttributeError: - data = 0 - res_log = RESOURCE_LOG.format( - res.ru_utime, - res.ru_stime, - res.ru_maxrss, - res.ru_inblock, - res.ru_oublock, - mem.rss, - mem.vms, - data, - ) - logger.info(f"[resource][{step}] {res_log}") diff --git a/vendor/github.com/V0RT3X4/python_utils/utils/vortexa_utils/utils/__init__.py b/vendor/github.com/V0RT3X4/python_utils/utils/vortexa_utils/utils/__init__.py deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/vendor/github.com/V0RT3X4/python_utils/utils/vortexa_utils/utils/byte_stream_spliter.py b/vendor/github.com/V0RT3X4/python_utils/utils/vortexa_utils/utils/byte_stream_spliter.py deleted file mode 100644 index 64e537577cd9a..0000000000000 --- a/vendor/github.com/V0RT3X4/python_utils/utils/vortexa_utils/utils/byte_stream_spliter.py +++ /dev/null @@ -1,31 +0,0 @@ -import io - - -socket_bytes = io.BytesIO(b"So\x01me\r\nbytes\rto\nparsB") - -byte_joiner = b''.join - -list(socket_bytes) - -def split_on(buffer, *spliters): - if not spliters: - spliters = {b'\n', b'\r'} - else: - spliters = set(spliters) - line = [] - while True: - b = buffer.read(1) - split = b in {b'\n', b'\r'} - - if split or not b: - if line: - yield byte_joiner(line) - if split: - line = [] - elif not b: - return - else: - line.append(b) - -gen = split_on(socket_bytes) -list(gen) diff --git a/vendor/github.com/V0RT3X4/python_utils/utils/vortexa_utils/utils/sockets/socket_client.py b/vendor/github.com/V0RT3X4/python_utils/utils/vortexa_utils/utils/sockets/socket_client.py deleted file mode 100644 index fdc97e08c2cb1..0000000000000 --- a/vendor/github.com/V0RT3X4/python_utils/utils/vortexa_utils/utils/sockets/socket_client.py +++ /dev/null @@ -1,24 +0,0 @@ -import socket - - -s = socket.socket(socket.AF_INET, socket.SOCK_STREAM) -s.connect(("www.python.org", 80)) - -import socket - -HOST = '127.0.0.1' # The server's hostname or IP address -PORT = 65432 # The port used by the server - -with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s: - s.connect((HOST, PORT)) - s.sendall(b'Hello, world') - data = s.recv(1024) - -print('Received', repr(data)) - -sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) -sock.connect((HOST, PORT)) -sio = sock.makefile('r', encoding='ascii', errors='backslashreplace', newline=None) -next(sio) -sock.close() -sio.close() diff --git a/vendor/github.com/V0RT3X4/python_utils/utils/vortexa_utils/utils/sockets/socket_server.py b/vendor/github.com/V0RT3X4/python_utils/utils/vortexa_utils/utils/sockets/socket_server.py deleted file mode 100644 index c1d427b6b0882..0000000000000 --- a/vendor/github.com/V0RT3X4/python_utils/utils/vortexa_utils/utils/sockets/socket_server.py +++ /dev/null @@ -1,17 +0,0 @@ -import socket - -HOST = '127.0.0.1' # Standard loopback interface address (localhost) -PORT = 65432 # Port to listen on (non-privileged ports are > 1023) - -with - -sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) - -sock.bind((HOST, PORT)) -sock.listen() - -while True: - conn, addr = sock.accept() - with conn: - while True: - conn.sendall(b'some\rdata\nbyt\1\xffest\r\nadslfkja\n\raslkdj') diff --git a/vendor/github.com/V0RT3X4/python_utils/versioning/VERSION b/vendor/github.com/V0RT3X4/python_utils/versioning/VERSION deleted file mode 100644 index 7bcd0e3612da7..0000000000000 --- a/vendor/github.com/V0RT3X4/python_utils/versioning/VERSION +++ /dev/null @@ -1 +0,0 @@ -0.0.2 \ No newline at end of file diff --git a/vendor/github.com/V0RT3X4/python_utils/versioning/setup.py b/vendor/github.com/V0RT3X4/python_utils/versioning/setup.py deleted file mode 100644 index 2a6c50ab207b1..0000000000000 --- a/vendor/github.com/V0RT3X4/python_utils/versioning/setup.py +++ /dev/null @@ -1,34 +0,0 @@ -# @Author: richard -# @Date: 2018-12-04T17:54:43+00:00 -# @Last modified by: richard -# @Last modified time: 2018-12-04T19:14:15+00:00 -import os -from setuptools import setup, find_packages -from vortexa_utils.versioning import __version__ - -namespace = 'vortexa_utils' - -# Setup boilerplate below - -package_root = os.path.abspath(os.path.dirname(__file__)) - -packages = [ - package for package in find_packages() - if package.startswith(namespace) -] - -setup( - name="vortexa_utils_versioning", - version=__version__, - description="", - long_description="", - - author="Richard Mathie", - author_email="richard.mathie@vortexa.com", - - zip_safe=False, - tests_require=['nose2'], - test_suite='nose2.collector.collector', - - packages=packages, -) diff --git a/vendor/github.com/V0RT3X4/python_utils/versioning/tests/__init__.py b/vendor/github.com/V0RT3X4/python_utils/versioning/tests/__init__.py deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/vendor/github.com/V0RT3X4/python_utils/versioning/tests/test_versioner.py b/vendor/github.com/V0RT3X4/python_utils/versioning/tests/test_versioner.py deleted file mode 100644 index 27be1a07217a9..0000000000000 --- a/vendor/github.com/V0RT3X4/python_utils/versioning/tests/test_versioner.py +++ /dev/null @@ -1,47 +0,0 @@ -import unittest -import os -import tempfile -from nose2.tools import params -from vortexa_utils.versioning.versioner import Versioner - -specs = [ - ((0, 0, 0), (0, 0, 1)), - ((0, 0, 1), (0, 0, 2)), - ((0, 1, 0), (0, 1, 0)), - ((0, 1, 1), (0, 1, 0)), - ((1, 0, 0), (1, 0, 0)), - ((1, 0, 1), (1, 0, 0)), - ((1, 1, 0), (1, 0, 0)), - ((1, 1, 1), (1, 0, 0)) -] - - -class TestVersioner(unittest.TestCase): - def setUp(self): - fh, filename = tempfile.mkstemp() - os.fdopen(fh).close() - self.version: Versioner = Versioner(filename) - - def tearDown(self): - os.remove(self.version.VERSION_FILE) - - def test_version_none(self): - self.assertEqual(self.version.__version__, None) - - def test_version_init(self): - self.assertEqual( - self.version.version, - self.version.SemanticVersion(0, 0, 1) - ) - self.assertTrue(os.path.isfile(self.version.VERSION_FILE)) - with open(self.version.VERSION_FILE, "r") as f: - self.assertEqual(f.readline(), "0.0.1") - - @params(*specs) - def test_version_incriment(self, flags, output): - self.test_version_init() - self.version.update_version(flags) - self.assertEqual( - self.version.version, - self.version.SemanticVersion(*output) - ) diff --git a/vendor/github.com/V0RT3X4/python_utils/versioning/vortexa_utils/__init__.py b/vendor/github.com/V0RT3X4/python_utils/versioning/vortexa_utils/__init__.py deleted file mode 100644 index a7712f632a766..0000000000000 --- a/vendor/github.com/V0RT3X4/python_utils/versioning/vortexa_utils/__init__.py +++ /dev/null @@ -1,5 +0,0 @@ -# @Author: richard -# @Date: 2018-12-04T20:12:18+00:00 -# @Last modified by: richard -# @Last modified time: 2018-12-04T20:12:57+00:00 -__path__ = __import__('pkgutil').extend_path(__path__, __name__) diff --git a/vendor/github.com/V0RT3X4/python_utils/versioning/vortexa_utils/versioning/__init__.py b/vendor/github.com/V0RT3X4/python_utils/versioning/vortexa_utils/versioning/__init__.py deleted file mode 100644 index 977291bcc6396..0000000000000 --- a/vendor/github.com/V0RT3X4/python_utils/versioning/vortexa_utils/versioning/__init__.py +++ /dev/null @@ -1,10 +0,0 @@ -from .versioner import Versioner - -version = Versioner("../../VERSION", __file__) -__version_numeric__ = version.version -__version__ = str(version) - - -if __name__ == "__main__": - from .cli import VersionCLI - VersionCLI(version).parse_args() diff --git a/vendor/github.com/V0RT3X4/python_utils/versioning/vortexa_utils/versioning/__main__.py b/vendor/github.com/V0RT3X4/python_utils/versioning/vortexa_utils/versioning/__main__.py deleted file mode 100644 index c9ce8d27293a2..0000000000000 --- a/vendor/github.com/V0RT3X4/python_utils/versioning/vortexa_utils/versioning/__main__.py +++ /dev/null @@ -1,9 +0,0 @@ -from . import version -from .cli import VersionCLI - -__version_numeric__ = version.version -__version__ = str(version) - - -if __name__ == "__main__": - VersionCLI(version).parse_args() diff --git a/vendor/github.com/V0RT3X4/python_utils/versioning/vortexa_utils/versioning/cli.py b/vendor/github.com/V0RT3X4/python_utils/versioning/vortexa_utils/versioning/cli.py deleted file mode 100644 index 8e414bb5e7c08..0000000000000 --- a/vendor/github.com/V0RT3X4/python_utils/versioning/vortexa_utils/versioning/cli.py +++ /dev/null @@ -1,46 +0,0 @@ -from argparse import ArgumentParser -from dataclasses import dataclass, field -from vortexa_utils.versioning.versioner import Versioner - - -@dataclass -class VersionCLI(object): - versioner: Versioner - parser: ArgumentParser = field(default=None, init=False) - - def __post_init__(self): - self.parser = ArgumentParser( - description='Package Version Tool.' - ) - self.specs = self.versioner.VERSION_SPEC.split( - self.versioner.VERSION_SEP - ) - for spec in self.specs: - self.parser.add_argument( - f'--bump-{spec.lower()}', - f'-{spec[0]}', - action='store_true' - ) - - def parse_args(self): - args = self.parser.parse_args() - spec_flags = list( - getattr(args, f'bump_{spec.lower()}') - for spec in self.specs - ) - if any(spec_flags): - print(f"Current Version: {self.versioner}") - if sum(spec_flags) > 1: - print("You can only bump one spec at a time") - self.parser.print_help() - else: - self.versioner.update_version(spec_flags) - print(f"New Version {self.versioner}") - else: - print(f"{self.versioner}") - - -if __name__ == "__main__": - version = Versioner() - cli = VersionCLI(version) - cli.parse_args() diff --git a/vendor/github.com/V0RT3X4/python_utils/versioning/vortexa_utils/versioning/utils.py b/vendor/github.com/V0RT3X4/python_utils/versioning/vortexa_utils/versioning/utils.py deleted file mode 100644 index 0d3f9b544b13e..0000000000000 --- a/vendor/github.com/V0RT3X4/python_utils/versioning/vortexa_utils/versioning/utils.py +++ /dev/null @@ -1,22 +0,0 @@ -# @Author: richard -# @Date: 2018-12-21T16:37:39+00:00 -# @Last modified by: richard -# @Last modified time: 2018-12-21T16:37:56+00:00 - - -class cached_property(object): - """ - A property that is only computed once per instance and then replaces itself - with an ordinary attribute. Deleting the attribute resets the property. - """ # noqa - - def __init__(self, func): - self.__doc__ = getattr(func, "__doc__") - self.func = func - - def __get__(self, obj, cls): - if obj is None: - return self - - value = obj.__dict__[self.func.__name__] = self.func(obj) - return value diff --git a/vendor/github.com/V0RT3X4/python_utils/versioning/vortexa_utils/versioning/versioner.py b/vendor/github.com/V0RT3X4/python_utils/versioning/vortexa_utils/versioning/versioner.py deleted file mode 100644 index 285481c05ad1a..0000000000000 --- a/vendor/github.com/V0RT3X4/python_utils/versioning/vortexa_utils/versioning/versioner.py +++ /dev/null @@ -1,99 +0,0 @@ -"""Class to track the version of a package.""" -import os -from dataclasses import dataclass, field, InitVar -from collections import namedtuple -from .utils import cached_property - - -@dataclass -class Versioner(object): - VERSION_FILE: str = "VERSION" - MODULE_FILE: InitVar[str] = None - VERSION_SEP: str = "." - VERSION_SPEC: str = "Major.minor.patch" - __version__: namedtuple = field(default=None, init=False) - __version_file__: namedtuple = field(default=None, init=False) - - def __post_init__(self, MODULE_FILE): - parts = [] - if MODULE_FILE is not None: - dir = os.path.dirname(os.path.abspath(MODULE_FILE)) - parts.append(dir) - parts.append(self.VERSION_FILE) - path = os.path.join(*parts) - self.__version_file__ = os.path.abspath(path) - - @cached_property - def SemanticVersion(self): - version_type = namedtuple( - "SemanticVersion", - self.VERSION_SPEC.lower().split(self.VERSION_SEP) - ) - return version_type - - def init_version(self): - fields = self.SemanticVersion._fields - version = ( - 1 if i == len(fields) - 1 else 0 - for i, field in enumerate(fields) - ) - self.version = self.SemanticVersion(*version) - self.write() - return self.version - - def new_version(self, spec_flags): - bumped = False - for spec, ver in zip(spec_flags, self.version): - if bumped: - yield 0 - elif spec: - bumped = True - yield ver + 1 - else: - yield ver - - def update_version(self, spec_flags): - version = self.SemanticVersion(*self.new_version(spec_flags)) - self.version = version - self.write() - return version - - def read(self): - try: - with open(self.__version_file__, "r") as file: - version_string = file.readline().strip() - except FileNotFoundError: - version = self.init_version() - else: - if version_string == "": - version = self.init_version() - else: - version = self.parse_verion(version_string) - self.version = version - return version - - def write(self): - with open(self.__version_file__, "w") as file: - file.write(str(self)) - - @property - def version(self): - if self.__version__ is None: - self.read() - return self.__version__ - - @version.setter - def version(self, version): - if isinstance(version, str): - version = self.parse_verion(version) - if isinstance(version, self.SemanticVersion): - self.__version__ = version - else: - raise TypeError("Version is not str or self.SemanticVersion") - - def parse_verion(self, version: str): - parts = (int(v) for v in version.split(self.VERSION_SEP)) - return self.SemanticVersion(*parts) - - def __str__(self): - return self.VERSION_SEP.join(str(v) for v in self.version) diff --git a/vendor/github.com/V0RT3X4/python_utils/youve_got_mail/README.md b/vendor/github.com/V0RT3X4/python_utils/youve_got_mail/README.md deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/vendor/github.com/V0RT3X4/python_utils/youve_got_mail/README.rst b/vendor/github.com/V0RT3X4/python_utils/youve_got_mail/README.rst deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/vendor/github.com/V0RT3X4/python_utils/youve_got_mail/requirements.txt b/vendor/github.com/V0RT3X4/python_utils/youve_got_mail/requirements.txt deleted file mode 100644 index dfedbe37089fc..0000000000000 --- a/vendor/github.com/V0RT3X4/python_utils/youve_got_mail/requirements.txt +++ /dev/null @@ -1,2 +0,0 @@ -sendgrid < 6.0.0 -boto3 diff --git a/vendor/github.com/V0RT3X4/python_utils/youve_got_mail/setup.py b/vendor/github.com/V0RT3X4/python_utils/youve_got_mail/setup.py deleted file mode 100644 index 1b998bcd47eba..0000000000000 --- a/vendor/github.com/V0RT3X4/python_utils/youve_got_mail/setup.py +++ /dev/null @@ -1,48 +0,0 @@ -# @Author: richard -# @Date: 2018-12-04T17:54:43+00:00 -# @Last modified by: richard -# @Last modified time: 2018-12-04T20:16:54+00:00 -import os -import io -from setuptools import setup, find_packages - -namespace = 'vortexa_utils' -name = 'vortexa_utils_youve_got_mail' -version = '1' -description = 'Vortexa E-mail utils helper library', - -dependencies = [ - 'boto3', - 'sendgrid<6.0.0' -] - -# Setup boilerplate below - -package_root = os.path.abspath(os.path.dirname(__file__)) - -readme_filename = os.path.join(package_root, 'README.rst') -with io.open(readme_filename, encoding='utf-8') as readme_file: - readme = readme_file.read() - -packages = [ - package for package in find_packages() - if package.startswith(namespace) -] - -setup( - name=name, - version=version, - description=description, - long_description=readme, - - author='Richard Mathie', - author_email='richard.mathie@vortexa.com', - - zip_safe=False, - test_suite='nose2.collector.collector', - tests_require=['nose2'], - - packages=packages, - install_requires=dependencies, - extras_require={} -) diff --git a/vendor/github.com/V0RT3X4/python_utils/youve_got_mail/tests/__init__.py b/vendor/github.com/V0RT3X4/python_utils/youve_got_mail/tests/__init__.py deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/vendor/github.com/V0RT3X4/python_utils/youve_got_mail/vortexa_utils/__init__.py b/vendor/github.com/V0RT3X4/python_utils/youve_got_mail/vortexa_utils/__init__.py deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/vendor/github.com/V0RT3X4/python_utils/youve_got_mail/vortexa_utils/youve_got_mail.py b/vendor/github.com/V0RT3X4/python_utils/youve_got_mail/vortexa_utils/youve_got_mail.py deleted file mode 100644 index aae86d37cf7e6..0000000000000 --- a/vendor/github.com/V0RT3X4/python_utils/youve_got_mail/vortexa_utils/youve_got_mail.py +++ /dev/null @@ -1,43 +0,0 @@ -import base64 -import boto3 -import json -import mimetypes -import sendgrid -from sendgrid.helpers.mail import * -from typing import List - - -secretsmanager = boto3.client('secretsmanager') - - -def create_sendgrid_client(): - secret = secretsmanager.get_secret_value(SecretId='prod/sendgrid') - api_key = json.loads(secret['SecretString'])['SENDGRID_API_KEY'] - - return sendgrid.SendGridAPIClient(apikey=api_key) - - -def build_attachment(buf: bytes, filename: str, disposition: str = "attachment", content_id: str = None): - encoded = base64.b64encode(buf).decode() - - mime_type, encoding = mimetypes.guess_type(filename) - - attachment = Attachment() - attachment.content = encoded - attachment.type = mime_type - attachment.filename = filename - attachment.disposition = disposition - attachment.content_id = content_id - - return attachment - - -def add_recipients(recipients: List[str], mail: Mail): - personalization = Personalization() - - for rec in recipients: - personalization.add_to(Email(rec)) - - mail.add_personalization(personalization) - - return mail From 6bcd6c2e23cab3821735967a74f3a3a1c5de8b03 Mon Sep 17 00:00:00 2001 From: cvonsteg Date: Sat, 9 Nov 2019 16:49:35 +0000 Subject: [PATCH 18/75] tidy up repo --- .gitignore | 1 + .pre-commit-config.yaml | 1 + pandas/io/sql.py | 44 +++++++++++++++++++----------------- pandas/io/sql_scratch.py | 48 ++++++++++++++++++---------------------- 4 files changed, 48 insertions(+), 46 deletions(-) diff --git a/.gitignore b/.gitignore index 919e1b9621a70..d17a87294796b 100644 --- a/.gitignore +++ b/.gitignore @@ -121,3 +121,4 @@ doc/source/savefig/ # pyenv files .python-version +vendor/ diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 3f98273a336cf..e79383238dc7e 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -10,6 +10,7 @@ repos: - id: flake8 language: python_venv additional_dependencies: [flake8-comprehensions] + exclude: ^pandas/io/sql_scratch.py$ - repo: https://github.com/pre-commit/mirrors-isort rev: v4.3.20 hooks: diff --git a/pandas/io/sql.py b/pandas/io/sql.py index 2997ac95b8adb..e1630fbc9a6f4 100644 --- a/pandas/io/sql.py +++ b/pandas/io/sql.py @@ -496,7 +496,13 @@ def to_sql( .. versionadded:: 0.24.0 """ - if if_exists not in ("fail", "replace", "append", "upsert_ignore", "upsert_delete"): #TODO: add upserts + if if_exists not in ( + "fail", + "replace", + "append", + "upsert_ignore", + "upsert_delete", + ): # TODO: add upserts raise ValueError("'{0}' is not valid for if_exists".format(if_exists)) pandas_sql = pandasSQL_builder(con, schema=schema) @@ -653,7 +659,6 @@ def create(self): elif self.if_exists == "upsert_delete": pass elif self.if_exists == "upsert_ignore": - # clear rows out of dataframe pass else: raise ValueError( @@ -664,20 +669,18 @@ def create(self): def _upsert_delete_processing(self): from sqlalchemy import tuple_ + # Primary key data primary_keys, primary_key_values = self._get_primary_key_data() # Generate delete statement delete_statement = self.table.delete().where( - tuple_( - *(self.table.c[col] for col in primary_keys) - ).in_( - primary_key_values - ) + tuple_(*(self.table.c[col] for col in primary_keys)).in_(primary_key_values) ) return delete_statement def _upsert_ignore_processing(self): from sqlalchemy import tuple_, select + # Primary key data primary_keys, primary_key_values = self._get_primary_key_data() @@ -695,9 +698,9 @@ def _upsert_ignore_processing(self): # Delete rows from self.frame where primary keys match self.frame = self._get_index_formatted_dataframe() - to_be_deleted_mask = self.frame[primary_keys].isin( - pkeys_from_database[primary_keys] - ).all(1) + to_be_deleted_mask = ( + self.frame[primary_keys].isin(pkeys_from_database[primary_keys]).all(1) + ) self.frame.drop(self.frame[to_be_deleted_mask].index, inplace=True) @@ -710,7 +713,8 @@ def _get_primary_key_data(self): ------- primary_keys, primary_key_values : Tuple[List[str], Iterable] - primary_keys : List of primary key column names - - primary_key_values : Iterable of dataframe rows corresponding to primary_key columns + - primary_key_values : Iterable of dataframe rows + corresponding to primary_key columns """ # reflect MetaData object and assign contents of db to self.table attribute @@ -718,16 +722,16 @@ def _get_primary_key_data(self): self.table = self.pd_sql.get_table(table_name=self.name, schema=self.schema) primary_keys = [ - str(primary_key.name) for primary_key in self.table.primary_key.columns.values() + str(primary_key.name) + for primary_key in self.table.primary_key.columns.values() ] - # For the time being, this method is defensive and will break if no pkeys are found - # If desired this default behaviour could be changed so that in cases where no pkeys - # are found, it could default to a normal insert + # For the time being, this method is defensive and will break if + # no pkeys are found. If desired this default behaviour could be + # changed so that in cases where no pkeys are found, + # it could default to a normal insert if len(primary_keys) == 0: - raise ValueError( - f"No primary keys found for table {self.name}" - ) + raise ValueError(f"No primary keys found for table {self.name}") primary_key_values = zip(*[self.frame[key] for key in primary_keys]) return primary_keys, primary_key_values @@ -767,8 +771,8 @@ def _get_index_formatted_dataframe(self): DataFrame object """ - # Originally this functionality formed the first step of the insert_data() method, - # however it will be useful to have in other places, so to keep code DRY it has been moved here. + # Originally this functionality formed the first step of the insert_data method. + # It will be useful to have in other places, so moved here to keep code DRY. if self.index is not None: # The following check ensures that the method can be called multiple times, diff --git a/pandas/io/sql_scratch.py b/pandas/io/sql_scratch.py index 63c52ebdd432d..e0c683813618c 100644 --- a/pandas/io/sql_scratch.py +++ b/pandas/io/sql_scratch.py @@ -1,5 +1,13 @@ -from sqlalchemy import Table, select +### REPRODUCIBLE SQLTable Creation:table +import sqlalchemy +from sqlalchemy import Table, create_engine, select from sqlalchemy.engine.base import Connection +from sqlalchemy.sql import tuple_ + +import pandas as pd +from vortexa_utils.database import ProdFactory + +from pandas.io.sql import SQLDatabase, SQLTable def get_pkey(table: Table): @@ -21,6 +29,7 @@ def pkey_generator(table, engine): for row in conn.execute(statement): yield row + # Leaves connection open def pkey_results_proxy(table, engine): pkeys = get_pkey(table) @@ -48,54 +57,41 @@ def get_pkey_values(table: SQLTable): statement = select([table.table.c[name] for name in pkeys]) table.pd_sql.execute(statement) + def generate_mask(df, dictionary): return [df[key] == value for key, value in dictionary.items()] + def generate_mask_of_masks(list_of_masks): return pd.concat([mask for mask in list_of_masks], axis=1).all(1) -### REPRODUCIBLE SQLTable Creation:table -import sqlalchemy - -engine = sqlalchemy.create_engine('enter string here') +engine = sqlalchemy.create_engine("enter string here") meta = MetaData(engine) -table_name = 'charterers' # or wtv +table_name = "charterers" # or wtv meta.reflect(only=[table_name], views=True) db = SQLDatabase(engine, meta=meta) table = SQLTable(table_name, db, index=None, schema=None) -from vortexa_utils.database import ProdFactory -from sqlalchemy import create_engine -import pandas as pd -from pandas.io.sql import SQLTable, SQLDatabase - engine_v = ProdFactory().engine() -engine = create_engine('sqlite:///:memory:') -table_name = 'charterers' +engine = create_engine("sqlite:///:memory:") +table_name = "charterers" df = pd.read_sql_table(table_name, engine_v) df_test = df.head().copy() -df_test['name'] = df_test['name'].apply(lambda x: x + '_TEST') -engine.execute("create table charterers(id text primary key, name text, energy integer)") -df.to_sql(table_name, index=False, if_exists='append', con=engine) +df_test["name"] = df_test["name"].apply(lambda x: x + "_TEST") +engine.execute( + "create table charterers(id text primary key, name text, energy integer)" +) +df.to_sql(table_name, index=False, if_exists="append", con=engine) db = SQLDatabase(engine, schema=None, meta=None) new_data = SQLTable(table_name, db, frame=df_test, index=False) - -from sqlalchemy.sql import tuple_ - - def delete_matching_keys(sql_table, key_columns, value_iter): delete_expression = sql_table.table.delete().where( - tuple_( - *(table.table.c[col] for col in key_columns) - ).in_( - list(zip(value_iter)) - ) + tuple_(*(table.table.c[col] for col in key_columns)).in_(list(zip(value_iter))) ) with sql_table.pd_sql.run_transaction() as conn: conn.execute(delete_expression) - From 00e63190ecf82c821c02db85eaf0d8a8b1469535 Mon Sep 17 00:00:00 2001 From: cvonsteg Date: Sat, 9 Nov 2019 18:39:20 +0000 Subject: [PATCH 19/75] Squashed 'vendor/github.com/V0RT3X4/python_utils/' content from commit 88a5a481b git-subtree-dir: vendor/github.com/V0RT3X4/python_utils git-subtree-split: 88a5a481b5dbec610e762df862fd69918c1b77d4 --- .circleci/config.yml | 35 ++++ .gitignore | 111 +++++++++++ README.md | 108 +++++++++++ aws/README.md | 1 + aws/README.rst | 0 aws/requirements.txt | 5 + aws/setup.py | 50 +++++ aws/tests/__init__.py | 4 + aws/tests/lambda_types/__init__.py | 0 aws/tests/lambda_types/message_eg.py | 19 ++ aws/tests/lambda_types/repeat_eg.py | 19 ++ aws/tests/lambda_types/test_lambda_types.py | 89 +++++++++ .../s3_client_encryption_tests/__init__.py | 4 + .../test_IOAuthDecrypter.py | 22 +++ .../test_IOAuthTagLength.py | 42 ++++ .../test_IODecrypter.py | 94 +++++++++ .../test_decrypt_s3_mime_with_attachment.py | 68 +++++++ .../test_decrypt_s3_object.py | 65 +++++++ .../test_kms_cipher_provider.py | 39 ++++ aws/tests/ses_inbox/__init__.py | 0 aws/tests/ses_inbox/test_get_attachments.py | 0 aws/tests/ses_inbox/test_list_inbox.py | 25 +++ aws/tests/ses_notification_types/__init__.py | 0 .../ses_notification_types/test_action.py | 16 ++ .../test_lambda_record.py | 32 +++ aws/tests/ses_notification_types/test_mail.py | 85 ++++++++ .../test_notification.py | 23 +++ .../ses_notification_types/test_receipt.py | 34 ++++ aws/tests/utils/__init__.py | 0 .../utils/nested_data_classes/__init__.py | 0 .../test_nested_dataclass.py | 36 ++++ aws/vortexa_utils/__init__.py | 5 + aws/vortexa_utils/aws/__init__.py | 4 + aws/vortexa_utils/aws/lambdr/__init__.py | 5 + aws/vortexa_utils/aws/lambdr/types.py | 45 +++++ aws/vortexa_utils/aws/s3/__init__.py | 0 aws/vortexa_utils/aws/s3/client.py | 50 +++++ .../client_side_encryption/IOAuthDecrypter.py | 40 ++++ .../IOAuthDecrypterTagLength.py | 65 +++++++ .../s3/client_side_encryption/IODecrypter.py | 61 ++++++ .../s3/client_side_encryption/IONocrypter.py | 38 ++++ .../aws/s3/client_side_encryption/__init__.py | 183 ++++++++++++++++++ .../client_side_encryption/cipher_provider.py | 17 ++ .../aws/s3/client_side_encryption/client.py | 103 ++++++++++ .../decrypt_handeler.py | 121 ++++++++++++ .../aws/s3/client_side_encryption/get.py | 75 +++++++ .../kms_cipher_provider.py | 61 ++++++ aws/vortexa_utils/aws/ses/__init__.py | 4 + .../aws/ses/application_mapper.py | 102 ++++++++++ aws/vortexa_utils/aws/ses/attachments.py | 15 ++ aws/vortexa_utils/aws/ses/inbox.py | 141 ++++++++++++++ .../aws/ses/notification/__init__.py | 0 .../aws/ses/notification/types/__init__.py | 5 + .../aws/ses/notification/types/action.py | 56 ++++++ .../ses/notification/types/lambda_record.py | 18 ++ .../aws/ses/notification/types/mail.py | 44 +++++ .../ses/notification/types/notification.py | 29 +++ .../aws/ses/notification/types/receipt.py | 65 +++++++ .../aws/ses/notification/types/verdicts.py | 43 ++++ aws/vortexa_utils/aws/utils/__init__.py | 4 + .../aws/utils/dataclasses/__init__.py | 1 + .../aws/utils/dataclasses/nested.py | 20 ++ collections/tests/__init__.py | 0 .../tests/collections/types/__init__.py | 0 .../types/test_instance_caching_abc.py | 130 +++++++++++++ .../vortexa_utils/collections/__inti__.py | 0 .../collections/types/__init__.py | 0 .../collections/types/instance_caching_abc.py | 45 +++++ database/README.md | 21 ++ database/README.rst | 28 +++ database/setup.py | 40 ++++ database/tests/__init__.py | 0 database/tests/test_database_factory.py | 16 ++ database/tests/test_querey_cache.py | 21 ++ database/vortexa_utils/__init__.py | 5 + database/vortexa_utils/database/__init__.py | 7 + database/vortexa_utils/database/database.py | 118 +++++++++++ .../database/default_factories.py | 20 ++ .../vortexa_utils/database/query_cache.py | 77 ++++++++ database/vortexa_utils/database/utils.py | 62 ++++++ deployment/setup.py | 20 ++ deployment/vortexa_utils/__init__.py | 5 + deployment/vortexa_utils/portainer/Readme.md | 1 + .../vortexa_utils/portainer/__init__.py | 8 + deployment/vortexa_utils/portainer/api.py | 56 ++++++ deployment/vortexa_utils/portainer/stacks.py | 61 ++++++ .../vortexa_utils/portainer/update_stack.py | 90 +++++++++ docker/pandas/Dockerfile | 25 +++ general/README.rst | 0 general/setup.py | 40 ++++ general/vortexa_utils/__init__.py | 5 + general/vortexa_utils/general/__init__.py | 0 general/vortexa_utils/git.py | 14 ++ logging/README.md | 55 ++++++ logging/setup.py | 38 ++++ logging/vortexa_utils/__init__.py | 1 + logging/vortexa_utils/logging/__init__.py | 1 + .../logging/exception_decorator.py | 12 ++ logging/vortexa_utils/logging/resources.py | 38 ++++ utils/vortexa_utils/utils/__init__.py | 0 .../utils/byte_stream_spliter.py | 31 +++ .../utils/sockets/socket_client.py | 24 +++ .../utils/sockets/socket_server.py | 17 ++ versioning/VERSION | 1 + versioning/setup.py | 34 ++++ versioning/tests/__init__.py | 0 versioning/tests/test_versioner.py | 47 +++++ versioning/vortexa_utils/__init__.py | 5 + .../vortexa_utils/versioning/__init__.py | 10 + .../vortexa_utils/versioning/__main__.py | 9 + versioning/vortexa_utils/versioning/cli.py | 46 +++++ versioning/vortexa_utils/versioning/utils.py | 22 +++ .../vortexa_utils/versioning/versioner.py | 99 ++++++++++ youve_got_mail/README.md | 0 youve_got_mail/README.rst | 0 youve_got_mail/requirements.txt | 2 + youve_got_mail/setup.py | 48 +++++ youve_got_mail/tests/__init__.py | 0 youve_got_mail/vortexa_utils/__init__.py | 0 .../vortexa_utils/youve_got_mail.py | 43 ++++ 120 files changed, 3944 insertions(+) create mode 100644 .circleci/config.yml create mode 100644 .gitignore create mode 100644 README.md create mode 100644 aws/README.md create mode 100644 aws/README.rst create mode 100644 aws/requirements.txt create mode 100644 aws/setup.py create mode 100644 aws/tests/__init__.py create mode 100644 aws/tests/lambda_types/__init__.py create mode 100644 aws/tests/lambda_types/message_eg.py create mode 100644 aws/tests/lambda_types/repeat_eg.py create mode 100644 aws/tests/lambda_types/test_lambda_types.py create mode 100644 aws/tests/s3_client_encryption_tests/__init__.py create mode 100644 aws/tests/s3_client_encryption_tests/test_IOAuthDecrypter.py create mode 100644 aws/tests/s3_client_encryption_tests/test_IOAuthTagLength.py create mode 100644 aws/tests/s3_client_encryption_tests/test_IODecrypter.py create mode 100644 aws/tests/s3_client_encryption_tests/test_decrypt_s3_mime_with_attachment.py create mode 100644 aws/tests/s3_client_encryption_tests/test_decrypt_s3_object.py create mode 100644 aws/tests/s3_client_encryption_tests/test_kms_cipher_provider.py create mode 100644 aws/tests/ses_inbox/__init__.py create mode 100644 aws/tests/ses_inbox/test_get_attachments.py create mode 100644 aws/tests/ses_inbox/test_list_inbox.py create mode 100644 aws/tests/ses_notification_types/__init__.py create mode 100644 aws/tests/ses_notification_types/test_action.py create mode 100644 aws/tests/ses_notification_types/test_lambda_record.py create mode 100644 aws/tests/ses_notification_types/test_mail.py create mode 100644 aws/tests/ses_notification_types/test_notification.py create mode 100644 aws/tests/ses_notification_types/test_receipt.py create mode 100644 aws/tests/utils/__init__.py create mode 100644 aws/tests/utils/nested_data_classes/__init__.py create mode 100644 aws/tests/utils/nested_data_classes/test_nested_dataclass.py create mode 100644 aws/vortexa_utils/__init__.py create mode 100644 aws/vortexa_utils/aws/__init__.py create mode 100644 aws/vortexa_utils/aws/lambdr/__init__.py create mode 100644 aws/vortexa_utils/aws/lambdr/types.py create mode 100644 aws/vortexa_utils/aws/s3/__init__.py create mode 100644 aws/vortexa_utils/aws/s3/client.py create mode 100644 aws/vortexa_utils/aws/s3/client_side_encryption/IOAuthDecrypter.py create mode 100644 aws/vortexa_utils/aws/s3/client_side_encryption/IOAuthDecrypterTagLength.py create mode 100644 aws/vortexa_utils/aws/s3/client_side_encryption/IODecrypter.py create mode 100644 aws/vortexa_utils/aws/s3/client_side_encryption/IONocrypter.py create mode 100644 aws/vortexa_utils/aws/s3/client_side_encryption/__init__.py create mode 100644 aws/vortexa_utils/aws/s3/client_side_encryption/cipher_provider.py create mode 100644 aws/vortexa_utils/aws/s3/client_side_encryption/client.py create mode 100644 aws/vortexa_utils/aws/s3/client_side_encryption/decrypt_handeler.py create mode 100644 aws/vortexa_utils/aws/s3/client_side_encryption/get.py create mode 100644 aws/vortexa_utils/aws/s3/client_side_encryption/kms_cipher_provider.py create mode 100644 aws/vortexa_utils/aws/ses/__init__.py create mode 100644 aws/vortexa_utils/aws/ses/application_mapper.py create mode 100644 aws/vortexa_utils/aws/ses/attachments.py create mode 100644 aws/vortexa_utils/aws/ses/inbox.py create mode 100644 aws/vortexa_utils/aws/ses/notification/__init__.py create mode 100644 aws/vortexa_utils/aws/ses/notification/types/__init__.py create mode 100644 aws/vortexa_utils/aws/ses/notification/types/action.py create mode 100644 aws/vortexa_utils/aws/ses/notification/types/lambda_record.py create mode 100644 aws/vortexa_utils/aws/ses/notification/types/mail.py create mode 100644 aws/vortexa_utils/aws/ses/notification/types/notification.py create mode 100644 aws/vortexa_utils/aws/ses/notification/types/receipt.py create mode 100644 aws/vortexa_utils/aws/ses/notification/types/verdicts.py create mode 100644 aws/vortexa_utils/aws/utils/__init__.py create mode 100644 aws/vortexa_utils/aws/utils/dataclasses/__init__.py create mode 100644 aws/vortexa_utils/aws/utils/dataclasses/nested.py create mode 100644 collections/tests/__init__.py create mode 100644 collections/tests/collections/types/__init__.py create mode 100644 collections/tests/collections/types/test_instance_caching_abc.py create mode 100644 collections/vortexa_utils/collections/__inti__.py create mode 100644 collections/vortexa_utils/collections/types/__init__.py create mode 100644 collections/vortexa_utils/collections/types/instance_caching_abc.py create mode 100644 database/README.md create mode 100644 database/README.rst create mode 100644 database/setup.py create mode 100644 database/tests/__init__.py create mode 100644 database/tests/test_database_factory.py create mode 100644 database/tests/test_querey_cache.py create mode 100644 database/vortexa_utils/__init__.py create mode 100644 database/vortexa_utils/database/__init__.py create mode 100644 database/vortexa_utils/database/database.py create mode 100644 database/vortexa_utils/database/default_factories.py create mode 100644 database/vortexa_utils/database/query_cache.py create mode 100644 database/vortexa_utils/database/utils.py create mode 100644 deployment/setup.py create mode 100644 deployment/vortexa_utils/__init__.py create mode 100644 deployment/vortexa_utils/portainer/Readme.md create mode 100644 deployment/vortexa_utils/portainer/__init__.py create mode 100644 deployment/vortexa_utils/portainer/api.py create mode 100644 deployment/vortexa_utils/portainer/stacks.py create mode 100644 deployment/vortexa_utils/portainer/update_stack.py create mode 100644 docker/pandas/Dockerfile create mode 100644 general/README.rst create mode 100644 general/setup.py create mode 100644 general/vortexa_utils/__init__.py create mode 100644 general/vortexa_utils/general/__init__.py create mode 100644 general/vortexa_utils/git.py create mode 100644 logging/README.md create mode 100644 logging/setup.py create mode 100644 logging/vortexa_utils/__init__.py create mode 100644 logging/vortexa_utils/logging/__init__.py create mode 100644 logging/vortexa_utils/logging/exception_decorator.py create mode 100644 logging/vortexa_utils/logging/resources.py create mode 100644 utils/vortexa_utils/utils/__init__.py create mode 100644 utils/vortexa_utils/utils/byte_stream_spliter.py create mode 100644 utils/vortexa_utils/utils/sockets/socket_client.py create mode 100644 utils/vortexa_utils/utils/sockets/socket_server.py create mode 100644 versioning/VERSION create mode 100644 versioning/setup.py create mode 100644 versioning/tests/__init__.py create mode 100644 versioning/tests/test_versioner.py create mode 100644 versioning/vortexa_utils/__init__.py create mode 100644 versioning/vortexa_utils/versioning/__init__.py create mode 100644 versioning/vortexa_utils/versioning/__main__.py create mode 100644 versioning/vortexa_utils/versioning/cli.py create mode 100644 versioning/vortexa_utils/versioning/utils.py create mode 100644 versioning/vortexa_utils/versioning/versioner.py create mode 100644 youve_got_mail/README.md create mode 100644 youve_got_mail/README.rst create mode 100644 youve_got_mail/requirements.txt create mode 100644 youve_got_mail/setup.py create mode 100644 youve_got_mail/tests/__init__.py create mode 100644 youve_got_mail/vortexa_utils/__init__.py create mode 100644 youve_got_mail/vortexa_utils/youve_got_mail.py diff --git a/.circleci/config.yml b/.circleci/config.yml new file mode 100644 index 0000000000000..c44edbe3b610c --- /dev/null +++ b/.circleci/config.yml @@ -0,0 +1,35 @@ +version: 2 +jobs: + build: + working_directory: ~/project + docker: + - image: circleci/python:3.7 + steps: + - checkout + - restore_cache: &restore_cache + keys: + - v1-{{ .Branch }}- + # - run: + # name: "Install Python3" + # command: | + # apk add --no-cache \ + # python3 \ + # libc-dev \ + # gcc + - run: + name: "Test aws" + working_directory: ~/project/aws + command: | + python3.7 -m venv .venv; + . .venv/bin/activate + pip install -U pip + pip install -U -r requirements.txt + nose2 + python setup.py test + - save_cache: &save_cache + key: v1-{{ .Branch }}-{{ epoch }} + paths: + - ~/project/aws/.venv + - ~/project/database/.venv + - ~/project/deployment/.venv + - ~/.cache/pip diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000000000..2c06c5a32bbdb --- /dev/null +++ b/.gitignore @@ -0,0 +1,111 @@ +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +.hypothesis/ +.pytest_cache/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# pyenv +.python-version + +# celery beat schedule file +celerybeat-schedule + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ + +# VSCODE +.vscode + +.idea +*.iml +scratch.py diff --git a/README.md b/README.md new file mode 100644 index 0000000000000..028c6e96fb015 --- /dev/null +++ b/README.md @@ -0,0 +1,108 @@ +# python_utils [![CircleCI](https://circleci.com/gh/V0RT3X4/python_utils.svg?style=svg&circle-token=30fa8fb22fa45a521a5d728e9accde63c242c2b4)](https://circleci.com/gh/V0RT3X4/python_utils) +Python utilities and helper functions/classes/modules + +## Sub Packages + +- [AWS](#aws) +- [Database](#database) +- [Deployment](#deployment) + +## Installation + +Installation is done by using [submodule vendoring](#vendoring). +Vendor the package into your project as [below](#vendoring) then you can install +with +``` +pip install vendor/github.com/V0RT3X4/python_utils/ +``` +or +``` +echo vendor/github.com/V0RT3X4/python_utils/ >> requirements.txt +pip install -r requirements.txt +``` + +## Aws + +Helper modules for `s3` client side encryption. `ses` email processing +(s3 as an inbox). `lambda` function handeler types. + +## Database + +Data base connection helpers to get you a +[`SQLAlchemy`](https://www.sqlalchemy.org/) connection [`Engine`](https://docs.sqlalchemy.org/en/latest/core/engines_connections.html) +to an RDS or RedShift database using +`aws secretsmanager` for managing connection credentials and rotation, and with +SSL encryption. + +## Deployment + +Custom Deployment Jazz + +## Installation - Vendoring the subtree +To install the scripts into your project it is recommended to vendor this module as a `git subtree` as opposed to a `git submodule`. You will have a version of this code in your repo, and you can easily update and push changes back upstream. + +To make your life easier install [git-vendor](https://github.com/brettlangdon/git-vendor) + +Then you can vendor the module into your repo and run installation scripts: +``` +git vendor add python_utils git@github.com:V0RT3X4/python_utils.git master +``` + +finally you can install the modules you want +``` +pip install vendor/github.com/V0RT3X4/python_utils/ +``` + +to update the reference +``` +git vendor update python_utils master +``` + +## AS Submodule + +In the project directory +``` +git submodule add \ + --name github.com/V0RT3X4/python_utils \ + git@github.com:V0RT3X4/python_utils.git \ + vendor/github.com/V0RT3X4/python_utils +``` + +Subsequently when you check out the source code (say in +[circleCI](https://circleci.com) or locally). +``` +git clone git@github.com:/V0RT3X4/.git +cd +git submodule init +git submodule update --remote +``` + +finally you can install the modules you want +``` +pip install vendor/github.com/V0RT3X4/python_utils/ +``` + +## Contributing +To contribute and push changes back upstream add this repo as a remote. +``` +git remote add -f python_utils git@github.com:V0RT3X4/python_utils.git +``` +Push changes in the sub tree +``` +git subtree push --prefix=vendor/github.com/V0RT3X4/python_utils python_utils some_branch +``` + +## [git-vendor](https://github.com/brettlangdon/git-vendor) installation + +``` +cd $(mktemp -d) && \ +git clone https://github.com/brettlangdon/git-vendor &> /dev/null && \ +cd git-vendor && \ +sudo make install +``` + +or + +``` +brew install git-vendor +``` diff --git a/aws/README.md b/aws/README.md new file mode 100644 index 0000000000000..f9e28102b5fbf --- /dev/null +++ b/aws/README.md @@ -0,0 +1 @@ +# Vortexa AWS Python Utils diff --git a/aws/README.rst b/aws/README.rst new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/aws/requirements.txt b/aws/requirements.txt new file mode 100644 index 0000000000000..34a10a130c16c --- /dev/null +++ b/aws/requirements.txt @@ -0,0 +1,5 @@ +boto3 +pycryptodomex +nose2 +pandas +logzero diff --git a/aws/setup.py b/aws/setup.py new file mode 100644 index 0000000000000..1e69b1cb89ad6 --- /dev/null +++ b/aws/setup.py @@ -0,0 +1,50 @@ +# @Author: richard +# @Date: 2018-12-04T17:54:43+00:00 +# @Last modified by: richard +# @Last modified time: 2018-12-04T20:16:54+00:00 +import os +import io +from setuptools import setup, find_packages + +namespace = 'vortexa_utils' +name = 'vortexa_utils_aws' +version = '1' +description = 'Vortexa AWS utils helper library', + +dependencies = [ + 'boto3', + 'pycryptodomex' +] + +# Setup boilerplate below + +package_root = os.path.abspath(os.path.dirname(__file__)) + +readme_filename = os.path.join(package_root, 'README.rst') +with io.open(readme_filename, encoding='utf-8') as readme_file: + readme = readme_file.read() + +packages = [ + package for package in find_packages() + if package.startswith(namespace) +] + +setup( + name=name, + version=version, + description=description, + long_description=readme, + + author='Richard Mathie', + author_email='richard.mathie@vortexa.com', + + zip_safe=False, + test_suite='nose2.collector.collector', + tests_require=['nose2', 'pandas'], + + packages=packages, + install_requires=dependencies, + extras_require={ + 'pandas': ['pandas'] + } +) diff --git a/aws/tests/__init__.py b/aws/tests/__init__.py new file mode 100644 index 0000000000000..b0f42e4b71cc9 --- /dev/null +++ b/aws/tests/__init__.py @@ -0,0 +1,4 @@ +# @Author: richard +# @Date: 2018-11-28T18:10:18+00:00 +# @Last modified by: richard +# @Last modified time: 2018-11-28T18:10:18+00:00 diff --git a/aws/tests/lambda_types/__init__.py b/aws/tests/lambda_types/__init__.py new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/aws/tests/lambda_types/message_eg.py b/aws/tests/lambda_types/message_eg.py new file mode 100644 index 0000000000000..9cf39d5a99d58 --- /dev/null +++ b/aws/tests/lambda_types/message_eg.py @@ -0,0 +1,19 @@ +""" Example #1 """ +import os +from vortexa_utils.aws.lambdr.types import LambdaDict, LambdaContext + +MSG_TEMPLATE: str = os.environ.get('MSG_TEMPLATE') or 'Hello {} {}!' +STAGE: str = os.environ.get('stage') or 'dev' + + +def handler(event: LambdaDict, context: LambdaContext) -> LambdaDict: + print('Received event {} for stage {}'.format(event, STAGE)) + first_name: str = event.get('first_name') # optional + last_name: str = event.get('last_name') # optional + return { + 'message': get_message(first_name, last_name), + } + + +def get_message(first_name: str = 'John', last_name: str = 'Smith'): + return MSG_TEMPLATE.format(first_name, last_name) diff --git a/aws/tests/lambda_types/repeat_eg.py b/aws/tests/lambda_types/repeat_eg.py new file mode 100644 index 0000000000000..95d5331e8f5f9 --- /dev/null +++ b/aws/tests/lambda_types/repeat_eg.py @@ -0,0 +1,19 @@ +""" Example #2 """ +import os +from vortexa_utils.aws.lambdr.types import LambdaDict, LambdaContext + +N: int = int(os.environ.get('N') or 10) +STAGE: str = os.environ.get('stage') or 'dev' + + +def handler(event: LambdaDict, context: LambdaContext) -> LambdaDict: + print('Received event {} for stage {}'.format(event, STAGE)) + input: str = event['input'] # required + return { + 'output': get_output(input, N), + } + + +def get_output(input: str, num: int): + """ Return the input string repeated N times. """ + return input * num diff --git a/aws/tests/lambda_types/test_lambda_types.py b/aws/tests/lambda_types/test_lambda_types.py new file mode 100644 index 0000000000000..0cdad796b76dd --- /dev/null +++ b/aws/tests/lambda_types/test_lambda_types.py @@ -0,0 +1,89 @@ +import unittest +from vortexa_utils.aws.lambdr.types import LambdaDict, LambdaContext +from .message_eg import handler as handler_message, get_message +from .repeat_eg import handler as handler_repeat, get_output + + +class TestMessageFunction(unittest.TestCase): + + def setUp(self): + self.context = LambdaContext() + + def test_handler(self) -> None: + event: LambdaDict = { + "first_name": "Alex", + "last_name": "Casalboni", + } + result = handler_message(event, self.context) + self.assertIn('message', result) + + def test_handler_empty(self) -> None: + event: LambdaDict = {} + result = handler_message(event, self.context) + self.assertIn('message', result) + + def test_message_default(self) -> None: + msg = get_message() + self.assertIsInstance(msg, str) + self.assertIn('Hello', msg) + self.assertIn('John', msg) + self.assertIn('Smith', msg) + self.assertTrue(msg.endswith('!')) + + def test_message_firstname(self) -> None: + msg = get_message(first_name='Charlie') + self.assertIsInstance(msg, str) + self.assertIn('Hello', msg) + self.assertIn('Charlie', msg) + self.assertIn('Smith', msg) + self.assertTrue(msg.endswith('!')) + + def test_message_lastname(self) -> None: + msg = get_message(last_name='Brown') + self.assertIsInstance(msg, str) + self.assertIn('Hello', msg) + self.assertIn('John', msg) + self.assertIn('Brown', msg) + self.assertTrue(msg.endswith('!')) + + def test_message(self) -> None: + msg = get_message(first_name='Charlie', last_name='Brown') + self.assertIsInstance(msg, str) + self.assertIn('Hello', msg) + self.assertIn('Charlie', msg) + self.assertIn('Brown', msg) + self.assertTrue(msg.endswith('!')) + + +class TestRepeatFunction(unittest.TestCase): + + def setUp(self): + self.context = LambdaContext() + + def test_handler(self) -> None: + event: LambdaDict = { + "input": "NaN", + } + result = handler_repeat(event, self.context) + self.assertIn('output', result) + self.assertEqual(30, len(result['output'])) + + def test_handler_empty(self) -> None: + event: LambdaDict = {} + with self.assertRaises(KeyError): + handler_repeat(event, self.context) + + def test_repeat_empty_string(self) -> None: + output = get_output('', 100) + self.assertIsInstance(output, str) + self.assertEqual(0, len(output)) + + def test_repeat_zero(self) -> None: + output = get_output('hello', 0) + self.assertIsInstance(output, str) + self.assertEqual(0, len(output)) + + def test_repeat(self) -> None: + output = get_output('hello', 10) + self.assertIsInstance(output, str) + self.assertEqual(50, len(output)) diff --git a/aws/tests/s3_client_encryption_tests/__init__.py b/aws/tests/s3_client_encryption_tests/__init__.py new file mode 100644 index 0000000000000..2e9b828ec304c --- /dev/null +++ b/aws/tests/s3_client_encryption_tests/__init__.py @@ -0,0 +1,4 @@ +# @Author: richard +# @Date: 2018-11-28T18:10:35+00:00 +# @Last modified by: richard +# @Last modified time: 2018-11-28T18:10:36+00:00 diff --git a/aws/tests/s3_client_encryption_tests/test_IOAuthDecrypter.py b/aws/tests/s3_client_encryption_tests/test_IOAuthDecrypter.py new file mode 100644 index 0000000000000..bf64d13548ac0 --- /dev/null +++ b/aws/tests/s3_client_encryption_tests/test_IOAuthDecrypter.py @@ -0,0 +1,22 @@ +# @Author: richard +# @Date: 2018-11-28T18:11:28+00:00 +# @Last modified by: richard +# @Last modified time: 2018-12-06T13:06:33+00:00 +from vortexa_utils.aws.s3.client_side_encryption import IOAuthDecrypter +from nose2.tools import params +from .test_IODecrypter import DummyChunksIO, IODecrypterTestCase + + +class IOAuthDecrypter(IODecrypterTestCase): + io_decrypter_class = IOAuthDecrypter.IOAuthDecrypter + + def get_decrypter(self, cypher, io, content_length): + return self.io_decrypter_class(cypher, io, content_length) + + def get_io(self, content_length): + tag_length = 128 + return DummyChunksIO(content_length + tag_length) + + def invalid_decryption(self, content_length): + with self.assertRaises(ValueError): + super().invalid_decryption(content_length) diff --git a/aws/tests/s3_client_encryption_tests/test_IOAuthTagLength.py b/aws/tests/s3_client_encryption_tests/test_IOAuthTagLength.py new file mode 100644 index 0000000000000..51685c22d13bd --- /dev/null +++ b/aws/tests/s3_client_encryption_tests/test_IOAuthTagLength.py @@ -0,0 +1,42 @@ +import unittest +import io +from vortexa_utils.aws.s3.client_side_encryption.IOAuthDecrypterTagLength \ + import StreamChunker +from nose2.tools import params + + +class StreamChunkerTestCase(unittest.TestCase): + + def get_chunker(self, io, tag_length): + return StreamChunker(io, tag_length) + + def test_tagged(self): + fixture = io.BytesIO(b'1234567890') + chunker = StreamChunker(fixture, 3) + bytes = chunker.read() + self.assertEqual(chunker.tag, b'890') + self.assertEqual(bytes, b'1234567') + + @params(*range(1, 11)) + def test_read_in_chunks(self, chunk): + bytes = b'1234567890' + fixture = io.BytesIO(bytes) + tag_length = 3 + chunker = StreamChunker(fixture, tag_length) + result = [] + index = 0 + while True: + byte = chunker.read(chunk) + if byte == b'': + break + result.append(byte) + self.assertEqual(bytes[index:index + len(byte)], byte) + index += len(byte) + print(result) + self.assertEqual(bytes[-tag_length:], chunker.tag) + self.assertEqual(b''.join(result), bytes[:-tag_length]) + # check that subsuquent reads return nothing and tag is correct + for i in range(10): + byte = chunker.read(chunk) + self.assertEqual(b'', byte) + self.assertEqual(bytes[-tag_length:], chunker.tag) diff --git a/aws/tests/s3_client_encryption_tests/test_IODecrypter.py b/aws/tests/s3_client_encryption_tests/test_IODecrypter.py new file mode 100644 index 0000000000000..cadab6acdaeae --- /dev/null +++ b/aws/tests/s3_client_encryption_tests/test_IODecrypter.py @@ -0,0 +1,94 @@ +# @Author: richard +# @Date: 2018-11-28T18:11:28+00:00 +# @Last modified by: richard +# @Last modified time: 2018-12-06T13:07:14+00:00 +from io import IOBase + +from vortexa_utils.aws.s3.client_side_encryption.IODecrypter import IODecrypter +import unittest +from nose2.tools import params + + +class DummyCipher(object): + def __init__(self, valid: bool = True): + self.valid = valid + + def decrypt(self, bytes): + return bytes + + def verify(self, tag): + if not self.valid: + raise ValueError("MAC check failed") + pass + + +class DummyChunksIO(IOBase): + _DEFAULT_CHUNK_SIZE = 1024 + + def __init__(self, size): + self.bytes_read = 0 + self.size = size + + def read(self, chunk=-1): + if chunk < 0: + chunk = self.size - self.bytes_read + else: + chunk = min(chunk, abs(self.size - self.bytes_read)) + self.bytes_read += chunk + return b' ' * chunk + + def __iter__(self): + """Return an iterator to yield 1k chunks from the raw stream. + """ + return self.iter_chunks(self._DEFAULT_CHUNK_SIZE) + + def iter_chunks(self, chunk_size=_DEFAULT_CHUNK_SIZE): + """Return an iterator to yield chunks of chunk_size bytes from the raw + stream. + """ + while True: + bytes = self.read(chunk_size) + if bytes == b'': + break + yield bytes + + def close(self): + pass + + def readable(self): + return True + + def seekable(self): + return False + + def writable(self): + return False + + +class IODecrypterTestCase(unittest.TestCase): + io_decrypter_class = IODecrypter + + def get_decrypter(self, cypher, io, content_length): + return self.io_decrypter_class(cypher, io) + + def get_io(self, content_length): + return DummyChunksIO(content_length) + + def make_decrypter(self, content_length, valid=True): + io = DummyChunksIO(content_length) + cypher = DummyCipher(valid=valid) + return self.get_decrypter(cypher, io, content_length) + + @params(123, 1024, 1024*3, 1024*3+123, 1, 0) + def test_read(self, content_length): + with self.make_decrypter(content_length) as decrypter: + bytes = list(decrypter) + self.assertEqual(b''.join(bytes), b' ' * content_length) + + @params(123, 1024, 1024*3, 1024*3+123, 1, 0) + def test_invalid(self, content_length): + self.invalid_decryption(content_length) + + def invalid_decryption(self, content_length): + with self.make_decrypter(content_length, valid=False) as decrypter: + list(decrypter) diff --git a/aws/tests/s3_client_encryption_tests/test_decrypt_s3_mime_with_attachment.py b/aws/tests/s3_client_encryption_tests/test_decrypt_s3_mime_with_attachment.py new file mode 100644 index 0000000000000..0be487412d5c2 --- /dev/null +++ b/aws/tests/s3_client_encryption_tests/test_decrypt_s3_mime_with_attachment.py @@ -0,0 +1,68 @@ +# @Author: richard +# @Date: 2018-12-06T17:26:08+00:00 +# @Last modified by: richard +# @Last modified time: 2018-12-06T19:36:16+00:00 +# cd aws/vortexa_utils/ +# import aws.s3.client_side_encryption.client as client +import logging +import vortexa_utils.aws.s3.client_side_encryption.client as client +import io +import email.parser +from email import policy +from email.iterators import _structure +import base64 +from nose2.tools.such import helper + +import pandas as pd + +logger = logging.getLogger(__name__) + +Bucket = 'ops-data.incoming-emails' +Key = 'incoming_email/akrk0l8sq4lm7qkgj8hpurfshpnj8frgqpqe9mg1' +Key = 'incoming_email/8ej2ldqnsmako2tgsbdpqg8tdi6tdnduoscojdo1' + + +def test_get_attachment(): + cl = client.Client() + parser = email.parser.BytesParser(policy=policy.default) + with cl.get_object(Bucket, Key) as io: + parsed = parser.parse(io) + _structure(parsed) + + # with open("/home/richard/an_email", 'wb') as f: + # for b in io: + # f.write(b) + # + # atts = list(parsed.iter_attachments()) + # [a.get_filename() for a in atts] + # [a.get_content_type() for a in atts] + # att = atts[2] + # att + # att.get_content_type() + # pd.read_excel(io.BytesIO(att.get_content())) + + target = parsed['to'] + source = parsed['from'] + helper.assertEqual(target, 'test@opsdata.vortexa.com') + helper.assertEqual(source, 'Richard Mathie ') + + parsed['subject'] + + for part in parsed.walk(): + print(part.get_content_type()) + att = parsed.get_payload() + att[0].get_content_type() + att[0].get_payload()[1].get_payload() + + logger.debug('\nwalking message') + for part in parsed.walk(): + content_type = part.get_content_type() + if content_type.startswith('text'): + logger.debug(content_type) + payload = part.get_payload() + if content_type == 'text/csv': + csv = base64.decodebytes(payload.encode('utf-8')) + for line in csv.splitlines(): + logger.debug(line) + else: + logger.debug('\n%s', payload) diff --git a/aws/tests/s3_client_encryption_tests/test_decrypt_s3_object.py b/aws/tests/s3_client_encryption_tests/test_decrypt_s3_object.py new file mode 100644 index 0000000000000..a33346502b0a2 --- /dev/null +++ b/aws/tests/s3_client_encryption_tests/test_decrypt_s3_object.py @@ -0,0 +1,65 @@ +# @Author: richard +# @Date: 2018-12-06T13:27:47+00:00 +# @Last modified by: richard +# @Last modified time: 2018-12-06T17:24:43+00:00 +import logging +import sys +# cd aws/vortexa_utils/ +# import aws.s3.client_side_encryption.client as client +import vortexa_utils.aws.s3.client_side_encryption.client as client +import email.parser +from nose2.tools.such import helper + + +logger = logging.getLogger(__name__) + +Bucket = 'ops-data.incoming-emails' +Key = 'incoming_email/4pnlhtml86pobumjn9d59mbkcq3to1i43sjbd201' + + +def test_get_obj(): + self = client.Client() + location_info = self.s3.get_bucket_location(Bucket=Bucket) + logger.info('location %s', location_info) + + obj = self.s3.get_object(Bucket=Bucket, Key=Key) + handeler = client.DecryptHandeler(obj, self) + envelop = handeler.envelope_v2(handeler.metadata) + cipher = self.cipher_provider.decryptor(envelop) + assert handeler.auth_tag() + io = handeler.decrypt_auth(cipher) + + bytes = [] + while True: + byte = io.read(1024) + if byte == b'': + break + logger.info("Bytes Read %s/%s", io.bytes_read, io.content_length) + logger.debug("Bytes %s", byte) + bytes.append(byte) + io.verify() + io.close() + # logger.info('bytes %s', str(bytes)) + + +def test_get_obj_io(): + cl = client.Client() + with cl.get_object(Bucket, Key) as io: + list(io) + + +def test_get_obj_mime(): + cl = client.Client() + parser = email.parser.BytesParser() + with cl.get_object(Bucket, Key) as io: + parsed = parser.parse(io) + + target = parsed['to'] + source = parsed['from'] + helper.assertEqual(target, 'test@opsdata.vortexa.com') + helper.assertEqual(source, 'Richard Mathie ') + + logger.info('\twalking message') + for part in parsed.walk(): + if part.get_content_type().startswith('text'): + logger.info('\t%s', part.get_payload()) diff --git a/aws/tests/s3_client_encryption_tests/test_kms_cipher_provider.py b/aws/tests/s3_client_encryption_tests/test_kms_cipher_provider.py new file mode 100644 index 0000000000000..7da39f7a34166 --- /dev/null +++ b/aws/tests/s3_client_encryption_tests/test_kms_cipher_provider.py @@ -0,0 +1,39 @@ +# @Author: richard +# @Date: 2018-12-05T16:23:13+00:00 +# @Last modified by: richard +# @Last modified time: 2018-12-05T19:43:28+00:00 +import unittest +from vortexa_utils.aws.s3.client_side_encryption import kms_cipher_provider +import logging + + +logger = logging.getLogger(__name__) + + +def log_bytes(*bytes): + logger.info(f' bytes: {bytes}') + + +class KMSCipherProviderTest(unittest.TestCase): + test_key_id = 'alias/python_utils_test_key' + + def get_cipher(self): + return kms_cipher_provider.KMSCipherProvider(self.test_key_id) + + def test_encrypt(self): + envelope, cipher = self.get_cipher().encryptor() + plaintext = b"The quick brown fox jumped over the lazy dog" + self.plaintext = plaintext + ciphertext, tag = cipher.encrypt_and_digest(plaintext) + log_bytes(ciphertext, tag) + self.assertNotEqual(ciphertext, plaintext) + package = (envelope, ciphertext, tag) + return package + + def test_decrypt(self): + envelope, ciphertext, tag = self.test_encrypt() + cipher = kms_cipher_provider.KMSCipherProvider().decryptor(envelope) + plaintext = cipher.decrypt(ciphertext) + log_bytes(ciphertext, tag, plaintext) + self.assertEqual(plaintext, self.plaintext) + cipher.verify(tag) diff --git a/aws/tests/ses_inbox/__init__.py b/aws/tests/ses_inbox/__init__.py new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/aws/tests/ses_inbox/test_get_attachments.py b/aws/tests/ses_inbox/test_get_attachments.py new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/aws/tests/ses_inbox/test_list_inbox.py b/aws/tests/ses_inbox/test_list_inbox.py new file mode 100644 index 0000000000000..a8ff2a0bd81ee --- /dev/null +++ b/aws/tests/ses_inbox/test_list_inbox.py @@ -0,0 +1,25 @@ +# cd aws/vortexa_utils +# cd .. +from typing import Iterable +from vortexa_utils.aws.ses.inbox import Inbox +from email.message import EmailMessage +from itertools import islice + + +Path = 'incoming_email/' + +inbox = Inbox(default_bucket='ops-data.incoming-emails') + + +def test_list_inbox(): + inbox = Inbox(default_bucket='ops-data.incoming-emails') + emails: Iterable[EmailMessage] = islice( + inbox.list_emails(Path=Path), + 10 + ) + + for email in emails: + # print(email.as_string()) + attachments = list(email.iter_attachments()) + print(list(a.get_filename() for a in attachments)) + print(list(a.get_content_type() for a in attachments)) diff --git a/aws/tests/ses_notification_types/__init__.py b/aws/tests/ses_notification_types/__init__.py new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/aws/tests/ses_notification_types/test_action.py b/aws/tests/ses_notification_types/test_action.py new file mode 100644 index 0000000000000..1110fda3de888 --- /dev/null +++ b/aws/tests/ses_notification_types/test_action.py @@ -0,0 +1,16 @@ +from vortexa_utils.aws.ses.notification.types import Action +from json import loads + + +action_json_sns = """ +{ + "type": "SNS", + "topicArn": "arn:aws:sns:us-east-1:012345678912:example-topic" +} +""" + + +def test_sns_action(): + action = Action(**loads(action_json_sns)) + assert action.type == "SNS" + assert action.topicArn == "arn:aws:sns:us-east-1:012345678912:example-topic" diff --git a/aws/tests/ses_notification_types/test_lambda_record.py b/aws/tests/ses_notification_types/test_lambda_record.py new file mode 100644 index 0000000000000..c489d6cd84e42 --- /dev/null +++ b/aws/tests/ses_notification_types/test_lambda_record.py @@ -0,0 +1,32 @@ +from vortexa_utils.aws.ses.notification.types import Record +from json import loads +from .test_mail import mail_json +from .test_receipt import receipt_json + + +ses = dict( + receipt=receipt_json, + mail=mail_json +) + + +record_json = loads(""" +{ + "eventSource": "aws:ses", + "eventVersion": "1.0", + "ses": { + "receipt": { + }, + "mail": { + } + } +} +""") + +record_json.update(ses=ses) + + +def test_record(): + record = Record(**record_json) + record.ses + assert record.eventSource == "aws:ses" diff --git a/aws/tests/ses_notification_types/test_mail.py b/aws/tests/ses_notification_types/test_mail.py new file mode 100644 index 0000000000000..bb558b3639e48 --- /dev/null +++ b/aws/tests/ses_notification_types/test_mail.py @@ -0,0 +1,85 @@ +from vortexa_utils.aws.ses.notification.types import Mail +from json import loads + +mail_json = loads(""" +{ +"timestamp": "2015-09-11T20:32:33.936Z", +"source": "61967230-7A45-4A9D-BEC9-87CBCF2211C9@example.com", +"messageId": "d6iitobk75ur44p8kdnnp7g2n800", +"destination": [ + "recipient@example.com" +], +"headersTruncated": false, +"headers": [ + { + "name": "Return-Path", + "value": "<0000014fbe1c09cf-7cb9f704-7531-4e53-89a1-5fa9744f5eb6-000000@amazonses.com>" + }, + { + "name": "Received", + "value": "from a9-183.smtp-out.amazonses.com (a9-183.smtp-out.amazonses.com [54.240.9.183]) by inbound-smtp.us-east-1.amazonaws.com with SMTP id d6iitobk75ur44p8kdnnp7g2n800 for recipient@example.com; Fri, 11 Sep 2015 20:32:33 +0000 (UTC)" + }, + { + "name": "DKIM-Signature", + "value": "v=1; a=rsa-sha256; q=dns/txt; c=relaxed/simple; s=ug7nbtf4gccmlpwj322ax3p6ow6yfsug; d=amazonses.com; t=1442003552; h=From:To:Subject:MIME-Version:Content-Type:Content-Transfer-Encoding:Date:Message-ID:Feedback-ID; bh=DWr3IOmYWoXCA9ARqGC/UaODfghffiwFNRIb2Mckyt4=; b=p4ukUDSFqhqiub+zPR0DW1kp7oJZakrzupr6LBe6sUuvqpBkig56UzUwc29rFbJF hlX3Ov7DeYVNoN38stqwsF8ivcajXpQsXRC1cW9z8x875J041rClAjV7EGbLmudVpPX 4hHst1XPyX5wmgdHIhmUuh8oZKpVqGi6bHGzzf7g=" + }, + { + "name": "From", + "value": "sender@example.com" + }, + { + "name": "To", + "value": "recipient@example.com" + }, + { + "name": "Subject", + "value": "Example subject" + }, + { + "name": "MIME-Version", + "value": "1.0" + }, + { + "name": "Content-Type", + "value": "text/plain; charset=UTF-8" + }, + { + "name": "Content-Transfer-Encoding", + "value": "7bit" + }, + { + "name": "Date", + "value": "Fri, 11 Sep 2015 20:32:32 +0000" + }, + { + "name": "Message-ID", + "value": "<61967230-7A45-4A9D-BEC9-87CBCF2211C9@example.com>" + }, + { + "name": "X-SES-Outgoing", + "value": "2015.09.11-54.240.9.183" + }, + { + "name": "Feedback-ID", + "value": "1.us-east-1.Krv2FKpFdWV+KUYw3Qd6wcpPJ4Sv/pOPpEPSHn2u2o4=:AmazonSES" + } +], +"commonHeaders": { + "returnPath": "0000014fbe1c09cf-7cb9f704-7531-4e53-89a1-5fa9744f5eb6-000000@amazonses.com", + "from": [ + "sender@example.com" + ], + "date": "Fri, 11 Sep 2015 20:32:32 +0000", + "to": [ + "recipient@example.com" + ], + "messageId": "<61967230-7A45-4A9D-BEC9-87CBCF2211C9@example.com>", + "subject": "Example subject" +} +} +""") + + +def test_init(): + mail = Mail(**mail_json) + mail.headers diff --git a/aws/tests/ses_notification_types/test_notification.py b/aws/tests/ses_notification_types/test_notification.py new file mode 100644 index 0000000000000..56884ad7463dd --- /dev/null +++ b/aws/tests/ses_notification_types/test_notification.py @@ -0,0 +1,23 @@ +from vortexa_utils.aws.ses.notification.types import Notification +from json import loads +from .test_mail import mail_json +from .test_action import action_json_sns +from .test_receipt import receipt_json + + +nodification_json = loads(""" +{ +"notificationType": "Received", +"content": "blarblarblar" +} +""" +) + +nodification_json.update( + mail=mail_json, + receipt=receipt_json +) + + +def test_init(): + Notification(**nodification_json) diff --git a/aws/tests/ses_notification_types/test_receipt.py b/aws/tests/ses_notification_types/test_receipt.py new file mode 100644 index 0000000000000..e41ea7f8ce24d --- /dev/null +++ b/aws/tests/ses_notification_types/test_receipt.py @@ -0,0 +1,34 @@ +from json import loads +from vortexa_utils.aws.ses.notification.types import Receipt + + +receipt_json = loads(""" +{ +"timestamp": "2015-09-11T20:32:33.936Z", +"processingTimeMillis": 222, +"recipients": [ + "recipient@example.com" +], +"spamVerdict": { + "status": "PASS" +}, +"virusVerdict": { + "status": "PASS" +}, +"spfVerdict": { + "status": "PASS" +}, +"dkimVerdict": { + "status": "PASS" +}, +"action": { + "type": "SNS", + "topicArn": "arn:aws:sns:us-east-1:012345678912:example-topic" +} +} +""") + + +def test_receipt(): + receipt = Receipt(**receipt_json) + receipt.dkimVerdict.status == "PASS" diff --git a/aws/tests/utils/__init__.py b/aws/tests/utils/__init__.py new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/aws/tests/utils/nested_data_classes/__init__.py b/aws/tests/utils/nested_data_classes/__init__.py new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/aws/tests/utils/nested_data_classes/test_nested_dataclass.py b/aws/tests/utils/nested_data_classes/test_nested_dataclass.py new file mode 100644 index 0000000000000..e15dffd75cc4d --- /dev/null +++ b/aws/tests/utils/nested_data_classes/test_nested_dataclass.py @@ -0,0 +1,36 @@ +from dataclasses import dataclass +# cd vortexa_utils/ +# from aws.utils.dataclasses import nested_dataclass +from vortexa_utils.aws.utils.dataclasses import nested_dataclass + + +@dataclass +class Foo: + a: str + b: int + + +@nested_dataclass +class Bar: + foo: Foo + baz: str + + +@nested_dataclass +class Bill: + bar: Bar + + +def test_init_class(): + data = dict( + bar=dict( + foo=dict(a="hello", b=1), + baz="world" + ) + ) + foo = Foo(**data['bar']['foo']) + bar = Bar(**data['bar']) + bill = Bill(**data) + + assert bill.bar == bar + assert bill.bar.foo == foo diff --git a/aws/vortexa_utils/__init__.py b/aws/vortexa_utils/__init__.py new file mode 100644 index 0000000000000..a7712f632a766 --- /dev/null +++ b/aws/vortexa_utils/__init__.py @@ -0,0 +1,5 @@ +# @Author: richard +# @Date: 2018-12-04T20:12:18+00:00 +# @Last modified by: richard +# @Last modified time: 2018-12-04T20:12:57+00:00 +__path__ = __import__('pkgutil').extend_path(__path__, __name__) diff --git a/aws/vortexa_utils/aws/__init__.py b/aws/vortexa_utils/aws/__init__.py new file mode 100644 index 0000000000000..dda33076e9246 --- /dev/null +++ b/aws/vortexa_utils/aws/__init__.py @@ -0,0 +1,4 @@ +# @Author: richard +# @Date: 2018-12-04T20:13:18+00:00 +# @Last modified by: richard +# @Last modified time: 2018-12-04T20:13:18+00:00 diff --git a/aws/vortexa_utils/aws/lambdr/__init__.py b/aws/vortexa_utils/aws/lambdr/__init__.py new file mode 100644 index 0000000000000..4dcf5531789e7 --- /dev/null +++ b/aws/vortexa_utils/aws/lambdr/__init__.py @@ -0,0 +1,5 @@ +"""Vortexa python utils aws lambda helper functions and types. + +This module is called lambdr as `lambda` is a reserved word in python + +""" diff --git a/aws/vortexa_utils/aws/lambdr/types.py b/aws/vortexa_utils/aws/lambdr/types.py new file mode 100644 index 0000000000000..a1af1904a954b --- /dev/null +++ b/aws/vortexa_utils/aws/lambdr/types.py @@ -0,0 +1,45 @@ +""" Note: this code is used only by the static type checker! + +_see: +_and: + +""" +from typing import Dict, Any + +LambdaDict = Dict[str, Any] + + +class LambdaCognitoIdentity(object): + cognito_identity_id: str + cognito_identity_pool_id: str + + +class LambdaClientContextMobileClient(object): + installation_id: str + app_title: str + app_version_name: str + app_version_code: str + app_package_name: str + + +class LambdaClientContext(object): + client: LambdaClientContextMobileClient + custom: LambdaDict + env: LambdaDict + + +class LambdaContext(object): + function_name: str + function_version: str + invoked_function_arn: str + memory_limit_in_mb: int + aws_request_id: str + log_group_name: str + log_stream_name: str + deadline_ms: int + identity: LambdaCognitoIdentity + client_context: LambdaClientContext + + @staticmethod + def get_remaining_time_in_millis() -> int: + return 0 diff --git a/aws/vortexa_utils/aws/s3/__init__.py b/aws/vortexa_utils/aws/s3/__init__.py new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/aws/vortexa_utils/aws/s3/client.py b/aws/vortexa_utils/aws/s3/client.py new file mode 100644 index 0000000000000..da8e4814d10cd --- /dev/null +++ b/aws/vortexa_utils/aws/s3/client.py @@ -0,0 +1,50 @@ +from tempfile import NamedTemporaryFile + +import boto3 +from logzero import logger +from pandas import DataFrame, read_hdf, read_csv + + +class S3Client: + def __init__(self, s3_bucket: str): + self.s3 = boto3.client("s3") + self.s3_bucket = s3_bucket + + def upload(self, filename: str, s3_key: str, owner_acl: bool = True): + logger.info("[s3] Started uploading: %s", s3_key) + self.s3.upload_file(filename, self.s3_bucket, s3_key) + logger.info("[s3] Finished uploading: %s", s3_key) + if owner_acl: + self.s3.put_object_acl( + ACL="bucket-owner-full-control", Bucket=self.s3_bucket, Key=s3_key + ) + logger.info("[s3] bucket-owner-full-control ACL set") + + def hdf_pd(self, filename) -> DataFrame: + return self.__s3_pd__(filename, "hdf") + + def csv_pd(self, filename) -> DataFrame: + return self.__s3_pd__(filename, "csv") + + def copy(self, src, dest, owner_acl: bool = True): + copy_source = {"Bucket": self.s3_bucket, "Key": src} + self.s3.copy_object(CopySource=copy_source, Bucket=self.s3_bucket, Key=dest) + if owner_acl: + self.s3.put_object_acl( + ACL="bucket-owner-full-control", Bucket=self.s3_bucket, Key=dest + ) + logger.info("[s3] bucket-owner-full-control ACL set") + + def __s3_pd__(self, filename, filetype) -> DataFrame: + with NamedTemporaryFile("wb") as f: + logger.info(f"[s3] Started downloading: s3://{self.s3_bucket}/{filename}") + self.s3.download_fileobj(self.s3_bucket, filename, f) + f.flush() + logger.info(f"[s3] Finished downloading: s3://{self.s3_bucket}/{filename}") + logger.info("[pandas] Started loading: %s", filename) + if filetype == "hdf": + df: DataFrame = read_hdf(f.name) + elif filetype == "csv": + df: DataFrame = read_csv(f.name) + logger.info("[pandas] Finished loading: %s", filename) + return df diff --git a/aws/vortexa_utils/aws/s3/client_side_encryption/IOAuthDecrypter.py b/aws/vortexa_utils/aws/s3/client_side_encryption/IOAuthDecrypter.py new file mode 100644 index 0000000000000..6e948f7032109 --- /dev/null +++ b/aws/vortexa_utils/aws/s3/client_side_encryption/IOAuthDecrypter.py @@ -0,0 +1,40 @@ +# @Author: richard +# @Date: 2018-11-28T17:01:36+00:00 +# @Last modified by: richard +# @Last modified time: 2018-12-06T16:00:31+00:00 +import logging +from .IODecrypter import IODecrypter + +logger = logging.getLogger(__name__) + + +class IOAuthDecrypter(IODecrypter): + def __init__(self, cipher, io, content_length, chunk_size=16*1024): + super().__init__(cipher, io) + self.bytes_read = 0 + self.content_length = content_length + + def read(self, chunk=None): + chunk = min(chunk, self.content_length - self.bytes_read) + bytes = super().read(chunk) + logger.debug("Bytes Read %s/%s", self.bytes_read, self.content_length) + self.bytes_read += len(bytes) + return bytes + + def verify(self): + # the remaining bytes should be the auth tag + tag = self.io.read() + logger.debug("Verifing Tag %s", tag) + self.cipher.verify(tag) + + def iter_chunks(self, chunk_size=None): + """Return an iterator to yield chunks of chunk_size bytes from the raw + stream. + """ + if chunk_size is None: + chunk_size = self._DEFAULT_CHUNK_SIZE + + while self.bytes_read < self.content_length: + bytes = self.read(chunk_size) + yield bytes + self.verify() diff --git a/aws/vortexa_utils/aws/s3/client_side_encryption/IOAuthDecrypterTagLength.py b/aws/vortexa_utils/aws/s3/client_side_encryption/IOAuthDecrypterTagLength.py new file mode 100644 index 0000000000000..c120281198139 --- /dev/null +++ b/aws/vortexa_utils/aws/s3/client_side_encryption/IOAuthDecrypterTagLength.py @@ -0,0 +1,65 @@ +# @Author: richard +# @Date: 2018-11-28T17:01:36+00:00 +# @Last modified by: richard +# @Last modified time: 2018-12-06T16:00:31+00:00 +import logging +from .IODecrypter import IODecrypter +from io import BytesIO, IOBase +logger = logging.getLogger(__name__) + + +class StreamChunker(IOBase): + """StreamChunker a class to keep the last tag bites of a file + + keeps hold of the last `tag_length` bytes in `self.tag` + when reading from a `BytesIO` object. + """ + + def __init__(self, io: BytesIO, tag_length: int): + self.io = io + self.tag_length = tag_length + # get the first chunk if this is the first read + self.tag = self.io.read(self.tag_length) + + def read(self, chunk=None): + bytes = self.tag + self.io.read(chunk) + bytes, self.tag = bytes[:-self.tag_length], bytes[-self.tag_length:] + return bytes + + def close(self): + """Close the underlying http response stream.""" + self.io.close() + + def readable(self): + return True + + def seekable(self): + return False + + def writable(self): + return False + + +class IOAuthDecrypterTagLength(IODecrypter): + def __init__(self, cipher, io, tag_length, chunk_size=16*1024): + super().__init__(cipher, StreamChunker(io, tag_length)) + + def verify(self): + # the remaining bytes should be the auth tag + tag = self.io.tag + logger.debug("Verifing Tag %s", tag) + self.cipher.verify(tag) + + def iter_chunks(self, chunk_size=None): + """Return an iterator to yield chunks of chunk_size bytes from the raw + stream. + """ + if chunk_size is None: + chunk_size = self._DEFAULT_CHUNK_SIZE + + while True: + bytes = self.read(chunk_size) + if bytes == b'': + break + yield bytes + self.verify() diff --git a/aws/vortexa_utils/aws/s3/client_side_encryption/IODecrypter.py b/aws/vortexa_utils/aws/s3/client_side_encryption/IODecrypter.py new file mode 100644 index 0000000000000..9346aafcbe053 --- /dev/null +++ b/aws/vortexa_utils/aws/s3/client_side_encryption/IODecrypter.py @@ -0,0 +1,61 @@ +# @Author: richard +# @Date: 2018-11-28T17:01:20+00:00 +# @Last modified by: richard +# @Last modified time: 2018-12-06T16:57:10+00:00 +# from typing import Iterable + +from io import IOBase +from botocore.response import StreamingBody + +import logging + +logger = logging.getLogger(__name__) + + +class IODecrypter(IOBase): + _DEFAULT_CHUNK_SIZE = 1024 + + def __init__(self, cipher, io: StreamingBody): + self.cipher: object = cipher + self.io: StreamingBody = io + + def read(self, chunk=None): + bytes = self.io.read(chunk) + return self.cipher.decrypt(bytes) + + def __iter__(self): + """Return an iterator to yield 1k chunks from the raw stream.""" + return self.iter_chunks(self._DEFAULT_CHUNK_SIZE) + + def iter_chunks(self, chunk_size: int = _DEFAULT_CHUNK_SIZE): + # type: (...) -> Iterable[bytes] + """Return an iterator to yield chunks bytes from the raw `io` stream. + + Parameters + ---------- + chunk_size : int + iterates over no more than Chunk size bytes. If `None` use + `self._DEFAULT_CHUNK_SIZE`. + + Returns + ------- + Iterator[bytes] + + """ + decrypt = self.cipher.decrypt + chunks = self.io.iter_chunks(chunk_size) + + return (decrypt(bytes) for bytes in chunks) + + def close(self): + """Close the underlying http response stream.""" + self.io.close() + + def readable(self): + return True + + def seekable(self): + return False + + def writable(self): + return False diff --git a/aws/vortexa_utils/aws/s3/client_side_encryption/IONocrypter.py b/aws/vortexa_utils/aws/s3/client_side_encryption/IONocrypter.py new file mode 100644 index 0000000000000..3f613f19550c5 --- /dev/null +++ b/aws/vortexa_utils/aws/s3/client_side_encryption/IONocrypter.py @@ -0,0 +1,38 @@ +# @Author: richard +# @Date: 2018-11-28T17:01:20+00:00 +# @Last modified by: richard +# @Last modified time: 2018-12-06T16:57:10+00:00 +from typing import Iterable +from botocore.response import StreamingBody +from .IODecrypter import IODecrypter + +import logging + +logger = logging.getLogger(__name__) + + +class IONocrypter(IODecrypter): + + def __init__(self, io): + self.io: StreamingBody = io + + def read(self, chunk=None): + return self.io.read(chunk) + + def iter_chunks(self, chunk_size: int = None) -> Iterable[bytes]: + """Return an iterator to yield chunks bytes from the raw `io` stream. + + Parameters + ---------- + chunk_size : int + iterates over no more than Chunk size bytes. If `None` use + `self._DEFAULT_CHUNK_SIZE`. + + Returns + ------- + Iterator[bytes] + + """ + if chunk_size is None: + chunk_size = self._DEFAULT_CHUNK_SIZE + return self.io.iter_chunks(chunk_size) diff --git a/aws/vortexa_utils/aws/s3/client_side_encryption/__init__.py b/aws/vortexa_utils/aws/s3/client_side_encryption/__init__.py new file mode 100644 index 0000000000000..628c41928cecc --- /dev/null +++ b/aws/vortexa_utils/aws/s3/client_side_encryption/__init__.py @@ -0,0 +1,183 @@ +# @Author: richard +# @Date: 2018-11-28T15:15:44+00:00 +# @Last modified by: richard +# @Last modified time: 2018-11-28T15:15:44+00:00 + +""" +# From the RUBY Docs. + +Provides an encryption client that encrypts and decrypts data client-side, +storing the encrypted data in Amazon S3. + +This client uses a process called "envelope encryption". Your private +encryption keys and your data's plain-text are **never** sent to +Amazon S3. **If you lose you encryption keys, you will not be able to +decrypt your data.** + +## Envelope Encryption Overview + +The goal of envelope encryption is to combine the performance of +fast symmetric encryption while maintaining the secure key management +that asymmetric keys provide. + +A one-time-use symmetric key (envelope key) is generated client-side. +This is used to encrypt the data client-side. This key is then +encrypted by your master key and stored alongside your data in Amazon +S3. + +When accessing your encrypted data with the encryption client, +the encrypted envelope key is retrieved and decrypted client-side +with your master key. The envelope key is then used to decrypt the +data client-side. + +One of the benefits of envelope encryption is that if your master key +is compromised, you have the option of just re-encrypting the stored +envelope symmetric keys, instead of re-encrypting all of the +data in your account. + +## Basic Usage + +The encryption client requires an {Aws::S3::Client}. If you do not +provide a `:client`, then a client will be constructed for you. + + require 'openssl' + key = OpenSSL::PKey::RSA.new(1024) + + # encryption client + s3 = aws.s3.client_side_encryption.Client(encryption_key: key) + + # round-trip an object, encrypted/decrypted locally + s3.put_object(bucket:'aws-sdk', key:'secret', body:'handshake') + s3.get_object(bucket:'aws-sdk', key:'secret').body.read + #=> 'handshake' + + # reading encrypted object without the encryption client + # results in the getting the cipher text + Aws::S3::Client.new.get_object(bucket:'aws-sdk', key:'secret').body.read + #=> "... cipher text ..." + +## Keys + +For client-side encryption to work, you must provide one of the following: + +* An encryption key +* A {KeyProvider} +* A KMS encryption key id + +### An Encryption Key + +You can pass a single encryption key. This is used as a master key +encrypting and decrypting all object keys. + + key = OpenSSL::Cipher.new("AES-256-ECB").random_key # symmetric key + key = OpenSSL::PKey::RSA.new(1024) # asymmetric key pair + + s3 = Aws::S3::Encryption::Client.new(encryption_key: key) + +### Key Provider + +Alternatively, you can use a {KeyProvider}. A key provider makes +it easy to work with multiple keys and simplifies key rotation. + +### KMS Encryption Key Id + +If you pass the id to an AWS Key Management Service (KMS) key, +then KMS will be used to generate, encrypt and decrypt object keys. + + # keep track of the kms key id + kms = Aws::KMS::Client.new + key_id = kms.create_key.key_metadata.key_id + + Aws::S3::Encryption::Client.new( + kms_key_id: key_id, + kms_client: kms, + ) + +## Custom Key Providers + +A {KeyProvider} is any object that responds to: + +* `#encryption_materials` +* `#key_for(materials_description)` + +Here is a trivial implementation of an in-memory key provider. +This is provided as a demonstration of the key provider interface, +and should not be used in production: + + class KeyProvider + + def initialize(default_key_name, keys) + @keys = keys + @encryption_materials = Aws::S3::Encryption::Materials.new( + key: @keys[default_key_name], + description: JSON.dump(key: default_key_name), + ) + end + + attr_reader :encryption_materials + + def key_for(matdesc) + key_name = JSON.load(matdesc)['key'] + if key = @keys[key_name] + key + else + raise "encryption key not found for: #{matdesc.inspect}" + end + end + end + +Given the above key provider, you can create an encryption client that +chooses the key to use based on the materials description stored with +the encrypted object. This makes it possible to use multiple keys +and simplifies key rotation. + + # uses "new-key" for encrypting objects, uses either for decrypting + keys = KeyProvider.new('new-key', { + "old-key" => Base64.decode64("kM5UVbhE/4rtMZJfsadYEdm2vaKFsmV2f5+URSeUCV4="), + "new-key" => Base64.decode64("w1WLio3agRWRTSJK/Ouh8NHoqRQ6fn5WbSXDTHjXMSo="), + }), + + # chooses the key based on the materials description stored + # with the encrypted object + s3 = Aws::S3::Encryption::Client.new(key_provider: keys) + +## Materials Description + +A materials description is JSON document string that is stored +in the metadata (or instruction file) of an encrypted object. +The {DefaultKeyProvider} uses the empty JSON document `"{}"`. + +When building a key provider, you are free to store whatever +information you need to identify the master key that was used +to encrypt the object. + +## Envelope Location + +By default, the encryption client store the encryption envelope +with the object, as metadata. You can choose to have the envelope +stored in a separate "instruction file". An instruction file +is an object, with the key of the encrypted object, suffixed with +`".instruction"`. + +Specify the `:envelope_location` option as `:instruction_file` to +use an instruction file for storing the envelope. + + # default behavior + s3 = Aws::S3::Encryption::Client.new( + key_provider: ..., + envelope_location: :metadata, + ) + + # store envelope in a separate object + s3 = Aws::S3::Encryption::Client.new( + key_provider: ..., + envelope_location: :instruction_file, + instruction_file_suffix: '.instruction' # default + ) + +When using an instruction file, multiple requests are made when +putting and getting the object. **This may cause issues if you are +issuing concurrent PUT and GET requests to an encrypted object.** +""" + +from .client import Client diff --git a/aws/vortexa_utils/aws/s3/client_side_encryption/cipher_provider.py b/aws/vortexa_utils/aws/s3/client_side_encryption/cipher_provider.py new file mode 100644 index 0000000000000..954b2276986b2 --- /dev/null +++ b/aws/vortexa_utils/aws/s3/client_side_encryption/cipher_provider.py @@ -0,0 +1,17 @@ +# @Author: richard +# @Date: 2018-11-27T18:22:34+00:00 +# @Last modified by: richard +# @Last modified time: 2018-12-05T16:25:56+00:00 +from Cryptodome.Cipher import AES # pycryptodomex + + +class CipherProvider(object): + def __init__(self, key): + self.key = key + + def decryptor(self, envelope): + pass + + def encryptor(self): + cipher = AES.new(self.key, AES.MODE_GCM) + return cipher diff --git a/aws/vortexa_utils/aws/s3/client_side_encryption/client.py b/aws/vortexa_utils/aws/s3/client_side_encryption/client.py new file mode 100644 index 0000000000000..6ebccdba9b9cd --- /dev/null +++ b/aws/vortexa_utils/aws/s3/client_side_encryption/client.py @@ -0,0 +1,103 @@ +# @Author: richard +# @Date: 2018-11-28T15:15:54+00:00 +# @Last modified by: richard +# @Last modified time: 2018-12-06T18:07:33+00:00 +import boto3 +from .kms_cipher_provider import KMSCipherProvider +from .decrypt_handeler import DecryptHandeler + + +class Client(object): + """ + Client Side Encryption S3 Client. + + Attributes + ---------- + s3 : botocore.client.S3 + cipher_provider : .cipher_provider.CipherProvider + + Methods + ------- + get_object(Bucket, Key) + get and decrypt an object from s3 + + """ + + def __init__( + self, + client=None, + cipher_provider=None, + key_id=None, + strict=None, + **kwargs): + """Initilises the client side encryption s3 client. + + Parameters + ---------- + client : botocore.client.S3 + Optional S3 client to use for s3 interaction + Will create client if not set. + + cipher_provider : CipherProvider + Optional `CipherProvider` to provide encryption cipher + Will default to `KMSCipherProvider()` if not set. + + key_id : str + The kms `key id`, `alias` or `aws::arn` + for the `KMSCipherProvider`. + + region_name : str + The region for the kms and s3 client resources. + + """ + region_name = kwargs.get('region') + self.s3 = client or boto3.client('s3', **kwargs) + self.cipher_provider = ( + cipher_provider or KMSCipherProvider( + key_id=key_id, + region_name=region_name + ) + ) + self.strict = strict + + def get_object(self, Bucket, Key): + """Retrieve object from Amazon S3. + + See also: + `AWS API Documentation `_ + + `AWS Client Side Encryption `_ + + Parameters + ---------- + Bucket : str + **[REQUIRED]** The Bucket + Key : str + **[REQUIRED]** The Path Key in the Bucket + + """ + # location_info = self.s3.get_bucket_location(Bucket=Bucket) + # bucket_region = location_info['LocationConstraint'] + + obj = self.s3.get_object(Bucket=Bucket, Key=Key) + handeler = DecryptHandeler(obj, self, self.strict) + return handeler.decrypt() + + def object_encrypted(self, Bucket, Key) -> bool: + """Check if object has encryption envelope. + + Parameters + ---------- + Bucket : str + **[REQUIRED]** The Bucket + Key : str + **[REQUIRED]** The Path Key in the Bucket + + Returns + ------- + bool + + """ + obj = self.s3.head_object(Bucket=Bucket, Key=Key) + handeler = DecryptHandeler(obj, self) + return handeler.extract_envelop() is not None diff --git a/aws/vortexa_utils/aws/s3/client_side_encryption/decrypt_handeler.py b/aws/vortexa_utils/aws/s3/client_side_encryption/decrypt_handeler.py new file mode 100644 index 0000000000000..464fc3c872642 --- /dev/null +++ b/aws/vortexa_utils/aws/s3/client_side_encryption/decrypt_handeler.py @@ -0,0 +1,121 @@ +# @Author: richard +# @Date: 2018-11-27T17:24:50+00:00 +# @Last modified by: richard +# @Last modified time: 2018-12-06T16:38:19+00:00 +import boto3 +import logging + +from .IODecrypter import IODecrypter +from .IONocrypter import IONocrypter +from .IOAuthDecrypter import IOAuthDecrypter +from .IOAuthDecrypterTagLength import IOAuthDecrypterTagLength + +logger = logging.getLogger(__name__) +kms = boto3.client('kms') + + +class DecryptionException(Exception): + pass + + +class DecryptHandeler(object): + + V1_ENVELOPE_KEYS = { + 'x-amz-key', + 'x-amz-iv', + 'x-amz-matdesc' + } + + V2_ENVELOPE_KEYS = { + 'x-amz-key-v2', + 'x-amz-iv', + 'x-amz-cek-alg', + 'x-amz-wrap-alg', + 'x-amz-matdesc' + } + + POSSIBLE_ENVELOPE_KEYS = V1_ENVELOPE_KEYS | V2_ENVELOPE_KEYS + + POSSIBLE_ENCRYPTION_FORMATS = { + 'AES/GCM/NoPadding', + 'AES/CBC/PKCS5Padding', + 'AES/CBC/PKCS7Padding' + } + + def __init__(self, obj, context, strict=False): + self.obj = obj + self.context = context + self.metadata = obj['Metadata'] + self.body = obj['Body'] + self.strict = strict + + def decrypt(self): + cipher = self.decryption_cipher() + logger.debug(self.metadata) + if cipher: + logger.debug(cipher) + if self.auth_tag(): + return self.decrypt_auth(cipher) + return IODecrypter(cipher=cipher, io=self.body) + # Object not encrypted with an envelope + mesg = f"Unencrypted Object at {self.obj['ETag']}" + if self.strict: + logger.error(mesg) + raise ValueError(mesg) + else: + logger.warning(mesg) + return IONocrypter(io=self.body) + + def auth_tag(self): + return 'x-amz-tag-len' in self.metadata + + def decryption_cipher(self): + envelope = self.extract_envelop(self.metadata) + if envelope: + return self.context.cipher_provider.decryptor(envelope) + + def extract_envelop(self, meta): + if 'x-amz-key' in meta: + return self.envelope_v1(meta) + elif 'x-amz-key-v2' in meta: + return self.envelope_v2(meta) + + key_prefix = 'x-amz-key' + key = next((k for k in meta.keys() if k.startswith(key_prefix)), None) + if key is not None: + key_version = key[len(key_prefix):] + mesg = f'Unknown envelope encryption version {key_version}' + raise DecryptionException(mesg) + # no envelope found + return None + + def envelope_v2(self, meta): + if meta['x-amz-cek-alg'] not in self.POSSIBLE_ENCRYPTION_FORMATS: + alg = meta['x-amz-cek-alg'] + msg = f'unsuported content encrypting key format: {alg}' + raise DecryptionException(msg) + if meta['x-amz-wrap-alg'] != 'kms': + alg = meta['x-amz-wrap-alg'] + msg = f'unsupported key wrapping algorithm: {alg}' + raise DecryptionException(msg) + if not self.V2_ENVELOPE_KEYS <= set(meta.keys()): + msg = "incomplete v2 encryption envelope:\n" + msg += f" expected: #{', '.join(self.V2_ENVELOPE_KEYS)}\n" + msg += f" got: #{', '.join(meta.keys)}" + return meta + + def envelope_v1(self, meta): + return meta + + def decrypt_auth(self, cipher): + meta = self.metadata + + content_length_string = meta.get( + 'x-amz-unencrypted-content-length', + None + ) + if content_length_string is not None: + content_length = int(content_length_string) + return IOAuthDecrypter(cipher, self.body, content_length) + tag_length = int(meta['x-amz-tag-len'])//8 + return IOAuthDecrypterTagLength(cipher, self.body, tag_length) diff --git a/aws/vortexa_utils/aws/s3/client_side_encryption/get.py b/aws/vortexa_utils/aws/s3/client_side_encryption/get.py new file mode 100644 index 0000000000000..7f961e62c814e --- /dev/null +++ b/aws/vortexa_utils/aws/s3/client_side_encryption/get.py @@ -0,0 +1,75 @@ +# @Author: richard +# @Date: 2018-11-27T14:58:39+00:00 +# @Last modified by: richard +# @Last modified time: 2018-11-30T12:09:27+00:00 + +# see https://medium.com/@samnco/reading-aws-ses-encrypted-emails-with-boto3-9c177f8ba130 +# and https://github.com/boto/boto3/issues/38 + +import base64 +import json +from Cryptodome.Cipher import AES # pycryptodomex +import boto3 + + +s3 = boto3.client('s3') +kms = boto3.client('kms') + + +def chunker(length, chunk_size): + index = 0 + while index < length: + chunk = min(chunk_size, length - index) + index += chunk + yield chunk + +list(chunker(2, 3)) + + +def content_streamer(bytes_io, content_length, chunk_size=16*1024): + for chunk in chunker(content_length, chunk_size): + yield bytes_io.read(chunk) + + + + + +def decrypt_object(obj): + metadata = obj['Metadata'] + key_alg = metadata['x-amz-cek-alg'] + + envelope_key = base64.b64decode(metadata['x-amz-key-v2']) + envelope_iv = base64.b64decode(metadata['x-amz-iv']) + encrypt_ctx = json.loads(metadata['x-amz-matdesc']) + + # x-amz-tag-len in is in bits so /8 to get bytes + tag_len = int(metadata['x-amz-tag-len'])/8 + original_size = int(metadata['x-amz-unencrypted-content-length']) + + decrypted_envelope_key = kms.decrypt( + CiphertextBlob=envelope_key, + EncryptionContext=encrypt_ctx + ) + key = decrypted_envelope_key['Plaintext'] + + if key_alg == 'AES/GCM/NoPadding': + # x-amz-tag-len in is in bits so /8 to get bytes + cipher = AES.new(key, AES.MODE_GCM, envelope_iv) + elif key_alg == 'AES/CBC/PKCS5Padding': + cipher = AES.new(key, AES.MODE_CBC, envelope_iv) + else: + raise Exception('unknown encryption algorythem') + + body = obj['Body'] + + body = body.read() + body, tag = body[:original_size], body[original_size:] + email = cipher.decrypt(body) + cipher.verify(tag) + return email + + +def get_object(bucket, key): + obj = s3.get_object(Bucket=bucket_name, Key=key) + location_info = s3.get_bucket_location(Bucket=bucket_name) + bucket_region = location_info['LocationConstraint'] diff --git a/aws/vortexa_utils/aws/s3/client_side_encryption/kms_cipher_provider.py b/aws/vortexa_utils/aws/s3/client_side_encryption/kms_cipher_provider.py new file mode 100644 index 0000000000000..6700eedb5e0b4 --- /dev/null +++ b/aws/vortexa_utils/aws/s3/client_side_encryption/kms_cipher_provider.py @@ -0,0 +1,61 @@ +# @Author: richard +# @Date: 2018-11-27T18:20:28+00:00 +# @Last modified by: richard +# @Last modified time: 2018-12-05T17:20:59+00:00 +import base64 +import boto3 +import json + +from Cryptodome.Cipher import AES # pycryptodomex +from .cipher_provider import CipherProvider + + +class KMSCipherProvider(CipherProvider): + aes_mode_map = { + 'AES/GCM/NoPadding': AES.MODE_GCM, + 'AES/CBC/PKCS5Padding': AES.MODE_CBC, + 'AES/CBC/PKCS7Padding': AES.MODE_CBC + } + + def __init__(self, key_id=None, **kwargs): + self.kms = boto3.client('kms', **kwargs) + self.key_id = key_id + + def decryptor(self, envelope): + key_alg = envelope['x-amz-cek-alg'] + aes_mode = self.aes_mode_map.get(key_alg) + if aes_mode is None: + raise Exception(f'unknown encryption algorythem {key_alg}') + + envelope_key = base64.b64decode(envelope['x-amz-key-v2']) + iv = base64.b64decode(envelope['x-amz-iv']) + encryption_context = json.loads(envelope['x-amz-matdesc']) + + decrypted_envelope = self.kms.decrypt( + CiphertextBlob=envelope_key, + EncryptionContext=encryption_context + ) + key = decrypted_envelope['Plaintext'] + cipher = AES.new(key, aes_mode, iv) + return cipher + + def encryptor(self): + encryption_context = {"kms_cmk_id": self.key_id} + + key_data = self.kms.generate_data_key( + KeyId=self.key_id, + EncryptionContext=encryption_context, + KeySpec='AES_256' + ) + + key = key_data['Plaintext'] + cipher = AES.new(key, AES.MODE_GCM) + + envelope = { + 'x-amz-key-v2': base64.encodebytes(key_data['CiphertextBlob']), + 'x-amz-iv': base64.encodebytes(cipher.nonce), + 'x-amz-cek-alg': 'AES/GCM/NoPadding', + 'x-amz-wrap-alg': 'kms', + 'x-amz-matdesc': json.dumps(encryption_context) + } + return envelope, cipher diff --git a/aws/vortexa_utils/aws/ses/__init__.py b/aws/vortexa_utils/aws/ses/__init__.py new file mode 100644 index 0000000000000..520cb4033d38a --- /dev/null +++ b/aws/vortexa_utils/aws/ses/__init__.py @@ -0,0 +1,4 @@ +# @Author: richard +# @Date: 2018-12-06T18:06:14+00:00 +# @Last modified by: richard +# @Last modified time: 2018-12-06T18:06:14+00:00 diff --git a/aws/vortexa_utils/aws/ses/application_mapper.py b/aws/vortexa_utils/aws/ses/application_mapper.py new file mode 100644 index 0000000000000..1e910af5a7b9c --- /dev/null +++ b/aws/vortexa_utils/aws/ses/application_mapper.py @@ -0,0 +1,102 @@ +import io +from typing import Callable +from collections.abc import Mapping +from functools import wraps +import pandas as pd + + +def read_input_wrapper(read_func=None, **kwargs): + """A decorator to make the `pandas.io.parser.read` functions + take `bytes` as input. + + Parameters + ---------- + `read_func` : `Callable[..., pd.DataFrame]` + The `pandas.io.parsers` function to decorate. + If not set `read_input_wrapper` will return a decorator. + **`kwargs` : `dict` + `kwargs` to pass on to `read_func`. + + Returns + ------- + function : `Callable[input: bytes, pd.DataFrame]` | + `Callable[[Callable[..., pd.DataFrame]], + Callable[input: bytes, pd.DataFrame]]` + either return a decorator which will wrap a pandas parser function + or a wrapped parser function: + + Examples + ------- + Examples should be written in doctest format, and + should illustrate how to use the function/class. + >>> read_csv = read_input_wrapper(pd.read_csv) + >>> read_tsv = read_input_wrapper(pd.read_csv, sep='\t') + + or as a decorator + + @read_input_wrapper + def read_foo(file, **kwargs) -> pd.DataFrame: + # some custom foo + return pd.DataFrame() + + or + + @read_input_wrapper(sep='\t') + def read_bar(file, **kwargs) -> pd.DataFrame: + # some custom bar + return pd.DataFrame() + """ + + def wrapper(func: Callable[..., pd.DataFrame]): + + @wraps(func) + def reader(input: bytes) -> pd.DataFrame: + return func(io.BytesIO(input), **kwargs) + return reader + + if read_func is None: + return wrapper + return wrapper(read_func) + + +read_csv = read_input_wrapper(pd.read_csv) +read_tsv = read_input_wrapper(pd.read_csv, sep='\t') +read_excel = read_input_wrapper(pd.read_excel, sheet_name=None) + + +class ApplicationMapper(Mapping): + """A `Mapping` class to map MIME application types to a pandas reader.""" + + application_mapping = { + "text/plain": read_tsv, + "text/csv": read_csv, + "application/vnd.ms-excel": read_excel + } + + aplication_prefixed = ( + ( + 'application/vnd.ms-excel.sheet', + read_excel + ) + ( + 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet', + read_excel + ) + ) + + def __getitem__(self, key): + func = self.application_mapping.get(key) + if func is not None: + return func + for prefix, func in self.aplication_prefixed: + if key.startswith(prefix): + return read_excel + + def __iter__(self): + return iter(self.application_mapping) + + def __len__(self): + return len(self.application_mapping) + + +application_mapping = ApplicationMapper() diff --git a/aws/vortexa_utils/aws/ses/attachments.py b/aws/vortexa_utils/aws/ses/attachments.py new file mode 100644 index 0000000000000..d5ef58684ee7f --- /dev/null +++ b/aws/vortexa_utils/aws/ses/attachments.py @@ -0,0 +1,15 @@ +import email +from .application_mapper import application_mapping + + +class Attachment(object): + + def __init__(self, attachment: email.message.EmailMessage): + self.attachment = attachment + + def to_df(self): + content_type = self.attachment.get_content_type() + reader = application_mapping.get(content_type) + if reader is None: + raise TypeError(f"unknown content_type {content_type}") + return reader(self.attachment.get_content()) diff --git a/aws/vortexa_utils/aws/ses/inbox.py b/aws/vortexa_utils/aws/ses/inbox.py new file mode 100644 index 0000000000000..4c3664093d938 --- /dev/null +++ b/aws/vortexa_utils/aws/ses/inbox.py @@ -0,0 +1,141 @@ +# @Author: richard +# @Date: 2018-12-06T18:06:25+00:00 +# @Last modified by: richard +# @Last modified time: 2018-12-06T19:36:09+00:00 +from typing import Iterable +import logging +from datetime import datetime +from functools import wraps +import boto3 +# cd aws/vortexa_utils +# import aws.s3.client_side_encryption.client as client +import vortexa_utils.aws.s3.client_side_encryption.client as client +import email +import email.policy +import email.parser +from email.message import EmailMessage + +logger = logging.getLogger(__name__) + + +class Inbox(object): + """Short summary. + + Parameters + ---------- + default_bucket : str + Default s3 Bucket to assosiate the inbox with. + + """ + + def __init__(self, default_bucket: str = None, **kwargs): + """Short summary. + + Parameters + ---------- + default_bucket : str + Default s3 Bucket to assosiate the inbox with. + strict : bool + When True will not fetch unencrypted emails. Defaults to False. + **kwargs : dict + **`kwargs` to pass to `s3.client`. + + """ + self.bucket = default_bucket + self.s3crypto = client.Client(**kwargs) + self.s3 = self.s3crypto.s3 + # Specify the default policy for email parsing else Parser defaults to + # email.policy.compat32 for python 3 and 2 compatibility + self.parser = email.parser.BytesParser(policy=email.policy.default) + + def get_email(self, Key: str, Bucket: str = None) -> EmailMessage: + """Get `EmailMessage` Object from `Bucket`. + + Parameters + ---------- + Key : str + `Key` name of email in s3. + Bucket : str + s3 `Bucket` to look for email, will search `self.bucket` if `None`. + + Returns + ------- + email.message.EmailMessage + Email object. + + """ + Bucket = Bucket or self.bucket + if Bucket is None: + raise ValueError("Bucket not set") + with self.s3crypto.get_object(Bucket=Bucket, Key=Key) as io: + return self.parser.parse(io) + + def list_objects( + self, + Bucket: str = None, + Path: str = None, + Begin: datetime = None, + Until: datetime = None): + # type: (...) -> Iterable['boto3.resources.factory.s3.ObjectSummary'] + """List all objects in `Bucket` prefixed by `Path`. + + Parameters + ---------- + Bucket : str + S3 `Bucket` to look for emails will search `self.bucket` if `None`. + Path : str + The `Path` prefix to filter the emails by, no filter if `None`. + Begin : datetime + Filter object from this datetime. + Until : datetime = None + Filter objects untill this datetime. + + Returns + ------- + iterable boto3.resources.factory.s3.ObjectSummary + List of matching email objects. + + """ + bucket = boto3.resource('s3').Bucket(Bucket or self.bucket) + objs = bucket.objects.filter(Prefix=Path) + if Begin: + objs = (obj for obj in objs if obj.last_modified >= Begin) + if Until: + objs = (obj for obj in objs if obj.last_modified <= Until) + + if Begin is None and Until is None: + # if no timestamps dont bother sorting + return objs + return sorted(objs, key=lambda o: o.last_modified) + + @wraps(list_objects, assigned=('__annotations__',)) + def list_emails(self, **kwargs) -> Iterable[EmailMessage]: + """List all emails in `Bucket` prefixed by `Path`. + + Parameters + ---------- + Bucket : str + S3 `Bucket` to look for emails will search `self.bucket` if `None`. + Path : str + The `Path` prefix to filter the emails by, no filter if `None`. + Begin : datetime + Filter object from this datetime. + Until : datetime = None + Filter objects untill this datetime. + + Returns + ------- + iterable emails + List of matching email objects. + + Examples + ------- + Examples should be written in doctest format, and + should illustrate how to use the function/class. + >>> inbox = Inbox() + >>> inboc.list_emails('/some/sub/folder') + + """ + objects = self.list_objects(**kwargs) + for obj in objects: + yield self.get_email(obj.key, obj.bucket_name) diff --git a/aws/vortexa_utils/aws/ses/notification/__init__.py b/aws/vortexa_utils/aws/ses/notification/__init__.py new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/aws/vortexa_utils/aws/ses/notification/types/__init__.py b/aws/vortexa_utils/aws/ses/notification/types/__init__.py new file mode 100644 index 0000000000000..7eb901a004212 --- /dev/null +++ b/aws/vortexa_utils/aws/ses/notification/types/__init__.py @@ -0,0 +1,5 @@ +from .action import Action +from .mail import Mail +from .receipt import Receipt +from .notification import Notification +from .lambda_record import Record diff --git a/aws/vortexa_utils/aws/ses/notification/types/action.py b/aws/vortexa_utils/aws/ses/notification/types/action.py new file mode 100644 index 0000000000000..d62791f941960 --- /dev/null +++ b/aws/vortexa_utils/aws/ses/notification/types/action.py @@ -0,0 +1,56 @@ +from dataclasses import dataclass + + +@dataclass +class Action: + """Action Object. + + Attributes + ---------- + type : str + action that was executed. [S3, SNS, Bounce, Lambda, Stop, WorkMail]. + topicArn : str + Amazon Resource Name (ARN) of the SNS topic of the notification. + bucketName : str + S3 bucket to which the message was published. + *Present only for the S3 action type.* + objectKey : str + name that uniquely identifies the email in the Amazon S3 bucket. + This is the same as the messageId in the mail Object. + *Present only for the S3 action type.* + smtpReplyCode : str + SMTP reply code, as defined by RFC 5321. + *Present only for the bounce action type.* + statusCode : str + SMTP enhanced status code, as defined by RFC 3463. + *Present only for the bounce action type.* + message : str + human-readable text to include in the bounce message. + *Present only for the bounce action type.* + sender : str + The email address of the sender of the email that bounced. + This is the address from which the bounce message was sent. + *Present only for the bounce action type.* + functionArn : str + ARN of the Lambda function that was triggered. + *Present only for the Lambda action type.* + invocationType : str + invocation type of the Lambda function. [RequestResponse, Event] + *Present only for the Lambda action type.* + organizationArn : str + ARN of the Amazon WorkMail organization. + *Present only for the WorkMail action type.* + + _see + """ + type: str + topicArn: str = None + bucketName: str = None + objectKey: str = None + smtpReplyCode: str = None + statusCode: str = None + message: str = None + sender: str = None + functionArn: str = None + invocationType: str = None + organizationArn: str = None diff --git a/aws/vortexa_utils/aws/ses/notification/types/lambda_record.py b/aws/vortexa_utils/aws/ses/notification/types/lambda_record.py new file mode 100644 index 0000000000000..3eecd720fedf8 --- /dev/null +++ b/aws/vortexa_utils/aws/ses/notification/types/lambda_record.py @@ -0,0 +1,18 @@ +from vortexa_utils.aws.utils.dataclasses import nested_dataclass +from .mail import Mail +from .receipt import Receipt + + +@nested_dataclass +class SESRecord: + receipt: Receipt + mail: Mail + + +@nested_dataclass +class Record: + """ + """ + eventSource: str # "aws:ses", + eventVersion: str # "1.0", + ses: SESRecord diff --git a/aws/vortexa_utils/aws/ses/notification/types/mail.py b/aws/vortexa_utils/aws/ses/notification/types/mail.py new file mode 100644 index 0000000000000..49252ed6610f3 --- /dev/null +++ b/aws/vortexa_utils/aws/ses/notification/types/mail.py @@ -0,0 +1,44 @@ +from typing import List, Dict, Any +from dataclasses import dataclass + + +@dataclass +class Mail: + """Mail Object. + + Attributes + ---------- + destination: List[str] + A complete list of all recipient addresses (including To: and CC:) + from the MIME headers of the incoming email. + messageId: str + String that contains the unique ID assigned to the email by Amazon SES. + If the email was delivered to Amazon S3, the message ID is also the + Amazon S3 object key that was used to write the message to your Amazon + S3 bucket. + source: str + String that contains the email address (the envelope MAIL FROM address) + that the email was sent from. + timestamp: + String that contains the time at which the email was received, + in ISO8601 format. + headers: List[List[str]] + A list of Amazon SES headers and your custom headers. + Each header in the list has a name field and a value field. + commonHeaders: List[List[str]] + A list of headers common to all emails. + Each header in the list is composed of a name and a value. + headersTruncated: str + String that specifies whether the headers were truncated, + which will happen if the headers are larger than 10 KB. + Possible values are true and false. + + """ + + destination: List[str] + messageId: str + source: str + timestamp: str + headers: List[Dict[str, str]] + commonHeaders: Dict[str, Any] + headersTruncated: str diff --git a/aws/vortexa_utils/aws/ses/notification/types/notification.py b/aws/vortexa_utils/aws/ses/notification/types/notification.py new file mode 100644 index 0000000000000..19fee6d3060d4 --- /dev/null +++ b/aws/vortexa_utils/aws/ses/notification/types/notification.py @@ -0,0 +1,29 @@ +from vortexa_utils.aws.utils.dataclasses import nested_dataclass +from . import Mail, Receipt + + +@nested_dataclass +class Notification: + """Notification Object. + + Attributes + ---------- + notificationType: str + The notification type. For this type of notification, + the value is always Received. + receipt : Recipt + Object that contains information about the email delivery. + mail : Mail + Object that contains information about the email + associated with the notification. + content : str + String that contains the raw, unmodified email, which is typically + in Multipurpose Internet Mail Extensions (MIME) format. + *Only if the notification was triggered by an SNS action.* + + """ + + notificationType: str + receipt: Receipt + mail: Mail + content: str diff --git a/aws/vortexa_utils/aws/ses/notification/types/receipt.py b/aws/vortexa_utils/aws/ses/notification/types/receipt.py new file mode 100644 index 0000000000000..b5d1a3857508d --- /dev/null +++ b/aws/vortexa_utils/aws/ses/notification/types/receipt.py @@ -0,0 +1,65 @@ +from vortexa_utils.aws.utils.dataclasses import nested_dataclass +from .action import Action +from .verdicts import (DKIMVerdict, + DMARCVerdict, + SPFVerdict, + SpamVerdict, + VirusVerdict) + + +@nested_dataclass +class Receipt: + """SNS Recipt object. + + Attributes + ---------- + action : Action + Encapsulates information about the action that was executed. + + dkimVerdict : DKIMVerdict + Indicates whether the DomainKeys Identified Mail (DKIM) check passed. + + dmarcPolicy : str + Domain-based Message Authentication, Reporting & Conformance (DMARC) + settings for the sending domain. + This field only appears if the message fails DMARC authentication. + Possible values for this field are: + - none: no specific action be taken on messages that fail DMARC. + - quarantine: messages that fail DMARC be treated as suspicious. + - reject: messages that fail DMARC authentication be rejected. + + dmarcVerdict : DMARCVerdict + Indicates whether the DMARC check passed. + + processingTimeMillis : str + `str` specifies the period, in milliseconds, from the time Amazon SES + received the message to the time it triggered the action. + + recipients : list[str] + list of recipients that were matched by the active receipt rule. + The addresses may differ from those listed by the destination field + in the mail Object. + + spamVerdict : SpamVerdict + Indicates whether the message is spam + + spfVerdict : SPFVerdict + Whether the Sender Policy Framework (SPF) check passed + + timestamp : str + ISO 8601 format string representing when the action was triggered. + + virusVerdict : VirusVerdict + Whether the message contains a virus. + For a list of possible values, see virusVerdict Object. + """ + action: Action + processingTimeMillis: str + recipients: str + timestamp: str + dmarcPolicy: str = None + dmarcVerdict: DMARCVerdict = None + dkimVerdict: DKIMVerdict = None + spamVerdict: SpamVerdict = None + spfVerdict: SPFVerdict = None + virusVerdict: VirusVerdict = None diff --git a/aws/vortexa_utils/aws/ses/notification/types/verdicts.py b/aws/vortexa_utils/aws/ses/notification/types/verdicts.py new file mode 100644 index 0000000000000..a4a47e06ce02f --- /dev/null +++ b/aws/vortexa_utils/aws/ses/notification/types/verdicts.py @@ -0,0 +1,43 @@ +from dataclasses import dataclass + + +@dataclass +class Verdict(object): + """Verdict object. + + Attributes + ---------- + status : str + String that contains the verdict. Possible values are: + - PASS: The message passed the given test. + - FAIL: The message failed the given test. + - GRAY: The message failed the given test, + - PROCESSING_FAILED: There is an issue that prevents Amazon SES + from providing a verdict to the given test. + """ + status: str + + +@dataclass +class DKIMVerdict(Verdict): + ... + + +@dataclass +class DMARCVerdict(Verdict): + ... + + +@dataclass +class SpamVerdict(Verdict): + ... + + +@dataclass +class SPFVerdict(Verdict): + ... + + +@dataclass +class VirusVerdict(Verdict): + ... diff --git a/aws/vortexa_utils/aws/utils/__init__.py b/aws/vortexa_utils/aws/utils/__init__.py new file mode 100644 index 0000000000000..dda33076e9246 --- /dev/null +++ b/aws/vortexa_utils/aws/utils/__init__.py @@ -0,0 +1,4 @@ +# @Author: richard +# @Date: 2018-12-04T20:13:18+00:00 +# @Last modified by: richard +# @Last modified time: 2018-12-04T20:13:18+00:00 diff --git a/aws/vortexa_utils/aws/utils/dataclasses/__init__.py b/aws/vortexa_utils/aws/utils/dataclasses/__init__.py new file mode 100644 index 0000000000000..0b443f83003f7 --- /dev/null +++ b/aws/vortexa_utils/aws/utils/dataclasses/__init__.py @@ -0,0 +1 @@ +from .nested import * diff --git a/aws/vortexa_utils/aws/utils/dataclasses/nested.py b/aws/vortexa_utils/aws/utils/dataclasses/nested.py new file mode 100644 index 0000000000000..22e1b071fd8d0 --- /dev/null +++ b/aws/vortexa_utils/aws/utils/dataclasses/nested.py @@ -0,0 +1,20 @@ +from dataclasses import dataclass, is_dataclass +from functools import wraps + + +def nested_dataclass(*args, **kwargs): + def wrapper(cls): + cls = dataclass(cls, **kwargs) + original_init = cls.__init__ + + @wraps(original_init) + def __init__(self, *args, **kwargs): + for name, value in kwargs.items(): + field_type = cls.__annotations__.get(name, None) + if is_dataclass(field_type) and isinstance(value, dict): + new_obj = field_type(**value) + kwargs[name] = new_obj + original_init(self, *args, **kwargs) + cls.__init__ = __init__ + return cls + return wrapper(args[0]) if args else wrapper diff --git a/collections/tests/__init__.py b/collections/tests/__init__.py new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/collections/tests/collections/types/__init__.py b/collections/tests/collections/types/__init__.py new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/collections/tests/collections/types/test_instance_caching_abc.py b/collections/tests/collections/types/test_instance_caching_abc.py new file mode 100644 index 0000000000000..3b8f0c0e2ec81 --- /dev/null +++ b/collections/tests/collections/types/test_instance_caching_abc.py @@ -0,0 +1,130 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +Created on Mon Nov 19 14:02:03 2018 +@author: richard +""" +import unittest + +from vortexa_utils.collections.types.instance_caching_abc import ( + InstanceCachingABC, + instance_caching) + + +class InstanceCachingABCTests(unittest.TestCase): + + def register_class(self, klass): + setattr(self, klass.__name__, klass) + return klass + + def setUp(self): + @self.register_class + class Foo(object, metaclass=InstanceCachingABC): + pass + + @self.register_class + class Bar(object): + pass + + def test_signiture(self): + self.assertEqual(repr(self.Foo), repr(self.Bar).replace('Bar', 'Foo')) + + def test_instance_cache(self): + # no instances + self.assertFalse(list(self.Foo)) + + # one instance + foo = self.Foo() + foos = list(self.Foo) + self.assertEqual(len(foos), 1) + klass_name, instance = foos[0] + self.assertEqual(instance, foo) + self.assertEqual(klass_name, 'Foo') + + # more instances + foo2 = self.Foo() + foos = list(self.Foo) + self.assertEqual(len(foos), 2) + klass_name, instance = foos[-1] + self.assertEqual(instance, foo2) + self.assertEqual(klass_name, 'Foo') + + +class InstanceCachingDecoratorTests(InstanceCachingABCTests): + + def setUp(self): + register = self.register_class + + @register + class Foo(object): + pass + + self._Foo = Foo + self.Foo = Foo = instance_caching(Foo) + + @register + class Bar(Foo): + pass + + @register + class Baz(Bar): + pass + + @register + class Bo(Foo): + pass + + @register + class Bill(Bo): + pass + + def test_signiture(self): + self.assertEqual(repr(self.Foo), repr(self._Foo)) + + def test_list_subclasses(self): + self.assertEqual( + set(self.Foo._allsubclasses()), + set((self.Foo, self.Bar, self.Baz, self.Bo, self.Bill)) + ) + self.assertEqual( + set(self.Bar._allsubclasses()), + set((self.Bar, self.Baz)) + ) + self.assertEqual( + set(self.Bo._allsubclasses()), + set((self.Bill, self.Bo)) + ) + + def test_instance_cache(self): + super().test_instance_cache() + # no instances in subclasses + for klass in self.Bar._allsubclasses(): + self.assertFalse(list(klass)) + + for klass in self.Bo._allsubclasses(): + self.assertFalse(list(klass)) + + self.assertEqual(len(list(self.Foo)), 2) + # one instance + bar = self.Bar() + foos = list(self.Foo) + bars = list(self.Bar) + self.assertEqual(len(foos), 3) + self.assertEqual(len(bars), 1) + klass_name, instance = bars[0] + self.assertEqual(instance, bar) + self.assertEqual(klass_name, 'Bar') + + baz = self.Baz() + foos = list(self.Foo) + bars = list(self.Bar) + bazs = list(self.Baz) + self.assertEqual(len(foos), 4) + self.assertEqual(len(bars), 2) + self.assertEqual(len(bazs), 1) + klass_name, instance = bazs[0] + self.assertEqual(instance, baz) + self.assertEqual(klass_name, 'Baz') + + for klass in self.Bo._allsubclasses(): + self.assertFalse(list(klass)) diff --git a/collections/vortexa_utils/collections/__inti__.py b/collections/vortexa_utils/collections/__inti__.py new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/collections/vortexa_utils/collections/types/__init__.py b/collections/vortexa_utils/collections/types/__init__.py new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/collections/vortexa_utils/collections/types/instance_caching_abc.py b/collections/vortexa_utils/collections/types/instance_caching_abc.py new file mode 100644 index 0000000000000..cdc6c556c07be --- /dev/null +++ b/collections/vortexa_utils/collections/types/instance_caching_abc.py @@ -0,0 +1,45 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +Created on Mon Nov 19 09:57:05 2018 +@author: richard +""" +from abc import ABCMeta + + +class InstanceCachingABC(ABCMeta): + """Metaclass for defining Instance Caching Abstract Base Classs (ICABC) + Use this metaclass to create an ICABC. An ICABC will remember the instances + created from it and can be iterated over to return all instances and sub + class instances + """ + + def __init__(cls, name, bases, namespace): + super().__init__(name, bases, namespace) + cls._instances = list() + + def __call__(cls, *args, **kwargs): + instance = super().__call__(*args, **kwargs) + cls._instances.append(instance) + return instance + + def _allsubclasses(cls): + yield cls + for subclass in cls.__subclasses__(): + yield from subclass._allsubclasses() + + # Metamethods, called on class objects: + def __iter__(cls): + return ((klass.__name__, instance) + for klass in cls._allsubclasses() + for instance in klass._instances) + + +def instance_caching(klass): + class Decorated(klass, metaclass=InstanceCachingABC): + pass + + Decorated.__name__ = klass.__name__ + Decorated.__qualname__ = klass.__qualname__ + Decorated.__module__ = klass.__module__ + return Decorated diff --git a/database/README.md b/database/README.md new file mode 100644 index 0000000000000..4c64ed6286b79 --- /dev/null +++ b/database/README.md @@ -0,0 +1,21 @@ +# Vortexa Utils DatabaseFactory + +Small factory class to give you a `SqlAlchemy` engine connection to an +`AWS rds` instance ensuring SSL and credentials are obtained with the secrets manager +## Usage + +```python +db_factory = DatabaseFactory() +engine = db_factory.engine(dbname='rolling_backup') + +sql = """ +SELECT + name +FROM new_polygons where name is not Null; +""" + +engine.execute(sql) +``` +## TODO Other utility functions + +- [ ] create a `~/.dbpass` file diff --git a/database/README.rst b/database/README.rst new file mode 100644 index 0000000000000..5f2775e7ba207 --- /dev/null +++ b/database/README.rst @@ -0,0 +1,28 @@ +.. contents:: + :depth: 3 +.. + +Vortexa Utils DatabaseFactory +============================= + +Small factory class to give you a ``SqlAlchemy`` engine connection to an +``AWS rds`` instance ensuring SSL and credentials are obtained with the +secrets manager ## Usage + +.. code:: python + + db_factory = DatabaseFactory() + engine = db_factory.engine(dbname='rolling_backup') + + sql = """ + SELECT + name + FROM new_polygons where name is not Null; + """ + + engine.execute(sql) + +TODO Other utility functions +---------------------------- + +- [ ] create a ``~/.dbpass`` file diff --git a/database/setup.py b/database/setup.py new file mode 100644 index 0000000000000..4ea029d37a074 --- /dev/null +++ b/database/setup.py @@ -0,0 +1,40 @@ +# @Author: richard +# @Date: 2018-12-04T17:54:43+00:00 +# @Last modified by: richard +# @Last modified time: 2018-12-04T19:14:15+00:00 +import io +import os + +from setuptools import find_packages, setup + +namespace = "vortexa_utils" +description = ("Vortexa Database Engine Factory",) + +dependencies = ["boto3", "SqlAlchemy", "psycopg2-binary", "requests"] + +# Setup boilerplate below + +package_root = os.path.abspath(os.path.dirname(__file__)) + +readme_filename = os.path.join(package_root, "README.rst") +with io.open(readme_filename, encoding="utf-8") as readme_file: + readme = readme_file.read() + +packages = [ + package for package in find_packages() if package.startswith(namespace) +] + +setup( + name="vortexa_utils_database", + version="0.0.1", + description=description, + long_description=readme, + author="Richard Mathie", + author_email="richard.mathie@vortexa.com", + zip_safe=False, + tests_require=["nose2"], + test_suite="nose2.collector.collector", + packages=packages, + install_requires=dependencies, + extras_require={"query_cache": ["pandas", "pyarrow"]}, +) diff --git a/database/tests/__init__.py b/database/tests/__init__.py new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/database/tests/test_database_factory.py b/database/tests/test_database_factory.py new file mode 100644 index 0000000000000..45ad343c6c796 --- /dev/null +++ b/database/tests/test_database_factory.py @@ -0,0 +1,16 @@ +import os +import unittest + +from vortexa_utils.database import DatabaseFactory + + +class TestEngineFactory(unittest.TestCase): + def test_create_factory(self): + db_factory = DatabaseFactory() + return db_factory + + def test_get_cert(self): + db_factory = self.test_create_factory() + cert_file = db_factory.fetch_cert() + self.assertEqual(cert_file, db_factory.cert_file) + assert os.path.isfile(cert_file) diff --git a/database/tests/test_querey_cache.py b/database/tests/test_querey_cache.py new file mode 100644 index 0000000000000..2e441f58cdb2e --- /dev/null +++ b/database/tests/test_querey_cache.py @@ -0,0 +1,21 @@ +# cd database +import logging + +from vortexa_utils.database.default_factories import DevFactory +from vortexa_utils.database.query_cache import QueryCache + +logger = logging.getLogger(__name__) + +logging.basicConfig(level=logging.DEBUG) + +# factory = DevFactory() +# engine = factory.engine() +# qc = QueryCache() + +# %time df = qc.read_sql("clarksons", engine) + + +def test_filename(): + qc = QueryCache() + assert qc.filename("some random query") == "qAdzxvMgeSc=.parquet.snappy" + assert qc.filename("banned_words") == "LoRkfDuNmuA=.parquet.snappy" diff --git a/database/vortexa_utils/__init__.py b/database/vortexa_utils/__init__.py new file mode 100644 index 0000000000000..a2ae790eb1d2c --- /dev/null +++ b/database/vortexa_utils/__init__.py @@ -0,0 +1,5 @@ +# @Author: richard +# @Date: 2018-12-04T19:01:36+00:00 +# @Last modified by: richard +# @Last modified time: 2018-12-04T19:01:39+00:00 +__path__ = __import__('pkgutil').extend_path(__path__, __name__) diff --git a/database/vortexa_utils/database/__init__.py b/database/vortexa_utils/database/__init__.py new file mode 100644 index 0000000000000..5c67964aad121 --- /dev/null +++ b/database/vortexa_utils/database/__init__.py @@ -0,0 +1,7 @@ +# @Author: richard +# @Date: 2018-12-04T17:55:58+00:00 +# @Last modified by: richard +# @Last modified time: 2018-12-04T17:55:58+00:00 + +from .database import DatabaseFactory +from .default_factories import DevFactory, ProdFactory, RedFactory diff --git a/database/vortexa_utils/database/database.py b/database/vortexa_utils/database/database.py new file mode 100644 index 0000000000000..8634168939edd --- /dev/null +++ b/database/vortexa_utils/database/database.py @@ -0,0 +1,118 @@ +# @Author: richard +# @Date: 2018-12-04T17:58:19+00:00 +# @Last modified by: richard +# @Last modified time: 2018-12-04T17:58:19+00:00 +import json +import logging +import os +from dataclasses import dataclass, field +from typing import Dict + +import boto3 +import requests +from sqlalchemy import create_engine + +logger = logging.getLogger(__name__) + +secretsmanager = boto3.client("secretsmanager") + +DEFAULT_CERT_URL = ( + "https://s3.amazonaws.com/rds-downloads/rds-combined-ca-bundle.pem" +) +DEFAULT_CERT_PATH = "/tmp/vortexa_utils_py/rds/ca-bundle.pem" + +DEFAULT_CREDENTIAL = "rds/dev/default" +DEFAULT_CREDENTIAL_MAPPING = dict( + host="host", username="user", port="port", password="password" +) + + +@dataclass +class DatabaseFactory(object): + """DatabaseFactory Class. + + Class for createing a database engine factory. + + usage:: + + factory = DatabaseFactory() + engine = factory.engine() + + Parameters + ---------- + secret_id : str + `secret_id` of the database credential. + (the default is 'rds/dev/default' wich points to the dev database host) + cert_file : str + The location to store the ssl certificate file + cert_url : str + The url to fetch the aws rds ssl certificates from + credential_mapping : Dict[str, str] + A mapping between the `psycopg` connection args and the credential keys + """ + + secret_id: str = DEFAULT_CREDENTIAL + cert_file: str = DEFAULT_CERT_PATH + cert_url: str = DEFAULT_CERT_URL + credential_mapping: Dict[str, str] = field( + default_factory=lambda: dict(DEFAULT_CREDENTIAL_MAPPING) + ) + + def __post_init__(self): + logger.debug(f"Created {self.secret_id} factory object") + + def fetch_cert(self, force: bool = False): + if not os.path.isfile(self.cert_file) or force: + logger.info("getting cert") + os.makedirs(os.path.dirname(self.cert_file), exist_ok=True) + cert = requests.get(self.cert_url) + with open(self.cert_file, "w") as f: + f.write(cert.text) + return self.cert_file + + def get_credential(self): + secret = secretsmanager.get_secret_value(SecretId=self.secret_id) + return json.loads(secret["SecretString"]) + + def engine(self, dbname: str = None, echo: bool = False, **kwargs): + # type (...) -> sqlalchemy.engine.Engine + """`sqlalchemy.engine.Engine` instance factory. + + Parameters + ---------- + dbname : str + database name `dbname` to connect to. + (the default is `None`, which will use the dbname in the secret + credential). + echo : bool + `echo` (the default is False). + + Returns + ------- + sqlalchemy.engine.Engine + SQLalchemy connection engine + + Examples + ------- + >>> factory = DatabaseFactory() + >>> engine = factory.engine() + + """ + cert_filename = self.fetch_cert() + credential = self.get_credential() + connect_args = { + v: credential[k] for k, v in self.credential_mapping.items() + } + + dbname = dbname or os.environ.get("DBNAME") or credential["dbname"] + host = connect_args.pop("host") + port = connect_args.pop("port") + + connect_args.update(sslmode="verify-full", sslrootcert=cert_filename) + engine = create_engine( + f"postgresql://{host}:{port}/{dbname}", + echo=echo, + connect_args=connect_args, + **kwargs, + ) + return engine diff --git a/database/vortexa_utils/database/default_factories.py b/database/vortexa_utils/database/default_factories.py new file mode 100644 index 0000000000000..d4f8ae0ca09e1 --- /dev/null +++ b/database/vortexa_utils/database/default_factories.py @@ -0,0 +1,20 @@ +from dataclasses import dataclass + +from .database import DatabaseFactory + + +@dataclass +class DevFactory(DatabaseFactory): + secret_id: str = "rds/dev/default" + + +@dataclass +class ProdFactory(DatabaseFactory): + secret_id: str = "rds/prod/default" + + +@dataclass +class RedFactory(DatabaseFactory): + cert_url: str = "https://s3.amazonaws.com/redshift-downloads/redshift-ca-bundle.crt" + cert_file: str = "/tmp/vortexa_utils_py/rds/redshift-ca-bundle.pem" + secret_id: str = "redshift/prod/default" diff --git a/database/vortexa_utils/database/query_cache.py b/database/vortexa_utils/database/query_cache.py new file mode 100644 index 0000000000000..ea86e9a914cd5 --- /dev/null +++ b/database/vortexa_utils/database/query_cache.py @@ -0,0 +1,77 @@ +import hashlib +import logging +import os +import time +from base64 import urlsafe_b64encode +from dataclasses import dataclass +from functools import wraps +from typing import Union + +import pandas as pd +from sqlalchemy.engine import Connection, Engine + +from pyarrow.lib import ArrowIOError + +logger = logging.getLogger(__name__) + + +@dataclass +class QueryCache(object): + result_extension: str = ".parquet.snappy" + cache_dir: str = os.path.join( + "/tmp", "python_utils", "database", "query_cache", "df_cache" + ) + ttl: int = 3600 + + def __post_init__(self): + os.makedirs(self.cache_dir, exist_ok=True) + + def path(self, url): + return os.path.join( + self.cache_dir, + url.drivername, + f"{url.host}:{url.port}", + url.database, + ) + + def filename(self, query): + query_digest = urlsafe_b64encode( + hashlib.blake2s(str(query).encode(), digest_size=8).digest() + ) + return query_digest.decode("ascii") + self.result_extension + + @wraps(pd.read_sql) + def read_sql( + self, + query: str, + con: Union[Engine, Connection], + ttl: int = None, + invalidate_cache: bool = False, + *args, + **kwargs, + ) -> pd.DataFrame: + + # formulate a path + path = self.path(con.engine.url) + filename = self.filename(query) + filepath = os.path.join(path, filename) + os.makedirs(path, exist_ok=True) + + # check if the cache exists and is valid + ttl = self.ttl if ttl is None else ttl + + if ( + os.path.isfile(filepath) + and time.time() - os.path.getmtime(filepath) < ttl + ): + try: + logger.debug("reading from cache %s", filepath) + df = pd.read_parquet(filepath) + except ArrowIOError as e: + logger.error("Invalid Cache file, error: %s", e) + else: + return df + logger.debug("reading from database") + df = pd.read_sql(query, con, *args, **kwargs) + df.to_parquet(filepath) + return df diff --git a/database/vortexa_utils/database/utils.py b/database/vortexa_utils/database/utils.py new file mode 100644 index 0000000000000..811e36443265d --- /dev/null +++ b/database/vortexa_utils/database/utils.py @@ -0,0 +1,62 @@ +from typing import Iterable, List + +import sqlalchemy +from pandas.io.sql import SQLTable +from sqlalchemy.engine import Connectable + + +def upsert( + table: SQLTable, conn: Connectable, keys: List[str], data_iter: Iterable +): + """Upsert method to be used with `pandas.DataFrame.to_sql`. + + In pandas > 0.24.0 you can specify a method to control the insertion clause + used by `pandas.DataFrame.to_sql`. + + Parameters + ---------- + table : pandas.io.sql.SQLTable + Description of parameter `table`. + conn : sqlalchemy.engine.Connectable + Description of parameter `conn`. + keys : List[str] + Description of parameter `keys`. + data_iter : Iterable + Description of parameter `data_iter`. + + Returns + ------- + type + Description of returned object. + + Examples + ------- + Examples should be written in doctest format, and + should illustrate how to use the function/class. + >>> + + """ + cols = ", ".join(f'"{k}"' for k in keys) + if table.schema: + tname = "{}.{}".format(table.schema, table.name) + else: + tname = table.name + + # placeholder = ", ".join(["?"] * len(keys)) + placeholder = ", ".join([f":{k}" for k in keys]) + datas = ({k: d for k, d in zip(keys, data)} for data in data_iter) + if conn.engine.driver.endswith("sqlite"): + # sqlite + sql = f"INSERT or IGNORE INTO {tname} ({cols}) VALUES ({placeholder})" + else: + # postgresql + sql = sqlalchemy.text( + f""" + INSERT INTO {tname} + ({cols}) + VALUES ({placeholder}) + ON CONFLICT DO NOTHING + """ + ) + + conn.execute(sql, *datas) diff --git a/deployment/setup.py b/deployment/setup.py new file mode 100644 index 0000000000000..6432302dac087 --- /dev/null +++ b/deployment/setup.py @@ -0,0 +1,20 @@ +# @Author: richard +# @Date: 2018-12-04T17:54:43+00:00 +# @Last modified by: richard +# @Last modified time: 2018-12-04T19:16:08+00:00 +from setuptools import setup, find_packages + + +setup( + name='vortexa_utils_deploy', + version='0.0.1', + description='', + long_description='', + + author='Richard Mathie', + author_email='richard.mathie@vortexa.com', + + zip_safe=False, + + packages=find_packages(), +) diff --git a/deployment/vortexa_utils/__init__.py b/deployment/vortexa_utils/__init__.py new file mode 100644 index 0000000000000..a2ae790eb1d2c --- /dev/null +++ b/deployment/vortexa_utils/__init__.py @@ -0,0 +1,5 @@ +# @Author: richard +# @Date: 2018-12-04T19:01:36+00:00 +# @Last modified by: richard +# @Last modified time: 2018-12-04T19:01:39+00:00 +__path__ = __import__('pkgutil').extend_path(__path__, __name__) diff --git a/deployment/vortexa_utils/portainer/Readme.md b/deployment/vortexa_utils/portainer/Readme.md new file mode 100644 index 0000000000000..f1d2dcd78744e --- /dev/null +++ b/deployment/vortexa_utils/portainer/Readme.md @@ -0,0 +1 @@ +# Portainer API Helper Module diff --git a/deployment/vortexa_utils/portainer/__init__.py b/deployment/vortexa_utils/portainer/__init__.py new file mode 100644 index 0000000000000..26e33c55820aa --- /dev/null +++ b/deployment/vortexa_utils/portainer/__init__.py @@ -0,0 +1,8 @@ +# @Author: richard +# @Date: 2018-12-04T17:56:21+00:00 +# @Last modified by: richard +# @Last modified time: 2018-12-04T18:11:43+00:00 + + +def notNone(x): + return x is not None diff --git a/deployment/vortexa_utils/portainer/api.py b/deployment/vortexa_utils/portainer/api.py new file mode 100644 index 0000000000000..456ace9496cba --- /dev/null +++ b/deployment/vortexa_utils/portainer/api.py @@ -0,0 +1,56 @@ +# @Author: richard +# @Date: 2018-12-04T18:05:38+00:00 +# @Last modified by: richard +# @Last modified time: 2018-12-04T18:14:40+00:00 +import os +import requests +from functools import wraps +from urllib.parse import urlparse +from .stacks import Stacks +from . import notNone + + +class RequestHelper(object): + def __init__(self, api, base_url='api'): + self.api = api + self.base_url = base_url + + def wrapper(self, func): + @wraps(func) + def caller(url=None, *args, **kwargs): + parts = filter(notNone, (self.api.host, self.base_url, url)) + parts = map(str, parts) + headers = kwargs.get("headers", {}) + headers.update(self.api.get_header()) + kwargs["headers"] = headers + return func(os.path.join(*parts), + *args, **kwargs).json() + return caller + + def __getattr__(self, name, *args, **kwargs): + method = getattr(requests, name, *args, **kwargs) + return self.wrapper(method) + + +class PortainerAPI(object): + def __init__(self, host, user=None, pw=None): + self.host = urlparse(host, scheme='http').geturl() + self.user = user + self.pw = pw + if any(ting is not None for ting in (host, user, pw)): + self.get_jwt() + self.requests = RequestHelper(self) + self.stacks = Stacks(self) + + def get_jwt(self): + """ + http POST :9000/api/auth Username="admin" Password="adminpassword" + """ + url = f'{self.host}/api/auth' + resp = requests.post(url, json=dict(Username=self.user, + Password=self.pw)) + self.token = resp.json().get('jwt') + return self.token + + def get_header(self): + return {"Authorization": f"Bearer {self.token}"} diff --git a/deployment/vortexa_utils/portainer/stacks.py b/deployment/vortexa_utils/portainer/stacks.py new file mode 100644 index 0000000000000..8eaf2f8d7482d --- /dev/null +++ b/deployment/vortexa_utils/portainer/stacks.py @@ -0,0 +1,61 @@ +# @Author: richard +# @Date: 2018-12-04T18:04:55+00:00 +# @Last modified by: richard +# @Last modified time: 2018-12-04T18:34:09+00:00 +from .api import RequestHelper + + +class Stacks(object): + def __init__(self, api): + self.api = api + self.requests = RequestHelper(api, 'api/stacks') + + def get(self, stack_id): + return self.requests.get(stack_id) + + def list(self): + return self.get(None) + + def filter(self, **kwargs): + def filter_kwargs(stack): + return all(str(stack[k]) == str(v) for k, v in kwargs.items()) + + return filter(filter_kwargs, self.list()) + + def first(self, **kwargs): + return next(self.filter(**kwargs)) + + def get_file(self, stack_id): + return self.requests.get(f'{stack_id}/file') + + def update(self, stack_id=None, endpointId=None, name=None, + Env=None, StackFileContent=None, Prune=False): + # get the stack by filtering on name or stack_id + if name is not None: + stack = self.first(Name=name) + stack_id = stack['Id'] + elif stack_id is not None: + stack = self.get(stack_id) + + endpointId = stack.get('EndpointId', endpointId) + if endpointId is None: + raise Exception("no entrypointID found or set") + + # update the old Env with the new Env + old_Env = stack.get('Env') + if old_Env is not None: + update_keys = set(e['name'] for e in Env) + old_Env = list(e for e in old_Env if e['name'] not in update_keys) + Env += old_Env + + if StackFileContent is None: + StackFileContent = self.get_file(stack_id)['StackFileContent'] + body = dict(StackFileContent=StackFileContent, + Env=Env, + Prune=Prune) + + return self.requests.put( + stack_id, + params=dict(endpointId=endpointId), + json=body + ) diff --git a/deployment/vortexa_utils/portainer/update_stack.py b/deployment/vortexa_utils/portainer/update_stack.py new file mode 100644 index 0000000000000..275f8e6dd8604 --- /dev/null +++ b/deployment/vortexa_utils/portainer/update_stack.py @@ -0,0 +1,90 @@ +#!/user/bin/env python3 +# @Author: richard +# @Date: 2018-12-04T18:10:07+00:00 +# @Last modified by: richard +# @Last modified time: 2018-12-04T18:31:45+00:00 +import argparse +from pprint import pprint +from .import notNone +from .api import PortainerAPI + + +if __name__ == '__main__': + parser = argparse.ArgumentParser(description='Portainer API CLI') + parser.add_argument('--host', '-H', type=str, + help='Host name of Portainer API', + default='https://lawgiver.vortexa.com:9000') + parser.add_argument('--user', '-u', type=str, + help='User name', + default='kraftwork_updater') + parser.add_argument('--pass', '-p', type=str, dest='password', + help='Password name') + + parser.add_argument('--name', '-n', type=str, + help='Stack name to filter') + + parser.add_argument('--env', '-e', nargs=2, action='append', + help='key value pairs of confic to update') + + parser.add_argument('--filter', '-f', nargs=2, action='append', + help='key value pairs of confic to update') + + def add_cmd(flag): + def command(func): + parser.add_argument( + flag, + action='store_const', + const=func, + dest='cmd' + ) + return func + + def get_filter(): + Filter = {} + if args.filter is not None: + Filter.update(args.filter) + if args.name is not None: + Filter.update(Name=args.name) + return Filter + + @add_cmd('--list') + def list_stacks(): + if any(map(notNone, ((args.name, args.filter)))): + Filter = get_filter() + return list(api.stacks.filter(**Filter)) + else: + return api.stacks.list() + + @add_cmd('--update') + def update_stacks(): + env = [dict(name=k, value=v) for k, v in args.env] + return api.stacks.update(name=args.name, Env=env) + + args = parser.parse_args() + + api = PortainerAPI(host=args.host, + user=args.user, + pw=args.password) + + pprint(args.cmd()) + +# api.stacks.list() +# api.stacks.update( +# 1, 1, +# Env=[{ +# "name": "KFAFTWERK_BUILD_NUM", +# "value": '376' +# }] +# ) +# +# +# content = Path('docker/scripts/docker-compose.yml').read_text() +# +# api.requests.post('stacks?type=1&method=string&endpointId=1', +# json=dict( +# Name="myStack", +# StackFileContent=content, +# Env=[dict(name="Hello",value="world")], +# SwarmID='729a4f2h5kj2sd42x34pl3uu1' +# ) +# ) diff --git a/docker/pandas/Dockerfile b/docker/pandas/Dockerfile new file mode 100644 index 0000000000000..76155dd44eb33 --- /dev/null +++ b/docker/pandas/Dockerfile @@ -0,0 +1,25 @@ +FROM python:3.7-alpine +ARG PANDAS_VERSION=0.24.1 + +RUN apk add libstdc++ \ + && apk add --no-cache --virtual .build-deps \ + g++ \ + gcc \ + gfortran \ + build-base \ + wget \ + freetype-dev \ + libpng-dev \ + openblas-dev \ + postgresql-dev \ + musl-dev \ + && ln -s /usr/include/locale.h /usr/include/xlocale.h \ + && pip install wheel \ + && pip install --no-cache \ + numpy==1.15.1 \ + scipy \ + psycopg2-binary \ + sqlalchemy \ + && pip install --no-cache \ + pandas==${PANDAS_VERSION} \ + && apk del .build-deps diff --git a/general/README.rst b/general/README.rst new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/general/setup.py b/general/setup.py new file mode 100644 index 0000000000000..c4a958adb103c --- /dev/null +++ b/general/setup.py @@ -0,0 +1,40 @@ +import io +import os + +from setuptools import setup, find_packages + +namespace = 'vortexa_utils' +name = 'vortexa_utils_general' +version = '1.0.0' +description = 'Vortexa general utils helper library', + +dependencies = [ + 'gitpython', + 'logzero', + 'tenacity' +] + +# Setup boilerplate below + +package_root = os.path.abspath(os.path.dirname(__file__)) + +readme_filename = os.path.join(package_root, 'README.rst') +with io.open(readme_filename, encoding='utf-8') as readme_file: + readme = readme_file.read() + +packages = [ + package for package in find_packages() + if package.startswith(namespace) +] + +setup( + name=name, + version=version, + description=description, + long_description=readme, + author='Marcin Szymanski', + author_email='marcin.szymanski@vortexa.com', + zip_safe=False, + packages=packages, + install_requires=dependencies, +) diff --git a/general/vortexa_utils/__init__.py b/general/vortexa_utils/__init__.py new file mode 100644 index 0000000000000..a7712f632a766 --- /dev/null +++ b/general/vortexa_utils/__init__.py @@ -0,0 +1,5 @@ +# @Author: richard +# @Date: 2018-12-04T20:12:18+00:00 +# @Last modified by: richard +# @Last modified time: 2018-12-04T20:12:57+00:00 +__path__ = __import__('pkgutil').extend_path(__path__, __name__) diff --git a/general/vortexa_utils/general/__init__.py b/general/vortexa_utils/general/__init__.py new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/general/vortexa_utils/git.py b/general/vortexa_utils/git.py new file mode 100644 index 0000000000000..79bc2365032ed --- /dev/null +++ b/general/vortexa_utils/git.py @@ -0,0 +1,14 @@ +import os + +from git import Git, Repo +from logzero import logger +from tenacity import retry, wait_fixed, stop_after_attempt + + +@retry(wait=wait_fixed(10), stop=stop_after_attempt(3)) +def clone_repo(repo_url: str, path: str, ssh_key: str): + os.environ['GIT_SSH_COMMAND'] = f'ssh -i {ssh_key}' + with Git().custom_environment(): + logger.info('Cloning git repo %s to %s', repo_url, path) + Repo.clone_from(repo_url, path, branch='master') + logger.info('Repo cloned successfully') diff --git a/logging/README.md b/logging/README.md new file mode 100644 index 0000000000000..28b90c8686b22 --- /dev/null +++ b/logging/README.md @@ -0,0 +1,55 @@ +# Vortexa Utils Logging Helpers + +Small helper scripts to abstract logging-related boilerplate code. + + +## log_unhandled_exceptions + +Function decorator, designed to be wrapped around any `main()` (or equivalent) function, to capture errors, prefix them with `ERROR`, and raise them in-line, when executed in AWS Batch. + +### Problem: + +AWS Batch jobs all output logs onto CloudWatch Log Group (`/aws/batch/job`). Therefore, to raise specific alarms, python jobs should use logging, with the logger pattern containing a unique identifier for the job (such as the job/repo name), so the CloudWatch can filter logs and look for specific exceptions. + +When Errors are raised by a python program logging to CloudWatch, the loger pattern and the Error/stacktrace are output on 2 consecutive lines. CloudWatch Alarm triggers can only look for patterns combinations which are in-line, therefore, for a CloudWatch Alarm to be raised when a job fails, the logger pattern and some form of identifiable error key most be printed in-line. + + +### Solution: + +`log_unhandled_exceptions` decorator, can be wrapped around main executing functions, and if any errors are raised during run-time, will capture these errors, and raise them in-line with the logging pattern, using the common pattern `ERROR: `. CloudWatch alerts can now be set to look for (1) the unique logging pattern of the project (i.e. name) and (2) the key `ERROR`, to raise targeted alerts. The full stacktrace will still be output to Cloudwatch logs. + +### Usage: + +```python +from vortexa_utils.logging import log_unhandled_exceptions + +# The following is the logger set-up boilerplate code. +# This can be done as below, or imported from a project-logger dir. +# The following is only intended as a sample and should not be copied without understanding what is happening. +import logging + +logger = logging.getLogger(__name__) +log_format = logging.Formatter( + f"PROJECT_NAME:%(name)s:%(message)s" +) # Only a sample format, can be designed at will, as long as unique identifier (e.g. PROJECT_NAME) is included +logger.setFormatter(log_format) +logger.setLevel(logging.INFO) + +@log_unhandled_exceptions(logger) +def main(): + return int(1) + str('two') + +if __name__ == "__main__": + main() +``` + +Code snippet above would return: + +``` +PROJECT_NAME:__main__:ERROR: unsupported operan types(s) for +: 'int' and 'str' + Traceback (most recent call last): + ... ... + TypeError: unsupported operand type(s) for +: 'int' and 'str' +``` + +As a result, a cloudwatch alarm can now be set on the pattern `PROJECT_NAME ERROR` diff --git a/logging/setup.py b/logging/setup.py new file mode 100644 index 0000000000000..7081b7db26c4e --- /dev/null +++ b/logging/setup.py @@ -0,0 +1,38 @@ +import io +import os + +from setuptools import find_packages, setup + +namespace = "vortexa_utils" +description = ("Vortexa Error Logging",) + +# Setup boilerplate below + +package_root = os.path.abspath(os.path.dirname(__file__)) + +readme_filename = os.path.join(package_root, "README.md") +with io.open(readme_filename, encoding="utf-8") as readme_file: + readme = readme_file.read() + +packages = [ + package for package in find_packages() if package.startswith(namespace) +] + +requirements = [ + "logzero", + "psutil" +] + +setup( + name="vortexa_utils_logging", + version="0.0.1", + description=description, + long_description=readme, + author="Tino von Stegmann", + author_email="constantin.vonstegmann@vortexa.com", + zip_safe=False, + tests_require=["nose2"], + install_requires=requirements, + test_suite="nose2.collector.collector", + packages=packages, +) diff --git a/logging/vortexa_utils/__init__.py b/logging/vortexa_utils/__init__.py new file mode 100644 index 0000000000000..69e3be50dac40 --- /dev/null +++ b/logging/vortexa_utils/__init__.py @@ -0,0 +1 @@ +__path__ = __import__('pkgutil').extend_path(__path__, __name__) diff --git a/logging/vortexa_utils/logging/__init__.py b/logging/vortexa_utils/logging/__init__.py new file mode 100644 index 0000000000000..14783dcbadd01 --- /dev/null +++ b/logging/vortexa_utils/logging/__init__.py @@ -0,0 +1 @@ +from .exception_decorator import log_unhandled_exceptions diff --git a/logging/vortexa_utils/logging/exception_decorator.py b/logging/vortexa_utils/logging/exception_decorator.py new file mode 100644 index 0000000000000..52b49bac513e0 --- /dev/null +++ b/logging/vortexa_utils/logging/exception_decorator.py @@ -0,0 +1,12 @@ +def log_unhandled_exceptions(logger): + def outer_wrapper(main): + def wrapper(*args, **kwargs): + try: + main(*args, **kwargs) + except Exception as e: + logger.exception(f"ERROR: {e}") + raise e + + return wrapper + + return outer_wrapper diff --git a/logging/vortexa_utils/logging/resources.py b/logging/vortexa_utils/logging/resources.py new file mode 100644 index 0000000000000..de2bac29e6c44 --- /dev/null +++ b/logging/vortexa_utils/logging/resources.py @@ -0,0 +1,38 @@ +import os +import resource + +import psutil +from logzero import logger + +RESOURCE_LOG = """---RESOURCE--- +User time: {0} +System time: {1} +Max resident size: {2} +Block input operations: {3} +Block output operations: {4} +---MEMORY_INFO--- +RSS: {5} +VMS: {6} +Data: {7} +""" + + +def log_resource_usage(step: str): + mem = psutil.Process(os.getpid()).memory_info() + res = resource.getrusage(resource.RUSAGE_SELF) + # MacOs only + try: + data = mem.data + except AttributeError: + data = 0 + res_log = RESOURCE_LOG.format( + res.ru_utime, + res.ru_stime, + res.ru_maxrss, + res.ru_inblock, + res.ru_oublock, + mem.rss, + mem.vms, + data, + ) + logger.info(f"[resource][{step}] {res_log}") diff --git a/utils/vortexa_utils/utils/__init__.py b/utils/vortexa_utils/utils/__init__.py new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/utils/vortexa_utils/utils/byte_stream_spliter.py b/utils/vortexa_utils/utils/byte_stream_spliter.py new file mode 100644 index 0000000000000..64e537577cd9a --- /dev/null +++ b/utils/vortexa_utils/utils/byte_stream_spliter.py @@ -0,0 +1,31 @@ +import io + + +socket_bytes = io.BytesIO(b"So\x01me\r\nbytes\rto\nparsB") + +byte_joiner = b''.join + +list(socket_bytes) + +def split_on(buffer, *spliters): + if not spliters: + spliters = {b'\n', b'\r'} + else: + spliters = set(spliters) + line = [] + while True: + b = buffer.read(1) + split = b in {b'\n', b'\r'} + + if split or not b: + if line: + yield byte_joiner(line) + if split: + line = [] + elif not b: + return + else: + line.append(b) + +gen = split_on(socket_bytes) +list(gen) diff --git a/utils/vortexa_utils/utils/sockets/socket_client.py b/utils/vortexa_utils/utils/sockets/socket_client.py new file mode 100644 index 0000000000000..fdc97e08c2cb1 --- /dev/null +++ b/utils/vortexa_utils/utils/sockets/socket_client.py @@ -0,0 +1,24 @@ +import socket + + +s = socket.socket(socket.AF_INET, socket.SOCK_STREAM) +s.connect(("www.python.org", 80)) + +import socket + +HOST = '127.0.0.1' # The server's hostname or IP address +PORT = 65432 # The port used by the server + +with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s: + s.connect((HOST, PORT)) + s.sendall(b'Hello, world') + data = s.recv(1024) + +print('Received', repr(data)) + +sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) +sock.connect((HOST, PORT)) +sio = sock.makefile('r', encoding='ascii', errors='backslashreplace', newline=None) +next(sio) +sock.close() +sio.close() diff --git a/utils/vortexa_utils/utils/sockets/socket_server.py b/utils/vortexa_utils/utils/sockets/socket_server.py new file mode 100644 index 0000000000000..c1d427b6b0882 --- /dev/null +++ b/utils/vortexa_utils/utils/sockets/socket_server.py @@ -0,0 +1,17 @@ +import socket + +HOST = '127.0.0.1' # Standard loopback interface address (localhost) +PORT = 65432 # Port to listen on (non-privileged ports are > 1023) + +with + +sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + +sock.bind((HOST, PORT)) +sock.listen() + +while True: + conn, addr = sock.accept() + with conn: + while True: + conn.sendall(b'some\rdata\nbyt\1\xffest\r\nadslfkja\n\raslkdj') diff --git a/versioning/VERSION b/versioning/VERSION new file mode 100644 index 0000000000000..7bcd0e3612da7 --- /dev/null +++ b/versioning/VERSION @@ -0,0 +1 @@ +0.0.2 \ No newline at end of file diff --git a/versioning/setup.py b/versioning/setup.py new file mode 100644 index 0000000000000..2a6c50ab207b1 --- /dev/null +++ b/versioning/setup.py @@ -0,0 +1,34 @@ +# @Author: richard +# @Date: 2018-12-04T17:54:43+00:00 +# @Last modified by: richard +# @Last modified time: 2018-12-04T19:14:15+00:00 +import os +from setuptools import setup, find_packages +from vortexa_utils.versioning import __version__ + +namespace = 'vortexa_utils' + +# Setup boilerplate below + +package_root = os.path.abspath(os.path.dirname(__file__)) + +packages = [ + package for package in find_packages() + if package.startswith(namespace) +] + +setup( + name="vortexa_utils_versioning", + version=__version__, + description="", + long_description="", + + author="Richard Mathie", + author_email="richard.mathie@vortexa.com", + + zip_safe=False, + tests_require=['nose2'], + test_suite='nose2.collector.collector', + + packages=packages, +) diff --git a/versioning/tests/__init__.py b/versioning/tests/__init__.py new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/versioning/tests/test_versioner.py b/versioning/tests/test_versioner.py new file mode 100644 index 0000000000000..27be1a07217a9 --- /dev/null +++ b/versioning/tests/test_versioner.py @@ -0,0 +1,47 @@ +import unittest +import os +import tempfile +from nose2.tools import params +from vortexa_utils.versioning.versioner import Versioner + +specs = [ + ((0, 0, 0), (0, 0, 1)), + ((0, 0, 1), (0, 0, 2)), + ((0, 1, 0), (0, 1, 0)), + ((0, 1, 1), (0, 1, 0)), + ((1, 0, 0), (1, 0, 0)), + ((1, 0, 1), (1, 0, 0)), + ((1, 1, 0), (1, 0, 0)), + ((1, 1, 1), (1, 0, 0)) +] + + +class TestVersioner(unittest.TestCase): + def setUp(self): + fh, filename = tempfile.mkstemp() + os.fdopen(fh).close() + self.version: Versioner = Versioner(filename) + + def tearDown(self): + os.remove(self.version.VERSION_FILE) + + def test_version_none(self): + self.assertEqual(self.version.__version__, None) + + def test_version_init(self): + self.assertEqual( + self.version.version, + self.version.SemanticVersion(0, 0, 1) + ) + self.assertTrue(os.path.isfile(self.version.VERSION_FILE)) + with open(self.version.VERSION_FILE, "r") as f: + self.assertEqual(f.readline(), "0.0.1") + + @params(*specs) + def test_version_incriment(self, flags, output): + self.test_version_init() + self.version.update_version(flags) + self.assertEqual( + self.version.version, + self.version.SemanticVersion(*output) + ) diff --git a/versioning/vortexa_utils/__init__.py b/versioning/vortexa_utils/__init__.py new file mode 100644 index 0000000000000..a7712f632a766 --- /dev/null +++ b/versioning/vortexa_utils/__init__.py @@ -0,0 +1,5 @@ +# @Author: richard +# @Date: 2018-12-04T20:12:18+00:00 +# @Last modified by: richard +# @Last modified time: 2018-12-04T20:12:57+00:00 +__path__ = __import__('pkgutil').extend_path(__path__, __name__) diff --git a/versioning/vortexa_utils/versioning/__init__.py b/versioning/vortexa_utils/versioning/__init__.py new file mode 100644 index 0000000000000..977291bcc6396 --- /dev/null +++ b/versioning/vortexa_utils/versioning/__init__.py @@ -0,0 +1,10 @@ +from .versioner import Versioner + +version = Versioner("../../VERSION", __file__) +__version_numeric__ = version.version +__version__ = str(version) + + +if __name__ == "__main__": + from .cli import VersionCLI + VersionCLI(version).parse_args() diff --git a/versioning/vortexa_utils/versioning/__main__.py b/versioning/vortexa_utils/versioning/__main__.py new file mode 100644 index 0000000000000..c9ce8d27293a2 --- /dev/null +++ b/versioning/vortexa_utils/versioning/__main__.py @@ -0,0 +1,9 @@ +from . import version +from .cli import VersionCLI + +__version_numeric__ = version.version +__version__ = str(version) + + +if __name__ == "__main__": + VersionCLI(version).parse_args() diff --git a/versioning/vortexa_utils/versioning/cli.py b/versioning/vortexa_utils/versioning/cli.py new file mode 100644 index 0000000000000..8e414bb5e7c08 --- /dev/null +++ b/versioning/vortexa_utils/versioning/cli.py @@ -0,0 +1,46 @@ +from argparse import ArgumentParser +from dataclasses import dataclass, field +from vortexa_utils.versioning.versioner import Versioner + + +@dataclass +class VersionCLI(object): + versioner: Versioner + parser: ArgumentParser = field(default=None, init=False) + + def __post_init__(self): + self.parser = ArgumentParser( + description='Package Version Tool.' + ) + self.specs = self.versioner.VERSION_SPEC.split( + self.versioner.VERSION_SEP + ) + for spec in self.specs: + self.parser.add_argument( + f'--bump-{spec.lower()}', + f'-{spec[0]}', + action='store_true' + ) + + def parse_args(self): + args = self.parser.parse_args() + spec_flags = list( + getattr(args, f'bump_{spec.lower()}') + for spec in self.specs + ) + if any(spec_flags): + print(f"Current Version: {self.versioner}") + if sum(spec_flags) > 1: + print("You can only bump one spec at a time") + self.parser.print_help() + else: + self.versioner.update_version(spec_flags) + print(f"New Version {self.versioner}") + else: + print(f"{self.versioner}") + + +if __name__ == "__main__": + version = Versioner() + cli = VersionCLI(version) + cli.parse_args() diff --git a/versioning/vortexa_utils/versioning/utils.py b/versioning/vortexa_utils/versioning/utils.py new file mode 100644 index 0000000000000..0d3f9b544b13e --- /dev/null +++ b/versioning/vortexa_utils/versioning/utils.py @@ -0,0 +1,22 @@ +# @Author: richard +# @Date: 2018-12-21T16:37:39+00:00 +# @Last modified by: richard +# @Last modified time: 2018-12-21T16:37:56+00:00 + + +class cached_property(object): + """ + A property that is only computed once per instance and then replaces itself + with an ordinary attribute. Deleting the attribute resets the property. + """ # noqa + + def __init__(self, func): + self.__doc__ = getattr(func, "__doc__") + self.func = func + + def __get__(self, obj, cls): + if obj is None: + return self + + value = obj.__dict__[self.func.__name__] = self.func(obj) + return value diff --git a/versioning/vortexa_utils/versioning/versioner.py b/versioning/vortexa_utils/versioning/versioner.py new file mode 100644 index 0000000000000..285481c05ad1a --- /dev/null +++ b/versioning/vortexa_utils/versioning/versioner.py @@ -0,0 +1,99 @@ +"""Class to track the version of a package.""" +import os +from dataclasses import dataclass, field, InitVar +from collections import namedtuple +from .utils import cached_property + + +@dataclass +class Versioner(object): + VERSION_FILE: str = "VERSION" + MODULE_FILE: InitVar[str] = None + VERSION_SEP: str = "." + VERSION_SPEC: str = "Major.minor.patch" + __version__: namedtuple = field(default=None, init=False) + __version_file__: namedtuple = field(default=None, init=False) + + def __post_init__(self, MODULE_FILE): + parts = [] + if MODULE_FILE is not None: + dir = os.path.dirname(os.path.abspath(MODULE_FILE)) + parts.append(dir) + parts.append(self.VERSION_FILE) + path = os.path.join(*parts) + self.__version_file__ = os.path.abspath(path) + + @cached_property + def SemanticVersion(self): + version_type = namedtuple( + "SemanticVersion", + self.VERSION_SPEC.lower().split(self.VERSION_SEP) + ) + return version_type + + def init_version(self): + fields = self.SemanticVersion._fields + version = ( + 1 if i == len(fields) - 1 else 0 + for i, field in enumerate(fields) + ) + self.version = self.SemanticVersion(*version) + self.write() + return self.version + + def new_version(self, spec_flags): + bumped = False + for spec, ver in zip(spec_flags, self.version): + if bumped: + yield 0 + elif spec: + bumped = True + yield ver + 1 + else: + yield ver + + def update_version(self, spec_flags): + version = self.SemanticVersion(*self.new_version(spec_flags)) + self.version = version + self.write() + return version + + def read(self): + try: + with open(self.__version_file__, "r") as file: + version_string = file.readline().strip() + except FileNotFoundError: + version = self.init_version() + else: + if version_string == "": + version = self.init_version() + else: + version = self.parse_verion(version_string) + self.version = version + return version + + def write(self): + with open(self.__version_file__, "w") as file: + file.write(str(self)) + + @property + def version(self): + if self.__version__ is None: + self.read() + return self.__version__ + + @version.setter + def version(self, version): + if isinstance(version, str): + version = self.parse_verion(version) + if isinstance(version, self.SemanticVersion): + self.__version__ = version + else: + raise TypeError("Version is not str or self.SemanticVersion") + + def parse_verion(self, version: str): + parts = (int(v) for v in version.split(self.VERSION_SEP)) + return self.SemanticVersion(*parts) + + def __str__(self): + return self.VERSION_SEP.join(str(v) for v in self.version) diff --git a/youve_got_mail/README.md b/youve_got_mail/README.md new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/youve_got_mail/README.rst b/youve_got_mail/README.rst new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/youve_got_mail/requirements.txt b/youve_got_mail/requirements.txt new file mode 100644 index 0000000000000..dfedbe37089fc --- /dev/null +++ b/youve_got_mail/requirements.txt @@ -0,0 +1,2 @@ +sendgrid < 6.0.0 +boto3 diff --git a/youve_got_mail/setup.py b/youve_got_mail/setup.py new file mode 100644 index 0000000000000..1b998bcd47eba --- /dev/null +++ b/youve_got_mail/setup.py @@ -0,0 +1,48 @@ +# @Author: richard +# @Date: 2018-12-04T17:54:43+00:00 +# @Last modified by: richard +# @Last modified time: 2018-12-04T20:16:54+00:00 +import os +import io +from setuptools import setup, find_packages + +namespace = 'vortexa_utils' +name = 'vortexa_utils_youve_got_mail' +version = '1' +description = 'Vortexa E-mail utils helper library', + +dependencies = [ + 'boto3', + 'sendgrid<6.0.0' +] + +# Setup boilerplate below + +package_root = os.path.abspath(os.path.dirname(__file__)) + +readme_filename = os.path.join(package_root, 'README.rst') +with io.open(readme_filename, encoding='utf-8') as readme_file: + readme = readme_file.read() + +packages = [ + package for package in find_packages() + if package.startswith(namespace) +] + +setup( + name=name, + version=version, + description=description, + long_description=readme, + + author='Richard Mathie', + author_email='richard.mathie@vortexa.com', + + zip_safe=False, + test_suite='nose2.collector.collector', + tests_require=['nose2'], + + packages=packages, + install_requires=dependencies, + extras_require={} +) diff --git a/youve_got_mail/tests/__init__.py b/youve_got_mail/tests/__init__.py new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/youve_got_mail/vortexa_utils/__init__.py b/youve_got_mail/vortexa_utils/__init__.py new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/youve_got_mail/vortexa_utils/youve_got_mail.py b/youve_got_mail/vortexa_utils/youve_got_mail.py new file mode 100644 index 0000000000000..aae86d37cf7e6 --- /dev/null +++ b/youve_got_mail/vortexa_utils/youve_got_mail.py @@ -0,0 +1,43 @@ +import base64 +import boto3 +import json +import mimetypes +import sendgrid +from sendgrid.helpers.mail import * +from typing import List + + +secretsmanager = boto3.client('secretsmanager') + + +def create_sendgrid_client(): + secret = secretsmanager.get_secret_value(SecretId='prod/sendgrid') + api_key = json.loads(secret['SecretString'])['SENDGRID_API_KEY'] + + return sendgrid.SendGridAPIClient(apikey=api_key) + + +def build_attachment(buf: bytes, filename: str, disposition: str = "attachment", content_id: str = None): + encoded = base64.b64encode(buf).decode() + + mime_type, encoding = mimetypes.guess_type(filename) + + attachment = Attachment() + attachment.content = encoded + attachment.type = mime_type + attachment.filename = filename + attachment.disposition = disposition + attachment.content_id = content_id + + return attachment + + +def add_recipients(recipients: List[str], mail: Mail): + personalization = Personalization() + + for rec in recipients: + personalization.add_to(Email(rec)) + + mail.add_personalization(personalization) + + return mail From b214a68f1a4450feff8aa79b7fad34512bee3a9f Mon Sep 17 00:00:00 2001 From: cvonsteg Date: Sat, 9 Nov 2019 20:19:29 +0000 Subject: [PATCH 20/75] added df argument to stop original df from getting deleted by upsert_ignore --- .pre-commit-config.yaml | 3 ++- pandas/io/sql.py | 39 ++++++++++++++++++++------------------- 2 files changed, 22 insertions(+), 20 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index e79383238dc7e..fd297bc9b4cbf 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -4,13 +4,14 @@ repos: hooks: - id: black language_version: python3.7 + exclude: ^pandas/io/sql_scratch.py$|^vendor/$ - repo: https://gitlab.com/pycqa/flake8 rev: 3.7.7 hooks: - id: flake8 language: python_venv additional_dependencies: [flake8-comprehensions] - exclude: ^pandas/io/sql_scratch.py$ + exclude: ^pandas/io/sql_scratch.py$|^vendor/$ - repo: https://github.com/pre-commit/mirrors-isort rev: v4.3.20 hooks: diff --git a/pandas/io/sql.py b/pandas/io/sql.py index e1630fbc9a6f4..5f77be5cd56b4 100644 --- a/pandas/io/sql.py +++ b/pandas/io/sql.py @@ -695,14 +695,15 @@ def _upsert_ignore_processing(self): pkeys_from_database = _wrap_result(data=result, columns=primary_keys) - # Delete rows from self.frame where primary keys match - self.frame = self._get_index_formatted_dataframe() - + # Get temporary dataframe so as not to delete values from main df + temp = self._get_index_formatted_dataframe() + # Delete rows from dataframe where primary keys match to_be_deleted_mask = ( - self.frame[primary_keys].isin(pkeys_from_database[primary_keys]).all(1) + temp[primary_keys].isin(pkeys_from_database[primary_keys]).all(1) ) + temp.drop(self.frame[to_be_deleted_mask].index, inplace=True) - self.frame.drop(self.frame[to_be_deleted_mask].index, inplace=True) + return temp def _get_primary_key_data(self): """ @@ -778,7 +779,7 @@ def _get_index_formatted_dataframe(self): # The following check ensures that the method can be called multiple times, # without the dataframe getting wrongfully formatted if all(idx in self.frame.columns for idx in self.index): - temp = self.frame + temp = self.frame.copy() else: temp = self.frame.copy() temp.index.names = self.index @@ -787,19 +788,16 @@ def _get_index_formatted_dataframe(self): except ValueError as err: raise ValueError("duplicate name in index/columns: {0}".format(err)) else: - temp = self.frame + temp = self.frame.copy() return temp - def insert_data(self): - - temp = self._get_index_formatted_dataframe() - - # TODO: column_names by list comprehension? - column_names = list(map(str, temp.columns)) + @staticmethod + def insert_data(data): + column_names = list(map(str, data.columns)) ncols = len(column_names) data_list = [None] * ncols - blocks = temp._data.blocks + blocks = data._data.blocks for b in blocks: if b.is_datetime: @@ -827,17 +825,19 @@ def insert_data(self): def insert(self, chunksize=None, method=None): if self.if_exists == "upsert_ignore": - self._upsert_ignore_processing() - self._insert(chunksize=chunksize, method=method) + data = self._upsert_ignore_processing() + self._insert(data=data, chunksize=chunksize, method=method) elif self.if_exists == "upsert_delete": delete_statement = self._upsert_delete_processing() + # nested transaction to ensure delete is + # rolled back in case of poor data with self.pd_sql.run_transaction() as trans: trans.execute(delete_statement) self._insert(chunksize=chunksize, method=method) else: self._insert(chunksize=chunksize, method=method) - def _insert(self, chunksize=None, method=None): + def _insert(self, data=None, chunksize=None, method=None): # set insert method if method is None: exec_insert = self._execute_insert @@ -848,9 +848,10 @@ def _insert(self, chunksize=None, method=None): else: raise ValueError("Invalid parameter `method`: {}".format(method)) - keys, data_list = self.insert_data() + data_to_add = data if data is not None else self.frame + keys, data_list = self.insert_data(data=data_to_add) - nrows = len(self.frame) + nrows = len(data) if nrows == 0: return From 41938cd26008b40be7b9d019bc2fb32a0ac0670d Mon Sep 17 00:00:00 2001 From: cvonsteg Date: Sat, 9 Nov 2019 20:19:29 +0000 Subject: [PATCH 21/75] added df argument to stop original df from getting deleted by upsert_ignore --- .pre-commit-config.yaml | 3 ++- pandas/io/sql.py | 39 ++++++++++++++++++++------------------- 2 files changed, 22 insertions(+), 20 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index e79383238dc7e..fd297bc9b4cbf 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -4,13 +4,14 @@ repos: hooks: - id: black language_version: python3.7 + exclude: ^pandas/io/sql_scratch.py$|^vendor/$ - repo: https://gitlab.com/pycqa/flake8 rev: 3.7.7 hooks: - id: flake8 language: python_venv additional_dependencies: [flake8-comprehensions] - exclude: ^pandas/io/sql_scratch.py$ + exclude: ^pandas/io/sql_scratch.py$|^vendor/$ - repo: https://github.com/pre-commit/mirrors-isort rev: v4.3.20 hooks: diff --git a/pandas/io/sql.py b/pandas/io/sql.py index e1630fbc9a6f4..5f77be5cd56b4 100644 --- a/pandas/io/sql.py +++ b/pandas/io/sql.py @@ -695,14 +695,15 @@ def _upsert_ignore_processing(self): pkeys_from_database = _wrap_result(data=result, columns=primary_keys) - # Delete rows from self.frame where primary keys match - self.frame = self._get_index_formatted_dataframe() - + # Get temporary dataframe so as not to delete values from main df + temp = self._get_index_formatted_dataframe() + # Delete rows from dataframe where primary keys match to_be_deleted_mask = ( - self.frame[primary_keys].isin(pkeys_from_database[primary_keys]).all(1) + temp[primary_keys].isin(pkeys_from_database[primary_keys]).all(1) ) + temp.drop(self.frame[to_be_deleted_mask].index, inplace=True) - self.frame.drop(self.frame[to_be_deleted_mask].index, inplace=True) + return temp def _get_primary_key_data(self): """ @@ -778,7 +779,7 @@ def _get_index_formatted_dataframe(self): # The following check ensures that the method can be called multiple times, # without the dataframe getting wrongfully formatted if all(idx in self.frame.columns for idx in self.index): - temp = self.frame + temp = self.frame.copy() else: temp = self.frame.copy() temp.index.names = self.index @@ -787,19 +788,16 @@ def _get_index_formatted_dataframe(self): except ValueError as err: raise ValueError("duplicate name in index/columns: {0}".format(err)) else: - temp = self.frame + temp = self.frame.copy() return temp - def insert_data(self): - - temp = self._get_index_formatted_dataframe() - - # TODO: column_names by list comprehension? - column_names = list(map(str, temp.columns)) + @staticmethod + def insert_data(data): + column_names = list(map(str, data.columns)) ncols = len(column_names) data_list = [None] * ncols - blocks = temp._data.blocks + blocks = data._data.blocks for b in blocks: if b.is_datetime: @@ -827,17 +825,19 @@ def insert_data(self): def insert(self, chunksize=None, method=None): if self.if_exists == "upsert_ignore": - self._upsert_ignore_processing() - self._insert(chunksize=chunksize, method=method) + data = self._upsert_ignore_processing() + self._insert(data=data, chunksize=chunksize, method=method) elif self.if_exists == "upsert_delete": delete_statement = self._upsert_delete_processing() + # nested transaction to ensure delete is + # rolled back in case of poor data with self.pd_sql.run_transaction() as trans: trans.execute(delete_statement) self._insert(chunksize=chunksize, method=method) else: self._insert(chunksize=chunksize, method=method) - def _insert(self, chunksize=None, method=None): + def _insert(self, data=None, chunksize=None, method=None): # set insert method if method is None: exec_insert = self._execute_insert @@ -848,9 +848,10 @@ def _insert(self, chunksize=None, method=None): else: raise ValueError("Invalid parameter `method`: {}".format(method)) - keys, data_list = self.insert_data() + data_to_add = data if data is not None else self.frame + keys, data_list = self.insert_data(data=data_to_add) - nrows = len(self.frame) + nrows = len(data) if nrows == 0: return From 78f8e863395b86f45fdac7fba59048a860027f55 Mon Sep 17 00:00:00 2001 From: cvonsteg Date: Sun, 10 Nov 2019 13:39:31 +0000 Subject: [PATCH 22/75] tidying up sql.py code and adding helpers to sql_scratch.py --- pandas/io/sql.py | 68 +++++++++++++++++++++++++--------------- pandas/io/sql_scratch.py | 22 +++++++++++++ 2 files changed, 64 insertions(+), 26 deletions(-) diff --git a/pandas/io/sql.py b/pandas/io/sql.py index 5f77be5cd56b4..b77e2921961a2 100644 --- a/pandas/io/sql.py +++ b/pandas/io/sql.py @@ -657,8 +657,10 @@ def create(self): elif self.if_exists == "append": pass elif self.if_exists == "upsert_delete": + # Pass here, upsert is handled in self.insert() method pass elif self.if_exists == "upsert_ignore": + # Pass here, upsert is handled in self.insert() method pass else: raise ValueError( @@ -668,6 +670,16 @@ def create(self): self._execute_create() def _upsert_delete_processing(self): + """ + Upsert delete prioritizes incoming data over what is already in the DB. + This method generates the Delete statement which is to be executed + in the same transaction as the ensuing data insert. + + Returns + ---------- + delete_statement : sqlalchemy.sql.dml.Delete object + - Delete statement to be executed against DB + """ from sqlalchemy import tuple_ # Primary key data @@ -679,29 +691,36 @@ def _upsert_delete_processing(self): return delete_statement def _upsert_ignore_processing(self): + """ + Upsert Ignore prioritizes data in DB over incoming data. + This method creates a copy of the incoming dataframe, + fetches matching data from DB, deletes matching data from copied frame, + and returns that frame to be inserted. + + Returns + ---------- + temp : DataFrame + - Filtered dataframe, with values that are already in DB removed. + """ from sqlalchemy import tuple_, select # Primary key data primary_keys, primary_key_values = self._get_primary_key_data() - # Fetch matching pkey values from database columns_to_fetch = [self.table.c[key] for key in primary_keys] - select_statement = select(columns_to_fetch).where( tuple_(*columns_to_fetch).in_(primary_key_values) ) - - result = self.pd_sql.execute(select_statement) - - pkeys_from_database = _wrap_result(data=result, columns=primary_keys) - + pkeys_from_database = _wrap_result( + data=self.pd_sql.execute(select_statement), columns=primary_keys + ) # Get temporary dataframe so as not to delete values from main df temp = self._get_index_formatted_dataframe() # Delete rows from dataframe where primary keys match to_be_deleted_mask = ( temp[primary_keys].isin(pkeys_from_database[primary_keys]).all(1) ) - temp.drop(self.frame[to_be_deleted_mask].index, inplace=True) + temp.drop(temp[to_be_deleted_mask].index, inplace=True) return temp @@ -717,7 +736,6 @@ def _get_primary_key_data(self): - primary_key_values : Iterable of dataframe rows corresponding to primary_key columns """ - # reflect MetaData object and assign contents of db to self.table attribute self.pd_sql.meta.reflect(only=[self.name], views=True) self.table = self.pd_sql.get_table(table_name=self.name, schema=self.schema) @@ -734,7 +752,8 @@ def _get_primary_key_data(self): if len(primary_keys) == 0: raise ValueError(f"No primary keys found for table {self.name}") - primary_key_values = zip(*[self.frame[key] for key in primary_keys]) + temp = self._get_index_formatted_dataframe() + primary_key_values = zip(*[temp[key] for key in primary_keys]) return primary_keys, primary_key_values def _execute_insert(self, conn, keys, data_iter): @@ -774,21 +793,13 @@ def _get_index_formatted_dataframe(self): # Originally this functionality formed the first step of the insert_data method. # It will be useful to have in other places, so moved here to keep code DRY. - + temp = self.frame.copy() if self.index is not None: - # The following check ensures that the method can be called multiple times, - # without the dataframe getting wrongfully formatted - if all(idx in self.frame.columns for idx in self.index): - temp = self.frame.copy() - else: - temp = self.frame.copy() - temp.index.names = self.index - try: - temp.reset_index(inplace=True) - except ValueError as err: - raise ValueError("duplicate name in index/columns: {0}".format(err)) - else: - temp = self.frame.copy() + temp.index.names = self.index + try: + temp.reset_index(inplace=True) + except ValueError as err: + raise ValueError("duplicate name in index/columns: {0}".format(err)) return temp @@ -824,6 +835,9 @@ def insert_data(data): return column_names, data_list def insert(self, chunksize=None, method=None): + """ + Determines what data to pass to the underlying insert method. + """ if self.if_exists == "upsert_ignore": data = self._upsert_ignore_processing() self._insert(data=data, chunksize=chunksize, method=method) @@ -848,8 +862,10 @@ def _insert(self, data=None, chunksize=None, method=None): else: raise ValueError("Invalid parameter `method`: {}".format(method)) - data_to_add = data if data is not None else self.frame - keys, data_list = self.insert_data(data=data_to_add) + if data is None: + data = self._get_index_formatted_dataframe() + + keys, data_list = self.insert_data(data=data) nrows = len(data) diff --git a/pandas/io/sql_scratch.py b/pandas/io/sql_scratch.py index e0c683813618c..fb81e527d8826 100644 --- a/pandas/io/sql_scratch.py +++ b/pandas/io/sql_scratch.py @@ -83,6 +83,28 @@ def generate_mask_of_masks(list_of_masks): engine.execute( "create table charterers(id text primary key, name text, energy integer)" ) +def create_test_df(df): + df2 = df.head().copy() + df2['name'] = df2['name'].apply(lambda x: x + '_NEW') + return df2 + +def read_table(table): + with engine.connect() as conn: + result = conn.execute(f'select * from {table}') + return result.fetchall() + +def clear_table(table): + with engine.connect() as conn: + conn.execute(f'delete from {table}') + +def top_up_table(table): + df.to_sql(table, con=engine, if_exists='append', index=False) + return read_table() + +def reset_table(table): + clear_table(table) + top_up_table(table) + df.to_sql(table_name, index=False, if_exists="append", con=engine) db = SQLDatabase(engine, schema=None, meta=None) From 025b0ef96be3dc5cde94f90851df67bd9eca978d Mon Sep 17 00:00:00 2001 From: cvonsteg Date: Sun, 10 Nov 2019 13:39:31 +0000 Subject: [PATCH 23/75] tidying up sql.py code and adding helpers to sql_scratch.py --- pandas/io/sql.py | 68 +++++++++++++++++++++++++--------------- pandas/io/sql_scratch.py | 22 +++++++++++++ 2 files changed, 64 insertions(+), 26 deletions(-) diff --git a/pandas/io/sql.py b/pandas/io/sql.py index 5f77be5cd56b4..b77e2921961a2 100644 --- a/pandas/io/sql.py +++ b/pandas/io/sql.py @@ -657,8 +657,10 @@ def create(self): elif self.if_exists == "append": pass elif self.if_exists == "upsert_delete": + # Pass here, upsert is handled in self.insert() method pass elif self.if_exists == "upsert_ignore": + # Pass here, upsert is handled in self.insert() method pass else: raise ValueError( @@ -668,6 +670,16 @@ def create(self): self._execute_create() def _upsert_delete_processing(self): + """ + Upsert delete prioritizes incoming data over what is already in the DB. + This method generates the Delete statement which is to be executed + in the same transaction as the ensuing data insert. + + Returns + ---------- + delete_statement : sqlalchemy.sql.dml.Delete object + - Delete statement to be executed against DB + """ from sqlalchemy import tuple_ # Primary key data @@ -679,29 +691,36 @@ def _upsert_delete_processing(self): return delete_statement def _upsert_ignore_processing(self): + """ + Upsert Ignore prioritizes data in DB over incoming data. + This method creates a copy of the incoming dataframe, + fetches matching data from DB, deletes matching data from copied frame, + and returns that frame to be inserted. + + Returns + ---------- + temp : DataFrame + - Filtered dataframe, with values that are already in DB removed. + """ from sqlalchemy import tuple_, select # Primary key data primary_keys, primary_key_values = self._get_primary_key_data() - # Fetch matching pkey values from database columns_to_fetch = [self.table.c[key] for key in primary_keys] - select_statement = select(columns_to_fetch).where( tuple_(*columns_to_fetch).in_(primary_key_values) ) - - result = self.pd_sql.execute(select_statement) - - pkeys_from_database = _wrap_result(data=result, columns=primary_keys) - + pkeys_from_database = _wrap_result( + data=self.pd_sql.execute(select_statement), columns=primary_keys + ) # Get temporary dataframe so as not to delete values from main df temp = self._get_index_formatted_dataframe() # Delete rows from dataframe where primary keys match to_be_deleted_mask = ( temp[primary_keys].isin(pkeys_from_database[primary_keys]).all(1) ) - temp.drop(self.frame[to_be_deleted_mask].index, inplace=True) + temp.drop(temp[to_be_deleted_mask].index, inplace=True) return temp @@ -717,7 +736,6 @@ def _get_primary_key_data(self): - primary_key_values : Iterable of dataframe rows corresponding to primary_key columns """ - # reflect MetaData object and assign contents of db to self.table attribute self.pd_sql.meta.reflect(only=[self.name], views=True) self.table = self.pd_sql.get_table(table_name=self.name, schema=self.schema) @@ -734,7 +752,8 @@ def _get_primary_key_data(self): if len(primary_keys) == 0: raise ValueError(f"No primary keys found for table {self.name}") - primary_key_values = zip(*[self.frame[key] for key in primary_keys]) + temp = self._get_index_formatted_dataframe() + primary_key_values = zip(*[temp[key] for key in primary_keys]) return primary_keys, primary_key_values def _execute_insert(self, conn, keys, data_iter): @@ -774,21 +793,13 @@ def _get_index_formatted_dataframe(self): # Originally this functionality formed the first step of the insert_data method. # It will be useful to have in other places, so moved here to keep code DRY. - + temp = self.frame.copy() if self.index is not None: - # The following check ensures that the method can be called multiple times, - # without the dataframe getting wrongfully formatted - if all(idx in self.frame.columns for idx in self.index): - temp = self.frame.copy() - else: - temp = self.frame.copy() - temp.index.names = self.index - try: - temp.reset_index(inplace=True) - except ValueError as err: - raise ValueError("duplicate name in index/columns: {0}".format(err)) - else: - temp = self.frame.copy() + temp.index.names = self.index + try: + temp.reset_index(inplace=True) + except ValueError as err: + raise ValueError("duplicate name in index/columns: {0}".format(err)) return temp @@ -824,6 +835,9 @@ def insert_data(data): return column_names, data_list def insert(self, chunksize=None, method=None): + """ + Determines what data to pass to the underlying insert method. + """ if self.if_exists == "upsert_ignore": data = self._upsert_ignore_processing() self._insert(data=data, chunksize=chunksize, method=method) @@ -848,8 +862,10 @@ def _insert(self, data=None, chunksize=None, method=None): else: raise ValueError("Invalid parameter `method`: {}".format(method)) - data_to_add = data if data is not None else self.frame - keys, data_list = self.insert_data(data=data_to_add) + if data is None: + data = self._get_index_formatted_dataframe() + + keys, data_list = self.insert_data(data=data) nrows = len(data) diff --git a/pandas/io/sql_scratch.py b/pandas/io/sql_scratch.py index e0c683813618c..fb81e527d8826 100644 --- a/pandas/io/sql_scratch.py +++ b/pandas/io/sql_scratch.py @@ -83,6 +83,28 @@ def generate_mask_of_masks(list_of_masks): engine.execute( "create table charterers(id text primary key, name text, energy integer)" ) +def create_test_df(df): + df2 = df.head().copy() + df2['name'] = df2['name'].apply(lambda x: x + '_NEW') + return df2 + +def read_table(table): + with engine.connect() as conn: + result = conn.execute(f'select * from {table}') + return result.fetchall() + +def clear_table(table): + with engine.connect() as conn: + conn.execute(f'delete from {table}') + +def top_up_table(table): + df.to_sql(table, con=engine, if_exists='append', index=False) + return read_table() + +def reset_table(table): + clear_table(table) + top_up_table(table) + df.to_sql(table_name, index=False, if_exists="append", con=engine) db = SQLDatabase(engine, schema=None, meta=None) From bbcf92b5a8d1f18178a35ac11a2374ae0cbc1c8a Mon Sep 17 00:00:00 2001 From: cvonsteg Date: Mon, 11 Nov 2019 11:51:53 +0000 Subject: [PATCH 24/75] made upsert_delete mask index agnostic --- pandas/io/sql.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/pandas/io/sql.py b/pandas/io/sql.py index b77e2921961a2..c532e4daf668d 100644 --- a/pandas/io/sql.py +++ b/pandas/io/sql.py @@ -717,8 +717,11 @@ def _upsert_ignore_processing(self): # Get temporary dataframe so as not to delete values from main df temp = self._get_index_formatted_dataframe() # Delete rows from dataframe where primary keys match + # Method requires tuples, to account for cases where indexes do not match to_be_deleted_mask = ( - temp[primary_keys].isin(pkeys_from_database[primary_keys]).all(1) + temp[primary_keys].apply(tuple, 1).isin( + pkeys_from_database[primary_keys].apply(tuple, 1) + ) ) temp.drop(temp[to_be_deleted_mask].index, inplace=True) From 73fea73ccc103a6c49b6cb044d3d1f66619ceeb7 Mon Sep 17 00:00:00 2001 From: cvonsteg Date: Mon, 11 Nov 2019 11:51:53 +0000 Subject: [PATCH 25/75] made upsert_delete mask index agnostic --- pandas/io/sql.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/pandas/io/sql.py b/pandas/io/sql.py index b77e2921961a2..c532e4daf668d 100644 --- a/pandas/io/sql.py +++ b/pandas/io/sql.py @@ -717,8 +717,11 @@ def _upsert_ignore_processing(self): # Get temporary dataframe so as not to delete values from main df temp = self._get_index_formatted_dataframe() # Delete rows from dataframe where primary keys match + # Method requires tuples, to account for cases where indexes do not match to_be_deleted_mask = ( - temp[primary_keys].isin(pkeys_from_database[primary_keys]).all(1) + temp[primary_keys].apply(tuple, 1).isin( + pkeys_from_database[primary_keys].apply(tuple, 1) + ) ) temp.drop(temp[to_be_deleted_mask].index, inplace=True) From d8b76860c103d619171b8e55640611d930723cd9 Mon Sep 17 00:00:00 2001 From: cvonsteg Date: Wed, 13 Nov 2019 10:23:07 +0000 Subject: [PATCH 26/75] updated docuemntation for to_sql method --- pandas/io/sql.py | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/pandas/io/sql.py b/pandas/io/sql.py index c532e4daf668d..5613334c21936 100644 --- a/pandas/io/sql.py +++ b/pandas/io/sql.py @@ -466,10 +466,15 @@ def to_sql( schema : str, optional Name of SQL schema in database to write to (if database flavor supports this). If None, use default schema (default). - if_exists : {'fail', 'replace', 'append'}, default 'fail' + if_exists : {'fail', 'replace', 'append', 'upsert_delete', 'upsert_ignore'}, + default 'fail'. - fail: If table exists, do nothing. - replace: If table exists, drop it, recreate it, and insert data. - append: If table exists, insert data. Create if does not exist. + - upsert_ignore: If table exists, perform an UPSERT (based on primary keys), + prioritising records already in the database over incoming duplicates. + - upsert_delete: If table exists, perform an UPSERT (based on primary keys), + prioritising incoming records over duplicates already in the database. index : boolean, default True Write DataFrame index as a column. index_label : str or sequence, optional @@ -502,7 +507,7 @@ def to_sql( "append", "upsert_ignore", "upsert_delete", - ): # TODO: add upserts + ): raise ValueError("'{0}' is not valid for if_exists".format(if_exists)) pandas_sql = pandasSQL_builder(con, schema=schema) @@ -719,8 +724,8 @@ def _upsert_ignore_processing(self): # Delete rows from dataframe where primary keys match # Method requires tuples, to account for cases where indexes do not match to_be_deleted_mask = ( - temp[primary_keys].apply(tuple, 1).isin( - pkeys_from_database[primary_keys].apply(tuple, 1) + temp[primary_keys].apply(tuple, axis=1).isin( + pkeys_from_database[primary_keys].apply(tuple, axis=1) ) ) temp.drop(temp[to_be_deleted_mask].index, inplace=True) From 75e16fff1722ffb963a17ddf41795b27b8756d3a Mon Sep 17 00:00:00 2001 From: cvonsteg Date: Wed, 13 Nov 2019 10:23:07 +0000 Subject: [PATCH 27/75] updated docuemntation for to_sql method --- pandas/io/sql.py | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/pandas/io/sql.py b/pandas/io/sql.py index c532e4daf668d..5613334c21936 100644 --- a/pandas/io/sql.py +++ b/pandas/io/sql.py @@ -466,10 +466,15 @@ def to_sql( schema : str, optional Name of SQL schema in database to write to (if database flavor supports this). If None, use default schema (default). - if_exists : {'fail', 'replace', 'append'}, default 'fail' + if_exists : {'fail', 'replace', 'append', 'upsert_delete', 'upsert_ignore'}, + default 'fail'. - fail: If table exists, do nothing. - replace: If table exists, drop it, recreate it, and insert data. - append: If table exists, insert data. Create if does not exist. + - upsert_ignore: If table exists, perform an UPSERT (based on primary keys), + prioritising records already in the database over incoming duplicates. + - upsert_delete: If table exists, perform an UPSERT (based on primary keys), + prioritising incoming records over duplicates already in the database. index : boolean, default True Write DataFrame index as a column. index_label : str or sequence, optional @@ -502,7 +507,7 @@ def to_sql( "append", "upsert_ignore", "upsert_delete", - ): # TODO: add upserts + ): raise ValueError("'{0}' is not valid for if_exists".format(if_exists)) pandas_sql = pandasSQL_builder(con, schema=schema) @@ -719,8 +724,8 @@ def _upsert_ignore_processing(self): # Delete rows from dataframe where primary keys match # Method requires tuples, to account for cases where indexes do not match to_be_deleted_mask = ( - temp[primary_keys].apply(tuple, 1).isin( - pkeys_from_database[primary_keys].apply(tuple, 1) + temp[primary_keys].apply(tuple, axis=1).isin( + pkeys_from_database[primary_keys].apply(tuple, axis=1) ) ) temp.drop(temp[to_be_deleted_mask].index, inplace=True) From 0dfe913e526c58b0f85ebde9b26d407bf57e1a64 Mon Sep 17 00:00:00 2001 From: cvonsteg Date: Wed, 13 Nov 2019 18:01:20 +0000 Subject: [PATCH 28/75] Added basic tests - need to figure out why postgres tests aren't working --- pandas/tests/io/test_sql.py | 128 +++++++++++++++++++++++++++++++++++- 1 file changed, 127 insertions(+), 1 deletion(-) diff --git a/pandas/tests/io/test_sql.py b/pandas/tests/io/test_sql.py index 89bc98b5a1006..e7f514b583e84 100644 --- a/pandas/tests/io/test_sql.py +++ b/pandas/tests/io/test_sql.py @@ -200,6 +200,26 @@ SELECT * FROM iris """ }, + "create_pkey_table": { + "sqlite": """CREATE TABLE pkey_table ( + "A" Integer Primary Key, + "B" TEXT + )""", + "mysql": """CREATE TABLE pkey_table ( + `A` INTEGER, + `B` TEXT, + PRIMARY KEY (A) + )""", + "posgresql": """CREATE TABLE pkey_table ( + "A" INTEGER PRIMARY KEY, + "B" TEXT + )""" + }, + "insert_pkey_table": { + "sqlite": """INSERT INTO pkey_table VALUES (?, ?)""", + "mysql": """INSERT INTO pkey_table VALUES (%s, %s)""", + "postgresql": """INSERT INTO pkey_table VALUES (%s, %s)""", + } } @@ -306,6 +326,17 @@ def _check_iris_loaded_frame(self, iris_frame): assert issubclass(pytype, np.floating) tm.equalContents(row.values, [5.1, 3.5, 1.4, 0.2, "Iris-setosa"]) + def _create_pkey_table(self): + self.drop_table("pkey_table") + self._get_exec().execute(SQL_STRINGS["create_pkey_table"][self.flavor]) + ins = SQL_STRINGS["insert_pkey_table"][self.flavor] + data = [ + (1, 'name1'), + (2, 'name2'), + (3, 'name3') + ] + self._get_exec().execute(ins, data) + def _load_test1_data(self): columns = ["index", "A", "B", "C", "D"] data = [ @@ -365,7 +396,8 @@ def _load_test3_data(self): ] self.test_frame3 = DataFrame(data, columns=columns) - + + def _load_raw_sql(self): self.drop_table("types_test_data") self._get_exec().execute(SQL_STRINGS["create_test_types"][self.flavor]) @@ -401,6 +433,17 @@ def _load_raw_sql(self): self._get_exec().execute( ins["query"], [d[field] for field in ins["fields"]] ) + + def _load_pkey_table_data(self): + columns = ["A", "B"] + data = [ + (1, 'new_name1'), + (2, 'new_name2'), + (4, 'name4'), + (5, 'name5') + ] + + self.pkey_table_frame = DataFrame(data, columns=columns) def _count_rows(self, table_name): result = ( @@ -509,6 +552,81 @@ def sample(pd_table, conn, keys, data_iter): # Nuke table self.drop_table("test_frame1") + def _to_sql_upsert_ignore(self): + """ + Original table: 3 rows + pkey_table_frame: 4 rows (2 duplicate keys) + Expected after upsert: + - table len = 5 + - Original database values for rows with duplicate keys + - dataframe has all original values + """ + # Nuke + self.drop_table('pkey_table') + # Re-create original table + self._create_pkey_table() + # Original table exists and as 3 rows + assert self.pandasSQL.has_table("pkey_table") + assert self._count_rows("pkey_table") == 3 + # Insert new dataframe + self.pandasSQL.to_sql( + self.pkey_table_frame, + "pkey_table", + if_exists="upsert_ignore", + index=False + ) + # Check table len correct + assert self._count_rows("pkey_table") == 5 + # Check original DB values maintained for duplicate keys + duplicate_keys = [1, 2] + duplicate_key_query = """SELECT B FROM pkey_table WHERE A IN (?, ?)""" + duplicate_val = self._get_exec().execute( + duplicate_key_query, duplicate_keys + ) + data_from_db = sorted([val[0] for val in duplicate_val]) + expected = sorted(["name1", "name2"]) + assert data_from_db == expected + # Finally, confirm that duplicate values are not removed from original df object + assert len(self.pkey_table_frame.index) == 4 + + def _to_sql_upsert_delete(self): + """ + Original table: 3 rows + pkey_table_frame: 4 rows (2 duplicate keys) + Expected after upsert: + - table len = 5 + - dataframe values for rows with duplicate keys + """ + # Nuke + self.drop_table('pkey_table') + # Re-create original table + self._create_pkey_table() + # Original table exists and as 3 rows + assert self.pandasSQL.has_table("pkey_table") + assert self._count_rows("pkey_table") == 3 + # Insert new dataframe + self.pandasSQL.to_sql( + self.pkey_table_frame, + "pkey_table", + if_exists="upsert_delete", + index=False + ) + # Check table len correct + assert self._count_rows("pkey_table") == 5 + # Check original DB values maintained for duplicate keys + duplicate_keys = [1, 2] + duplicate_key_query = """SELECT B FROM pkey_table WHERE A IN (?, ?)""" + duplicate_val = self._get_exec().execute( + duplicate_key_query, duplicate_keys + ) + data_from_db = sorted([val[0] for val in duplicate_val]) + data_from_df = sorted( + list( + self.pkey_table_frame.loc[self.pkey_table_frame['A'].isin(duplicate_keys), 'B'] + ) + ) + assert data_from_db == data_from_df + def _roundtrip(self): self.drop_table("test_frame_roundtrip") self.pandasSQL.to_sql(self.test_frame1, "test_frame_roundtrip") @@ -594,6 +712,7 @@ def load_test_data_and_sql(self): self._load_test1_data() self._load_test2_data() self._load_test3_data() + self._load_pkey_table_data() self._load_raw_sql() def test_read_sql_iris(self): @@ -1243,6 +1362,7 @@ def setup_class(cls): def load_test_data_and_sql(self): self._load_raw_sql() self._load_test1_data() + self._load_pkey_table_data() @pytest.fixture(autouse=True) def setup_method(self, load_iris_data): @@ -1301,6 +1421,12 @@ def test_to_sql_method_multi(self): def test_to_sql_method_callable(self): self._to_sql_method_callable() + def test_to_sql_upsert_ignore(self): + self._to_sql_upsert_ignore() + + def test_to_sql_upsert_delete(self): + self._to_sql_upsert_delete() + def test_create_table(self): temp_conn = self.connect() temp_frame = DataFrame( From 779818a4d1e3035a1666319a98718d13e2423219 Mon Sep 17 00:00:00 2001 From: cvonsteg Date: Wed, 13 Nov 2019 18:01:20 +0000 Subject: [PATCH 29/75] Added basic tests - need to figure out why postgres tests aren't working --- pandas/tests/io/test_sql.py | 128 +++++++++++++++++++++++++++++++++++- 1 file changed, 127 insertions(+), 1 deletion(-) diff --git a/pandas/tests/io/test_sql.py b/pandas/tests/io/test_sql.py index 89bc98b5a1006..e7f514b583e84 100644 --- a/pandas/tests/io/test_sql.py +++ b/pandas/tests/io/test_sql.py @@ -200,6 +200,26 @@ SELECT * FROM iris """ }, + "create_pkey_table": { + "sqlite": """CREATE TABLE pkey_table ( + "A" Integer Primary Key, + "B" TEXT + )""", + "mysql": """CREATE TABLE pkey_table ( + `A` INTEGER, + `B` TEXT, + PRIMARY KEY (A) + )""", + "posgresql": """CREATE TABLE pkey_table ( + "A" INTEGER PRIMARY KEY, + "B" TEXT + )""" + }, + "insert_pkey_table": { + "sqlite": """INSERT INTO pkey_table VALUES (?, ?)""", + "mysql": """INSERT INTO pkey_table VALUES (%s, %s)""", + "postgresql": """INSERT INTO pkey_table VALUES (%s, %s)""", + } } @@ -306,6 +326,17 @@ def _check_iris_loaded_frame(self, iris_frame): assert issubclass(pytype, np.floating) tm.equalContents(row.values, [5.1, 3.5, 1.4, 0.2, "Iris-setosa"]) + def _create_pkey_table(self): + self.drop_table("pkey_table") + self._get_exec().execute(SQL_STRINGS["create_pkey_table"][self.flavor]) + ins = SQL_STRINGS["insert_pkey_table"][self.flavor] + data = [ + (1, 'name1'), + (2, 'name2'), + (3, 'name3') + ] + self._get_exec().execute(ins, data) + def _load_test1_data(self): columns = ["index", "A", "B", "C", "D"] data = [ @@ -365,7 +396,8 @@ def _load_test3_data(self): ] self.test_frame3 = DataFrame(data, columns=columns) - + + def _load_raw_sql(self): self.drop_table("types_test_data") self._get_exec().execute(SQL_STRINGS["create_test_types"][self.flavor]) @@ -401,6 +433,17 @@ def _load_raw_sql(self): self._get_exec().execute( ins["query"], [d[field] for field in ins["fields"]] ) + + def _load_pkey_table_data(self): + columns = ["A", "B"] + data = [ + (1, 'new_name1'), + (2, 'new_name2'), + (4, 'name4'), + (5, 'name5') + ] + + self.pkey_table_frame = DataFrame(data, columns=columns) def _count_rows(self, table_name): result = ( @@ -509,6 +552,81 @@ def sample(pd_table, conn, keys, data_iter): # Nuke table self.drop_table("test_frame1") + def _to_sql_upsert_ignore(self): + """ + Original table: 3 rows + pkey_table_frame: 4 rows (2 duplicate keys) + Expected after upsert: + - table len = 5 + - Original database values for rows with duplicate keys + - dataframe has all original values + """ + # Nuke + self.drop_table('pkey_table') + # Re-create original table + self._create_pkey_table() + # Original table exists and as 3 rows + assert self.pandasSQL.has_table("pkey_table") + assert self._count_rows("pkey_table") == 3 + # Insert new dataframe + self.pandasSQL.to_sql( + self.pkey_table_frame, + "pkey_table", + if_exists="upsert_ignore", + index=False + ) + # Check table len correct + assert self._count_rows("pkey_table") == 5 + # Check original DB values maintained for duplicate keys + duplicate_keys = [1, 2] + duplicate_key_query = """SELECT B FROM pkey_table WHERE A IN (?, ?)""" + duplicate_val = self._get_exec().execute( + duplicate_key_query, duplicate_keys + ) + data_from_db = sorted([val[0] for val in duplicate_val]) + expected = sorted(["name1", "name2"]) + assert data_from_db == expected + # Finally, confirm that duplicate values are not removed from original df object + assert len(self.pkey_table_frame.index) == 4 + + def _to_sql_upsert_delete(self): + """ + Original table: 3 rows + pkey_table_frame: 4 rows (2 duplicate keys) + Expected after upsert: + - table len = 5 + - dataframe values for rows with duplicate keys + """ + # Nuke + self.drop_table('pkey_table') + # Re-create original table + self._create_pkey_table() + # Original table exists and as 3 rows + assert self.pandasSQL.has_table("pkey_table") + assert self._count_rows("pkey_table") == 3 + # Insert new dataframe + self.pandasSQL.to_sql( + self.pkey_table_frame, + "pkey_table", + if_exists="upsert_delete", + index=False + ) + # Check table len correct + assert self._count_rows("pkey_table") == 5 + # Check original DB values maintained for duplicate keys + duplicate_keys = [1, 2] + duplicate_key_query = """SELECT B FROM pkey_table WHERE A IN (?, ?)""" + duplicate_val = self._get_exec().execute( + duplicate_key_query, duplicate_keys + ) + data_from_db = sorted([val[0] for val in duplicate_val]) + data_from_df = sorted( + list( + self.pkey_table_frame.loc[self.pkey_table_frame['A'].isin(duplicate_keys), 'B'] + ) + ) + assert data_from_db == data_from_df + def _roundtrip(self): self.drop_table("test_frame_roundtrip") self.pandasSQL.to_sql(self.test_frame1, "test_frame_roundtrip") @@ -594,6 +712,7 @@ def load_test_data_and_sql(self): self._load_test1_data() self._load_test2_data() self._load_test3_data() + self._load_pkey_table_data() self._load_raw_sql() def test_read_sql_iris(self): @@ -1243,6 +1362,7 @@ def setup_class(cls): def load_test_data_and_sql(self): self._load_raw_sql() self._load_test1_data() + self._load_pkey_table_data() @pytest.fixture(autouse=True) def setup_method(self, load_iris_data): @@ -1301,6 +1421,12 @@ def test_to_sql_method_multi(self): def test_to_sql_method_callable(self): self._to_sql_method_callable() + def test_to_sql_upsert_ignore(self): + self._to_sql_upsert_ignore() + + def test_to_sql_upsert_delete(self): + self._to_sql_upsert_delete() + def test_create_table(self): temp_conn = self.connect() temp_frame = DataFrame( From 3fafc955467ea35878cdf16a0fb81b677bfc08ca Mon Sep 17 00:00:00 2001 From: cvonsteg Date: Thu, 14 Nov 2019 18:27:34 +0000 Subject: [PATCH 30/75] updated docstrings and added desription to --- doc/source/whatsnew/v1.0.0.rst | 6 ++-- pandas/io/sql.py | 52 +++++++++++++++++++--------------- 2 files changed, 31 insertions(+), 27 deletions(-) diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst index a5af4e727391a..73440ff180642 100644 --- a/doc/source/whatsnew/v1.0.0.rst +++ b/doc/source/whatsnew/v1.0.0.rst @@ -26,9 +26,7 @@ Enhancements .. _whatsnew_1000.enhancements.other: -Other enhancements -^^^^^^^^^^^^^^^^^^ - +Other enhancement - :meth:`MultiIndex.from_product` infers level names from inputs if not explicitly provided (:issue:`27292`) - :meth:`DataFrame.to_latex` now accepts ``caption`` and ``label`` arguments (:issue:`25436`) - The :ref:`integer dtype ` with support for missing values can now be converted to @@ -37,7 +35,7 @@ Other enhancements pandas (so it will become an integer or float dtype depending on the presence of missing data). (:issue:`28368`) - :meth:`DataFrame.to_json` now accepts an ``indent`` integer argument to enable pretty printing of JSON output (:issue:`12004`) - +- :meth:`DataFrame.to_sql` now supports upsert statements. To facilitate this, the ``if_exists`` argument of now accepts ``upsert_delete`` and ``upsert_ignore`` parameters (:issue:`14553`) Build Changes ^^^^^^^^^^^^^ diff --git a/pandas/io/sql.py b/pandas/io/sql.py index 5613334c21936..d71b6bf23d42c 100644 --- a/pandas/io/sql.py +++ b/pandas/io/sql.py @@ -662,10 +662,8 @@ def create(self): elif self.if_exists == "append": pass elif self.if_exists == "upsert_delete": - # Pass here, upsert is handled in self.insert() method pass elif self.if_exists == "upsert_ignore": - # Pass here, upsert is handled in self.insert() method pass else: raise ValueError( @@ -676,14 +674,16 @@ def create(self): def _upsert_delete_processing(self): """ - Upsert delete prioritizes incoming data over what is already in the DB. - This method generates the Delete statement which is to be executed - in the same transaction as the ensuing data insert. + Generate delete statement, to remove rows with clashing primary key from database. + + `upsert_delete` prioritizes incoming data, over existing data in the DB. + This method generates the Delete statement for duplicate rows, + which is to be executed in the same transaction as the ensuing data insert. Returns ---------- - delete_statement : sqlalchemy.sql.dml.Delete object - - Delete statement to be executed against DB + sqlalchemy.sql.dml.Delete + Delete statement to be executed against DB """ from sqlalchemy import tuple_ @@ -697,15 +697,17 @@ def _upsert_delete_processing(self): def _upsert_ignore_processing(self): """ - Upsert Ignore prioritizes data in DB over incoming data. + Delete clashing values from a copy of the incoming dataframe. + + `upsert_ignore` prioritizes data in DB over incoming data. This method creates a copy of the incoming dataframe, fetches matching data from DB, deletes matching data from copied frame, and returns that frame to be inserted. Returns ---------- - temp : DataFrame - - Filtered dataframe, with values that are already in DB removed. + DataFrame + Filtered dataframe, with values that are already in DB removed. """ from sqlalchemy import tuple_, select @@ -734,15 +736,19 @@ def _upsert_ignore_processing(self): def _get_primary_key_data(self): """ - Upsert workflows require knowledge of what is already in the database - this method reflects the meta object and gets a list of primary keys + Get primary key names from database, and yield columns with same names from dataframe. + + Upsert workflows require knowledge of what is already in the database. + This method reflects the meta object and gets a list of primary keys, + it then returns all columns from the incoming dataframe with names matching + these keys. Returns ------- - primary_keys, primary_key_values : Tuple[List[str], Iterable] - - primary_keys : List of primary key column names - - primary_key_values : Iterable of dataframe rows - corresponding to primary_key columns + primary_keys : list of str + Primary key names + primary_key_values : iterable + DataFrame rows, for columns corresponding to `primary_key` names """ # reflect MetaData object and assign contents of db to self.table attribute self.pd_sql.meta.reflect(only=[self.name], views=True) @@ -790,15 +796,16 @@ def _execute_insert_multi(self, conn, keys, data_iter): def _get_index_formatted_dataframe(self): """ - Method that checks whether the dataframe index is also to be added to the - database table. If it is, it takes care of formatting the incoming dataframe - accordingly + Format index of incoming dataframe to be aligned with a database table. + + Copy original dataframe, and check whether the dataframe index + is to be added to the database table. + If it is, reset the index so that it becomes a normal column, else return Returns ------- - DataFrame object + DataFrame """ - # Originally this functionality formed the first step of the insert_data method. # It will be useful to have in other places, so moved here to keep code DRY. temp = self.frame.copy() @@ -851,8 +858,7 @@ def insert(self, chunksize=None, method=None): self._insert(data=data, chunksize=chunksize, method=method) elif self.if_exists == "upsert_delete": delete_statement = self._upsert_delete_processing() - # nested transaction to ensure delete is - # rolled back in case of poor data + # nested transaction to ensure delete is rolled back in case of poor data with self.pd_sql.run_transaction() as trans: trans.execute(delete_statement) self._insert(chunksize=chunksize, method=method) From d79b9707a23709025d8791e8c91a5ef5c42efe42 Mon Sep 17 00:00:00 2001 From: cvonsteg Date: Thu, 14 Nov 2019 18:27:34 +0000 Subject: [PATCH 31/75] updated docstrings and added desription to --- doc/source/whatsnew/v1.0.0.rst | 6 ++-- pandas/io/sql.py | 52 +++++++++++++++++++--------------- 2 files changed, 31 insertions(+), 27 deletions(-) diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst index a5af4e727391a..73440ff180642 100644 --- a/doc/source/whatsnew/v1.0.0.rst +++ b/doc/source/whatsnew/v1.0.0.rst @@ -26,9 +26,7 @@ Enhancements .. _whatsnew_1000.enhancements.other: -Other enhancements -^^^^^^^^^^^^^^^^^^ - +Other enhancement - :meth:`MultiIndex.from_product` infers level names from inputs if not explicitly provided (:issue:`27292`) - :meth:`DataFrame.to_latex` now accepts ``caption`` and ``label`` arguments (:issue:`25436`) - The :ref:`integer dtype ` with support for missing values can now be converted to @@ -37,7 +35,7 @@ Other enhancements pandas (so it will become an integer or float dtype depending on the presence of missing data). (:issue:`28368`) - :meth:`DataFrame.to_json` now accepts an ``indent`` integer argument to enable pretty printing of JSON output (:issue:`12004`) - +- :meth:`DataFrame.to_sql` now supports upsert statements. To facilitate this, the ``if_exists`` argument of now accepts ``upsert_delete`` and ``upsert_ignore`` parameters (:issue:`14553`) Build Changes ^^^^^^^^^^^^^ diff --git a/pandas/io/sql.py b/pandas/io/sql.py index 5613334c21936..d71b6bf23d42c 100644 --- a/pandas/io/sql.py +++ b/pandas/io/sql.py @@ -662,10 +662,8 @@ def create(self): elif self.if_exists == "append": pass elif self.if_exists == "upsert_delete": - # Pass here, upsert is handled in self.insert() method pass elif self.if_exists == "upsert_ignore": - # Pass here, upsert is handled in self.insert() method pass else: raise ValueError( @@ -676,14 +674,16 @@ def create(self): def _upsert_delete_processing(self): """ - Upsert delete prioritizes incoming data over what is already in the DB. - This method generates the Delete statement which is to be executed - in the same transaction as the ensuing data insert. + Generate delete statement, to remove rows with clashing primary key from database. + + `upsert_delete` prioritizes incoming data, over existing data in the DB. + This method generates the Delete statement for duplicate rows, + which is to be executed in the same transaction as the ensuing data insert. Returns ---------- - delete_statement : sqlalchemy.sql.dml.Delete object - - Delete statement to be executed against DB + sqlalchemy.sql.dml.Delete + Delete statement to be executed against DB """ from sqlalchemy import tuple_ @@ -697,15 +697,17 @@ def _upsert_delete_processing(self): def _upsert_ignore_processing(self): """ - Upsert Ignore prioritizes data in DB over incoming data. + Delete clashing values from a copy of the incoming dataframe. + + `upsert_ignore` prioritizes data in DB over incoming data. This method creates a copy of the incoming dataframe, fetches matching data from DB, deletes matching data from copied frame, and returns that frame to be inserted. Returns ---------- - temp : DataFrame - - Filtered dataframe, with values that are already in DB removed. + DataFrame + Filtered dataframe, with values that are already in DB removed. """ from sqlalchemy import tuple_, select @@ -734,15 +736,19 @@ def _upsert_ignore_processing(self): def _get_primary_key_data(self): """ - Upsert workflows require knowledge of what is already in the database - this method reflects the meta object and gets a list of primary keys + Get primary key names from database, and yield columns with same names from dataframe. + + Upsert workflows require knowledge of what is already in the database. + This method reflects the meta object and gets a list of primary keys, + it then returns all columns from the incoming dataframe with names matching + these keys. Returns ------- - primary_keys, primary_key_values : Tuple[List[str], Iterable] - - primary_keys : List of primary key column names - - primary_key_values : Iterable of dataframe rows - corresponding to primary_key columns + primary_keys : list of str + Primary key names + primary_key_values : iterable + DataFrame rows, for columns corresponding to `primary_key` names """ # reflect MetaData object and assign contents of db to self.table attribute self.pd_sql.meta.reflect(only=[self.name], views=True) @@ -790,15 +796,16 @@ def _execute_insert_multi(self, conn, keys, data_iter): def _get_index_formatted_dataframe(self): """ - Method that checks whether the dataframe index is also to be added to the - database table. If it is, it takes care of formatting the incoming dataframe - accordingly + Format index of incoming dataframe to be aligned with a database table. + + Copy original dataframe, and check whether the dataframe index + is to be added to the database table. + If it is, reset the index so that it becomes a normal column, else return Returns ------- - DataFrame object + DataFrame """ - # Originally this functionality formed the first step of the insert_data method. # It will be useful to have in other places, so moved here to keep code DRY. temp = self.frame.copy() @@ -851,8 +858,7 @@ def insert(self, chunksize=None, method=None): self._insert(data=data, chunksize=chunksize, method=method) elif self.if_exists == "upsert_delete": delete_statement = self._upsert_delete_processing() - # nested transaction to ensure delete is - # rolled back in case of poor data + # nested transaction to ensure delete is rolled back in case of poor data with self.pd_sql.run_transaction() as trans: trans.execute(delete_statement) self._insert(chunksize=chunksize, method=method) From c38f9005321caaa362d50ebce2f343a5bfa1ad93 Mon Sep 17 00:00:00 2001 From: cvonsteg Date: Thu, 14 Nov 2019 18:35:25 +0000 Subject: [PATCH 32/75] remove vendor --- .../V0RT3X4/python_utils/.circleci/config.yml | 35 ---- .../V0RT3X4/python_utils/.gitignore | 111 ----------- .../github.com/V0RT3X4/python_utils/README.md | 108 ----------- .../V0RT3X4/python_utils/aws/README.md | 1 - .../V0RT3X4/python_utils/aws/README.rst | 0 .../V0RT3X4/python_utils/aws/requirements.txt | 5 - .../V0RT3X4/python_utils/aws/setup.py | 50 ----- .../python_utils/aws/tests/__init__.py | 4 - .../aws/tests/lambda_types/__init__.py | 0 .../aws/tests/lambda_types/message_eg.py | 19 -- .../aws/tests/lambda_types/repeat_eg.py | 19 -- .../tests/lambda_types/test_lambda_types.py | 89 --------- .../s3_client_encryption_tests/__init__.py | 4 - .../test_IOAuthDecrypter.py | 22 --- .../test_IOAuthTagLength.py | 42 ---- .../test_IODecrypter.py | 94 --------- .../test_decrypt_s3_mime_with_attachment.py | 68 ------- .../test_decrypt_s3_object.py | 65 ------- .../test_kms_cipher_provider.py | 39 ---- .../aws/tests/ses_inbox/__init__.py | 0 .../tests/ses_inbox/test_get_attachments.py | 0 .../aws/tests/ses_inbox/test_list_inbox.py | 25 --- .../tests/ses_notification_types/__init__.py | 0 .../ses_notification_types/test_action.py | 16 -- .../test_lambda_record.py | 32 --- .../tests/ses_notification_types/test_mail.py | 85 -------- .../test_notification.py | 23 --- .../ses_notification_types/test_receipt.py | 34 ---- .../python_utils/aws/tests/utils/__init__.py | 0 .../utils/nested_data_classes/__init__.py | 0 .../test_nested_dataclass.py | 36 ---- .../aws/vortexa_utils/__init__.py | 5 - .../aws/vortexa_utils/aws/__init__.py | 4 - .../aws/vortexa_utils/aws/lambdr/__init__.py | 5 - .../aws/vortexa_utils/aws/lambdr/types.py | 45 ----- .../aws/vortexa_utils/aws/s3/__init__.py | 0 .../aws/vortexa_utils/aws/s3/client.py | 50 ----- .../client_side_encryption/IOAuthDecrypter.py | 40 ---- .../IOAuthDecrypterTagLength.py | 65 ------- .../s3/client_side_encryption/IODecrypter.py | 61 ------ .../s3/client_side_encryption/IONocrypter.py | 38 ---- .../aws/s3/client_side_encryption/__init__.py | 183 ------------------ .../client_side_encryption/cipher_provider.py | 17 -- .../aws/s3/client_side_encryption/client.py | 103 ---------- .../decrypt_handeler.py | 121 ------------ .../aws/s3/client_side_encryption/get.py | 75 ------- .../kms_cipher_provider.py | 61 ------ .../aws/vortexa_utils/aws/ses/__init__.py | 4 - .../aws/ses/application_mapper.py | 102 ---------- .../aws/vortexa_utils/aws/ses/attachments.py | 15 -- .../aws/vortexa_utils/aws/ses/inbox.py | 141 -------------- .../aws/ses/notification/__init__.py | 0 .../aws/ses/notification/types/__init__.py | 5 - .../aws/ses/notification/types/action.py | 56 ------ .../ses/notification/types/lambda_record.py | 18 -- .../aws/ses/notification/types/mail.py | 44 ----- .../ses/notification/types/notification.py | 29 --- .../aws/ses/notification/types/receipt.py | 65 ------- .../aws/ses/notification/types/verdicts.py | 43 ---- .../aws/vortexa_utils/aws/utils/__init__.py | 4 - .../aws/utils/dataclasses/__init__.py | 1 - .../aws/utils/dataclasses/nested.py | 20 -- .../collections/tests/__init__.py | 0 .../tests/collections/types/__init__.py | 0 .../types/test_instance_caching_abc.py | 130 ------------- .../vortexa_utils/collections/__inti__.py | 0 .../collections/types/__init__.py | 0 .../collections/types/instance_caching_abc.py | 45 ----- .../V0RT3X4/python_utils/database/README.md | 21 -- .../V0RT3X4/python_utils/database/README.rst | 28 --- .../V0RT3X4/python_utils/database/setup.py | 40 ---- .../python_utils/database/tests/__init__.py | 0 .../database/tests/test_database_factory.py | 16 -- .../database/tests/test_querey_cache.py | 21 -- .../database/vortexa_utils/__init__.py | 5 - .../vortexa_utils/database/__init__.py | 7 - .../vortexa_utils/database/database.py | 118 ----------- .../database/default_factories.py | 20 -- .../vortexa_utils/database/query_cache.py | 77 -------- .../database/vortexa_utils/database/utils.py | 62 ------ .../V0RT3X4/python_utils/deployment/setup.py | 20 -- .../deployment/vortexa_utils/__init__.py | 5 - .../vortexa_utils/portainer/Readme.md | 1 - .../vortexa_utils/portainer/__init__.py | 8 - .../deployment/vortexa_utils/portainer/api.py | 56 ------ .../vortexa_utils/portainer/stacks.py | 61 ------ .../vortexa_utils/portainer/update_stack.py | 90 --------- .../python_utils/docker/pandas/Dockerfile | 25 --- .../V0RT3X4/python_utils/general/README.rst | 0 .../V0RT3X4/python_utils/general/setup.py | 40 ---- .../general/vortexa_utils/__init__.py | 5 - .../general/vortexa_utils/general/__init__.py | 0 .../python_utils/general/vortexa_utils/git.py | 14 -- .../V0RT3X4/python_utils/logging/README.md | 55 ------ .../V0RT3X4/python_utils/logging/setup.py | 38 ---- .../logging/vortexa_utils/__init__.py | 1 - .../logging/vortexa_utils/logging/__init__.py | 1 - .../logging/exception_decorator.py | 12 -- .../vortexa_utils/logging/resources.py | 38 ---- .../utils/vortexa_utils/utils/__init__.py | 0 .../utils/byte_stream_spliter.py | 31 --- .../utils/sockets/socket_client.py | 24 --- .../utils/sockets/socket_server.py | 17 -- .../V0RT3X4/python_utils/versioning/VERSION | 1 - .../V0RT3X4/python_utils/versioning/setup.py | 34 ---- .../python_utils/versioning/tests/__init__.py | 0 .../versioning/tests/test_versioner.py | 47 ----- .../versioning/vortexa_utils/__init__.py | 5 - .../vortexa_utils/versioning/__init__.py | 10 - .../vortexa_utils/versioning/__main__.py | 9 - .../vortexa_utils/versioning/cli.py | 46 ----- .../vortexa_utils/versioning/utils.py | 22 --- .../vortexa_utils/versioning/versioner.py | 99 ---------- .../python_utils/youve_got_mail/README.md | 0 .../python_utils/youve_got_mail/README.rst | 0 .../youve_got_mail/requirements.txt | 2 - .../python_utils/youve_got_mail/setup.py | 48 ----- .../youve_got_mail/tests/__init__.py | 0 .../youve_got_mail/vortexa_utils/__init__.py | 0 .../vortexa_utils/youve_got_mail.py | 43 ---- 120 files changed, 3944 deletions(-) delete mode 100644 vendor/github.com/V0RT3X4/python_utils/.circleci/config.yml delete mode 100644 vendor/github.com/V0RT3X4/python_utils/.gitignore delete mode 100644 vendor/github.com/V0RT3X4/python_utils/README.md delete mode 100644 vendor/github.com/V0RT3X4/python_utils/aws/README.md delete mode 100644 vendor/github.com/V0RT3X4/python_utils/aws/README.rst delete mode 100644 vendor/github.com/V0RT3X4/python_utils/aws/requirements.txt delete mode 100644 vendor/github.com/V0RT3X4/python_utils/aws/setup.py delete mode 100644 vendor/github.com/V0RT3X4/python_utils/aws/tests/__init__.py delete mode 100644 vendor/github.com/V0RT3X4/python_utils/aws/tests/lambda_types/__init__.py delete mode 100644 vendor/github.com/V0RT3X4/python_utils/aws/tests/lambda_types/message_eg.py delete mode 100644 vendor/github.com/V0RT3X4/python_utils/aws/tests/lambda_types/repeat_eg.py delete mode 100644 vendor/github.com/V0RT3X4/python_utils/aws/tests/lambda_types/test_lambda_types.py delete mode 100644 vendor/github.com/V0RT3X4/python_utils/aws/tests/s3_client_encryption_tests/__init__.py delete mode 100644 vendor/github.com/V0RT3X4/python_utils/aws/tests/s3_client_encryption_tests/test_IOAuthDecrypter.py delete mode 100644 vendor/github.com/V0RT3X4/python_utils/aws/tests/s3_client_encryption_tests/test_IOAuthTagLength.py delete mode 100644 vendor/github.com/V0RT3X4/python_utils/aws/tests/s3_client_encryption_tests/test_IODecrypter.py delete mode 100644 vendor/github.com/V0RT3X4/python_utils/aws/tests/s3_client_encryption_tests/test_decrypt_s3_mime_with_attachment.py delete mode 100644 vendor/github.com/V0RT3X4/python_utils/aws/tests/s3_client_encryption_tests/test_decrypt_s3_object.py delete mode 100644 vendor/github.com/V0RT3X4/python_utils/aws/tests/s3_client_encryption_tests/test_kms_cipher_provider.py delete mode 100644 vendor/github.com/V0RT3X4/python_utils/aws/tests/ses_inbox/__init__.py delete mode 100644 vendor/github.com/V0RT3X4/python_utils/aws/tests/ses_inbox/test_get_attachments.py delete mode 100644 vendor/github.com/V0RT3X4/python_utils/aws/tests/ses_inbox/test_list_inbox.py delete mode 100644 vendor/github.com/V0RT3X4/python_utils/aws/tests/ses_notification_types/__init__.py delete mode 100644 vendor/github.com/V0RT3X4/python_utils/aws/tests/ses_notification_types/test_action.py delete mode 100644 vendor/github.com/V0RT3X4/python_utils/aws/tests/ses_notification_types/test_lambda_record.py delete mode 100644 vendor/github.com/V0RT3X4/python_utils/aws/tests/ses_notification_types/test_mail.py delete mode 100644 vendor/github.com/V0RT3X4/python_utils/aws/tests/ses_notification_types/test_notification.py delete mode 100644 vendor/github.com/V0RT3X4/python_utils/aws/tests/ses_notification_types/test_receipt.py delete mode 100644 vendor/github.com/V0RT3X4/python_utils/aws/tests/utils/__init__.py delete mode 100644 vendor/github.com/V0RT3X4/python_utils/aws/tests/utils/nested_data_classes/__init__.py delete mode 100644 vendor/github.com/V0RT3X4/python_utils/aws/tests/utils/nested_data_classes/test_nested_dataclass.py delete mode 100644 vendor/github.com/V0RT3X4/python_utils/aws/vortexa_utils/__init__.py delete mode 100644 vendor/github.com/V0RT3X4/python_utils/aws/vortexa_utils/aws/__init__.py delete mode 100644 vendor/github.com/V0RT3X4/python_utils/aws/vortexa_utils/aws/lambdr/__init__.py delete mode 100644 vendor/github.com/V0RT3X4/python_utils/aws/vortexa_utils/aws/lambdr/types.py delete mode 100644 vendor/github.com/V0RT3X4/python_utils/aws/vortexa_utils/aws/s3/__init__.py delete mode 100644 vendor/github.com/V0RT3X4/python_utils/aws/vortexa_utils/aws/s3/client.py delete mode 100644 vendor/github.com/V0RT3X4/python_utils/aws/vortexa_utils/aws/s3/client_side_encryption/IOAuthDecrypter.py delete mode 100644 vendor/github.com/V0RT3X4/python_utils/aws/vortexa_utils/aws/s3/client_side_encryption/IOAuthDecrypterTagLength.py delete mode 100644 vendor/github.com/V0RT3X4/python_utils/aws/vortexa_utils/aws/s3/client_side_encryption/IODecrypter.py delete mode 100644 vendor/github.com/V0RT3X4/python_utils/aws/vortexa_utils/aws/s3/client_side_encryption/IONocrypter.py delete mode 100644 vendor/github.com/V0RT3X4/python_utils/aws/vortexa_utils/aws/s3/client_side_encryption/__init__.py delete mode 100644 vendor/github.com/V0RT3X4/python_utils/aws/vortexa_utils/aws/s3/client_side_encryption/cipher_provider.py delete mode 100644 vendor/github.com/V0RT3X4/python_utils/aws/vortexa_utils/aws/s3/client_side_encryption/client.py delete mode 100644 vendor/github.com/V0RT3X4/python_utils/aws/vortexa_utils/aws/s3/client_side_encryption/decrypt_handeler.py delete mode 100644 vendor/github.com/V0RT3X4/python_utils/aws/vortexa_utils/aws/s3/client_side_encryption/get.py delete mode 100644 vendor/github.com/V0RT3X4/python_utils/aws/vortexa_utils/aws/s3/client_side_encryption/kms_cipher_provider.py delete mode 100644 vendor/github.com/V0RT3X4/python_utils/aws/vortexa_utils/aws/ses/__init__.py delete mode 100644 vendor/github.com/V0RT3X4/python_utils/aws/vortexa_utils/aws/ses/application_mapper.py delete mode 100644 vendor/github.com/V0RT3X4/python_utils/aws/vortexa_utils/aws/ses/attachments.py delete mode 100644 vendor/github.com/V0RT3X4/python_utils/aws/vortexa_utils/aws/ses/inbox.py delete mode 100644 vendor/github.com/V0RT3X4/python_utils/aws/vortexa_utils/aws/ses/notification/__init__.py delete mode 100644 vendor/github.com/V0RT3X4/python_utils/aws/vortexa_utils/aws/ses/notification/types/__init__.py delete mode 100644 vendor/github.com/V0RT3X4/python_utils/aws/vortexa_utils/aws/ses/notification/types/action.py delete mode 100644 vendor/github.com/V0RT3X4/python_utils/aws/vortexa_utils/aws/ses/notification/types/lambda_record.py delete mode 100644 vendor/github.com/V0RT3X4/python_utils/aws/vortexa_utils/aws/ses/notification/types/mail.py delete mode 100644 vendor/github.com/V0RT3X4/python_utils/aws/vortexa_utils/aws/ses/notification/types/notification.py delete mode 100644 vendor/github.com/V0RT3X4/python_utils/aws/vortexa_utils/aws/ses/notification/types/receipt.py delete mode 100644 vendor/github.com/V0RT3X4/python_utils/aws/vortexa_utils/aws/ses/notification/types/verdicts.py delete mode 100644 vendor/github.com/V0RT3X4/python_utils/aws/vortexa_utils/aws/utils/__init__.py delete mode 100644 vendor/github.com/V0RT3X4/python_utils/aws/vortexa_utils/aws/utils/dataclasses/__init__.py delete mode 100644 vendor/github.com/V0RT3X4/python_utils/aws/vortexa_utils/aws/utils/dataclasses/nested.py delete mode 100644 vendor/github.com/V0RT3X4/python_utils/collections/tests/__init__.py delete mode 100644 vendor/github.com/V0RT3X4/python_utils/collections/tests/collections/types/__init__.py delete mode 100644 vendor/github.com/V0RT3X4/python_utils/collections/tests/collections/types/test_instance_caching_abc.py delete mode 100644 vendor/github.com/V0RT3X4/python_utils/collections/vortexa_utils/collections/__inti__.py delete mode 100644 vendor/github.com/V0RT3X4/python_utils/collections/vortexa_utils/collections/types/__init__.py delete mode 100644 vendor/github.com/V0RT3X4/python_utils/collections/vortexa_utils/collections/types/instance_caching_abc.py delete mode 100644 vendor/github.com/V0RT3X4/python_utils/database/README.md delete mode 100644 vendor/github.com/V0RT3X4/python_utils/database/README.rst delete mode 100644 vendor/github.com/V0RT3X4/python_utils/database/setup.py delete mode 100644 vendor/github.com/V0RT3X4/python_utils/database/tests/__init__.py delete mode 100644 vendor/github.com/V0RT3X4/python_utils/database/tests/test_database_factory.py delete mode 100644 vendor/github.com/V0RT3X4/python_utils/database/tests/test_querey_cache.py delete mode 100644 vendor/github.com/V0RT3X4/python_utils/database/vortexa_utils/__init__.py delete mode 100644 vendor/github.com/V0RT3X4/python_utils/database/vortexa_utils/database/__init__.py delete mode 100644 vendor/github.com/V0RT3X4/python_utils/database/vortexa_utils/database/database.py delete mode 100644 vendor/github.com/V0RT3X4/python_utils/database/vortexa_utils/database/default_factories.py delete mode 100644 vendor/github.com/V0RT3X4/python_utils/database/vortexa_utils/database/query_cache.py delete mode 100644 vendor/github.com/V0RT3X4/python_utils/database/vortexa_utils/database/utils.py delete mode 100644 vendor/github.com/V0RT3X4/python_utils/deployment/setup.py delete mode 100644 vendor/github.com/V0RT3X4/python_utils/deployment/vortexa_utils/__init__.py delete mode 100644 vendor/github.com/V0RT3X4/python_utils/deployment/vortexa_utils/portainer/Readme.md delete mode 100644 vendor/github.com/V0RT3X4/python_utils/deployment/vortexa_utils/portainer/__init__.py delete mode 100644 vendor/github.com/V0RT3X4/python_utils/deployment/vortexa_utils/portainer/api.py delete mode 100644 vendor/github.com/V0RT3X4/python_utils/deployment/vortexa_utils/portainer/stacks.py delete mode 100644 vendor/github.com/V0RT3X4/python_utils/deployment/vortexa_utils/portainer/update_stack.py delete mode 100644 vendor/github.com/V0RT3X4/python_utils/docker/pandas/Dockerfile delete mode 100644 vendor/github.com/V0RT3X4/python_utils/general/README.rst delete mode 100644 vendor/github.com/V0RT3X4/python_utils/general/setup.py delete mode 100644 vendor/github.com/V0RT3X4/python_utils/general/vortexa_utils/__init__.py delete mode 100644 vendor/github.com/V0RT3X4/python_utils/general/vortexa_utils/general/__init__.py delete mode 100644 vendor/github.com/V0RT3X4/python_utils/general/vortexa_utils/git.py delete mode 100644 vendor/github.com/V0RT3X4/python_utils/logging/README.md delete mode 100644 vendor/github.com/V0RT3X4/python_utils/logging/setup.py delete mode 100644 vendor/github.com/V0RT3X4/python_utils/logging/vortexa_utils/__init__.py delete mode 100644 vendor/github.com/V0RT3X4/python_utils/logging/vortexa_utils/logging/__init__.py delete mode 100644 vendor/github.com/V0RT3X4/python_utils/logging/vortexa_utils/logging/exception_decorator.py delete mode 100644 vendor/github.com/V0RT3X4/python_utils/logging/vortexa_utils/logging/resources.py delete mode 100644 vendor/github.com/V0RT3X4/python_utils/utils/vortexa_utils/utils/__init__.py delete mode 100644 vendor/github.com/V0RT3X4/python_utils/utils/vortexa_utils/utils/byte_stream_spliter.py delete mode 100644 vendor/github.com/V0RT3X4/python_utils/utils/vortexa_utils/utils/sockets/socket_client.py delete mode 100644 vendor/github.com/V0RT3X4/python_utils/utils/vortexa_utils/utils/sockets/socket_server.py delete mode 100644 vendor/github.com/V0RT3X4/python_utils/versioning/VERSION delete mode 100644 vendor/github.com/V0RT3X4/python_utils/versioning/setup.py delete mode 100644 vendor/github.com/V0RT3X4/python_utils/versioning/tests/__init__.py delete mode 100644 vendor/github.com/V0RT3X4/python_utils/versioning/tests/test_versioner.py delete mode 100644 vendor/github.com/V0RT3X4/python_utils/versioning/vortexa_utils/__init__.py delete mode 100644 vendor/github.com/V0RT3X4/python_utils/versioning/vortexa_utils/versioning/__init__.py delete mode 100644 vendor/github.com/V0RT3X4/python_utils/versioning/vortexa_utils/versioning/__main__.py delete mode 100644 vendor/github.com/V0RT3X4/python_utils/versioning/vortexa_utils/versioning/cli.py delete mode 100644 vendor/github.com/V0RT3X4/python_utils/versioning/vortexa_utils/versioning/utils.py delete mode 100644 vendor/github.com/V0RT3X4/python_utils/versioning/vortexa_utils/versioning/versioner.py delete mode 100644 vendor/github.com/V0RT3X4/python_utils/youve_got_mail/README.md delete mode 100644 vendor/github.com/V0RT3X4/python_utils/youve_got_mail/README.rst delete mode 100644 vendor/github.com/V0RT3X4/python_utils/youve_got_mail/requirements.txt delete mode 100644 vendor/github.com/V0RT3X4/python_utils/youve_got_mail/setup.py delete mode 100644 vendor/github.com/V0RT3X4/python_utils/youve_got_mail/tests/__init__.py delete mode 100644 vendor/github.com/V0RT3X4/python_utils/youve_got_mail/vortexa_utils/__init__.py delete mode 100644 vendor/github.com/V0RT3X4/python_utils/youve_got_mail/vortexa_utils/youve_got_mail.py diff --git a/vendor/github.com/V0RT3X4/python_utils/.circleci/config.yml b/vendor/github.com/V0RT3X4/python_utils/.circleci/config.yml deleted file mode 100644 index c44edbe3b610c..0000000000000 --- a/vendor/github.com/V0RT3X4/python_utils/.circleci/config.yml +++ /dev/null @@ -1,35 +0,0 @@ -version: 2 -jobs: - build: - working_directory: ~/project - docker: - - image: circleci/python:3.7 - steps: - - checkout - - restore_cache: &restore_cache - keys: - - v1-{{ .Branch }}- - # - run: - # name: "Install Python3" - # command: | - # apk add --no-cache \ - # python3 \ - # libc-dev \ - # gcc - - run: - name: "Test aws" - working_directory: ~/project/aws - command: | - python3.7 -m venv .venv; - . .venv/bin/activate - pip install -U pip - pip install -U -r requirements.txt - nose2 - python setup.py test - - save_cache: &save_cache - key: v1-{{ .Branch }}-{{ epoch }} - paths: - - ~/project/aws/.venv - - ~/project/database/.venv - - ~/project/deployment/.venv - - ~/.cache/pip diff --git a/vendor/github.com/V0RT3X4/python_utils/.gitignore b/vendor/github.com/V0RT3X4/python_utils/.gitignore deleted file mode 100644 index 2c06c5a32bbdb..0000000000000 --- a/vendor/github.com/V0RT3X4/python_utils/.gitignore +++ /dev/null @@ -1,111 +0,0 @@ -# Byte-compiled / optimized / DLL files -__pycache__/ -*.py[cod] -*$py.class - -# C extensions -*.so - -# Distribution / packaging -.Python -build/ -develop-eggs/ -dist/ -downloads/ -eggs/ -.eggs/ -lib/ -lib64/ -parts/ -sdist/ -var/ -wheels/ -*.egg-info/ -.installed.cfg -*.egg -MANIFEST - -# PyInstaller -# Usually these files are written by a python script from a template -# before PyInstaller builds the exe, so as to inject date/other infos into it. -*.manifest -*.spec - -# Installer logs -pip-log.txt -pip-delete-this-directory.txt - -# Unit test / coverage reports -htmlcov/ -.tox/ -.coverage -.coverage.* -.cache -nosetests.xml -coverage.xml -*.cover -.hypothesis/ -.pytest_cache/ - -# Translations -*.mo -*.pot - -# Django stuff: -*.log -local_settings.py -db.sqlite3 - -# Flask stuff: -instance/ -.webassets-cache - -# Scrapy stuff: -.scrapy - -# Sphinx documentation -docs/_build/ - -# PyBuilder -target/ - -# Jupyter Notebook -.ipynb_checkpoints - -# pyenv -.python-version - -# celery beat schedule file -celerybeat-schedule - -# SageMath parsed files -*.sage.py - -# Environments -.env -.venv -env/ -venv/ -ENV/ -env.bak/ -venv.bak/ - -# Spyder project settings -.spyderproject -.spyproject - -# Rope project settings -.ropeproject - -# mkdocs documentation -/site - -# mypy -.mypy_cache/ - -# VSCODE -.vscode - -.idea -*.iml -scratch.py diff --git a/vendor/github.com/V0RT3X4/python_utils/README.md b/vendor/github.com/V0RT3X4/python_utils/README.md deleted file mode 100644 index 028c6e96fb015..0000000000000 --- a/vendor/github.com/V0RT3X4/python_utils/README.md +++ /dev/null @@ -1,108 +0,0 @@ -# python_utils [![CircleCI](https://circleci.com/gh/V0RT3X4/python_utils.svg?style=svg&circle-token=30fa8fb22fa45a521a5d728e9accde63c242c2b4)](https://circleci.com/gh/V0RT3X4/python_utils) -Python utilities and helper functions/classes/modules - -## Sub Packages - -- [AWS](#aws) -- [Database](#database) -- [Deployment](#deployment) - -## Installation - -Installation is done by using [submodule vendoring](#vendoring). -Vendor the package into your project as [below](#vendoring) then you can install -with -``` -pip install vendor/github.com/V0RT3X4/python_utils/ -``` -or -``` -echo vendor/github.com/V0RT3X4/python_utils/ >> requirements.txt -pip install -r requirements.txt -``` - -## Aws - -Helper modules for `s3` client side encryption. `ses` email processing -(s3 as an inbox). `lambda` function handeler types. - -## Database - -Data base connection helpers to get you a -[`SQLAlchemy`](https://www.sqlalchemy.org/) connection [`Engine`](https://docs.sqlalchemy.org/en/latest/core/engines_connections.html) -to an RDS or RedShift database using -`aws secretsmanager` for managing connection credentials and rotation, and with -SSL encryption. - -## Deployment - -Custom Deployment Jazz - -## Installation - Vendoring the subtree -To install the scripts into your project it is recommended to vendor this module as a `git subtree` as opposed to a `git submodule`. You will have a version of this code in your repo, and you can easily update and push changes back upstream. - -To make your life easier install [git-vendor](https://github.com/brettlangdon/git-vendor) - -Then you can vendor the module into your repo and run installation scripts: -``` -git vendor add python_utils git@github.com:V0RT3X4/python_utils.git master -``` - -finally you can install the modules you want -``` -pip install vendor/github.com/V0RT3X4/python_utils/ -``` - -to update the reference -``` -git vendor update python_utils master -``` - -## AS Submodule - -In the project directory -``` -git submodule add \ - --name github.com/V0RT3X4/python_utils \ - git@github.com:V0RT3X4/python_utils.git \ - vendor/github.com/V0RT3X4/python_utils -``` - -Subsequently when you check out the source code (say in -[circleCI](https://circleci.com) or locally). -``` -git clone git@github.com:/V0RT3X4/.git -cd -git submodule init -git submodule update --remote -``` - -finally you can install the modules you want -``` -pip install vendor/github.com/V0RT3X4/python_utils/ -``` - -## Contributing -To contribute and push changes back upstream add this repo as a remote. -``` -git remote add -f python_utils git@github.com:V0RT3X4/python_utils.git -``` -Push changes in the sub tree -``` -git subtree push --prefix=vendor/github.com/V0RT3X4/python_utils python_utils some_branch -``` - -## [git-vendor](https://github.com/brettlangdon/git-vendor) installation - -``` -cd $(mktemp -d) && \ -git clone https://github.com/brettlangdon/git-vendor &> /dev/null && \ -cd git-vendor && \ -sudo make install -``` - -or - -``` -brew install git-vendor -``` diff --git a/vendor/github.com/V0RT3X4/python_utils/aws/README.md b/vendor/github.com/V0RT3X4/python_utils/aws/README.md deleted file mode 100644 index f9e28102b5fbf..0000000000000 --- a/vendor/github.com/V0RT3X4/python_utils/aws/README.md +++ /dev/null @@ -1 +0,0 @@ -# Vortexa AWS Python Utils diff --git a/vendor/github.com/V0RT3X4/python_utils/aws/README.rst b/vendor/github.com/V0RT3X4/python_utils/aws/README.rst deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/vendor/github.com/V0RT3X4/python_utils/aws/requirements.txt b/vendor/github.com/V0RT3X4/python_utils/aws/requirements.txt deleted file mode 100644 index 34a10a130c16c..0000000000000 --- a/vendor/github.com/V0RT3X4/python_utils/aws/requirements.txt +++ /dev/null @@ -1,5 +0,0 @@ -boto3 -pycryptodomex -nose2 -pandas -logzero diff --git a/vendor/github.com/V0RT3X4/python_utils/aws/setup.py b/vendor/github.com/V0RT3X4/python_utils/aws/setup.py deleted file mode 100644 index 1e69b1cb89ad6..0000000000000 --- a/vendor/github.com/V0RT3X4/python_utils/aws/setup.py +++ /dev/null @@ -1,50 +0,0 @@ -# @Author: richard -# @Date: 2018-12-04T17:54:43+00:00 -# @Last modified by: richard -# @Last modified time: 2018-12-04T20:16:54+00:00 -import os -import io -from setuptools import setup, find_packages - -namespace = 'vortexa_utils' -name = 'vortexa_utils_aws' -version = '1' -description = 'Vortexa AWS utils helper library', - -dependencies = [ - 'boto3', - 'pycryptodomex' -] - -# Setup boilerplate below - -package_root = os.path.abspath(os.path.dirname(__file__)) - -readme_filename = os.path.join(package_root, 'README.rst') -with io.open(readme_filename, encoding='utf-8') as readme_file: - readme = readme_file.read() - -packages = [ - package for package in find_packages() - if package.startswith(namespace) -] - -setup( - name=name, - version=version, - description=description, - long_description=readme, - - author='Richard Mathie', - author_email='richard.mathie@vortexa.com', - - zip_safe=False, - test_suite='nose2.collector.collector', - tests_require=['nose2', 'pandas'], - - packages=packages, - install_requires=dependencies, - extras_require={ - 'pandas': ['pandas'] - } -) diff --git a/vendor/github.com/V0RT3X4/python_utils/aws/tests/__init__.py b/vendor/github.com/V0RT3X4/python_utils/aws/tests/__init__.py deleted file mode 100644 index b0f42e4b71cc9..0000000000000 --- a/vendor/github.com/V0RT3X4/python_utils/aws/tests/__init__.py +++ /dev/null @@ -1,4 +0,0 @@ -# @Author: richard -# @Date: 2018-11-28T18:10:18+00:00 -# @Last modified by: richard -# @Last modified time: 2018-11-28T18:10:18+00:00 diff --git a/vendor/github.com/V0RT3X4/python_utils/aws/tests/lambda_types/__init__.py b/vendor/github.com/V0RT3X4/python_utils/aws/tests/lambda_types/__init__.py deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/vendor/github.com/V0RT3X4/python_utils/aws/tests/lambda_types/message_eg.py b/vendor/github.com/V0RT3X4/python_utils/aws/tests/lambda_types/message_eg.py deleted file mode 100644 index 9cf39d5a99d58..0000000000000 --- a/vendor/github.com/V0RT3X4/python_utils/aws/tests/lambda_types/message_eg.py +++ /dev/null @@ -1,19 +0,0 @@ -""" Example #1 """ -import os -from vortexa_utils.aws.lambdr.types import LambdaDict, LambdaContext - -MSG_TEMPLATE: str = os.environ.get('MSG_TEMPLATE') or 'Hello {} {}!' -STAGE: str = os.environ.get('stage') or 'dev' - - -def handler(event: LambdaDict, context: LambdaContext) -> LambdaDict: - print('Received event {} for stage {}'.format(event, STAGE)) - first_name: str = event.get('first_name') # optional - last_name: str = event.get('last_name') # optional - return { - 'message': get_message(first_name, last_name), - } - - -def get_message(first_name: str = 'John', last_name: str = 'Smith'): - return MSG_TEMPLATE.format(first_name, last_name) diff --git a/vendor/github.com/V0RT3X4/python_utils/aws/tests/lambda_types/repeat_eg.py b/vendor/github.com/V0RT3X4/python_utils/aws/tests/lambda_types/repeat_eg.py deleted file mode 100644 index 95d5331e8f5f9..0000000000000 --- a/vendor/github.com/V0RT3X4/python_utils/aws/tests/lambda_types/repeat_eg.py +++ /dev/null @@ -1,19 +0,0 @@ -""" Example #2 """ -import os -from vortexa_utils.aws.lambdr.types import LambdaDict, LambdaContext - -N: int = int(os.environ.get('N') or 10) -STAGE: str = os.environ.get('stage') or 'dev' - - -def handler(event: LambdaDict, context: LambdaContext) -> LambdaDict: - print('Received event {} for stage {}'.format(event, STAGE)) - input: str = event['input'] # required - return { - 'output': get_output(input, N), - } - - -def get_output(input: str, num: int): - """ Return the input string repeated N times. """ - return input * num diff --git a/vendor/github.com/V0RT3X4/python_utils/aws/tests/lambda_types/test_lambda_types.py b/vendor/github.com/V0RT3X4/python_utils/aws/tests/lambda_types/test_lambda_types.py deleted file mode 100644 index 0cdad796b76dd..0000000000000 --- a/vendor/github.com/V0RT3X4/python_utils/aws/tests/lambda_types/test_lambda_types.py +++ /dev/null @@ -1,89 +0,0 @@ -import unittest -from vortexa_utils.aws.lambdr.types import LambdaDict, LambdaContext -from .message_eg import handler as handler_message, get_message -from .repeat_eg import handler as handler_repeat, get_output - - -class TestMessageFunction(unittest.TestCase): - - def setUp(self): - self.context = LambdaContext() - - def test_handler(self) -> None: - event: LambdaDict = { - "first_name": "Alex", - "last_name": "Casalboni", - } - result = handler_message(event, self.context) - self.assertIn('message', result) - - def test_handler_empty(self) -> None: - event: LambdaDict = {} - result = handler_message(event, self.context) - self.assertIn('message', result) - - def test_message_default(self) -> None: - msg = get_message() - self.assertIsInstance(msg, str) - self.assertIn('Hello', msg) - self.assertIn('John', msg) - self.assertIn('Smith', msg) - self.assertTrue(msg.endswith('!')) - - def test_message_firstname(self) -> None: - msg = get_message(first_name='Charlie') - self.assertIsInstance(msg, str) - self.assertIn('Hello', msg) - self.assertIn('Charlie', msg) - self.assertIn('Smith', msg) - self.assertTrue(msg.endswith('!')) - - def test_message_lastname(self) -> None: - msg = get_message(last_name='Brown') - self.assertIsInstance(msg, str) - self.assertIn('Hello', msg) - self.assertIn('John', msg) - self.assertIn('Brown', msg) - self.assertTrue(msg.endswith('!')) - - def test_message(self) -> None: - msg = get_message(first_name='Charlie', last_name='Brown') - self.assertIsInstance(msg, str) - self.assertIn('Hello', msg) - self.assertIn('Charlie', msg) - self.assertIn('Brown', msg) - self.assertTrue(msg.endswith('!')) - - -class TestRepeatFunction(unittest.TestCase): - - def setUp(self): - self.context = LambdaContext() - - def test_handler(self) -> None: - event: LambdaDict = { - "input": "NaN", - } - result = handler_repeat(event, self.context) - self.assertIn('output', result) - self.assertEqual(30, len(result['output'])) - - def test_handler_empty(self) -> None: - event: LambdaDict = {} - with self.assertRaises(KeyError): - handler_repeat(event, self.context) - - def test_repeat_empty_string(self) -> None: - output = get_output('', 100) - self.assertIsInstance(output, str) - self.assertEqual(0, len(output)) - - def test_repeat_zero(self) -> None: - output = get_output('hello', 0) - self.assertIsInstance(output, str) - self.assertEqual(0, len(output)) - - def test_repeat(self) -> None: - output = get_output('hello', 10) - self.assertIsInstance(output, str) - self.assertEqual(50, len(output)) diff --git a/vendor/github.com/V0RT3X4/python_utils/aws/tests/s3_client_encryption_tests/__init__.py b/vendor/github.com/V0RT3X4/python_utils/aws/tests/s3_client_encryption_tests/__init__.py deleted file mode 100644 index 2e9b828ec304c..0000000000000 --- a/vendor/github.com/V0RT3X4/python_utils/aws/tests/s3_client_encryption_tests/__init__.py +++ /dev/null @@ -1,4 +0,0 @@ -# @Author: richard -# @Date: 2018-11-28T18:10:35+00:00 -# @Last modified by: richard -# @Last modified time: 2018-11-28T18:10:36+00:00 diff --git a/vendor/github.com/V0RT3X4/python_utils/aws/tests/s3_client_encryption_tests/test_IOAuthDecrypter.py b/vendor/github.com/V0RT3X4/python_utils/aws/tests/s3_client_encryption_tests/test_IOAuthDecrypter.py deleted file mode 100644 index bf64d13548ac0..0000000000000 --- a/vendor/github.com/V0RT3X4/python_utils/aws/tests/s3_client_encryption_tests/test_IOAuthDecrypter.py +++ /dev/null @@ -1,22 +0,0 @@ -# @Author: richard -# @Date: 2018-11-28T18:11:28+00:00 -# @Last modified by: richard -# @Last modified time: 2018-12-06T13:06:33+00:00 -from vortexa_utils.aws.s3.client_side_encryption import IOAuthDecrypter -from nose2.tools import params -from .test_IODecrypter import DummyChunksIO, IODecrypterTestCase - - -class IOAuthDecrypter(IODecrypterTestCase): - io_decrypter_class = IOAuthDecrypter.IOAuthDecrypter - - def get_decrypter(self, cypher, io, content_length): - return self.io_decrypter_class(cypher, io, content_length) - - def get_io(self, content_length): - tag_length = 128 - return DummyChunksIO(content_length + tag_length) - - def invalid_decryption(self, content_length): - with self.assertRaises(ValueError): - super().invalid_decryption(content_length) diff --git a/vendor/github.com/V0RT3X4/python_utils/aws/tests/s3_client_encryption_tests/test_IOAuthTagLength.py b/vendor/github.com/V0RT3X4/python_utils/aws/tests/s3_client_encryption_tests/test_IOAuthTagLength.py deleted file mode 100644 index 51685c22d13bd..0000000000000 --- a/vendor/github.com/V0RT3X4/python_utils/aws/tests/s3_client_encryption_tests/test_IOAuthTagLength.py +++ /dev/null @@ -1,42 +0,0 @@ -import unittest -import io -from vortexa_utils.aws.s3.client_side_encryption.IOAuthDecrypterTagLength \ - import StreamChunker -from nose2.tools import params - - -class StreamChunkerTestCase(unittest.TestCase): - - def get_chunker(self, io, tag_length): - return StreamChunker(io, tag_length) - - def test_tagged(self): - fixture = io.BytesIO(b'1234567890') - chunker = StreamChunker(fixture, 3) - bytes = chunker.read() - self.assertEqual(chunker.tag, b'890') - self.assertEqual(bytes, b'1234567') - - @params(*range(1, 11)) - def test_read_in_chunks(self, chunk): - bytes = b'1234567890' - fixture = io.BytesIO(bytes) - tag_length = 3 - chunker = StreamChunker(fixture, tag_length) - result = [] - index = 0 - while True: - byte = chunker.read(chunk) - if byte == b'': - break - result.append(byte) - self.assertEqual(bytes[index:index + len(byte)], byte) - index += len(byte) - print(result) - self.assertEqual(bytes[-tag_length:], chunker.tag) - self.assertEqual(b''.join(result), bytes[:-tag_length]) - # check that subsuquent reads return nothing and tag is correct - for i in range(10): - byte = chunker.read(chunk) - self.assertEqual(b'', byte) - self.assertEqual(bytes[-tag_length:], chunker.tag) diff --git a/vendor/github.com/V0RT3X4/python_utils/aws/tests/s3_client_encryption_tests/test_IODecrypter.py b/vendor/github.com/V0RT3X4/python_utils/aws/tests/s3_client_encryption_tests/test_IODecrypter.py deleted file mode 100644 index cadab6acdaeae..0000000000000 --- a/vendor/github.com/V0RT3X4/python_utils/aws/tests/s3_client_encryption_tests/test_IODecrypter.py +++ /dev/null @@ -1,94 +0,0 @@ -# @Author: richard -# @Date: 2018-11-28T18:11:28+00:00 -# @Last modified by: richard -# @Last modified time: 2018-12-06T13:07:14+00:00 -from io import IOBase - -from vortexa_utils.aws.s3.client_side_encryption.IODecrypter import IODecrypter -import unittest -from nose2.tools import params - - -class DummyCipher(object): - def __init__(self, valid: bool = True): - self.valid = valid - - def decrypt(self, bytes): - return bytes - - def verify(self, tag): - if not self.valid: - raise ValueError("MAC check failed") - pass - - -class DummyChunksIO(IOBase): - _DEFAULT_CHUNK_SIZE = 1024 - - def __init__(self, size): - self.bytes_read = 0 - self.size = size - - def read(self, chunk=-1): - if chunk < 0: - chunk = self.size - self.bytes_read - else: - chunk = min(chunk, abs(self.size - self.bytes_read)) - self.bytes_read += chunk - return b' ' * chunk - - def __iter__(self): - """Return an iterator to yield 1k chunks from the raw stream. - """ - return self.iter_chunks(self._DEFAULT_CHUNK_SIZE) - - def iter_chunks(self, chunk_size=_DEFAULT_CHUNK_SIZE): - """Return an iterator to yield chunks of chunk_size bytes from the raw - stream. - """ - while True: - bytes = self.read(chunk_size) - if bytes == b'': - break - yield bytes - - def close(self): - pass - - def readable(self): - return True - - def seekable(self): - return False - - def writable(self): - return False - - -class IODecrypterTestCase(unittest.TestCase): - io_decrypter_class = IODecrypter - - def get_decrypter(self, cypher, io, content_length): - return self.io_decrypter_class(cypher, io) - - def get_io(self, content_length): - return DummyChunksIO(content_length) - - def make_decrypter(self, content_length, valid=True): - io = DummyChunksIO(content_length) - cypher = DummyCipher(valid=valid) - return self.get_decrypter(cypher, io, content_length) - - @params(123, 1024, 1024*3, 1024*3+123, 1, 0) - def test_read(self, content_length): - with self.make_decrypter(content_length) as decrypter: - bytes = list(decrypter) - self.assertEqual(b''.join(bytes), b' ' * content_length) - - @params(123, 1024, 1024*3, 1024*3+123, 1, 0) - def test_invalid(self, content_length): - self.invalid_decryption(content_length) - - def invalid_decryption(self, content_length): - with self.make_decrypter(content_length, valid=False) as decrypter: - list(decrypter) diff --git a/vendor/github.com/V0RT3X4/python_utils/aws/tests/s3_client_encryption_tests/test_decrypt_s3_mime_with_attachment.py b/vendor/github.com/V0RT3X4/python_utils/aws/tests/s3_client_encryption_tests/test_decrypt_s3_mime_with_attachment.py deleted file mode 100644 index 0be487412d5c2..0000000000000 --- a/vendor/github.com/V0RT3X4/python_utils/aws/tests/s3_client_encryption_tests/test_decrypt_s3_mime_with_attachment.py +++ /dev/null @@ -1,68 +0,0 @@ -# @Author: richard -# @Date: 2018-12-06T17:26:08+00:00 -# @Last modified by: richard -# @Last modified time: 2018-12-06T19:36:16+00:00 -# cd aws/vortexa_utils/ -# import aws.s3.client_side_encryption.client as client -import logging -import vortexa_utils.aws.s3.client_side_encryption.client as client -import io -import email.parser -from email import policy -from email.iterators import _structure -import base64 -from nose2.tools.such import helper - -import pandas as pd - -logger = logging.getLogger(__name__) - -Bucket = 'ops-data.incoming-emails' -Key = 'incoming_email/akrk0l8sq4lm7qkgj8hpurfshpnj8frgqpqe9mg1' -Key = 'incoming_email/8ej2ldqnsmako2tgsbdpqg8tdi6tdnduoscojdo1' - - -def test_get_attachment(): - cl = client.Client() - parser = email.parser.BytesParser(policy=policy.default) - with cl.get_object(Bucket, Key) as io: - parsed = parser.parse(io) - _structure(parsed) - - # with open("/home/richard/an_email", 'wb') as f: - # for b in io: - # f.write(b) - # - # atts = list(parsed.iter_attachments()) - # [a.get_filename() for a in atts] - # [a.get_content_type() for a in atts] - # att = atts[2] - # att - # att.get_content_type() - # pd.read_excel(io.BytesIO(att.get_content())) - - target = parsed['to'] - source = parsed['from'] - helper.assertEqual(target, 'test@opsdata.vortexa.com') - helper.assertEqual(source, 'Richard Mathie ') - - parsed['subject'] - - for part in parsed.walk(): - print(part.get_content_type()) - att = parsed.get_payload() - att[0].get_content_type() - att[0].get_payload()[1].get_payload() - - logger.debug('\nwalking message') - for part in parsed.walk(): - content_type = part.get_content_type() - if content_type.startswith('text'): - logger.debug(content_type) - payload = part.get_payload() - if content_type == 'text/csv': - csv = base64.decodebytes(payload.encode('utf-8')) - for line in csv.splitlines(): - logger.debug(line) - else: - logger.debug('\n%s', payload) diff --git a/vendor/github.com/V0RT3X4/python_utils/aws/tests/s3_client_encryption_tests/test_decrypt_s3_object.py b/vendor/github.com/V0RT3X4/python_utils/aws/tests/s3_client_encryption_tests/test_decrypt_s3_object.py deleted file mode 100644 index a33346502b0a2..0000000000000 --- a/vendor/github.com/V0RT3X4/python_utils/aws/tests/s3_client_encryption_tests/test_decrypt_s3_object.py +++ /dev/null @@ -1,65 +0,0 @@ -# @Author: richard -# @Date: 2018-12-06T13:27:47+00:00 -# @Last modified by: richard -# @Last modified time: 2018-12-06T17:24:43+00:00 -import logging -import sys -# cd aws/vortexa_utils/ -# import aws.s3.client_side_encryption.client as client -import vortexa_utils.aws.s3.client_side_encryption.client as client -import email.parser -from nose2.tools.such import helper - - -logger = logging.getLogger(__name__) - -Bucket = 'ops-data.incoming-emails' -Key = 'incoming_email/4pnlhtml86pobumjn9d59mbkcq3to1i43sjbd201' - - -def test_get_obj(): - self = client.Client() - location_info = self.s3.get_bucket_location(Bucket=Bucket) - logger.info('location %s', location_info) - - obj = self.s3.get_object(Bucket=Bucket, Key=Key) - handeler = client.DecryptHandeler(obj, self) - envelop = handeler.envelope_v2(handeler.metadata) - cipher = self.cipher_provider.decryptor(envelop) - assert handeler.auth_tag() - io = handeler.decrypt_auth(cipher) - - bytes = [] - while True: - byte = io.read(1024) - if byte == b'': - break - logger.info("Bytes Read %s/%s", io.bytes_read, io.content_length) - logger.debug("Bytes %s", byte) - bytes.append(byte) - io.verify() - io.close() - # logger.info('bytes %s', str(bytes)) - - -def test_get_obj_io(): - cl = client.Client() - with cl.get_object(Bucket, Key) as io: - list(io) - - -def test_get_obj_mime(): - cl = client.Client() - parser = email.parser.BytesParser() - with cl.get_object(Bucket, Key) as io: - parsed = parser.parse(io) - - target = parsed['to'] - source = parsed['from'] - helper.assertEqual(target, 'test@opsdata.vortexa.com') - helper.assertEqual(source, 'Richard Mathie ') - - logger.info('\twalking message') - for part in parsed.walk(): - if part.get_content_type().startswith('text'): - logger.info('\t%s', part.get_payload()) diff --git a/vendor/github.com/V0RT3X4/python_utils/aws/tests/s3_client_encryption_tests/test_kms_cipher_provider.py b/vendor/github.com/V0RT3X4/python_utils/aws/tests/s3_client_encryption_tests/test_kms_cipher_provider.py deleted file mode 100644 index 7da39f7a34166..0000000000000 --- a/vendor/github.com/V0RT3X4/python_utils/aws/tests/s3_client_encryption_tests/test_kms_cipher_provider.py +++ /dev/null @@ -1,39 +0,0 @@ -# @Author: richard -# @Date: 2018-12-05T16:23:13+00:00 -# @Last modified by: richard -# @Last modified time: 2018-12-05T19:43:28+00:00 -import unittest -from vortexa_utils.aws.s3.client_side_encryption import kms_cipher_provider -import logging - - -logger = logging.getLogger(__name__) - - -def log_bytes(*bytes): - logger.info(f' bytes: {bytes}') - - -class KMSCipherProviderTest(unittest.TestCase): - test_key_id = 'alias/python_utils_test_key' - - def get_cipher(self): - return kms_cipher_provider.KMSCipherProvider(self.test_key_id) - - def test_encrypt(self): - envelope, cipher = self.get_cipher().encryptor() - plaintext = b"The quick brown fox jumped over the lazy dog" - self.plaintext = plaintext - ciphertext, tag = cipher.encrypt_and_digest(plaintext) - log_bytes(ciphertext, tag) - self.assertNotEqual(ciphertext, plaintext) - package = (envelope, ciphertext, tag) - return package - - def test_decrypt(self): - envelope, ciphertext, tag = self.test_encrypt() - cipher = kms_cipher_provider.KMSCipherProvider().decryptor(envelope) - plaintext = cipher.decrypt(ciphertext) - log_bytes(ciphertext, tag, plaintext) - self.assertEqual(plaintext, self.plaintext) - cipher.verify(tag) diff --git a/vendor/github.com/V0RT3X4/python_utils/aws/tests/ses_inbox/__init__.py b/vendor/github.com/V0RT3X4/python_utils/aws/tests/ses_inbox/__init__.py deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/vendor/github.com/V0RT3X4/python_utils/aws/tests/ses_inbox/test_get_attachments.py b/vendor/github.com/V0RT3X4/python_utils/aws/tests/ses_inbox/test_get_attachments.py deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/vendor/github.com/V0RT3X4/python_utils/aws/tests/ses_inbox/test_list_inbox.py b/vendor/github.com/V0RT3X4/python_utils/aws/tests/ses_inbox/test_list_inbox.py deleted file mode 100644 index a8ff2a0bd81ee..0000000000000 --- a/vendor/github.com/V0RT3X4/python_utils/aws/tests/ses_inbox/test_list_inbox.py +++ /dev/null @@ -1,25 +0,0 @@ -# cd aws/vortexa_utils -# cd .. -from typing import Iterable -from vortexa_utils.aws.ses.inbox import Inbox -from email.message import EmailMessage -from itertools import islice - - -Path = 'incoming_email/' - -inbox = Inbox(default_bucket='ops-data.incoming-emails') - - -def test_list_inbox(): - inbox = Inbox(default_bucket='ops-data.incoming-emails') - emails: Iterable[EmailMessage] = islice( - inbox.list_emails(Path=Path), - 10 - ) - - for email in emails: - # print(email.as_string()) - attachments = list(email.iter_attachments()) - print(list(a.get_filename() for a in attachments)) - print(list(a.get_content_type() for a in attachments)) diff --git a/vendor/github.com/V0RT3X4/python_utils/aws/tests/ses_notification_types/__init__.py b/vendor/github.com/V0RT3X4/python_utils/aws/tests/ses_notification_types/__init__.py deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/vendor/github.com/V0RT3X4/python_utils/aws/tests/ses_notification_types/test_action.py b/vendor/github.com/V0RT3X4/python_utils/aws/tests/ses_notification_types/test_action.py deleted file mode 100644 index 1110fda3de888..0000000000000 --- a/vendor/github.com/V0RT3X4/python_utils/aws/tests/ses_notification_types/test_action.py +++ /dev/null @@ -1,16 +0,0 @@ -from vortexa_utils.aws.ses.notification.types import Action -from json import loads - - -action_json_sns = """ -{ - "type": "SNS", - "topicArn": "arn:aws:sns:us-east-1:012345678912:example-topic" -} -""" - - -def test_sns_action(): - action = Action(**loads(action_json_sns)) - assert action.type == "SNS" - assert action.topicArn == "arn:aws:sns:us-east-1:012345678912:example-topic" diff --git a/vendor/github.com/V0RT3X4/python_utils/aws/tests/ses_notification_types/test_lambda_record.py b/vendor/github.com/V0RT3X4/python_utils/aws/tests/ses_notification_types/test_lambda_record.py deleted file mode 100644 index c489d6cd84e42..0000000000000 --- a/vendor/github.com/V0RT3X4/python_utils/aws/tests/ses_notification_types/test_lambda_record.py +++ /dev/null @@ -1,32 +0,0 @@ -from vortexa_utils.aws.ses.notification.types import Record -from json import loads -from .test_mail import mail_json -from .test_receipt import receipt_json - - -ses = dict( - receipt=receipt_json, - mail=mail_json -) - - -record_json = loads(""" -{ - "eventSource": "aws:ses", - "eventVersion": "1.0", - "ses": { - "receipt": { - }, - "mail": { - } - } -} -""") - -record_json.update(ses=ses) - - -def test_record(): - record = Record(**record_json) - record.ses - assert record.eventSource == "aws:ses" diff --git a/vendor/github.com/V0RT3X4/python_utils/aws/tests/ses_notification_types/test_mail.py b/vendor/github.com/V0RT3X4/python_utils/aws/tests/ses_notification_types/test_mail.py deleted file mode 100644 index bb558b3639e48..0000000000000 --- a/vendor/github.com/V0RT3X4/python_utils/aws/tests/ses_notification_types/test_mail.py +++ /dev/null @@ -1,85 +0,0 @@ -from vortexa_utils.aws.ses.notification.types import Mail -from json import loads - -mail_json = loads(""" -{ -"timestamp": "2015-09-11T20:32:33.936Z", -"source": "61967230-7A45-4A9D-BEC9-87CBCF2211C9@example.com", -"messageId": "d6iitobk75ur44p8kdnnp7g2n800", -"destination": [ - "recipient@example.com" -], -"headersTruncated": false, -"headers": [ - { - "name": "Return-Path", - "value": "<0000014fbe1c09cf-7cb9f704-7531-4e53-89a1-5fa9744f5eb6-000000@amazonses.com>" - }, - { - "name": "Received", - "value": "from a9-183.smtp-out.amazonses.com (a9-183.smtp-out.amazonses.com [54.240.9.183]) by inbound-smtp.us-east-1.amazonaws.com with SMTP id d6iitobk75ur44p8kdnnp7g2n800 for recipient@example.com; Fri, 11 Sep 2015 20:32:33 +0000 (UTC)" - }, - { - "name": "DKIM-Signature", - "value": "v=1; a=rsa-sha256; q=dns/txt; c=relaxed/simple; s=ug7nbtf4gccmlpwj322ax3p6ow6yfsug; d=amazonses.com; t=1442003552; h=From:To:Subject:MIME-Version:Content-Type:Content-Transfer-Encoding:Date:Message-ID:Feedback-ID; bh=DWr3IOmYWoXCA9ARqGC/UaODfghffiwFNRIb2Mckyt4=; b=p4ukUDSFqhqiub+zPR0DW1kp7oJZakrzupr6LBe6sUuvqpBkig56UzUwc29rFbJF hlX3Ov7DeYVNoN38stqwsF8ivcajXpQsXRC1cW9z8x875J041rClAjV7EGbLmudVpPX 4hHst1XPyX5wmgdHIhmUuh8oZKpVqGi6bHGzzf7g=" - }, - { - "name": "From", - "value": "sender@example.com" - }, - { - "name": "To", - "value": "recipient@example.com" - }, - { - "name": "Subject", - "value": "Example subject" - }, - { - "name": "MIME-Version", - "value": "1.0" - }, - { - "name": "Content-Type", - "value": "text/plain; charset=UTF-8" - }, - { - "name": "Content-Transfer-Encoding", - "value": "7bit" - }, - { - "name": "Date", - "value": "Fri, 11 Sep 2015 20:32:32 +0000" - }, - { - "name": "Message-ID", - "value": "<61967230-7A45-4A9D-BEC9-87CBCF2211C9@example.com>" - }, - { - "name": "X-SES-Outgoing", - "value": "2015.09.11-54.240.9.183" - }, - { - "name": "Feedback-ID", - "value": "1.us-east-1.Krv2FKpFdWV+KUYw3Qd6wcpPJ4Sv/pOPpEPSHn2u2o4=:AmazonSES" - } -], -"commonHeaders": { - "returnPath": "0000014fbe1c09cf-7cb9f704-7531-4e53-89a1-5fa9744f5eb6-000000@amazonses.com", - "from": [ - "sender@example.com" - ], - "date": "Fri, 11 Sep 2015 20:32:32 +0000", - "to": [ - "recipient@example.com" - ], - "messageId": "<61967230-7A45-4A9D-BEC9-87CBCF2211C9@example.com>", - "subject": "Example subject" -} -} -""") - - -def test_init(): - mail = Mail(**mail_json) - mail.headers diff --git a/vendor/github.com/V0RT3X4/python_utils/aws/tests/ses_notification_types/test_notification.py b/vendor/github.com/V0RT3X4/python_utils/aws/tests/ses_notification_types/test_notification.py deleted file mode 100644 index 56884ad7463dd..0000000000000 --- a/vendor/github.com/V0RT3X4/python_utils/aws/tests/ses_notification_types/test_notification.py +++ /dev/null @@ -1,23 +0,0 @@ -from vortexa_utils.aws.ses.notification.types import Notification -from json import loads -from .test_mail import mail_json -from .test_action import action_json_sns -from .test_receipt import receipt_json - - -nodification_json = loads(""" -{ -"notificationType": "Received", -"content": "blarblarblar" -} -""" -) - -nodification_json.update( - mail=mail_json, - receipt=receipt_json -) - - -def test_init(): - Notification(**nodification_json) diff --git a/vendor/github.com/V0RT3X4/python_utils/aws/tests/ses_notification_types/test_receipt.py b/vendor/github.com/V0RT3X4/python_utils/aws/tests/ses_notification_types/test_receipt.py deleted file mode 100644 index e41ea7f8ce24d..0000000000000 --- a/vendor/github.com/V0RT3X4/python_utils/aws/tests/ses_notification_types/test_receipt.py +++ /dev/null @@ -1,34 +0,0 @@ -from json import loads -from vortexa_utils.aws.ses.notification.types import Receipt - - -receipt_json = loads(""" -{ -"timestamp": "2015-09-11T20:32:33.936Z", -"processingTimeMillis": 222, -"recipients": [ - "recipient@example.com" -], -"spamVerdict": { - "status": "PASS" -}, -"virusVerdict": { - "status": "PASS" -}, -"spfVerdict": { - "status": "PASS" -}, -"dkimVerdict": { - "status": "PASS" -}, -"action": { - "type": "SNS", - "topicArn": "arn:aws:sns:us-east-1:012345678912:example-topic" -} -} -""") - - -def test_receipt(): - receipt = Receipt(**receipt_json) - receipt.dkimVerdict.status == "PASS" diff --git a/vendor/github.com/V0RT3X4/python_utils/aws/tests/utils/__init__.py b/vendor/github.com/V0RT3X4/python_utils/aws/tests/utils/__init__.py deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/vendor/github.com/V0RT3X4/python_utils/aws/tests/utils/nested_data_classes/__init__.py b/vendor/github.com/V0RT3X4/python_utils/aws/tests/utils/nested_data_classes/__init__.py deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/vendor/github.com/V0RT3X4/python_utils/aws/tests/utils/nested_data_classes/test_nested_dataclass.py b/vendor/github.com/V0RT3X4/python_utils/aws/tests/utils/nested_data_classes/test_nested_dataclass.py deleted file mode 100644 index e15dffd75cc4d..0000000000000 --- a/vendor/github.com/V0RT3X4/python_utils/aws/tests/utils/nested_data_classes/test_nested_dataclass.py +++ /dev/null @@ -1,36 +0,0 @@ -from dataclasses import dataclass -# cd vortexa_utils/ -# from aws.utils.dataclasses import nested_dataclass -from vortexa_utils.aws.utils.dataclasses import nested_dataclass - - -@dataclass -class Foo: - a: str - b: int - - -@nested_dataclass -class Bar: - foo: Foo - baz: str - - -@nested_dataclass -class Bill: - bar: Bar - - -def test_init_class(): - data = dict( - bar=dict( - foo=dict(a="hello", b=1), - baz="world" - ) - ) - foo = Foo(**data['bar']['foo']) - bar = Bar(**data['bar']) - bill = Bill(**data) - - assert bill.bar == bar - assert bill.bar.foo == foo diff --git a/vendor/github.com/V0RT3X4/python_utils/aws/vortexa_utils/__init__.py b/vendor/github.com/V0RT3X4/python_utils/aws/vortexa_utils/__init__.py deleted file mode 100644 index a7712f632a766..0000000000000 --- a/vendor/github.com/V0RT3X4/python_utils/aws/vortexa_utils/__init__.py +++ /dev/null @@ -1,5 +0,0 @@ -# @Author: richard -# @Date: 2018-12-04T20:12:18+00:00 -# @Last modified by: richard -# @Last modified time: 2018-12-04T20:12:57+00:00 -__path__ = __import__('pkgutil').extend_path(__path__, __name__) diff --git a/vendor/github.com/V0RT3X4/python_utils/aws/vortexa_utils/aws/__init__.py b/vendor/github.com/V0RT3X4/python_utils/aws/vortexa_utils/aws/__init__.py deleted file mode 100644 index dda33076e9246..0000000000000 --- a/vendor/github.com/V0RT3X4/python_utils/aws/vortexa_utils/aws/__init__.py +++ /dev/null @@ -1,4 +0,0 @@ -# @Author: richard -# @Date: 2018-12-04T20:13:18+00:00 -# @Last modified by: richard -# @Last modified time: 2018-12-04T20:13:18+00:00 diff --git a/vendor/github.com/V0RT3X4/python_utils/aws/vortexa_utils/aws/lambdr/__init__.py b/vendor/github.com/V0RT3X4/python_utils/aws/vortexa_utils/aws/lambdr/__init__.py deleted file mode 100644 index 4dcf5531789e7..0000000000000 --- a/vendor/github.com/V0RT3X4/python_utils/aws/vortexa_utils/aws/lambdr/__init__.py +++ /dev/null @@ -1,5 +0,0 @@ -"""Vortexa python utils aws lambda helper functions and types. - -This module is called lambdr as `lambda` is a reserved word in python - -""" diff --git a/vendor/github.com/V0RT3X4/python_utils/aws/vortexa_utils/aws/lambdr/types.py b/vendor/github.com/V0RT3X4/python_utils/aws/vortexa_utils/aws/lambdr/types.py deleted file mode 100644 index a1af1904a954b..0000000000000 --- a/vendor/github.com/V0RT3X4/python_utils/aws/vortexa_utils/aws/lambdr/types.py +++ /dev/null @@ -1,45 +0,0 @@ -""" Note: this code is used only by the static type checker! - -_see: -_and: - -""" -from typing import Dict, Any - -LambdaDict = Dict[str, Any] - - -class LambdaCognitoIdentity(object): - cognito_identity_id: str - cognito_identity_pool_id: str - - -class LambdaClientContextMobileClient(object): - installation_id: str - app_title: str - app_version_name: str - app_version_code: str - app_package_name: str - - -class LambdaClientContext(object): - client: LambdaClientContextMobileClient - custom: LambdaDict - env: LambdaDict - - -class LambdaContext(object): - function_name: str - function_version: str - invoked_function_arn: str - memory_limit_in_mb: int - aws_request_id: str - log_group_name: str - log_stream_name: str - deadline_ms: int - identity: LambdaCognitoIdentity - client_context: LambdaClientContext - - @staticmethod - def get_remaining_time_in_millis() -> int: - return 0 diff --git a/vendor/github.com/V0RT3X4/python_utils/aws/vortexa_utils/aws/s3/__init__.py b/vendor/github.com/V0RT3X4/python_utils/aws/vortexa_utils/aws/s3/__init__.py deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/vendor/github.com/V0RT3X4/python_utils/aws/vortexa_utils/aws/s3/client.py b/vendor/github.com/V0RT3X4/python_utils/aws/vortexa_utils/aws/s3/client.py deleted file mode 100644 index da8e4814d10cd..0000000000000 --- a/vendor/github.com/V0RT3X4/python_utils/aws/vortexa_utils/aws/s3/client.py +++ /dev/null @@ -1,50 +0,0 @@ -from tempfile import NamedTemporaryFile - -import boto3 -from logzero import logger -from pandas import DataFrame, read_hdf, read_csv - - -class S3Client: - def __init__(self, s3_bucket: str): - self.s3 = boto3.client("s3") - self.s3_bucket = s3_bucket - - def upload(self, filename: str, s3_key: str, owner_acl: bool = True): - logger.info("[s3] Started uploading: %s", s3_key) - self.s3.upload_file(filename, self.s3_bucket, s3_key) - logger.info("[s3] Finished uploading: %s", s3_key) - if owner_acl: - self.s3.put_object_acl( - ACL="bucket-owner-full-control", Bucket=self.s3_bucket, Key=s3_key - ) - logger.info("[s3] bucket-owner-full-control ACL set") - - def hdf_pd(self, filename) -> DataFrame: - return self.__s3_pd__(filename, "hdf") - - def csv_pd(self, filename) -> DataFrame: - return self.__s3_pd__(filename, "csv") - - def copy(self, src, dest, owner_acl: bool = True): - copy_source = {"Bucket": self.s3_bucket, "Key": src} - self.s3.copy_object(CopySource=copy_source, Bucket=self.s3_bucket, Key=dest) - if owner_acl: - self.s3.put_object_acl( - ACL="bucket-owner-full-control", Bucket=self.s3_bucket, Key=dest - ) - logger.info("[s3] bucket-owner-full-control ACL set") - - def __s3_pd__(self, filename, filetype) -> DataFrame: - with NamedTemporaryFile("wb") as f: - logger.info(f"[s3] Started downloading: s3://{self.s3_bucket}/{filename}") - self.s3.download_fileobj(self.s3_bucket, filename, f) - f.flush() - logger.info(f"[s3] Finished downloading: s3://{self.s3_bucket}/{filename}") - logger.info("[pandas] Started loading: %s", filename) - if filetype == "hdf": - df: DataFrame = read_hdf(f.name) - elif filetype == "csv": - df: DataFrame = read_csv(f.name) - logger.info("[pandas] Finished loading: %s", filename) - return df diff --git a/vendor/github.com/V0RT3X4/python_utils/aws/vortexa_utils/aws/s3/client_side_encryption/IOAuthDecrypter.py b/vendor/github.com/V0RT3X4/python_utils/aws/vortexa_utils/aws/s3/client_side_encryption/IOAuthDecrypter.py deleted file mode 100644 index 6e948f7032109..0000000000000 --- a/vendor/github.com/V0RT3X4/python_utils/aws/vortexa_utils/aws/s3/client_side_encryption/IOAuthDecrypter.py +++ /dev/null @@ -1,40 +0,0 @@ -# @Author: richard -# @Date: 2018-11-28T17:01:36+00:00 -# @Last modified by: richard -# @Last modified time: 2018-12-06T16:00:31+00:00 -import logging -from .IODecrypter import IODecrypter - -logger = logging.getLogger(__name__) - - -class IOAuthDecrypter(IODecrypter): - def __init__(self, cipher, io, content_length, chunk_size=16*1024): - super().__init__(cipher, io) - self.bytes_read = 0 - self.content_length = content_length - - def read(self, chunk=None): - chunk = min(chunk, self.content_length - self.bytes_read) - bytes = super().read(chunk) - logger.debug("Bytes Read %s/%s", self.bytes_read, self.content_length) - self.bytes_read += len(bytes) - return bytes - - def verify(self): - # the remaining bytes should be the auth tag - tag = self.io.read() - logger.debug("Verifing Tag %s", tag) - self.cipher.verify(tag) - - def iter_chunks(self, chunk_size=None): - """Return an iterator to yield chunks of chunk_size bytes from the raw - stream. - """ - if chunk_size is None: - chunk_size = self._DEFAULT_CHUNK_SIZE - - while self.bytes_read < self.content_length: - bytes = self.read(chunk_size) - yield bytes - self.verify() diff --git a/vendor/github.com/V0RT3X4/python_utils/aws/vortexa_utils/aws/s3/client_side_encryption/IOAuthDecrypterTagLength.py b/vendor/github.com/V0RT3X4/python_utils/aws/vortexa_utils/aws/s3/client_side_encryption/IOAuthDecrypterTagLength.py deleted file mode 100644 index c120281198139..0000000000000 --- a/vendor/github.com/V0RT3X4/python_utils/aws/vortexa_utils/aws/s3/client_side_encryption/IOAuthDecrypterTagLength.py +++ /dev/null @@ -1,65 +0,0 @@ -# @Author: richard -# @Date: 2018-11-28T17:01:36+00:00 -# @Last modified by: richard -# @Last modified time: 2018-12-06T16:00:31+00:00 -import logging -from .IODecrypter import IODecrypter -from io import BytesIO, IOBase -logger = logging.getLogger(__name__) - - -class StreamChunker(IOBase): - """StreamChunker a class to keep the last tag bites of a file - - keeps hold of the last `tag_length` bytes in `self.tag` - when reading from a `BytesIO` object. - """ - - def __init__(self, io: BytesIO, tag_length: int): - self.io = io - self.tag_length = tag_length - # get the first chunk if this is the first read - self.tag = self.io.read(self.tag_length) - - def read(self, chunk=None): - bytes = self.tag + self.io.read(chunk) - bytes, self.tag = bytes[:-self.tag_length], bytes[-self.tag_length:] - return bytes - - def close(self): - """Close the underlying http response stream.""" - self.io.close() - - def readable(self): - return True - - def seekable(self): - return False - - def writable(self): - return False - - -class IOAuthDecrypterTagLength(IODecrypter): - def __init__(self, cipher, io, tag_length, chunk_size=16*1024): - super().__init__(cipher, StreamChunker(io, tag_length)) - - def verify(self): - # the remaining bytes should be the auth tag - tag = self.io.tag - logger.debug("Verifing Tag %s", tag) - self.cipher.verify(tag) - - def iter_chunks(self, chunk_size=None): - """Return an iterator to yield chunks of chunk_size bytes from the raw - stream. - """ - if chunk_size is None: - chunk_size = self._DEFAULT_CHUNK_SIZE - - while True: - bytes = self.read(chunk_size) - if bytes == b'': - break - yield bytes - self.verify() diff --git a/vendor/github.com/V0RT3X4/python_utils/aws/vortexa_utils/aws/s3/client_side_encryption/IODecrypter.py b/vendor/github.com/V0RT3X4/python_utils/aws/vortexa_utils/aws/s3/client_side_encryption/IODecrypter.py deleted file mode 100644 index 9346aafcbe053..0000000000000 --- a/vendor/github.com/V0RT3X4/python_utils/aws/vortexa_utils/aws/s3/client_side_encryption/IODecrypter.py +++ /dev/null @@ -1,61 +0,0 @@ -# @Author: richard -# @Date: 2018-11-28T17:01:20+00:00 -# @Last modified by: richard -# @Last modified time: 2018-12-06T16:57:10+00:00 -# from typing import Iterable - -from io import IOBase -from botocore.response import StreamingBody - -import logging - -logger = logging.getLogger(__name__) - - -class IODecrypter(IOBase): - _DEFAULT_CHUNK_SIZE = 1024 - - def __init__(self, cipher, io: StreamingBody): - self.cipher: object = cipher - self.io: StreamingBody = io - - def read(self, chunk=None): - bytes = self.io.read(chunk) - return self.cipher.decrypt(bytes) - - def __iter__(self): - """Return an iterator to yield 1k chunks from the raw stream.""" - return self.iter_chunks(self._DEFAULT_CHUNK_SIZE) - - def iter_chunks(self, chunk_size: int = _DEFAULT_CHUNK_SIZE): - # type: (...) -> Iterable[bytes] - """Return an iterator to yield chunks bytes from the raw `io` stream. - - Parameters - ---------- - chunk_size : int - iterates over no more than Chunk size bytes. If `None` use - `self._DEFAULT_CHUNK_SIZE`. - - Returns - ------- - Iterator[bytes] - - """ - decrypt = self.cipher.decrypt - chunks = self.io.iter_chunks(chunk_size) - - return (decrypt(bytes) for bytes in chunks) - - def close(self): - """Close the underlying http response stream.""" - self.io.close() - - def readable(self): - return True - - def seekable(self): - return False - - def writable(self): - return False diff --git a/vendor/github.com/V0RT3X4/python_utils/aws/vortexa_utils/aws/s3/client_side_encryption/IONocrypter.py b/vendor/github.com/V0RT3X4/python_utils/aws/vortexa_utils/aws/s3/client_side_encryption/IONocrypter.py deleted file mode 100644 index 3f613f19550c5..0000000000000 --- a/vendor/github.com/V0RT3X4/python_utils/aws/vortexa_utils/aws/s3/client_side_encryption/IONocrypter.py +++ /dev/null @@ -1,38 +0,0 @@ -# @Author: richard -# @Date: 2018-11-28T17:01:20+00:00 -# @Last modified by: richard -# @Last modified time: 2018-12-06T16:57:10+00:00 -from typing import Iterable -from botocore.response import StreamingBody -from .IODecrypter import IODecrypter - -import logging - -logger = logging.getLogger(__name__) - - -class IONocrypter(IODecrypter): - - def __init__(self, io): - self.io: StreamingBody = io - - def read(self, chunk=None): - return self.io.read(chunk) - - def iter_chunks(self, chunk_size: int = None) -> Iterable[bytes]: - """Return an iterator to yield chunks bytes from the raw `io` stream. - - Parameters - ---------- - chunk_size : int - iterates over no more than Chunk size bytes. If `None` use - `self._DEFAULT_CHUNK_SIZE`. - - Returns - ------- - Iterator[bytes] - - """ - if chunk_size is None: - chunk_size = self._DEFAULT_CHUNK_SIZE - return self.io.iter_chunks(chunk_size) diff --git a/vendor/github.com/V0RT3X4/python_utils/aws/vortexa_utils/aws/s3/client_side_encryption/__init__.py b/vendor/github.com/V0RT3X4/python_utils/aws/vortexa_utils/aws/s3/client_side_encryption/__init__.py deleted file mode 100644 index 628c41928cecc..0000000000000 --- a/vendor/github.com/V0RT3X4/python_utils/aws/vortexa_utils/aws/s3/client_side_encryption/__init__.py +++ /dev/null @@ -1,183 +0,0 @@ -# @Author: richard -# @Date: 2018-11-28T15:15:44+00:00 -# @Last modified by: richard -# @Last modified time: 2018-11-28T15:15:44+00:00 - -""" -# From the RUBY Docs. - -Provides an encryption client that encrypts and decrypts data client-side, -storing the encrypted data in Amazon S3. - -This client uses a process called "envelope encryption". Your private -encryption keys and your data's plain-text are **never** sent to -Amazon S3. **If you lose you encryption keys, you will not be able to -decrypt your data.** - -## Envelope Encryption Overview - -The goal of envelope encryption is to combine the performance of -fast symmetric encryption while maintaining the secure key management -that asymmetric keys provide. - -A one-time-use symmetric key (envelope key) is generated client-side. -This is used to encrypt the data client-side. This key is then -encrypted by your master key and stored alongside your data in Amazon -S3. - -When accessing your encrypted data with the encryption client, -the encrypted envelope key is retrieved and decrypted client-side -with your master key. The envelope key is then used to decrypt the -data client-side. - -One of the benefits of envelope encryption is that if your master key -is compromised, you have the option of just re-encrypting the stored -envelope symmetric keys, instead of re-encrypting all of the -data in your account. - -## Basic Usage - -The encryption client requires an {Aws::S3::Client}. If you do not -provide a `:client`, then a client will be constructed for you. - - require 'openssl' - key = OpenSSL::PKey::RSA.new(1024) - - # encryption client - s3 = aws.s3.client_side_encryption.Client(encryption_key: key) - - # round-trip an object, encrypted/decrypted locally - s3.put_object(bucket:'aws-sdk', key:'secret', body:'handshake') - s3.get_object(bucket:'aws-sdk', key:'secret').body.read - #=> 'handshake' - - # reading encrypted object without the encryption client - # results in the getting the cipher text - Aws::S3::Client.new.get_object(bucket:'aws-sdk', key:'secret').body.read - #=> "... cipher text ..." - -## Keys - -For client-side encryption to work, you must provide one of the following: - -* An encryption key -* A {KeyProvider} -* A KMS encryption key id - -### An Encryption Key - -You can pass a single encryption key. This is used as a master key -encrypting and decrypting all object keys. - - key = OpenSSL::Cipher.new("AES-256-ECB").random_key # symmetric key - key = OpenSSL::PKey::RSA.new(1024) # asymmetric key pair - - s3 = Aws::S3::Encryption::Client.new(encryption_key: key) - -### Key Provider - -Alternatively, you can use a {KeyProvider}. A key provider makes -it easy to work with multiple keys and simplifies key rotation. - -### KMS Encryption Key Id - -If you pass the id to an AWS Key Management Service (KMS) key, -then KMS will be used to generate, encrypt and decrypt object keys. - - # keep track of the kms key id - kms = Aws::KMS::Client.new - key_id = kms.create_key.key_metadata.key_id - - Aws::S3::Encryption::Client.new( - kms_key_id: key_id, - kms_client: kms, - ) - -## Custom Key Providers - -A {KeyProvider} is any object that responds to: - -* `#encryption_materials` -* `#key_for(materials_description)` - -Here is a trivial implementation of an in-memory key provider. -This is provided as a demonstration of the key provider interface, -and should not be used in production: - - class KeyProvider - - def initialize(default_key_name, keys) - @keys = keys - @encryption_materials = Aws::S3::Encryption::Materials.new( - key: @keys[default_key_name], - description: JSON.dump(key: default_key_name), - ) - end - - attr_reader :encryption_materials - - def key_for(matdesc) - key_name = JSON.load(matdesc)['key'] - if key = @keys[key_name] - key - else - raise "encryption key not found for: #{matdesc.inspect}" - end - end - end - -Given the above key provider, you can create an encryption client that -chooses the key to use based on the materials description stored with -the encrypted object. This makes it possible to use multiple keys -and simplifies key rotation. - - # uses "new-key" for encrypting objects, uses either for decrypting - keys = KeyProvider.new('new-key', { - "old-key" => Base64.decode64("kM5UVbhE/4rtMZJfsadYEdm2vaKFsmV2f5+URSeUCV4="), - "new-key" => Base64.decode64("w1WLio3agRWRTSJK/Ouh8NHoqRQ6fn5WbSXDTHjXMSo="), - }), - - # chooses the key based on the materials description stored - # with the encrypted object - s3 = Aws::S3::Encryption::Client.new(key_provider: keys) - -## Materials Description - -A materials description is JSON document string that is stored -in the metadata (or instruction file) of an encrypted object. -The {DefaultKeyProvider} uses the empty JSON document `"{}"`. - -When building a key provider, you are free to store whatever -information you need to identify the master key that was used -to encrypt the object. - -## Envelope Location - -By default, the encryption client store the encryption envelope -with the object, as metadata. You can choose to have the envelope -stored in a separate "instruction file". An instruction file -is an object, with the key of the encrypted object, suffixed with -`".instruction"`. - -Specify the `:envelope_location` option as `:instruction_file` to -use an instruction file for storing the envelope. - - # default behavior - s3 = Aws::S3::Encryption::Client.new( - key_provider: ..., - envelope_location: :metadata, - ) - - # store envelope in a separate object - s3 = Aws::S3::Encryption::Client.new( - key_provider: ..., - envelope_location: :instruction_file, - instruction_file_suffix: '.instruction' # default - ) - -When using an instruction file, multiple requests are made when -putting and getting the object. **This may cause issues if you are -issuing concurrent PUT and GET requests to an encrypted object.** -""" - -from .client import Client diff --git a/vendor/github.com/V0RT3X4/python_utils/aws/vortexa_utils/aws/s3/client_side_encryption/cipher_provider.py b/vendor/github.com/V0RT3X4/python_utils/aws/vortexa_utils/aws/s3/client_side_encryption/cipher_provider.py deleted file mode 100644 index 954b2276986b2..0000000000000 --- a/vendor/github.com/V0RT3X4/python_utils/aws/vortexa_utils/aws/s3/client_side_encryption/cipher_provider.py +++ /dev/null @@ -1,17 +0,0 @@ -# @Author: richard -# @Date: 2018-11-27T18:22:34+00:00 -# @Last modified by: richard -# @Last modified time: 2018-12-05T16:25:56+00:00 -from Cryptodome.Cipher import AES # pycryptodomex - - -class CipherProvider(object): - def __init__(self, key): - self.key = key - - def decryptor(self, envelope): - pass - - def encryptor(self): - cipher = AES.new(self.key, AES.MODE_GCM) - return cipher diff --git a/vendor/github.com/V0RT3X4/python_utils/aws/vortexa_utils/aws/s3/client_side_encryption/client.py b/vendor/github.com/V0RT3X4/python_utils/aws/vortexa_utils/aws/s3/client_side_encryption/client.py deleted file mode 100644 index 6ebccdba9b9cd..0000000000000 --- a/vendor/github.com/V0RT3X4/python_utils/aws/vortexa_utils/aws/s3/client_side_encryption/client.py +++ /dev/null @@ -1,103 +0,0 @@ -# @Author: richard -# @Date: 2018-11-28T15:15:54+00:00 -# @Last modified by: richard -# @Last modified time: 2018-12-06T18:07:33+00:00 -import boto3 -from .kms_cipher_provider import KMSCipherProvider -from .decrypt_handeler import DecryptHandeler - - -class Client(object): - """ - Client Side Encryption S3 Client. - - Attributes - ---------- - s3 : botocore.client.S3 - cipher_provider : .cipher_provider.CipherProvider - - Methods - ------- - get_object(Bucket, Key) - get and decrypt an object from s3 - - """ - - def __init__( - self, - client=None, - cipher_provider=None, - key_id=None, - strict=None, - **kwargs): - """Initilises the client side encryption s3 client. - - Parameters - ---------- - client : botocore.client.S3 - Optional S3 client to use for s3 interaction - Will create client if not set. - - cipher_provider : CipherProvider - Optional `CipherProvider` to provide encryption cipher - Will default to `KMSCipherProvider()` if not set. - - key_id : str - The kms `key id`, `alias` or `aws::arn` - for the `KMSCipherProvider`. - - region_name : str - The region for the kms and s3 client resources. - - """ - region_name = kwargs.get('region') - self.s3 = client or boto3.client('s3', **kwargs) - self.cipher_provider = ( - cipher_provider or KMSCipherProvider( - key_id=key_id, - region_name=region_name - ) - ) - self.strict = strict - - def get_object(self, Bucket, Key): - """Retrieve object from Amazon S3. - - See also: - `AWS API Documentation `_ - - `AWS Client Side Encryption `_ - - Parameters - ---------- - Bucket : str - **[REQUIRED]** The Bucket - Key : str - **[REQUIRED]** The Path Key in the Bucket - - """ - # location_info = self.s3.get_bucket_location(Bucket=Bucket) - # bucket_region = location_info['LocationConstraint'] - - obj = self.s3.get_object(Bucket=Bucket, Key=Key) - handeler = DecryptHandeler(obj, self, self.strict) - return handeler.decrypt() - - def object_encrypted(self, Bucket, Key) -> bool: - """Check if object has encryption envelope. - - Parameters - ---------- - Bucket : str - **[REQUIRED]** The Bucket - Key : str - **[REQUIRED]** The Path Key in the Bucket - - Returns - ------- - bool - - """ - obj = self.s3.head_object(Bucket=Bucket, Key=Key) - handeler = DecryptHandeler(obj, self) - return handeler.extract_envelop() is not None diff --git a/vendor/github.com/V0RT3X4/python_utils/aws/vortexa_utils/aws/s3/client_side_encryption/decrypt_handeler.py b/vendor/github.com/V0RT3X4/python_utils/aws/vortexa_utils/aws/s3/client_side_encryption/decrypt_handeler.py deleted file mode 100644 index 464fc3c872642..0000000000000 --- a/vendor/github.com/V0RT3X4/python_utils/aws/vortexa_utils/aws/s3/client_side_encryption/decrypt_handeler.py +++ /dev/null @@ -1,121 +0,0 @@ -# @Author: richard -# @Date: 2018-11-27T17:24:50+00:00 -# @Last modified by: richard -# @Last modified time: 2018-12-06T16:38:19+00:00 -import boto3 -import logging - -from .IODecrypter import IODecrypter -from .IONocrypter import IONocrypter -from .IOAuthDecrypter import IOAuthDecrypter -from .IOAuthDecrypterTagLength import IOAuthDecrypterTagLength - -logger = logging.getLogger(__name__) -kms = boto3.client('kms') - - -class DecryptionException(Exception): - pass - - -class DecryptHandeler(object): - - V1_ENVELOPE_KEYS = { - 'x-amz-key', - 'x-amz-iv', - 'x-amz-matdesc' - } - - V2_ENVELOPE_KEYS = { - 'x-amz-key-v2', - 'x-amz-iv', - 'x-amz-cek-alg', - 'x-amz-wrap-alg', - 'x-amz-matdesc' - } - - POSSIBLE_ENVELOPE_KEYS = V1_ENVELOPE_KEYS | V2_ENVELOPE_KEYS - - POSSIBLE_ENCRYPTION_FORMATS = { - 'AES/GCM/NoPadding', - 'AES/CBC/PKCS5Padding', - 'AES/CBC/PKCS7Padding' - } - - def __init__(self, obj, context, strict=False): - self.obj = obj - self.context = context - self.metadata = obj['Metadata'] - self.body = obj['Body'] - self.strict = strict - - def decrypt(self): - cipher = self.decryption_cipher() - logger.debug(self.metadata) - if cipher: - logger.debug(cipher) - if self.auth_tag(): - return self.decrypt_auth(cipher) - return IODecrypter(cipher=cipher, io=self.body) - # Object not encrypted with an envelope - mesg = f"Unencrypted Object at {self.obj['ETag']}" - if self.strict: - logger.error(mesg) - raise ValueError(mesg) - else: - logger.warning(mesg) - return IONocrypter(io=self.body) - - def auth_tag(self): - return 'x-amz-tag-len' in self.metadata - - def decryption_cipher(self): - envelope = self.extract_envelop(self.metadata) - if envelope: - return self.context.cipher_provider.decryptor(envelope) - - def extract_envelop(self, meta): - if 'x-amz-key' in meta: - return self.envelope_v1(meta) - elif 'x-amz-key-v2' in meta: - return self.envelope_v2(meta) - - key_prefix = 'x-amz-key' - key = next((k for k in meta.keys() if k.startswith(key_prefix)), None) - if key is not None: - key_version = key[len(key_prefix):] - mesg = f'Unknown envelope encryption version {key_version}' - raise DecryptionException(mesg) - # no envelope found - return None - - def envelope_v2(self, meta): - if meta['x-amz-cek-alg'] not in self.POSSIBLE_ENCRYPTION_FORMATS: - alg = meta['x-amz-cek-alg'] - msg = f'unsuported content encrypting key format: {alg}' - raise DecryptionException(msg) - if meta['x-amz-wrap-alg'] != 'kms': - alg = meta['x-amz-wrap-alg'] - msg = f'unsupported key wrapping algorithm: {alg}' - raise DecryptionException(msg) - if not self.V2_ENVELOPE_KEYS <= set(meta.keys()): - msg = "incomplete v2 encryption envelope:\n" - msg += f" expected: #{', '.join(self.V2_ENVELOPE_KEYS)}\n" - msg += f" got: #{', '.join(meta.keys)}" - return meta - - def envelope_v1(self, meta): - return meta - - def decrypt_auth(self, cipher): - meta = self.metadata - - content_length_string = meta.get( - 'x-amz-unencrypted-content-length', - None - ) - if content_length_string is not None: - content_length = int(content_length_string) - return IOAuthDecrypter(cipher, self.body, content_length) - tag_length = int(meta['x-amz-tag-len'])//8 - return IOAuthDecrypterTagLength(cipher, self.body, tag_length) diff --git a/vendor/github.com/V0RT3X4/python_utils/aws/vortexa_utils/aws/s3/client_side_encryption/get.py b/vendor/github.com/V0RT3X4/python_utils/aws/vortexa_utils/aws/s3/client_side_encryption/get.py deleted file mode 100644 index 7f961e62c814e..0000000000000 --- a/vendor/github.com/V0RT3X4/python_utils/aws/vortexa_utils/aws/s3/client_side_encryption/get.py +++ /dev/null @@ -1,75 +0,0 @@ -# @Author: richard -# @Date: 2018-11-27T14:58:39+00:00 -# @Last modified by: richard -# @Last modified time: 2018-11-30T12:09:27+00:00 - -# see https://medium.com/@samnco/reading-aws-ses-encrypted-emails-with-boto3-9c177f8ba130 -# and https://github.com/boto/boto3/issues/38 - -import base64 -import json -from Cryptodome.Cipher import AES # pycryptodomex -import boto3 - - -s3 = boto3.client('s3') -kms = boto3.client('kms') - - -def chunker(length, chunk_size): - index = 0 - while index < length: - chunk = min(chunk_size, length - index) - index += chunk - yield chunk - -list(chunker(2, 3)) - - -def content_streamer(bytes_io, content_length, chunk_size=16*1024): - for chunk in chunker(content_length, chunk_size): - yield bytes_io.read(chunk) - - - - - -def decrypt_object(obj): - metadata = obj['Metadata'] - key_alg = metadata['x-amz-cek-alg'] - - envelope_key = base64.b64decode(metadata['x-amz-key-v2']) - envelope_iv = base64.b64decode(metadata['x-amz-iv']) - encrypt_ctx = json.loads(metadata['x-amz-matdesc']) - - # x-amz-tag-len in is in bits so /8 to get bytes - tag_len = int(metadata['x-amz-tag-len'])/8 - original_size = int(metadata['x-amz-unencrypted-content-length']) - - decrypted_envelope_key = kms.decrypt( - CiphertextBlob=envelope_key, - EncryptionContext=encrypt_ctx - ) - key = decrypted_envelope_key['Plaintext'] - - if key_alg == 'AES/GCM/NoPadding': - # x-amz-tag-len in is in bits so /8 to get bytes - cipher = AES.new(key, AES.MODE_GCM, envelope_iv) - elif key_alg == 'AES/CBC/PKCS5Padding': - cipher = AES.new(key, AES.MODE_CBC, envelope_iv) - else: - raise Exception('unknown encryption algorythem') - - body = obj['Body'] - - body = body.read() - body, tag = body[:original_size], body[original_size:] - email = cipher.decrypt(body) - cipher.verify(tag) - return email - - -def get_object(bucket, key): - obj = s3.get_object(Bucket=bucket_name, Key=key) - location_info = s3.get_bucket_location(Bucket=bucket_name) - bucket_region = location_info['LocationConstraint'] diff --git a/vendor/github.com/V0RT3X4/python_utils/aws/vortexa_utils/aws/s3/client_side_encryption/kms_cipher_provider.py b/vendor/github.com/V0RT3X4/python_utils/aws/vortexa_utils/aws/s3/client_side_encryption/kms_cipher_provider.py deleted file mode 100644 index 6700eedb5e0b4..0000000000000 --- a/vendor/github.com/V0RT3X4/python_utils/aws/vortexa_utils/aws/s3/client_side_encryption/kms_cipher_provider.py +++ /dev/null @@ -1,61 +0,0 @@ -# @Author: richard -# @Date: 2018-11-27T18:20:28+00:00 -# @Last modified by: richard -# @Last modified time: 2018-12-05T17:20:59+00:00 -import base64 -import boto3 -import json - -from Cryptodome.Cipher import AES # pycryptodomex -from .cipher_provider import CipherProvider - - -class KMSCipherProvider(CipherProvider): - aes_mode_map = { - 'AES/GCM/NoPadding': AES.MODE_GCM, - 'AES/CBC/PKCS5Padding': AES.MODE_CBC, - 'AES/CBC/PKCS7Padding': AES.MODE_CBC - } - - def __init__(self, key_id=None, **kwargs): - self.kms = boto3.client('kms', **kwargs) - self.key_id = key_id - - def decryptor(self, envelope): - key_alg = envelope['x-amz-cek-alg'] - aes_mode = self.aes_mode_map.get(key_alg) - if aes_mode is None: - raise Exception(f'unknown encryption algorythem {key_alg}') - - envelope_key = base64.b64decode(envelope['x-amz-key-v2']) - iv = base64.b64decode(envelope['x-amz-iv']) - encryption_context = json.loads(envelope['x-amz-matdesc']) - - decrypted_envelope = self.kms.decrypt( - CiphertextBlob=envelope_key, - EncryptionContext=encryption_context - ) - key = decrypted_envelope['Plaintext'] - cipher = AES.new(key, aes_mode, iv) - return cipher - - def encryptor(self): - encryption_context = {"kms_cmk_id": self.key_id} - - key_data = self.kms.generate_data_key( - KeyId=self.key_id, - EncryptionContext=encryption_context, - KeySpec='AES_256' - ) - - key = key_data['Plaintext'] - cipher = AES.new(key, AES.MODE_GCM) - - envelope = { - 'x-amz-key-v2': base64.encodebytes(key_data['CiphertextBlob']), - 'x-amz-iv': base64.encodebytes(cipher.nonce), - 'x-amz-cek-alg': 'AES/GCM/NoPadding', - 'x-amz-wrap-alg': 'kms', - 'x-amz-matdesc': json.dumps(encryption_context) - } - return envelope, cipher diff --git a/vendor/github.com/V0RT3X4/python_utils/aws/vortexa_utils/aws/ses/__init__.py b/vendor/github.com/V0RT3X4/python_utils/aws/vortexa_utils/aws/ses/__init__.py deleted file mode 100644 index 520cb4033d38a..0000000000000 --- a/vendor/github.com/V0RT3X4/python_utils/aws/vortexa_utils/aws/ses/__init__.py +++ /dev/null @@ -1,4 +0,0 @@ -# @Author: richard -# @Date: 2018-12-06T18:06:14+00:00 -# @Last modified by: richard -# @Last modified time: 2018-12-06T18:06:14+00:00 diff --git a/vendor/github.com/V0RT3X4/python_utils/aws/vortexa_utils/aws/ses/application_mapper.py b/vendor/github.com/V0RT3X4/python_utils/aws/vortexa_utils/aws/ses/application_mapper.py deleted file mode 100644 index 1e910af5a7b9c..0000000000000 --- a/vendor/github.com/V0RT3X4/python_utils/aws/vortexa_utils/aws/ses/application_mapper.py +++ /dev/null @@ -1,102 +0,0 @@ -import io -from typing import Callable -from collections.abc import Mapping -from functools import wraps -import pandas as pd - - -def read_input_wrapper(read_func=None, **kwargs): - """A decorator to make the `pandas.io.parser.read` functions - take `bytes` as input. - - Parameters - ---------- - `read_func` : `Callable[..., pd.DataFrame]` - The `pandas.io.parsers` function to decorate. - If not set `read_input_wrapper` will return a decorator. - **`kwargs` : `dict` - `kwargs` to pass on to `read_func`. - - Returns - ------- - function : `Callable[input: bytes, pd.DataFrame]` | - `Callable[[Callable[..., pd.DataFrame]], - Callable[input: bytes, pd.DataFrame]]` - either return a decorator which will wrap a pandas parser function - or a wrapped parser function: - - Examples - ------- - Examples should be written in doctest format, and - should illustrate how to use the function/class. - >>> read_csv = read_input_wrapper(pd.read_csv) - >>> read_tsv = read_input_wrapper(pd.read_csv, sep='\t') - - or as a decorator - - @read_input_wrapper - def read_foo(file, **kwargs) -> pd.DataFrame: - # some custom foo - return pd.DataFrame() - - or - - @read_input_wrapper(sep='\t') - def read_bar(file, **kwargs) -> pd.DataFrame: - # some custom bar - return pd.DataFrame() - """ - - def wrapper(func: Callable[..., pd.DataFrame]): - - @wraps(func) - def reader(input: bytes) -> pd.DataFrame: - return func(io.BytesIO(input), **kwargs) - return reader - - if read_func is None: - return wrapper - return wrapper(read_func) - - -read_csv = read_input_wrapper(pd.read_csv) -read_tsv = read_input_wrapper(pd.read_csv, sep='\t') -read_excel = read_input_wrapper(pd.read_excel, sheet_name=None) - - -class ApplicationMapper(Mapping): - """A `Mapping` class to map MIME application types to a pandas reader.""" - - application_mapping = { - "text/plain": read_tsv, - "text/csv": read_csv, - "application/vnd.ms-excel": read_excel - } - - aplication_prefixed = ( - ( - 'application/vnd.ms-excel.sheet', - read_excel - ) - ( - 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet', - read_excel - ) - ) - - def __getitem__(self, key): - func = self.application_mapping.get(key) - if func is not None: - return func - for prefix, func in self.aplication_prefixed: - if key.startswith(prefix): - return read_excel - - def __iter__(self): - return iter(self.application_mapping) - - def __len__(self): - return len(self.application_mapping) - - -application_mapping = ApplicationMapper() diff --git a/vendor/github.com/V0RT3X4/python_utils/aws/vortexa_utils/aws/ses/attachments.py b/vendor/github.com/V0RT3X4/python_utils/aws/vortexa_utils/aws/ses/attachments.py deleted file mode 100644 index d5ef58684ee7f..0000000000000 --- a/vendor/github.com/V0RT3X4/python_utils/aws/vortexa_utils/aws/ses/attachments.py +++ /dev/null @@ -1,15 +0,0 @@ -import email -from .application_mapper import application_mapping - - -class Attachment(object): - - def __init__(self, attachment: email.message.EmailMessage): - self.attachment = attachment - - def to_df(self): - content_type = self.attachment.get_content_type() - reader = application_mapping.get(content_type) - if reader is None: - raise TypeError(f"unknown content_type {content_type}") - return reader(self.attachment.get_content()) diff --git a/vendor/github.com/V0RT3X4/python_utils/aws/vortexa_utils/aws/ses/inbox.py b/vendor/github.com/V0RT3X4/python_utils/aws/vortexa_utils/aws/ses/inbox.py deleted file mode 100644 index 4c3664093d938..0000000000000 --- a/vendor/github.com/V0RT3X4/python_utils/aws/vortexa_utils/aws/ses/inbox.py +++ /dev/null @@ -1,141 +0,0 @@ -# @Author: richard -# @Date: 2018-12-06T18:06:25+00:00 -# @Last modified by: richard -# @Last modified time: 2018-12-06T19:36:09+00:00 -from typing import Iterable -import logging -from datetime import datetime -from functools import wraps -import boto3 -# cd aws/vortexa_utils -# import aws.s3.client_side_encryption.client as client -import vortexa_utils.aws.s3.client_side_encryption.client as client -import email -import email.policy -import email.parser -from email.message import EmailMessage - -logger = logging.getLogger(__name__) - - -class Inbox(object): - """Short summary. - - Parameters - ---------- - default_bucket : str - Default s3 Bucket to assosiate the inbox with. - - """ - - def __init__(self, default_bucket: str = None, **kwargs): - """Short summary. - - Parameters - ---------- - default_bucket : str - Default s3 Bucket to assosiate the inbox with. - strict : bool - When True will not fetch unencrypted emails. Defaults to False. - **kwargs : dict - **`kwargs` to pass to `s3.client`. - - """ - self.bucket = default_bucket - self.s3crypto = client.Client(**kwargs) - self.s3 = self.s3crypto.s3 - # Specify the default policy for email parsing else Parser defaults to - # email.policy.compat32 for python 3 and 2 compatibility - self.parser = email.parser.BytesParser(policy=email.policy.default) - - def get_email(self, Key: str, Bucket: str = None) -> EmailMessage: - """Get `EmailMessage` Object from `Bucket`. - - Parameters - ---------- - Key : str - `Key` name of email in s3. - Bucket : str - s3 `Bucket` to look for email, will search `self.bucket` if `None`. - - Returns - ------- - email.message.EmailMessage - Email object. - - """ - Bucket = Bucket or self.bucket - if Bucket is None: - raise ValueError("Bucket not set") - with self.s3crypto.get_object(Bucket=Bucket, Key=Key) as io: - return self.parser.parse(io) - - def list_objects( - self, - Bucket: str = None, - Path: str = None, - Begin: datetime = None, - Until: datetime = None): - # type: (...) -> Iterable['boto3.resources.factory.s3.ObjectSummary'] - """List all objects in `Bucket` prefixed by `Path`. - - Parameters - ---------- - Bucket : str - S3 `Bucket` to look for emails will search `self.bucket` if `None`. - Path : str - The `Path` prefix to filter the emails by, no filter if `None`. - Begin : datetime - Filter object from this datetime. - Until : datetime = None - Filter objects untill this datetime. - - Returns - ------- - iterable boto3.resources.factory.s3.ObjectSummary - List of matching email objects. - - """ - bucket = boto3.resource('s3').Bucket(Bucket or self.bucket) - objs = bucket.objects.filter(Prefix=Path) - if Begin: - objs = (obj for obj in objs if obj.last_modified >= Begin) - if Until: - objs = (obj for obj in objs if obj.last_modified <= Until) - - if Begin is None and Until is None: - # if no timestamps dont bother sorting - return objs - return sorted(objs, key=lambda o: o.last_modified) - - @wraps(list_objects, assigned=('__annotations__',)) - def list_emails(self, **kwargs) -> Iterable[EmailMessage]: - """List all emails in `Bucket` prefixed by `Path`. - - Parameters - ---------- - Bucket : str - S3 `Bucket` to look for emails will search `self.bucket` if `None`. - Path : str - The `Path` prefix to filter the emails by, no filter if `None`. - Begin : datetime - Filter object from this datetime. - Until : datetime = None - Filter objects untill this datetime. - - Returns - ------- - iterable emails - List of matching email objects. - - Examples - ------- - Examples should be written in doctest format, and - should illustrate how to use the function/class. - >>> inbox = Inbox() - >>> inboc.list_emails('/some/sub/folder') - - """ - objects = self.list_objects(**kwargs) - for obj in objects: - yield self.get_email(obj.key, obj.bucket_name) diff --git a/vendor/github.com/V0RT3X4/python_utils/aws/vortexa_utils/aws/ses/notification/__init__.py b/vendor/github.com/V0RT3X4/python_utils/aws/vortexa_utils/aws/ses/notification/__init__.py deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/vendor/github.com/V0RT3X4/python_utils/aws/vortexa_utils/aws/ses/notification/types/__init__.py b/vendor/github.com/V0RT3X4/python_utils/aws/vortexa_utils/aws/ses/notification/types/__init__.py deleted file mode 100644 index 7eb901a004212..0000000000000 --- a/vendor/github.com/V0RT3X4/python_utils/aws/vortexa_utils/aws/ses/notification/types/__init__.py +++ /dev/null @@ -1,5 +0,0 @@ -from .action import Action -from .mail import Mail -from .receipt import Receipt -from .notification import Notification -from .lambda_record import Record diff --git a/vendor/github.com/V0RT3X4/python_utils/aws/vortexa_utils/aws/ses/notification/types/action.py b/vendor/github.com/V0RT3X4/python_utils/aws/vortexa_utils/aws/ses/notification/types/action.py deleted file mode 100644 index d62791f941960..0000000000000 --- a/vendor/github.com/V0RT3X4/python_utils/aws/vortexa_utils/aws/ses/notification/types/action.py +++ /dev/null @@ -1,56 +0,0 @@ -from dataclasses import dataclass - - -@dataclass -class Action: - """Action Object. - - Attributes - ---------- - type : str - action that was executed. [S3, SNS, Bounce, Lambda, Stop, WorkMail]. - topicArn : str - Amazon Resource Name (ARN) of the SNS topic of the notification. - bucketName : str - S3 bucket to which the message was published. - *Present only for the S3 action type.* - objectKey : str - name that uniquely identifies the email in the Amazon S3 bucket. - This is the same as the messageId in the mail Object. - *Present only for the S3 action type.* - smtpReplyCode : str - SMTP reply code, as defined by RFC 5321. - *Present only for the bounce action type.* - statusCode : str - SMTP enhanced status code, as defined by RFC 3463. - *Present only for the bounce action type.* - message : str - human-readable text to include in the bounce message. - *Present only for the bounce action type.* - sender : str - The email address of the sender of the email that bounced. - This is the address from which the bounce message was sent. - *Present only for the bounce action type.* - functionArn : str - ARN of the Lambda function that was triggered. - *Present only for the Lambda action type.* - invocationType : str - invocation type of the Lambda function. [RequestResponse, Event] - *Present only for the Lambda action type.* - organizationArn : str - ARN of the Amazon WorkMail organization. - *Present only for the WorkMail action type.* - - _see - """ - type: str - topicArn: str = None - bucketName: str = None - objectKey: str = None - smtpReplyCode: str = None - statusCode: str = None - message: str = None - sender: str = None - functionArn: str = None - invocationType: str = None - organizationArn: str = None diff --git a/vendor/github.com/V0RT3X4/python_utils/aws/vortexa_utils/aws/ses/notification/types/lambda_record.py b/vendor/github.com/V0RT3X4/python_utils/aws/vortexa_utils/aws/ses/notification/types/lambda_record.py deleted file mode 100644 index 3eecd720fedf8..0000000000000 --- a/vendor/github.com/V0RT3X4/python_utils/aws/vortexa_utils/aws/ses/notification/types/lambda_record.py +++ /dev/null @@ -1,18 +0,0 @@ -from vortexa_utils.aws.utils.dataclasses import nested_dataclass -from .mail import Mail -from .receipt import Receipt - - -@nested_dataclass -class SESRecord: - receipt: Receipt - mail: Mail - - -@nested_dataclass -class Record: - """ - """ - eventSource: str # "aws:ses", - eventVersion: str # "1.0", - ses: SESRecord diff --git a/vendor/github.com/V0RT3X4/python_utils/aws/vortexa_utils/aws/ses/notification/types/mail.py b/vendor/github.com/V0RT3X4/python_utils/aws/vortexa_utils/aws/ses/notification/types/mail.py deleted file mode 100644 index 49252ed6610f3..0000000000000 --- a/vendor/github.com/V0RT3X4/python_utils/aws/vortexa_utils/aws/ses/notification/types/mail.py +++ /dev/null @@ -1,44 +0,0 @@ -from typing import List, Dict, Any -from dataclasses import dataclass - - -@dataclass -class Mail: - """Mail Object. - - Attributes - ---------- - destination: List[str] - A complete list of all recipient addresses (including To: and CC:) - from the MIME headers of the incoming email. - messageId: str - String that contains the unique ID assigned to the email by Amazon SES. - If the email was delivered to Amazon S3, the message ID is also the - Amazon S3 object key that was used to write the message to your Amazon - S3 bucket. - source: str - String that contains the email address (the envelope MAIL FROM address) - that the email was sent from. - timestamp: - String that contains the time at which the email was received, - in ISO8601 format. - headers: List[List[str]] - A list of Amazon SES headers and your custom headers. - Each header in the list has a name field and a value field. - commonHeaders: List[List[str]] - A list of headers common to all emails. - Each header in the list is composed of a name and a value. - headersTruncated: str - String that specifies whether the headers were truncated, - which will happen if the headers are larger than 10 KB. - Possible values are true and false. - - """ - - destination: List[str] - messageId: str - source: str - timestamp: str - headers: List[Dict[str, str]] - commonHeaders: Dict[str, Any] - headersTruncated: str diff --git a/vendor/github.com/V0RT3X4/python_utils/aws/vortexa_utils/aws/ses/notification/types/notification.py b/vendor/github.com/V0RT3X4/python_utils/aws/vortexa_utils/aws/ses/notification/types/notification.py deleted file mode 100644 index 19fee6d3060d4..0000000000000 --- a/vendor/github.com/V0RT3X4/python_utils/aws/vortexa_utils/aws/ses/notification/types/notification.py +++ /dev/null @@ -1,29 +0,0 @@ -from vortexa_utils.aws.utils.dataclasses import nested_dataclass -from . import Mail, Receipt - - -@nested_dataclass -class Notification: - """Notification Object. - - Attributes - ---------- - notificationType: str - The notification type. For this type of notification, - the value is always Received. - receipt : Recipt - Object that contains information about the email delivery. - mail : Mail - Object that contains information about the email - associated with the notification. - content : str - String that contains the raw, unmodified email, which is typically - in Multipurpose Internet Mail Extensions (MIME) format. - *Only if the notification was triggered by an SNS action.* - - """ - - notificationType: str - receipt: Receipt - mail: Mail - content: str diff --git a/vendor/github.com/V0RT3X4/python_utils/aws/vortexa_utils/aws/ses/notification/types/receipt.py b/vendor/github.com/V0RT3X4/python_utils/aws/vortexa_utils/aws/ses/notification/types/receipt.py deleted file mode 100644 index b5d1a3857508d..0000000000000 --- a/vendor/github.com/V0RT3X4/python_utils/aws/vortexa_utils/aws/ses/notification/types/receipt.py +++ /dev/null @@ -1,65 +0,0 @@ -from vortexa_utils.aws.utils.dataclasses import nested_dataclass -from .action import Action -from .verdicts import (DKIMVerdict, - DMARCVerdict, - SPFVerdict, - SpamVerdict, - VirusVerdict) - - -@nested_dataclass -class Receipt: - """SNS Recipt object. - - Attributes - ---------- - action : Action - Encapsulates information about the action that was executed. - - dkimVerdict : DKIMVerdict - Indicates whether the DomainKeys Identified Mail (DKIM) check passed. - - dmarcPolicy : str - Domain-based Message Authentication, Reporting & Conformance (DMARC) - settings for the sending domain. - This field only appears if the message fails DMARC authentication. - Possible values for this field are: - - none: no specific action be taken on messages that fail DMARC. - - quarantine: messages that fail DMARC be treated as suspicious. - - reject: messages that fail DMARC authentication be rejected. - - dmarcVerdict : DMARCVerdict - Indicates whether the DMARC check passed. - - processingTimeMillis : str - `str` specifies the period, in milliseconds, from the time Amazon SES - received the message to the time it triggered the action. - - recipients : list[str] - list of recipients that were matched by the active receipt rule. - The addresses may differ from those listed by the destination field - in the mail Object. - - spamVerdict : SpamVerdict - Indicates whether the message is spam - - spfVerdict : SPFVerdict - Whether the Sender Policy Framework (SPF) check passed - - timestamp : str - ISO 8601 format string representing when the action was triggered. - - virusVerdict : VirusVerdict - Whether the message contains a virus. - For a list of possible values, see virusVerdict Object. - """ - action: Action - processingTimeMillis: str - recipients: str - timestamp: str - dmarcPolicy: str = None - dmarcVerdict: DMARCVerdict = None - dkimVerdict: DKIMVerdict = None - spamVerdict: SpamVerdict = None - spfVerdict: SPFVerdict = None - virusVerdict: VirusVerdict = None diff --git a/vendor/github.com/V0RT3X4/python_utils/aws/vortexa_utils/aws/ses/notification/types/verdicts.py b/vendor/github.com/V0RT3X4/python_utils/aws/vortexa_utils/aws/ses/notification/types/verdicts.py deleted file mode 100644 index a4a47e06ce02f..0000000000000 --- a/vendor/github.com/V0RT3X4/python_utils/aws/vortexa_utils/aws/ses/notification/types/verdicts.py +++ /dev/null @@ -1,43 +0,0 @@ -from dataclasses import dataclass - - -@dataclass -class Verdict(object): - """Verdict object. - - Attributes - ---------- - status : str - String that contains the verdict. Possible values are: - - PASS: The message passed the given test. - - FAIL: The message failed the given test. - - GRAY: The message failed the given test, - - PROCESSING_FAILED: There is an issue that prevents Amazon SES - from providing a verdict to the given test. - """ - status: str - - -@dataclass -class DKIMVerdict(Verdict): - ... - - -@dataclass -class DMARCVerdict(Verdict): - ... - - -@dataclass -class SpamVerdict(Verdict): - ... - - -@dataclass -class SPFVerdict(Verdict): - ... - - -@dataclass -class VirusVerdict(Verdict): - ... diff --git a/vendor/github.com/V0RT3X4/python_utils/aws/vortexa_utils/aws/utils/__init__.py b/vendor/github.com/V0RT3X4/python_utils/aws/vortexa_utils/aws/utils/__init__.py deleted file mode 100644 index dda33076e9246..0000000000000 --- a/vendor/github.com/V0RT3X4/python_utils/aws/vortexa_utils/aws/utils/__init__.py +++ /dev/null @@ -1,4 +0,0 @@ -# @Author: richard -# @Date: 2018-12-04T20:13:18+00:00 -# @Last modified by: richard -# @Last modified time: 2018-12-04T20:13:18+00:00 diff --git a/vendor/github.com/V0RT3X4/python_utils/aws/vortexa_utils/aws/utils/dataclasses/__init__.py b/vendor/github.com/V0RT3X4/python_utils/aws/vortexa_utils/aws/utils/dataclasses/__init__.py deleted file mode 100644 index 0b443f83003f7..0000000000000 --- a/vendor/github.com/V0RT3X4/python_utils/aws/vortexa_utils/aws/utils/dataclasses/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from .nested import * diff --git a/vendor/github.com/V0RT3X4/python_utils/aws/vortexa_utils/aws/utils/dataclasses/nested.py b/vendor/github.com/V0RT3X4/python_utils/aws/vortexa_utils/aws/utils/dataclasses/nested.py deleted file mode 100644 index 22e1b071fd8d0..0000000000000 --- a/vendor/github.com/V0RT3X4/python_utils/aws/vortexa_utils/aws/utils/dataclasses/nested.py +++ /dev/null @@ -1,20 +0,0 @@ -from dataclasses import dataclass, is_dataclass -from functools import wraps - - -def nested_dataclass(*args, **kwargs): - def wrapper(cls): - cls = dataclass(cls, **kwargs) - original_init = cls.__init__ - - @wraps(original_init) - def __init__(self, *args, **kwargs): - for name, value in kwargs.items(): - field_type = cls.__annotations__.get(name, None) - if is_dataclass(field_type) and isinstance(value, dict): - new_obj = field_type(**value) - kwargs[name] = new_obj - original_init(self, *args, **kwargs) - cls.__init__ = __init__ - return cls - return wrapper(args[0]) if args else wrapper diff --git a/vendor/github.com/V0RT3X4/python_utils/collections/tests/__init__.py b/vendor/github.com/V0RT3X4/python_utils/collections/tests/__init__.py deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/vendor/github.com/V0RT3X4/python_utils/collections/tests/collections/types/__init__.py b/vendor/github.com/V0RT3X4/python_utils/collections/tests/collections/types/__init__.py deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/vendor/github.com/V0RT3X4/python_utils/collections/tests/collections/types/test_instance_caching_abc.py b/vendor/github.com/V0RT3X4/python_utils/collections/tests/collections/types/test_instance_caching_abc.py deleted file mode 100644 index 3b8f0c0e2ec81..0000000000000 --- a/vendor/github.com/V0RT3X4/python_utils/collections/tests/collections/types/test_instance_caching_abc.py +++ /dev/null @@ -1,130 +0,0 @@ -#!/usr/bin/env python3 -# -*- coding: utf-8 -*- -""" -Created on Mon Nov 19 14:02:03 2018 -@author: richard -""" -import unittest - -from vortexa_utils.collections.types.instance_caching_abc import ( - InstanceCachingABC, - instance_caching) - - -class InstanceCachingABCTests(unittest.TestCase): - - def register_class(self, klass): - setattr(self, klass.__name__, klass) - return klass - - def setUp(self): - @self.register_class - class Foo(object, metaclass=InstanceCachingABC): - pass - - @self.register_class - class Bar(object): - pass - - def test_signiture(self): - self.assertEqual(repr(self.Foo), repr(self.Bar).replace('Bar', 'Foo')) - - def test_instance_cache(self): - # no instances - self.assertFalse(list(self.Foo)) - - # one instance - foo = self.Foo() - foos = list(self.Foo) - self.assertEqual(len(foos), 1) - klass_name, instance = foos[0] - self.assertEqual(instance, foo) - self.assertEqual(klass_name, 'Foo') - - # more instances - foo2 = self.Foo() - foos = list(self.Foo) - self.assertEqual(len(foos), 2) - klass_name, instance = foos[-1] - self.assertEqual(instance, foo2) - self.assertEqual(klass_name, 'Foo') - - -class InstanceCachingDecoratorTests(InstanceCachingABCTests): - - def setUp(self): - register = self.register_class - - @register - class Foo(object): - pass - - self._Foo = Foo - self.Foo = Foo = instance_caching(Foo) - - @register - class Bar(Foo): - pass - - @register - class Baz(Bar): - pass - - @register - class Bo(Foo): - pass - - @register - class Bill(Bo): - pass - - def test_signiture(self): - self.assertEqual(repr(self.Foo), repr(self._Foo)) - - def test_list_subclasses(self): - self.assertEqual( - set(self.Foo._allsubclasses()), - set((self.Foo, self.Bar, self.Baz, self.Bo, self.Bill)) - ) - self.assertEqual( - set(self.Bar._allsubclasses()), - set((self.Bar, self.Baz)) - ) - self.assertEqual( - set(self.Bo._allsubclasses()), - set((self.Bill, self.Bo)) - ) - - def test_instance_cache(self): - super().test_instance_cache() - # no instances in subclasses - for klass in self.Bar._allsubclasses(): - self.assertFalse(list(klass)) - - for klass in self.Bo._allsubclasses(): - self.assertFalse(list(klass)) - - self.assertEqual(len(list(self.Foo)), 2) - # one instance - bar = self.Bar() - foos = list(self.Foo) - bars = list(self.Bar) - self.assertEqual(len(foos), 3) - self.assertEqual(len(bars), 1) - klass_name, instance = bars[0] - self.assertEqual(instance, bar) - self.assertEqual(klass_name, 'Bar') - - baz = self.Baz() - foos = list(self.Foo) - bars = list(self.Bar) - bazs = list(self.Baz) - self.assertEqual(len(foos), 4) - self.assertEqual(len(bars), 2) - self.assertEqual(len(bazs), 1) - klass_name, instance = bazs[0] - self.assertEqual(instance, baz) - self.assertEqual(klass_name, 'Baz') - - for klass in self.Bo._allsubclasses(): - self.assertFalse(list(klass)) diff --git a/vendor/github.com/V0RT3X4/python_utils/collections/vortexa_utils/collections/__inti__.py b/vendor/github.com/V0RT3X4/python_utils/collections/vortexa_utils/collections/__inti__.py deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/vendor/github.com/V0RT3X4/python_utils/collections/vortexa_utils/collections/types/__init__.py b/vendor/github.com/V0RT3X4/python_utils/collections/vortexa_utils/collections/types/__init__.py deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/vendor/github.com/V0RT3X4/python_utils/collections/vortexa_utils/collections/types/instance_caching_abc.py b/vendor/github.com/V0RT3X4/python_utils/collections/vortexa_utils/collections/types/instance_caching_abc.py deleted file mode 100644 index cdc6c556c07be..0000000000000 --- a/vendor/github.com/V0RT3X4/python_utils/collections/vortexa_utils/collections/types/instance_caching_abc.py +++ /dev/null @@ -1,45 +0,0 @@ -#!/usr/bin/env python3 -# -*- coding: utf-8 -*- -""" -Created on Mon Nov 19 09:57:05 2018 -@author: richard -""" -from abc import ABCMeta - - -class InstanceCachingABC(ABCMeta): - """Metaclass for defining Instance Caching Abstract Base Classs (ICABC) - Use this metaclass to create an ICABC. An ICABC will remember the instances - created from it and can be iterated over to return all instances and sub - class instances - """ - - def __init__(cls, name, bases, namespace): - super().__init__(name, bases, namespace) - cls._instances = list() - - def __call__(cls, *args, **kwargs): - instance = super().__call__(*args, **kwargs) - cls._instances.append(instance) - return instance - - def _allsubclasses(cls): - yield cls - for subclass in cls.__subclasses__(): - yield from subclass._allsubclasses() - - # Metamethods, called on class objects: - def __iter__(cls): - return ((klass.__name__, instance) - for klass in cls._allsubclasses() - for instance in klass._instances) - - -def instance_caching(klass): - class Decorated(klass, metaclass=InstanceCachingABC): - pass - - Decorated.__name__ = klass.__name__ - Decorated.__qualname__ = klass.__qualname__ - Decorated.__module__ = klass.__module__ - return Decorated diff --git a/vendor/github.com/V0RT3X4/python_utils/database/README.md b/vendor/github.com/V0RT3X4/python_utils/database/README.md deleted file mode 100644 index 4c64ed6286b79..0000000000000 --- a/vendor/github.com/V0RT3X4/python_utils/database/README.md +++ /dev/null @@ -1,21 +0,0 @@ -# Vortexa Utils DatabaseFactory - -Small factory class to give you a `SqlAlchemy` engine connection to an -`AWS rds` instance ensuring SSL and credentials are obtained with the secrets manager -## Usage - -```python -db_factory = DatabaseFactory() -engine = db_factory.engine(dbname='rolling_backup') - -sql = """ -SELECT - name -FROM new_polygons where name is not Null; -""" - -engine.execute(sql) -``` -## TODO Other utility functions - -- [ ] create a `~/.dbpass` file diff --git a/vendor/github.com/V0RT3X4/python_utils/database/README.rst b/vendor/github.com/V0RT3X4/python_utils/database/README.rst deleted file mode 100644 index 5f2775e7ba207..0000000000000 --- a/vendor/github.com/V0RT3X4/python_utils/database/README.rst +++ /dev/null @@ -1,28 +0,0 @@ -.. contents:: - :depth: 3 -.. - -Vortexa Utils DatabaseFactory -============================= - -Small factory class to give you a ``SqlAlchemy`` engine connection to an -``AWS rds`` instance ensuring SSL and credentials are obtained with the -secrets manager ## Usage - -.. code:: python - - db_factory = DatabaseFactory() - engine = db_factory.engine(dbname='rolling_backup') - - sql = """ - SELECT - name - FROM new_polygons where name is not Null; - """ - - engine.execute(sql) - -TODO Other utility functions ----------------------------- - -- [ ] create a ``~/.dbpass`` file diff --git a/vendor/github.com/V0RT3X4/python_utils/database/setup.py b/vendor/github.com/V0RT3X4/python_utils/database/setup.py deleted file mode 100644 index 4ea029d37a074..0000000000000 --- a/vendor/github.com/V0RT3X4/python_utils/database/setup.py +++ /dev/null @@ -1,40 +0,0 @@ -# @Author: richard -# @Date: 2018-12-04T17:54:43+00:00 -# @Last modified by: richard -# @Last modified time: 2018-12-04T19:14:15+00:00 -import io -import os - -from setuptools import find_packages, setup - -namespace = "vortexa_utils" -description = ("Vortexa Database Engine Factory",) - -dependencies = ["boto3", "SqlAlchemy", "psycopg2-binary", "requests"] - -# Setup boilerplate below - -package_root = os.path.abspath(os.path.dirname(__file__)) - -readme_filename = os.path.join(package_root, "README.rst") -with io.open(readme_filename, encoding="utf-8") as readme_file: - readme = readme_file.read() - -packages = [ - package for package in find_packages() if package.startswith(namespace) -] - -setup( - name="vortexa_utils_database", - version="0.0.1", - description=description, - long_description=readme, - author="Richard Mathie", - author_email="richard.mathie@vortexa.com", - zip_safe=False, - tests_require=["nose2"], - test_suite="nose2.collector.collector", - packages=packages, - install_requires=dependencies, - extras_require={"query_cache": ["pandas", "pyarrow"]}, -) diff --git a/vendor/github.com/V0RT3X4/python_utils/database/tests/__init__.py b/vendor/github.com/V0RT3X4/python_utils/database/tests/__init__.py deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/vendor/github.com/V0RT3X4/python_utils/database/tests/test_database_factory.py b/vendor/github.com/V0RT3X4/python_utils/database/tests/test_database_factory.py deleted file mode 100644 index 45ad343c6c796..0000000000000 --- a/vendor/github.com/V0RT3X4/python_utils/database/tests/test_database_factory.py +++ /dev/null @@ -1,16 +0,0 @@ -import os -import unittest - -from vortexa_utils.database import DatabaseFactory - - -class TestEngineFactory(unittest.TestCase): - def test_create_factory(self): - db_factory = DatabaseFactory() - return db_factory - - def test_get_cert(self): - db_factory = self.test_create_factory() - cert_file = db_factory.fetch_cert() - self.assertEqual(cert_file, db_factory.cert_file) - assert os.path.isfile(cert_file) diff --git a/vendor/github.com/V0RT3X4/python_utils/database/tests/test_querey_cache.py b/vendor/github.com/V0RT3X4/python_utils/database/tests/test_querey_cache.py deleted file mode 100644 index 2e441f58cdb2e..0000000000000 --- a/vendor/github.com/V0RT3X4/python_utils/database/tests/test_querey_cache.py +++ /dev/null @@ -1,21 +0,0 @@ -# cd database -import logging - -from vortexa_utils.database.default_factories import DevFactory -from vortexa_utils.database.query_cache import QueryCache - -logger = logging.getLogger(__name__) - -logging.basicConfig(level=logging.DEBUG) - -# factory = DevFactory() -# engine = factory.engine() -# qc = QueryCache() - -# %time df = qc.read_sql("clarksons", engine) - - -def test_filename(): - qc = QueryCache() - assert qc.filename("some random query") == "qAdzxvMgeSc=.parquet.snappy" - assert qc.filename("banned_words") == "LoRkfDuNmuA=.parquet.snappy" diff --git a/vendor/github.com/V0RT3X4/python_utils/database/vortexa_utils/__init__.py b/vendor/github.com/V0RT3X4/python_utils/database/vortexa_utils/__init__.py deleted file mode 100644 index a2ae790eb1d2c..0000000000000 --- a/vendor/github.com/V0RT3X4/python_utils/database/vortexa_utils/__init__.py +++ /dev/null @@ -1,5 +0,0 @@ -# @Author: richard -# @Date: 2018-12-04T19:01:36+00:00 -# @Last modified by: richard -# @Last modified time: 2018-12-04T19:01:39+00:00 -__path__ = __import__('pkgutil').extend_path(__path__, __name__) diff --git a/vendor/github.com/V0RT3X4/python_utils/database/vortexa_utils/database/__init__.py b/vendor/github.com/V0RT3X4/python_utils/database/vortexa_utils/database/__init__.py deleted file mode 100644 index 5c67964aad121..0000000000000 --- a/vendor/github.com/V0RT3X4/python_utils/database/vortexa_utils/database/__init__.py +++ /dev/null @@ -1,7 +0,0 @@ -# @Author: richard -# @Date: 2018-12-04T17:55:58+00:00 -# @Last modified by: richard -# @Last modified time: 2018-12-04T17:55:58+00:00 - -from .database import DatabaseFactory -from .default_factories import DevFactory, ProdFactory, RedFactory diff --git a/vendor/github.com/V0RT3X4/python_utils/database/vortexa_utils/database/database.py b/vendor/github.com/V0RT3X4/python_utils/database/vortexa_utils/database/database.py deleted file mode 100644 index 8634168939edd..0000000000000 --- a/vendor/github.com/V0RT3X4/python_utils/database/vortexa_utils/database/database.py +++ /dev/null @@ -1,118 +0,0 @@ -# @Author: richard -# @Date: 2018-12-04T17:58:19+00:00 -# @Last modified by: richard -# @Last modified time: 2018-12-04T17:58:19+00:00 -import json -import logging -import os -from dataclasses import dataclass, field -from typing import Dict - -import boto3 -import requests -from sqlalchemy import create_engine - -logger = logging.getLogger(__name__) - -secretsmanager = boto3.client("secretsmanager") - -DEFAULT_CERT_URL = ( - "https://s3.amazonaws.com/rds-downloads/rds-combined-ca-bundle.pem" -) -DEFAULT_CERT_PATH = "/tmp/vortexa_utils_py/rds/ca-bundle.pem" - -DEFAULT_CREDENTIAL = "rds/dev/default" -DEFAULT_CREDENTIAL_MAPPING = dict( - host="host", username="user", port="port", password="password" -) - - -@dataclass -class DatabaseFactory(object): - """DatabaseFactory Class. - - Class for createing a database engine factory. - - usage:: - - factory = DatabaseFactory() - engine = factory.engine() - - Parameters - ---------- - secret_id : str - `secret_id` of the database credential. - (the default is 'rds/dev/default' wich points to the dev database host) - cert_file : str - The location to store the ssl certificate file - cert_url : str - The url to fetch the aws rds ssl certificates from - credential_mapping : Dict[str, str] - A mapping between the `psycopg` connection args and the credential keys - """ - - secret_id: str = DEFAULT_CREDENTIAL - cert_file: str = DEFAULT_CERT_PATH - cert_url: str = DEFAULT_CERT_URL - credential_mapping: Dict[str, str] = field( - default_factory=lambda: dict(DEFAULT_CREDENTIAL_MAPPING) - ) - - def __post_init__(self): - logger.debug(f"Created {self.secret_id} factory object") - - def fetch_cert(self, force: bool = False): - if not os.path.isfile(self.cert_file) or force: - logger.info("getting cert") - os.makedirs(os.path.dirname(self.cert_file), exist_ok=True) - cert = requests.get(self.cert_url) - with open(self.cert_file, "w") as f: - f.write(cert.text) - return self.cert_file - - def get_credential(self): - secret = secretsmanager.get_secret_value(SecretId=self.secret_id) - return json.loads(secret["SecretString"]) - - def engine(self, dbname: str = None, echo: bool = False, **kwargs): - # type (...) -> sqlalchemy.engine.Engine - """`sqlalchemy.engine.Engine` instance factory. - - Parameters - ---------- - dbname : str - database name `dbname` to connect to. - (the default is `None`, which will use the dbname in the secret - credential). - echo : bool - `echo` (the default is False). - - Returns - ------- - sqlalchemy.engine.Engine - SQLalchemy connection engine - - Examples - ------- - >>> factory = DatabaseFactory() - >>> engine = factory.engine() - - """ - cert_filename = self.fetch_cert() - credential = self.get_credential() - connect_args = { - v: credential[k] for k, v in self.credential_mapping.items() - } - - dbname = dbname or os.environ.get("DBNAME") or credential["dbname"] - host = connect_args.pop("host") - port = connect_args.pop("port") - - connect_args.update(sslmode="verify-full", sslrootcert=cert_filename) - engine = create_engine( - f"postgresql://{host}:{port}/{dbname}", - echo=echo, - connect_args=connect_args, - **kwargs, - ) - return engine diff --git a/vendor/github.com/V0RT3X4/python_utils/database/vortexa_utils/database/default_factories.py b/vendor/github.com/V0RT3X4/python_utils/database/vortexa_utils/database/default_factories.py deleted file mode 100644 index d4f8ae0ca09e1..0000000000000 --- a/vendor/github.com/V0RT3X4/python_utils/database/vortexa_utils/database/default_factories.py +++ /dev/null @@ -1,20 +0,0 @@ -from dataclasses import dataclass - -from .database import DatabaseFactory - - -@dataclass -class DevFactory(DatabaseFactory): - secret_id: str = "rds/dev/default" - - -@dataclass -class ProdFactory(DatabaseFactory): - secret_id: str = "rds/prod/default" - - -@dataclass -class RedFactory(DatabaseFactory): - cert_url: str = "https://s3.amazonaws.com/redshift-downloads/redshift-ca-bundle.crt" - cert_file: str = "/tmp/vortexa_utils_py/rds/redshift-ca-bundle.pem" - secret_id: str = "redshift/prod/default" diff --git a/vendor/github.com/V0RT3X4/python_utils/database/vortexa_utils/database/query_cache.py b/vendor/github.com/V0RT3X4/python_utils/database/vortexa_utils/database/query_cache.py deleted file mode 100644 index ea86e9a914cd5..0000000000000 --- a/vendor/github.com/V0RT3X4/python_utils/database/vortexa_utils/database/query_cache.py +++ /dev/null @@ -1,77 +0,0 @@ -import hashlib -import logging -import os -import time -from base64 import urlsafe_b64encode -from dataclasses import dataclass -from functools import wraps -from typing import Union - -import pandas as pd -from sqlalchemy.engine import Connection, Engine - -from pyarrow.lib import ArrowIOError - -logger = logging.getLogger(__name__) - - -@dataclass -class QueryCache(object): - result_extension: str = ".parquet.snappy" - cache_dir: str = os.path.join( - "/tmp", "python_utils", "database", "query_cache", "df_cache" - ) - ttl: int = 3600 - - def __post_init__(self): - os.makedirs(self.cache_dir, exist_ok=True) - - def path(self, url): - return os.path.join( - self.cache_dir, - url.drivername, - f"{url.host}:{url.port}", - url.database, - ) - - def filename(self, query): - query_digest = urlsafe_b64encode( - hashlib.blake2s(str(query).encode(), digest_size=8).digest() - ) - return query_digest.decode("ascii") + self.result_extension - - @wraps(pd.read_sql) - def read_sql( - self, - query: str, - con: Union[Engine, Connection], - ttl: int = None, - invalidate_cache: bool = False, - *args, - **kwargs, - ) -> pd.DataFrame: - - # formulate a path - path = self.path(con.engine.url) - filename = self.filename(query) - filepath = os.path.join(path, filename) - os.makedirs(path, exist_ok=True) - - # check if the cache exists and is valid - ttl = self.ttl if ttl is None else ttl - - if ( - os.path.isfile(filepath) - and time.time() - os.path.getmtime(filepath) < ttl - ): - try: - logger.debug("reading from cache %s", filepath) - df = pd.read_parquet(filepath) - except ArrowIOError as e: - logger.error("Invalid Cache file, error: %s", e) - else: - return df - logger.debug("reading from database") - df = pd.read_sql(query, con, *args, **kwargs) - df.to_parquet(filepath) - return df diff --git a/vendor/github.com/V0RT3X4/python_utils/database/vortexa_utils/database/utils.py b/vendor/github.com/V0RT3X4/python_utils/database/vortexa_utils/database/utils.py deleted file mode 100644 index 811e36443265d..0000000000000 --- a/vendor/github.com/V0RT3X4/python_utils/database/vortexa_utils/database/utils.py +++ /dev/null @@ -1,62 +0,0 @@ -from typing import Iterable, List - -import sqlalchemy -from pandas.io.sql import SQLTable -from sqlalchemy.engine import Connectable - - -def upsert( - table: SQLTable, conn: Connectable, keys: List[str], data_iter: Iterable -): - """Upsert method to be used with `pandas.DataFrame.to_sql`. - - In pandas > 0.24.0 you can specify a method to control the insertion clause - used by `pandas.DataFrame.to_sql`. - - Parameters - ---------- - table : pandas.io.sql.SQLTable - Description of parameter `table`. - conn : sqlalchemy.engine.Connectable - Description of parameter `conn`. - keys : List[str] - Description of parameter `keys`. - data_iter : Iterable - Description of parameter `data_iter`. - - Returns - ------- - type - Description of returned object. - - Examples - ------- - Examples should be written in doctest format, and - should illustrate how to use the function/class. - >>> - - """ - cols = ", ".join(f'"{k}"' for k in keys) - if table.schema: - tname = "{}.{}".format(table.schema, table.name) - else: - tname = table.name - - # placeholder = ", ".join(["?"] * len(keys)) - placeholder = ", ".join([f":{k}" for k in keys]) - datas = ({k: d for k, d in zip(keys, data)} for data in data_iter) - if conn.engine.driver.endswith("sqlite"): - # sqlite - sql = f"INSERT or IGNORE INTO {tname} ({cols}) VALUES ({placeholder})" - else: - # postgresql - sql = sqlalchemy.text( - f""" - INSERT INTO {tname} - ({cols}) - VALUES ({placeholder}) - ON CONFLICT DO NOTHING - """ - ) - - conn.execute(sql, *datas) diff --git a/vendor/github.com/V0RT3X4/python_utils/deployment/setup.py b/vendor/github.com/V0RT3X4/python_utils/deployment/setup.py deleted file mode 100644 index 6432302dac087..0000000000000 --- a/vendor/github.com/V0RT3X4/python_utils/deployment/setup.py +++ /dev/null @@ -1,20 +0,0 @@ -# @Author: richard -# @Date: 2018-12-04T17:54:43+00:00 -# @Last modified by: richard -# @Last modified time: 2018-12-04T19:16:08+00:00 -from setuptools import setup, find_packages - - -setup( - name='vortexa_utils_deploy', - version='0.0.1', - description='', - long_description='', - - author='Richard Mathie', - author_email='richard.mathie@vortexa.com', - - zip_safe=False, - - packages=find_packages(), -) diff --git a/vendor/github.com/V0RT3X4/python_utils/deployment/vortexa_utils/__init__.py b/vendor/github.com/V0RT3X4/python_utils/deployment/vortexa_utils/__init__.py deleted file mode 100644 index a2ae790eb1d2c..0000000000000 --- a/vendor/github.com/V0RT3X4/python_utils/deployment/vortexa_utils/__init__.py +++ /dev/null @@ -1,5 +0,0 @@ -# @Author: richard -# @Date: 2018-12-04T19:01:36+00:00 -# @Last modified by: richard -# @Last modified time: 2018-12-04T19:01:39+00:00 -__path__ = __import__('pkgutil').extend_path(__path__, __name__) diff --git a/vendor/github.com/V0RT3X4/python_utils/deployment/vortexa_utils/portainer/Readme.md b/vendor/github.com/V0RT3X4/python_utils/deployment/vortexa_utils/portainer/Readme.md deleted file mode 100644 index f1d2dcd78744e..0000000000000 --- a/vendor/github.com/V0RT3X4/python_utils/deployment/vortexa_utils/portainer/Readme.md +++ /dev/null @@ -1 +0,0 @@ -# Portainer API Helper Module diff --git a/vendor/github.com/V0RT3X4/python_utils/deployment/vortexa_utils/portainer/__init__.py b/vendor/github.com/V0RT3X4/python_utils/deployment/vortexa_utils/portainer/__init__.py deleted file mode 100644 index 26e33c55820aa..0000000000000 --- a/vendor/github.com/V0RT3X4/python_utils/deployment/vortexa_utils/portainer/__init__.py +++ /dev/null @@ -1,8 +0,0 @@ -# @Author: richard -# @Date: 2018-12-04T17:56:21+00:00 -# @Last modified by: richard -# @Last modified time: 2018-12-04T18:11:43+00:00 - - -def notNone(x): - return x is not None diff --git a/vendor/github.com/V0RT3X4/python_utils/deployment/vortexa_utils/portainer/api.py b/vendor/github.com/V0RT3X4/python_utils/deployment/vortexa_utils/portainer/api.py deleted file mode 100644 index 456ace9496cba..0000000000000 --- a/vendor/github.com/V0RT3X4/python_utils/deployment/vortexa_utils/portainer/api.py +++ /dev/null @@ -1,56 +0,0 @@ -# @Author: richard -# @Date: 2018-12-04T18:05:38+00:00 -# @Last modified by: richard -# @Last modified time: 2018-12-04T18:14:40+00:00 -import os -import requests -from functools import wraps -from urllib.parse import urlparse -from .stacks import Stacks -from . import notNone - - -class RequestHelper(object): - def __init__(self, api, base_url='api'): - self.api = api - self.base_url = base_url - - def wrapper(self, func): - @wraps(func) - def caller(url=None, *args, **kwargs): - parts = filter(notNone, (self.api.host, self.base_url, url)) - parts = map(str, parts) - headers = kwargs.get("headers", {}) - headers.update(self.api.get_header()) - kwargs["headers"] = headers - return func(os.path.join(*parts), - *args, **kwargs).json() - return caller - - def __getattr__(self, name, *args, **kwargs): - method = getattr(requests, name, *args, **kwargs) - return self.wrapper(method) - - -class PortainerAPI(object): - def __init__(self, host, user=None, pw=None): - self.host = urlparse(host, scheme='http').geturl() - self.user = user - self.pw = pw - if any(ting is not None for ting in (host, user, pw)): - self.get_jwt() - self.requests = RequestHelper(self) - self.stacks = Stacks(self) - - def get_jwt(self): - """ - http POST :9000/api/auth Username="admin" Password="adminpassword" - """ - url = f'{self.host}/api/auth' - resp = requests.post(url, json=dict(Username=self.user, - Password=self.pw)) - self.token = resp.json().get('jwt') - return self.token - - def get_header(self): - return {"Authorization": f"Bearer {self.token}"} diff --git a/vendor/github.com/V0RT3X4/python_utils/deployment/vortexa_utils/portainer/stacks.py b/vendor/github.com/V0RT3X4/python_utils/deployment/vortexa_utils/portainer/stacks.py deleted file mode 100644 index 8eaf2f8d7482d..0000000000000 --- a/vendor/github.com/V0RT3X4/python_utils/deployment/vortexa_utils/portainer/stacks.py +++ /dev/null @@ -1,61 +0,0 @@ -# @Author: richard -# @Date: 2018-12-04T18:04:55+00:00 -# @Last modified by: richard -# @Last modified time: 2018-12-04T18:34:09+00:00 -from .api import RequestHelper - - -class Stacks(object): - def __init__(self, api): - self.api = api - self.requests = RequestHelper(api, 'api/stacks') - - def get(self, stack_id): - return self.requests.get(stack_id) - - def list(self): - return self.get(None) - - def filter(self, **kwargs): - def filter_kwargs(stack): - return all(str(stack[k]) == str(v) for k, v in kwargs.items()) - - return filter(filter_kwargs, self.list()) - - def first(self, **kwargs): - return next(self.filter(**kwargs)) - - def get_file(self, stack_id): - return self.requests.get(f'{stack_id}/file') - - def update(self, stack_id=None, endpointId=None, name=None, - Env=None, StackFileContent=None, Prune=False): - # get the stack by filtering on name or stack_id - if name is not None: - stack = self.first(Name=name) - stack_id = stack['Id'] - elif stack_id is not None: - stack = self.get(stack_id) - - endpointId = stack.get('EndpointId', endpointId) - if endpointId is None: - raise Exception("no entrypointID found or set") - - # update the old Env with the new Env - old_Env = stack.get('Env') - if old_Env is not None: - update_keys = set(e['name'] for e in Env) - old_Env = list(e for e in old_Env if e['name'] not in update_keys) - Env += old_Env - - if StackFileContent is None: - StackFileContent = self.get_file(stack_id)['StackFileContent'] - body = dict(StackFileContent=StackFileContent, - Env=Env, - Prune=Prune) - - return self.requests.put( - stack_id, - params=dict(endpointId=endpointId), - json=body - ) diff --git a/vendor/github.com/V0RT3X4/python_utils/deployment/vortexa_utils/portainer/update_stack.py b/vendor/github.com/V0RT3X4/python_utils/deployment/vortexa_utils/portainer/update_stack.py deleted file mode 100644 index 275f8e6dd8604..0000000000000 --- a/vendor/github.com/V0RT3X4/python_utils/deployment/vortexa_utils/portainer/update_stack.py +++ /dev/null @@ -1,90 +0,0 @@ -#!/user/bin/env python3 -# @Author: richard -# @Date: 2018-12-04T18:10:07+00:00 -# @Last modified by: richard -# @Last modified time: 2018-12-04T18:31:45+00:00 -import argparse -from pprint import pprint -from .import notNone -from .api import PortainerAPI - - -if __name__ == '__main__': - parser = argparse.ArgumentParser(description='Portainer API CLI') - parser.add_argument('--host', '-H', type=str, - help='Host name of Portainer API', - default='https://lawgiver.vortexa.com:9000') - parser.add_argument('--user', '-u', type=str, - help='User name', - default='kraftwork_updater') - parser.add_argument('--pass', '-p', type=str, dest='password', - help='Password name') - - parser.add_argument('--name', '-n', type=str, - help='Stack name to filter') - - parser.add_argument('--env', '-e', nargs=2, action='append', - help='key value pairs of confic to update') - - parser.add_argument('--filter', '-f', nargs=2, action='append', - help='key value pairs of confic to update') - - def add_cmd(flag): - def command(func): - parser.add_argument( - flag, - action='store_const', - const=func, - dest='cmd' - ) - return func - - def get_filter(): - Filter = {} - if args.filter is not None: - Filter.update(args.filter) - if args.name is not None: - Filter.update(Name=args.name) - return Filter - - @add_cmd('--list') - def list_stacks(): - if any(map(notNone, ((args.name, args.filter)))): - Filter = get_filter() - return list(api.stacks.filter(**Filter)) - else: - return api.stacks.list() - - @add_cmd('--update') - def update_stacks(): - env = [dict(name=k, value=v) for k, v in args.env] - return api.stacks.update(name=args.name, Env=env) - - args = parser.parse_args() - - api = PortainerAPI(host=args.host, - user=args.user, - pw=args.password) - - pprint(args.cmd()) - -# api.stacks.list() -# api.stacks.update( -# 1, 1, -# Env=[{ -# "name": "KFAFTWERK_BUILD_NUM", -# "value": '376' -# }] -# ) -# -# -# content = Path('docker/scripts/docker-compose.yml').read_text() -# -# api.requests.post('stacks?type=1&method=string&endpointId=1', -# json=dict( -# Name="myStack", -# StackFileContent=content, -# Env=[dict(name="Hello",value="world")], -# SwarmID='729a4f2h5kj2sd42x34pl3uu1' -# ) -# ) diff --git a/vendor/github.com/V0RT3X4/python_utils/docker/pandas/Dockerfile b/vendor/github.com/V0RT3X4/python_utils/docker/pandas/Dockerfile deleted file mode 100644 index 76155dd44eb33..0000000000000 --- a/vendor/github.com/V0RT3X4/python_utils/docker/pandas/Dockerfile +++ /dev/null @@ -1,25 +0,0 @@ -FROM python:3.7-alpine -ARG PANDAS_VERSION=0.24.1 - -RUN apk add libstdc++ \ - && apk add --no-cache --virtual .build-deps \ - g++ \ - gcc \ - gfortran \ - build-base \ - wget \ - freetype-dev \ - libpng-dev \ - openblas-dev \ - postgresql-dev \ - musl-dev \ - && ln -s /usr/include/locale.h /usr/include/xlocale.h \ - && pip install wheel \ - && pip install --no-cache \ - numpy==1.15.1 \ - scipy \ - psycopg2-binary \ - sqlalchemy \ - && pip install --no-cache \ - pandas==${PANDAS_VERSION} \ - && apk del .build-deps diff --git a/vendor/github.com/V0RT3X4/python_utils/general/README.rst b/vendor/github.com/V0RT3X4/python_utils/general/README.rst deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/vendor/github.com/V0RT3X4/python_utils/general/setup.py b/vendor/github.com/V0RT3X4/python_utils/general/setup.py deleted file mode 100644 index c4a958adb103c..0000000000000 --- a/vendor/github.com/V0RT3X4/python_utils/general/setup.py +++ /dev/null @@ -1,40 +0,0 @@ -import io -import os - -from setuptools import setup, find_packages - -namespace = 'vortexa_utils' -name = 'vortexa_utils_general' -version = '1.0.0' -description = 'Vortexa general utils helper library', - -dependencies = [ - 'gitpython', - 'logzero', - 'tenacity' -] - -# Setup boilerplate below - -package_root = os.path.abspath(os.path.dirname(__file__)) - -readme_filename = os.path.join(package_root, 'README.rst') -with io.open(readme_filename, encoding='utf-8') as readme_file: - readme = readme_file.read() - -packages = [ - package for package in find_packages() - if package.startswith(namespace) -] - -setup( - name=name, - version=version, - description=description, - long_description=readme, - author='Marcin Szymanski', - author_email='marcin.szymanski@vortexa.com', - zip_safe=False, - packages=packages, - install_requires=dependencies, -) diff --git a/vendor/github.com/V0RT3X4/python_utils/general/vortexa_utils/__init__.py b/vendor/github.com/V0RT3X4/python_utils/general/vortexa_utils/__init__.py deleted file mode 100644 index a7712f632a766..0000000000000 --- a/vendor/github.com/V0RT3X4/python_utils/general/vortexa_utils/__init__.py +++ /dev/null @@ -1,5 +0,0 @@ -# @Author: richard -# @Date: 2018-12-04T20:12:18+00:00 -# @Last modified by: richard -# @Last modified time: 2018-12-04T20:12:57+00:00 -__path__ = __import__('pkgutil').extend_path(__path__, __name__) diff --git a/vendor/github.com/V0RT3X4/python_utils/general/vortexa_utils/general/__init__.py b/vendor/github.com/V0RT3X4/python_utils/general/vortexa_utils/general/__init__.py deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/vendor/github.com/V0RT3X4/python_utils/general/vortexa_utils/git.py b/vendor/github.com/V0RT3X4/python_utils/general/vortexa_utils/git.py deleted file mode 100644 index 79bc2365032ed..0000000000000 --- a/vendor/github.com/V0RT3X4/python_utils/general/vortexa_utils/git.py +++ /dev/null @@ -1,14 +0,0 @@ -import os - -from git import Git, Repo -from logzero import logger -from tenacity import retry, wait_fixed, stop_after_attempt - - -@retry(wait=wait_fixed(10), stop=stop_after_attempt(3)) -def clone_repo(repo_url: str, path: str, ssh_key: str): - os.environ['GIT_SSH_COMMAND'] = f'ssh -i {ssh_key}' - with Git().custom_environment(): - logger.info('Cloning git repo %s to %s', repo_url, path) - Repo.clone_from(repo_url, path, branch='master') - logger.info('Repo cloned successfully') diff --git a/vendor/github.com/V0RT3X4/python_utils/logging/README.md b/vendor/github.com/V0RT3X4/python_utils/logging/README.md deleted file mode 100644 index 28b90c8686b22..0000000000000 --- a/vendor/github.com/V0RT3X4/python_utils/logging/README.md +++ /dev/null @@ -1,55 +0,0 @@ -# Vortexa Utils Logging Helpers - -Small helper scripts to abstract logging-related boilerplate code. - - -## log_unhandled_exceptions - -Function decorator, designed to be wrapped around any `main()` (or equivalent) function, to capture errors, prefix them with `ERROR`, and raise them in-line, when executed in AWS Batch. - -### Problem: - -AWS Batch jobs all output logs onto CloudWatch Log Group (`/aws/batch/job`). Therefore, to raise specific alarms, python jobs should use logging, with the logger pattern containing a unique identifier for the job (such as the job/repo name), so the CloudWatch can filter logs and look for specific exceptions. - -When Errors are raised by a python program logging to CloudWatch, the loger pattern and the Error/stacktrace are output on 2 consecutive lines. CloudWatch Alarm triggers can only look for patterns combinations which are in-line, therefore, for a CloudWatch Alarm to be raised when a job fails, the logger pattern and some form of identifiable error key most be printed in-line. - - -### Solution: - -`log_unhandled_exceptions` decorator, can be wrapped around main executing functions, and if any errors are raised during run-time, will capture these errors, and raise them in-line with the logging pattern, using the common pattern `ERROR: `. CloudWatch alerts can now be set to look for (1) the unique logging pattern of the project (i.e. name) and (2) the key `ERROR`, to raise targeted alerts. The full stacktrace will still be output to Cloudwatch logs. - -### Usage: - -```python -from vortexa_utils.logging import log_unhandled_exceptions - -# The following is the logger set-up boilerplate code. -# This can be done as below, or imported from a project-logger dir. -# The following is only intended as a sample and should not be copied without understanding what is happening. -import logging - -logger = logging.getLogger(__name__) -log_format = logging.Formatter( - f"PROJECT_NAME:%(name)s:%(message)s" -) # Only a sample format, can be designed at will, as long as unique identifier (e.g. PROJECT_NAME) is included -logger.setFormatter(log_format) -logger.setLevel(logging.INFO) - -@log_unhandled_exceptions(logger) -def main(): - return int(1) + str('two') - -if __name__ == "__main__": - main() -``` - -Code snippet above would return: - -``` -PROJECT_NAME:__main__:ERROR: unsupported operan types(s) for +: 'int' and 'str' - Traceback (most recent call last): - ... ... - TypeError: unsupported operand type(s) for +: 'int' and 'str' -``` - -As a result, a cloudwatch alarm can now be set on the pattern `PROJECT_NAME ERROR` diff --git a/vendor/github.com/V0RT3X4/python_utils/logging/setup.py b/vendor/github.com/V0RT3X4/python_utils/logging/setup.py deleted file mode 100644 index 7081b7db26c4e..0000000000000 --- a/vendor/github.com/V0RT3X4/python_utils/logging/setup.py +++ /dev/null @@ -1,38 +0,0 @@ -import io -import os - -from setuptools import find_packages, setup - -namespace = "vortexa_utils" -description = ("Vortexa Error Logging",) - -# Setup boilerplate below - -package_root = os.path.abspath(os.path.dirname(__file__)) - -readme_filename = os.path.join(package_root, "README.md") -with io.open(readme_filename, encoding="utf-8") as readme_file: - readme = readme_file.read() - -packages = [ - package for package in find_packages() if package.startswith(namespace) -] - -requirements = [ - "logzero", - "psutil" -] - -setup( - name="vortexa_utils_logging", - version="0.0.1", - description=description, - long_description=readme, - author="Tino von Stegmann", - author_email="constantin.vonstegmann@vortexa.com", - zip_safe=False, - tests_require=["nose2"], - install_requires=requirements, - test_suite="nose2.collector.collector", - packages=packages, -) diff --git a/vendor/github.com/V0RT3X4/python_utils/logging/vortexa_utils/__init__.py b/vendor/github.com/V0RT3X4/python_utils/logging/vortexa_utils/__init__.py deleted file mode 100644 index 69e3be50dac40..0000000000000 --- a/vendor/github.com/V0RT3X4/python_utils/logging/vortexa_utils/__init__.py +++ /dev/null @@ -1 +0,0 @@ -__path__ = __import__('pkgutil').extend_path(__path__, __name__) diff --git a/vendor/github.com/V0RT3X4/python_utils/logging/vortexa_utils/logging/__init__.py b/vendor/github.com/V0RT3X4/python_utils/logging/vortexa_utils/logging/__init__.py deleted file mode 100644 index 14783dcbadd01..0000000000000 --- a/vendor/github.com/V0RT3X4/python_utils/logging/vortexa_utils/logging/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from .exception_decorator import log_unhandled_exceptions diff --git a/vendor/github.com/V0RT3X4/python_utils/logging/vortexa_utils/logging/exception_decorator.py b/vendor/github.com/V0RT3X4/python_utils/logging/vortexa_utils/logging/exception_decorator.py deleted file mode 100644 index 52b49bac513e0..0000000000000 --- a/vendor/github.com/V0RT3X4/python_utils/logging/vortexa_utils/logging/exception_decorator.py +++ /dev/null @@ -1,12 +0,0 @@ -def log_unhandled_exceptions(logger): - def outer_wrapper(main): - def wrapper(*args, **kwargs): - try: - main(*args, **kwargs) - except Exception as e: - logger.exception(f"ERROR: {e}") - raise e - - return wrapper - - return outer_wrapper diff --git a/vendor/github.com/V0RT3X4/python_utils/logging/vortexa_utils/logging/resources.py b/vendor/github.com/V0RT3X4/python_utils/logging/vortexa_utils/logging/resources.py deleted file mode 100644 index de2bac29e6c44..0000000000000 --- a/vendor/github.com/V0RT3X4/python_utils/logging/vortexa_utils/logging/resources.py +++ /dev/null @@ -1,38 +0,0 @@ -import os -import resource - -import psutil -from logzero import logger - -RESOURCE_LOG = """---RESOURCE--- -User time: {0} -System time: {1} -Max resident size: {2} -Block input operations: {3} -Block output operations: {4} ----MEMORY_INFO--- -RSS: {5} -VMS: {6} -Data: {7} -""" - - -def log_resource_usage(step: str): - mem = psutil.Process(os.getpid()).memory_info() - res = resource.getrusage(resource.RUSAGE_SELF) - # MacOs only - try: - data = mem.data - except AttributeError: - data = 0 - res_log = RESOURCE_LOG.format( - res.ru_utime, - res.ru_stime, - res.ru_maxrss, - res.ru_inblock, - res.ru_oublock, - mem.rss, - mem.vms, - data, - ) - logger.info(f"[resource][{step}] {res_log}") diff --git a/vendor/github.com/V0RT3X4/python_utils/utils/vortexa_utils/utils/__init__.py b/vendor/github.com/V0RT3X4/python_utils/utils/vortexa_utils/utils/__init__.py deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/vendor/github.com/V0RT3X4/python_utils/utils/vortexa_utils/utils/byte_stream_spliter.py b/vendor/github.com/V0RT3X4/python_utils/utils/vortexa_utils/utils/byte_stream_spliter.py deleted file mode 100644 index 64e537577cd9a..0000000000000 --- a/vendor/github.com/V0RT3X4/python_utils/utils/vortexa_utils/utils/byte_stream_spliter.py +++ /dev/null @@ -1,31 +0,0 @@ -import io - - -socket_bytes = io.BytesIO(b"So\x01me\r\nbytes\rto\nparsB") - -byte_joiner = b''.join - -list(socket_bytes) - -def split_on(buffer, *spliters): - if not spliters: - spliters = {b'\n', b'\r'} - else: - spliters = set(spliters) - line = [] - while True: - b = buffer.read(1) - split = b in {b'\n', b'\r'} - - if split or not b: - if line: - yield byte_joiner(line) - if split: - line = [] - elif not b: - return - else: - line.append(b) - -gen = split_on(socket_bytes) -list(gen) diff --git a/vendor/github.com/V0RT3X4/python_utils/utils/vortexa_utils/utils/sockets/socket_client.py b/vendor/github.com/V0RT3X4/python_utils/utils/vortexa_utils/utils/sockets/socket_client.py deleted file mode 100644 index fdc97e08c2cb1..0000000000000 --- a/vendor/github.com/V0RT3X4/python_utils/utils/vortexa_utils/utils/sockets/socket_client.py +++ /dev/null @@ -1,24 +0,0 @@ -import socket - - -s = socket.socket(socket.AF_INET, socket.SOCK_STREAM) -s.connect(("www.python.org", 80)) - -import socket - -HOST = '127.0.0.1' # The server's hostname or IP address -PORT = 65432 # The port used by the server - -with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s: - s.connect((HOST, PORT)) - s.sendall(b'Hello, world') - data = s.recv(1024) - -print('Received', repr(data)) - -sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) -sock.connect((HOST, PORT)) -sio = sock.makefile('r', encoding='ascii', errors='backslashreplace', newline=None) -next(sio) -sock.close() -sio.close() diff --git a/vendor/github.com/V0RT3X4/python_utils/utils/vortexa_utils/utils/sockets/socket_server.py b/vendor/github.com/V0RT3X4/python_utils/utils/vortexa_utils/utils/sockets/socket_server.py deleted file mode 100644 index c1d427b6b0882..0000000000000 --- a/vendor/github.com/V0RT3X4/python_utils/utils/vortexa_utils/utils/sockets/socket_server.py +++ /dev/null @@ -1,17 +0,0 @@ -import socket - -HOST = '127.0.0.1' # Standard loopback interface address (localhost) -PORT = 65432 # Port to listen on (non-privileged ports are > 1023) - -with - -sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) - -sock.bind((HOST, PORT)) -sock.listen() - -while True: - conn, addr = sock.accept() - with conn: - while True: - conn.sendall(b'some\rdata\nbyt\1\xffest\r\nadslfkja\n\raslkdj') diff --git a/vendor/github.com/V0RT3X4/python_utils/versioning/VERSION b/vendor/github.com/V0RT3X4/python_utils/versioning/VERSION deleted file mode 100644 index 7bcd0e3612da7..0000000000000 --- a/vendor/github.com/V0RT3X4/python_utils/versioning/VERSION +++ /dev/null @@ -1 +0,0 @@ -0.0.2 \ No newline at end of file diff --git a/vendor/github.com/V0RT3X4/python_utils/versioning/setup.py b/vendor/github.com/V0RT3X4/python_utils/versioning/setup.py deleted file mode 100644 index 2a6c50ab207b1..0000000000000 --- a/vendor/github.com/V0RT3X4/python_utils/versioning/setup.py +++ /dev/null @@ -1,34 +0,0 @@ -# @Author: richard -# @Date: 2018-12-04T17:54:43+00:00 -# @Last modified by: richard -# @Last modified time: 2018-12-04T19:14:15+00:00 -import os -from setuptools import setup, find_packages -from vortexa_utils.versioning import __version__ - -namespace = 'vortexa_utils' - -# Setup boilerplate below - -package_root = os.path.abspath(os.path.dirname(__file__)) - -packages = [ - package for package in find_packages() - if package.startswith(namespace) -] - -setup( - name="vortexa_utils_versioning", - version=__version__, - description="", - long_description="", - - author="Richard Mathie", - author_email="richard.mathie@vortexa.com", - - zip_safe=False, - tests_require=['nose2'], - test_suite='nose2.collector.collector', - - packages=packages, -) diff --git a/vendor/github.com/V0RT3X4/python_utils/versioning/tests/__init__.py b/vendor/github.com/V0RT3X4/python_utils/versioning/tests/__init__.py deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/vendor/github.com/V0RT3X4/python_utils/versioning/tests/test_versioner.py b/vendor/github.com/V0RT3X4/python_utils/versioning/tests/test_versioner.py deleted file mode 100644 index 27be1a07217a9..0000000000000 --- a/vendor/github.com/V0RT3X4/python_utils/versioning/tests/test_versioner.py +++ /dev/null @@ -1,47 +0,0 @@ -import unittest -import os -import tempfile -from nose2.tools import params -from vortexa_utils.versioning.versioner import Versioner - -specs = [ - ((0, 0, 0), (0, 0, 1)), - ((0, 0, 1), (0, 0, 2)), - ((0, 1, 0), (0, 1, 0)), - ((0, 1, 1), (0, 1, 0)), - ((1, 0, 0), (1, 0, 0)), - ((1, 0, 1), (1, 0, 0)), - ((1, 1, 0), (1, 0, 0)), - ((1, 1, 1), (1, 0, 0)) -] - - -class TestVersioner(unittest.TestCase): - def setUp(self): - fh, filename = tempfile.mkstemp() - os.fdopen(fh).close() - self.version: Versioner = Versioner(filename) - - def tearDown(self): - os.remove(self.version.VERSION_FILE) - - def test_version_none(self): - self.assertEqual(self.version.__version__, None) - - def test_version_init(self): - self.assertEqual( - self.version.version, - self.version.SemanticVersion(0, 0, 1) - ) - self.assertTrue(os.path.isfile(self.version.VERSION_FILE)) - with open(self.version.VERSION_FILE, "r") as f: - self.assertEqual(f.readline(), "0.0.1") - - @params(*specs) - def test_version_incriment(self, flags, output): - self.test_version_init() - self.version.update_version(flags) - self.assertEqual( - self.version.version, - self.version.SemanticVersion(*output) - ) diff --git a/vendor/github.com/V0RT3X4/python_utils/versioning/vortexa_utils/__init__.py b/vendor/github.com/V0RT3X4/python_utils/versioning/vortexa_utils/__init__.py deleted file mode 100644 index a7712f632a766..0000000000000 --- a/vendor/github.com/V0RT3X4/python_utils/versioning/vortexa_utils/__init__.py +++ /dev/null @@ -1,5 +0,0 @@ -# @Author: richard -# @Date: 2018-12-04T20:12:18+00:00 -# @Last modified by: richard -# @Last modified time: 2018-12-04T20:12:57+00:00 -__path__ = __import__('pkgutil').extend_path(__path__, __name__) diff --git a/vendor/github.com/V0RT3X4/python_utils/versioning/vortexa_utils/versioning/__init__.py b/vendor/github.com/V0RT3X4/python_utils/versioning/vortexa_utils/versioning/__init__.py deleted file mode 100644 index 977291bcc6396..0000000000000 --- a/vendor/github.com/V0RT3X4/python_utils/versioning/vortexa_utils/versioning/__init__.py +++ /dev/null @@ -1,10 +0,0 @@ -from .versioner import Versioner - -version = Versioner("../../VERSION", __file__) -__version_numeric__ = version.version -__version__ = str(version) - - -if __name__ == "__main__": - from .cli import VersionCLI - VersionCLI(version).parse_args() diff --git a/vendor/github.com/V0RT3X4/python_utils/versioning/vortexa_utils/versioning/__main__.py b/vendor/github.com/V0RT3X4/python_utils/versioning/vortexa_utils/versioning/__main__.py deleted file mode 100644 index c9ce8d27293a2..0000000000000 --- a/vendor/github.com/V0RT3X4/python_utils/versioning/vortexa_utils/versioning/__main__.py +++ /dev/null @@ -1,9 +0,0 @@ -from . import version -from .cli import VersionCLI - -__version_numeric__ = version.version -__version__ = str(version) - - -if __name__ == "__main__": - VersionCLI(version).parse_args() diff --git a/vendor/github.com/V0RT3X4/python_utils/versioning/vortexa_utils/versioning/cli.py b/vendor/github.com/V0RT3X4/python_utils/versioning/vortexa_utils/versioning/cli.py deleted file mode 100644 index 8e414bb5e7c08..0000000000000 --- a/vendor/github.com/V0RT3X4/python_utils/versioning/vortexa_utils/versioning/cli.py +++ /dev/null @@ -1,46 +0,0 @@ -from argparse import ArgumentParser -from dataclasses import dataclass, field -from vortexa_utils.versioning.versioner import Versioner - - -@dataclass -class VersionCLI(object): - versioner: Versioner - parser: ArgumentParser = field(default=None, init=False) - - def __post_init__(self): - self.parser = ArgumentParser( - description='Package Version Tool.' - ) - self.specs = self.versioner.VERSION_SPEC.split( - self.versioner.VERSION_SEP - ) - for spec in self.specs: - self.parser.add_argument( - f'--bump-{spec.lower()}', - f'-{spec[0]}', - action='store_true' - ) - - def parse_args(self): - args = self.parser.parse_args() - spec_flags = list( - getattr(args, f'bump_{spec.lower()}') - for spec in self.specs - ) - if any(spec_flags): - print(f"Current Version: {self.versioner}") - if sum(spec_flags) > 1: - print("You can only bump one spec at a time") - self.parser.print_help() - else: - self.versioner.update_version(spec_flags) - print(f"New Version {self.versioner}") - else: - print(f"{self.versioner}") - - -if __name__ == "__main__": - version = Versioner() - cli = VersionCLI(version) - cli.parse_args() diff --git a/vendor/github.com/V0RT3X4/python_utils/versioning/vortexa_utils/versioning/utils.py b/vendor/github.com/V0RT3X4/python_utils/versioning/vortexa_utils/versioning/utils.py deleted file mode 100644 index 0d3f9b544b13e..0000000000000 --- a/vendor/github.com/V0RT3X4/python_utils/versioning/vortexa_utils/versioning/utils.py +++ /dev/null @@ -1,22 +0,0 @@ -# @Author: richard -# @Date: 2018-12-21T16:37:39+00:00 -# @Last modified by: richard -# @Last modified time: 2018-12-21T16:37:56+00:00 - - -class cached_property(object): - """ - A property that is only computed once per instance and then replaces itself - with an ordinary attribute. Deleting the attribute resets the property. - """ # noqa - - def __init__(self, func): - self.__doc__ = getattr(func, "__doc__") - self.func = func - - def __get__(self, obj, cls): - if obj is None: - return self - - value = obj.__dict__[self.func.__name__] = self.func(obj) - return value diff --git a/vendor/github.com/V0RT3X4/python_utils/versioning/vortexa_utils/versioning/versioner.py b/vendor/github.com/V0RT3X4/python_utils/versioning/vortexa_utils/versioning/versioner.py deleted file mode 100644 index 285481c05ad1a..0000000000000 --- a/vendor/github.com/V0RT3X4/python_utils/versioning/vortexa_utils/versioning/versioner.py +++ /dev/null @@ -1,99 +0,0 @@ -"""Class to track the version of a package.""" -import os -from dataclasses import dataclass, field, InitVar -from collections import namedtuple -from .utils import cached_property - - -@dataclass -class Versioner(object): - VERSION_FILE: str = "VERSION" - MODULE_FILE: InitVar[str] = None - VERSION_SEP: str = "." - VERSION_SPEC: str = "Major.minor.patch" - __version__: namedtuple = field(default=None, init=False) - __version_file__: namedtuple = field(default=None, init=False) - - def __post_init__(self, MODULE_FILE): - parts = [] - if MODULE_FILE is not None: - dir = os.path.dirname(os.path.abspath(MODULE_FILE)) - parts.append(dir) - parts.append(self.VERSION_FILE) - path = os.path.join(*parts) - self.__version_file__ = os.path.abspath(path) - - @cached_property - def SemanticVersion(self): - version_type = namedtuple( - "SemanticVersion", - self.VERSION_SPEC.lower().split(self.VERSION_SEP) - ) - return version_type - - def init_version(self): - fields = self.SemanticVersion._fields - version = ( - 1 if i == len(fields) - 1 else 0 - for i, field in enumerate(fields) - ) - self.version = self.SemanticVersion(*version) - self.write() - return self.version - - def new_version(self, spec_flags): - bumped = False - for spec, ver in zip(spec_flags, self.version): - if bumped: - yield 0 - elif spec: - bumped = True - yield ver + 1 - else: - yield ver - - def update_version(self, spec_flags): - version = self.SemanticVersion(*self.new_version(spec_flags)) - self.version = version - self.write() - return version - - def read(self): - try: - with open(self.__version_file__, "r") as file: - version_string = file.readline().strip() - except FileNotFoundError: - version = self.init_version() - else: - if version_string == "": - version = self.init_version() - else: - version = self.parse_verion(version_string) - self.version = version - return version - - def write(self): - with open(self.__version_file__, "w") as file: - file.write(str(self)) - - @property - def version(self): - if self.__version__ is None: - self.read() - return self.__version__ - - @version.setter - def version(self, version): - if isinstance(version, str): - version = self.parse_verion(version) - if isinstance(version, self.SemanticVersion): - self.__version__ = version - else: - raise TypeError("Version is not str or self.SemanticVersion") - - def parse_verion(self, version: str): - parts = (int(v) for v in version.split(self.VERSION_SEP)) - return self.SemanticVersion(*parts) - - def __str__(self): - return self.VERSION_SEP.join(str(v) for v in self.version) diff --git a/vendor/github.com/V0RT3X4/python_utils/youve_got_mail/README.md b/vendor/github.com/V0RT3X4/python_utils/youve_got_mail/README.md deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/vendor/github.com/V0RT3X4/python_utils/youve_got_mail/README.rst b/vendor/github.com/V0RT3X4/python_utils/youve_got_mail/README.rst deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/vendor/github.com/V0RT3X4/python_utils/youve_got_mail/requirements.txt b/vendor/github.com/V0RT3X4/python_utils/youve_got_mail/requirements.txt deleted file mode 100644 index dfedbe37089fc..0000000000000 --- a/vendor/github.com/V0RT3X4/python_utils/youve_got_mail/requirements.txt +++ /dev/null @@ -1,2 +0,0 @@ -sendgrid < 6.0.0 -boto3 diff --git a/vendor/github.com/V0RT3X4/python_utils/youve_got_mail/setup.py b/vendor/github.com/V0RT3X4/python_utils/youve_got_mail/setup.py deleted file mode 100644 index 1b998bcd47eba..0000000000000 --- a/vendor/github.com/V0RT3X4/python_utils/youve_got_mail/setup.py +++ /dev/null @@ -1,48 +0,0 @@ -# @Author: richard -# @Date: 2018-12-04T17:54:43+00:00 -# @Last modified by: richard -# @Last modified time: 2018-12-04T20:16:54+00:00 -import os -import io -from setuptools import setup, find_packages - -namespace = 'vortexa_utils' -name = 'vortexa_utils_youve_got_mail' -version = '1' -description = 'Vortexa E-mail utils helper library', - -dependencies = [ - 'boto3', - 'sendgrid<6.0.0' -] - -# Setup boilerplate below - -package_root = os.path.abspath(os.path.dirname(__file__)) - -readme_filename = os.path.join(package_root, 'README.rst') -with io.open(readme_filename, encoding='utf-8') as readme_file: - readme = readme_file.read() - -packages = [ - package for package in find_packages() - if package.startswith(namespace) -] - -setup( - name=name, - version=version, - description=description, - long_description=readme, - - author='Richard Mathie', - author_email='richard.mathie@vortexa.com', - - zip_safe=False, - test_suite='nose2.collector.collector', - tests_require=['nose2'], - - packages=packages, - install_requires=dependencies, - extras_require={} -) diff --git a/vendor/github.com/V0RT3X4/python_utils/youve_got_mail/tests/__init__.py b/vendor/github.com/V0RT3X4/python_utils/youve_got_mail/tests/__init__.py deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/vendor/github.com/V0RT3X4/python_utils/youve_got_mail/vortexa_utils/__init__.py b/vendor/github.com/V0RT3X4/python_utils/youve_got_mail/vortexa_utils/__init__.py deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/vendor/github.com/V0RT3X4/python_utils/youve_got_mail/vortexa_utils/youve_got_mail.py b/vendor/github.com/V0RT3X4/python_utils/youve_got_mail/vortexa_utils/youve_got_mail.py deleted file mode 100644 index aae86d37cf7e6..0000000000000 --- a/vendor/github.com/V0RT3X4/python_utils/youve_got_mail/vortexa_utils/youve_got_mail.py +++ /dev/null @@ -1,43 +0,0 @@ -import base64 -import boto3 -import json -import mimetypes -import sendgrid -from sendgrid.helpers.mail import * -from typing import List - - -secretsmanager = boto3.client('secretsmanager') - - -def create_sendgrid_client(): - secret = secretsmanager.get_secret_value(SecretId='prod/sendgrid') - api_key = json.loads(secret['SecretString'])['SENDGRID_API_KEY'] - - return sendgrid.SendGridAPIClient(apikey=api_key) - - -def build_attachment(buf: bytes, filename: str, disposition: str = "attachment", content_id: str = None): - encoded = base64.b64encode(buf).decode() - - mime_type, encoding = mimetypes.guess_type(filename) - - attachment = Attachment() - attachment.content = encoded - attachment.type = mime_type - attachment.filename = filename - attachment.disposition = disposition - attachment.content_id = content_id - - return attachment - - -def add_recipients(recipients: List[str], mail: Mail): - personalization = Personalization() - - for rec in recipients: - personalization.add_to(Email(rec)) - - mail.add_personalization(personalization) - - return mail From a15fc2f56b7a0b388f4f7df3d51b526dbe553514 Mon Sep 17 00:00:00 2001 From: cvonsteg Date: Fri, 15 Nov 2019 08:52:39 +0000 Subject: [PATCH 33/75] clean up for PR --- .gitignore | 4 -- .pre-commit-config.yaml | 2 - doc/source/whatsnew/v1.0.0.rst | 6 +- pandas/io/sql_scratch.py | 119 --------------------------------- 4 files changed, 4 insertions(+), 127 deletions(-) delete mode 100644 pandas/io/sql_scratch.py diff --git a/.gitignore b/.gitignore index d17a87294796b..6c3c275c48fb7 100644 --- a/.gitignore +++ b/.gitignore @@ -118,7 +118,3 @@ doc/build/html/index.html doc/tmp.sv env/ doc/source/savefig/ -# pyenv files -.python-version - -vendor/ diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index fd297bc9b4cbf..3f98273a336cf 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -4,14 +4,12 @@ repos: hooks: - id: black language_version: python3.7 - exclude: ^pandas/io/sql_scratch.py$|^vendor/$ - repo: https://gitlab.com/pycqa/flake8 rev: 3.7.7 hooks: - id: flake8 language: python_venv additional_dependencies: [flake8-comprehensions] - exclude: ^pandas/io/sql_scratch.py$|^vendor/$ - repo: https://github.com/pre-commit/mirrors-isort rev: v4.3.20 hooks: diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst index 73440ff180642..a5af4e727391a 100644 --- a/doc/source/whatsnew/v1.0.0.rst +++ b/doc/source/whatsnew/v1.0.0.rst @@ -26,7 +26,9 @@ Enhancements .. _whatsnew_1000.enhancements.other: -Other enhancement +Other enhancements +^^^^^^^^^^^^^^^^^^ + - :meth:`MultiIndex.from_product` infers level names from inputs if not explicitly provided (:issue:`27292`) - :meth:`DataFrame.to_latex` now accepts ``caption`` and ``label`` arguments (:issue:`25436`) - The :ref:`integer dtype ` with support for missing values can now be converted to @@ -35,7 +37,7 @@ Other enhancement pandas (so it will become an integer or float dtype depending on the presence of missing data). (:issue:`28368`) - :meth:`DataFrame.to_json` now accepts an ``indent`` integer argument to enable pretty printing of JSON output (:issue:`12004`) -- :meth:`DataFrame.to_sql` now supports upsert statements. To facilitate this, the ``if_exists`` argument of now accepts ``upsert_delete`` and ``upsert_ignore`` parameters (:issue:`14553`) + Build Changes ^^^^^^^^^^^^^ diff --git a/pandas/io/sql_scratch.py b/pandas/io/sql_scratch.py deleted file mode 100644 index fb81e527d8826..0000000000000 --- a/pandas/io/sql_scratch.py +++ /dev/null @@ -1,119 +0,0 @@ -### REPRODUCIBLE SQLTable Creation:table -import sqlalchemy -from sqlalchemy import Table, create_engine, select -from sqlalchemy.engine.base import Connection -from sqlalchemy.sql import tuple_ - -import pandas as pd -from vortexa_utils.database import ProdFactory - -from pandas.io.sql import SQLDatabase, SQLTable - - -def get_pkey(table: Table): - return [pkey.name for pkey in table.primary_key.columns.values()] - - -def get_pkey_values(table: Table, conn: Connection): - pkeys = get_pkey(table) - statement = select([table.c[name] for name in pkeys]) - return [row for row in conn.execute(statement)] - # for row in conn.execute(statement): - # yield row - - -def pkey_generator(table, engine): - pkeys = get_pkey(table) - statement = select([table.c[name] for name in pkeys]) - with engine.connect() as conn: - for row in conn.execute(statement): - yield row - - -# Leaves connection open -def pkey_results_proxy(table, engine): - pkeys = get_pkey(table) - statement = select([table.c[name] for name in pkeys]) - with engine.connect() as conn: - result = conn.execute(statement) - return result - - -def pkey_generator2(table, engine): - pkeys = get_pkey(table) - statement = select([table.c[name] for name in pkeys]) - with engine.connect() as conn: - result = conn.execute(statement) - try: - for row in result: - yield result.fetchone() - finally: - result.close() - - -# replace table with self -def get_pkey_values(table: SQLTable): - pkeys = [pkey.name for pkey in table.table.primary_key.columns.values()] - statement = select([table.table.c[name] for name in pkeys]) - table.pd_sql.execute(statement) - - -def generate_mask(df, dictionary): - return [df[key] == value for key, value in dictionary.items()] - - -def generate_mask_of_masks(list_of_masks): - return pd.concat([mask for mask in list_of_masks], axis=1).all(1) - - -engine = sqlalchemy.create_engine("enter string here") -meta = MetaData(engine) -table_name = "charterers" # or wtv -meta.reflect(only=[table_name], views=True) -db = SQLDatabase(engine, meta=meta) -table = SQLTable(table_name, db, index=None, schema=None) - - -engine_v = ProdFactory().engine() -engine = create_engine("sqlite:///:memory:") -table_name = "charterers" -df = pd.read_sql_table(table_name, engine_v) -df_test = df.head().copy() -df_test["name"] = df_test["name"].apply(lambda x: x + "_TEST") -engine.execute( - "create table charterers(id text primary key, name text, energy integer)" -) -def create_test_df(df): - df2 = df.head().copy() - df2['name'] = df2['name'].apply(lambda x: x + '_NEW') - return df2 - -def read_table(table): - with engine.connect() as conn: - result = conn.execute(f'select * from {table}') - return result.fetchall() - -def clear_table(table): - with engine.connect() as conn: - conn.execute(f'delete from {table}') - -def top_up_table(table): - df.to_sql(table, con=engine, if_exists='append', index=False) - return read_table() - -def reset_table(table): - clear_table(table) - top_up_table(table) - -df.to_sql(table_name, index=False, if_exists="append", con=engine) - -db = SQLDatabase(engine, schema=None, meta=None) -new_data = SQLTable(table_name, db, frame=df_test, index=False) - - -def delete_matching_keys(sql_table, key_columns, value_iter): - delete_expression = sql_table.table.delete().where( - tuple_(*(table.table.c[col] for col in key_columns)).in_(list(zip(value_iter))) - ) - with sql_table.pd_sql.run_transaction() as conn: - conn.execute(delete_expression) From 6c4450623f477db634d8b9214b04b933c3c07018 Mon Sep 17 00:00:00 2001 From: cvonsteg Date: Fri, 15 Nov 2019 11:56:34 +0000 Subject: [PATCH 34/75] wrapped whole insert workflow in transaction to avoid postgres freezing --- pandas/io/sql.py | 39 +++++++++++++++++++-------------------- 1 file changed, 19 insertions(+), 20 deletions(-) diff --git a/pandas/io/sql.py b/pandas/io/sql.py index d71b6bf23d42c..5c129f6c7abac 100644 --- a/pandas/io/sql.py +++ b/pandas/io/sql.py @@ -853,19 +853,19 @@ def insert(self, chunksize=None, method=None): """ Determines what data to pass to the underlying insert method. """ - if self.if_exists == "upsert_ignore": - data = self._upsert_ignore_processing() - self._insert(data=data, chunksize=chunksize, method=method) - elif self.if_exists == "upsert_delete": - delete_statement = self._upsert_delete_processing() - # nested transaction to ensure delete is rolled back in case of poor data - with self.pd_sql.run_transaction() as trans: + with self.pd_sql.run_transaction() as trans: + if self.if_exists == "upsert_ignore": + data = self._upsert_ignore_processing() + self._insert(data=data, chunksize=chunksize, method=method, conn=trans) + elif self.if_exists == "upsert_delete": + delete_statement = self._upsert_delete_processing() + # nested transaction to ensure delete is rolled back in case of poor data trans.execute(delete_statement) - self._insert(chunksize=chunksize, method=method) - else: - self._insert(chunksize=chunksize, method=method) + self._insert(chunksize=chunksize, method=method, conn=trans) + else: + self._insert(chunksize=chunksize, method=method, conn=trans) - def _insert(self, data=None, chunksize=None, method=None): + def _insert(self, data=None, chunksize=None, method=None, conn=None): # set insert method if method is None: exec_insert = self._execute_insert @@ -893,15 +893,14 @@ def _insert(self, data=None, chunksize=None, method=None): chunks = int(nrows / chunksize) + 1 - with self.pd_sql.run_transaction() as conn: - for i in range(chunks): - start_i = i * chunksize - end_i = min((i + 1) * chunksize, nrows) - if start_i >= end_i: - break - - chunk_iter = zip(*[arr[start_i:end_i] for arr in data_list]) - exec_insert(conn, keys, chunk_iter) + for i in range(chunks): + start_i = i * chunksize + end_i = min((i + 1) * chunksize, nrows) + if start_i >= end_i: + break + + chunk_iter = zip(*[arr[start_i:end_i] for arr in data_list]) + exec_insert(conn, keys, chunk_iter) def _query_iterator( self, result, chunksize, columns, coerce_float=True, parse_dates=None From e409bda4bd14399a49251845611a419ba5425e4d Mon Sep 17 00:00:00 2001 From: cvonsteg Date: Fri, 15 Nov 2019 14:19:39 +0000 Subject: [PATCH 35/75] ENH: black file --- pandas/io/sql.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/pandas/io/sql.py b/pandas/io/sql.py index 5c129f6c7abac..0bd81f48de02b 100644 --- a/pandas/io/sql.py +++ b/pandas/io/sql.py @@ -507,7 +507,7 @@ def to_sql( "append", "upsert_ignore", "upsert_delete", - ): + ): raise ValueError("'{0}' is not valid for if_exists".format(if_exists)) pandas_sql = pandasSQL_builder(con, schema=schema) @@ -726,9 +726,9 @@ def _upsert_ignore_processing(self): # Delete rows from dataframe where primary keys match # Method requires tuples, to account for cases where indexes do not match to_be_deleted_mask = ( - temp[primary_keys].apply(tuple, axis=1).isin( - pkeys_from_database[primary_keys].apply(tuple, axis=1) - ) + temp[primary_keys] + .apply(tuple, axis=1) + .isin(pkeys_from_database[primary_keys].apply(tuple, axis=1)) ) temp.drop(temp[to_be_deleted_mask].index, inplace=True) From d35e145c4d8aab8b6e2cd10bf1b722360aa8fdbb Mon Sep 17 00:00:00 2001 From: cvonsteg Date: Fri, 15 Nov 2019 14:55:47 +0000 Subject: [PATCH 36/75] pep8 formatting --- pandas/io/sql.py | 17 +++++----- pandas/tests/io/test_sql.py | 68 +++++++++++++------------------------ 2 files changed, 31 insertions(+), 54 deletions(-) diff --git a/pandas/io/sql.py b/pandas/io/sql.py index 0bd81f48de02b..6ac20f4025886 100644 --- a/pandas/io/sql.py +++ b/pandas/io/sql.py @@ -674,10 +674,10 @@ def create(self): def _upsert_delete_processing(self): """ - Generate delete statement, to remove rows with clashing primary key from database. + Generate delete statement for rows with clashing primary key from database. `upsert_delete` prioritizes incoming data, over existing data in the DB. - This method generates the Delete statement for duplicate rows, + This method generates the Delete statement for duplicate rows, which is to be executed in the same transaction as the ensuing data insert. Returns @@ -736,7 +736,7 @@ def _upsert_ignore_processing(self): def _get_primary_key_data(self): """ - Get primary key names from database, and yield columns with same names from dataframe. + Get primary keys from database, and yield dataframe columns with same names. Upsert workflows require knowledge of what is already in the database. This method reflects the meta object and gets a list of primary keys, @@ -745,7 +745,7 @@ def _get_primary_key_data(self): Returns ------- - primary_keys : list of str + primary_keys : list of str Primary key names primary_key_values : iterable DataFrame rows, for columns corresponding to `primary_key` names @@ -797,10 +797,10 @@ def _execute_insert_multi(self, conn, keys, data_iter): def _get_index_formatted_dataframe(self): """ Format index of incoming dataframe to be aligned with a database table. - - Copy original dataframe, and check whether the dataframe index - is to be added to the database table. - If it is, reset the index so that it becomes a normal column, else return + + Copy original dataframe, and check whether the dataframe index + is to be added to the database table. + If it is, reset the index so that it becomes a normal column, else return Returns ------- @@ -859,7 +859,6 @@ def insert(self, chunksize=None, method=None): self._insert(data=data, chunksize=chunksize, method=method, conn=trans) elif self.if_exists == "upsert_delete": delete_statement = self._upsert_delete_processing() - # nested transaction to ensure delete is rolled back in case of poor data trans.execute(delete_statement) self._insert(chunksize=chunksize, method=method, conn=trans) else: diff --git a/pandas/tests/io/test_sql.py b/pandas/tests/io/test_sql.py index e7f514b583e84..adf866779b82d 100644 --- a/pandas/tests/io/test_sql.py +++ b/pandas/tests/io/test_sql.py @@ -213,13 +213,13 @@ "posgresql": """CREATE TABLE pkey_table ( "A" INTEGER PRIMARY KEY, "B" TEXT - )""" + )""", }, "insert_pkey_table": { "sqlite": """INSERT INTO pkey_table VALUES (?, ?)""", "mysql": """INSERT INTO pkey_table VALUES (%s, %s)""", "postgresql": """INSERT INTO pkey_table VALUES (%s, %s)""", - } + }, } @@ -330,11 +330,7 @@ def _create_pkey_table(self): self.drop_table("pkey_table") self._get_exec().execute(SQL_STRINGS["create_pkey_table"][self.flavor]) ins = SQL_STRINGS["insert_pkey_table"][self.flavor] - data = [ - (1, 'name1'), - (2, 'name2'), - (3, 'name3') - ] + data = [(1, "name1"), (2, "name2"), (3, "name3")] self._get_exec().execute(ins, data) def _load_test1_data(self): @@ -396,8 +392,7 @@ def _load_test3_data(self): ] self.test_frame3 = DataFrame(data, columns=columns) - - + def _load_raw_sql(self): self.drop_table("types_test_data") self._get_exec().execute(SQL_STRINGS["create_test_types"][self.flavor]) @@ -433,15 +428,10 @@ def _load_raw_sql(self): self._get_exec().execute( ins["query"], [d[field] for field in ins["fields"]] ) - + def _load_pkey_table_data(self): columns = ["A", "B"] - data = [ - (1, 'new_name1'), - (2, 'new_name2'), - (4, 'name4'), - (5, 'name5') - ] + data = [(1, "new_name1"), (2, "new_name2"), (4, "name4"), (5, "name5")] self.pkey_table_frame = DataFrame(data, columns=columns) @@ -504,9 +494,7 @@ def _to_sql_fail(self): def _to_sql_replace(self): self.drop_table("test_frame1") - self.pandasSQL.to_sql(self.test_frame1, "test_frame1", if_exists="fail") - # Add to table again self.pandasSQL.to_sql(self.test_frame1, "test_frame1", if_exists="replace") assert self.pandasSQL.has_table("test_frame1") @@ -562,7 +550,7 @@ def _to_sql_upsert_ignore(self): - dataframe has all original values """ # Nuke - self.drop_table('pkey_table') + self.drop_table("pkey_table") # Re-create original table self._create_pkey_table() # Original table exists and as 3 rows @@ -570,21 +558,16 @@ def _to_sql_upsert_ignore(self): assert self._count_rows("pkey_table") == 3 # Insert new dataframe self.pandasSQL.to_sql( - self.pkey_table_frame, - "pkey_table", - if_exists="upsert_ignore", - index=False - ) + self.pkey_table_frame, "pkey_table", if_exists="upsert_ignore", index=False + ) # Check table len correct assert self._count_rows("pkey_table") == 5 # Check original DB values maintained for duplicate keys duplicate_keys = [1, 2] duplicate_key_query = """SELECT B FROM pkey_table WHERE A IN (?, ?)""" - duplicate_val = self._get_exec().execute( - duplicate_key_query, duplicate_keys - ) - data_from_db = sorted([val[0] for val in duplicate_val]) - expected = sorted(["name1", "name2"]) + duplicate_val = self._get_exec().execute(duplicate_key_query, duplicate_keys) + data_from_db = [val[0] for val in duplicate_val].sort() + expected = ["name1", "name2"].sort() assert data_from_db == expected # Finally, confirm that duplicate values are not removed from original df object assert len(self.pkey_table_frame.index) == 4 @@ -597,8 +580,8 @@ def _to_sql_upsert_delete(self): - table len = 5 - dataframe values for rows with duplicate keys """ - # Nuke - self.drop_table('pkey_table') + # Nuke + self.drop_table("pkey_table") # Re-create original table self._create_pkey_table() # Original table exists and as 3 rows @@ -606,25 +589,20 @@ def _to_sql_upsert_delete(self): assert self._count_rows("pkey_table") == 3 # Insert new dataframe self.pandasSQL.to_sql( - self.pkey_table_frame, - "pkey_table", - if_exists="upsert_delete", - index=False - ) + self.pkey_table_frame, "pkey_table", if_exists="upsert_delete", index=False + ) # Check table len correct assert self._count_rows("pkey_table") == 5 # Check original DB values maintained for duplicate keys duplicate_keys = [1, 2] duplicate_key_query = """SELECT B FROM pkey_table WHERE A IN (?, ?)""" - duplicate_val = self._get_exec().execute( - duplicate_key_query, duplicate_keys - ) - data_from_db = sorted([val[0] for val in duplicate_val]) - data_from_df = sorted( - list( - self.pkey_table_frame.loc[self.pkey_table_frame['A'].isin(duplicate_keys), 'B'] - ) - ) + duplicate_val = self._get_exec().execute(duplicate_key_query, duplicate_keys) + data_from_db = [val[0] for val in duplicate_val].sort() + data_from_df = list( + self.pkey_table_frame.loc[ + self.pkey_table_frame["A"].isin(duplicate_keys), "B" + ] + ).sort() assert data_from_db == data_from_df def _roundtrip(self): From 8a57126b6bbd88bc9d3e54b71bafa62ad094380b Mon Sep 17 00:00:00 2001 From: cvonsteg Date: Sat, 16 Nov 2019 14:39:18 +0000 Subject: [PATCH 37/75] black formatting sql.py --- pandas/io/sql.py | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/pandas/io/sql.py b/pandas/io/sql.py index 6ac20f4025886..3a09a484717c7 100644 --- a/pandas/io/sql.py +++ b/pandas/io/sql.py @@ -501,13 +501,7 @@ def to_sql( .. versionadded:: 0.24.0 """ - if if_exists not in ( - "fail", - "replace", - "append", - "upsert_ignore", - "upsert_delete", - ): + if if_exists not in ("fail", "replace", "append", "upsert_ignore", "upsert_delete"): raise ValueError("'{0}' is not valid for if_exists".format(if_exists)) pandas_sql = pandasSQL_builder(con, schema=schema) From 2b1c79733edc34e44aafa429d74cbb239505c609 Mon Sep 17 00:00:00 2001 From: cvonsteg Date: Sun, 24 Nov 2019 17:44:27 +0000 Subject: [PATCH 38/75] reformatted tests, added requested changes, and updated generic docstring --- pandas/core/generic.py | 5 +++- pandas/io/sql.py | 51 +++++++++++++++++++++---------------- pandas/tests/io/test_sql.py | 32 ++++++++++++++--------- 3 files changed, 53 insertions(+), 35 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 2e2ae4e1dfa0a..0828876c37e33 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -2604,12 +2604,15 @@ def to_sql( schema : str, optional Specify the schema (if database flavor supports this). If None, use default schema. - if_exists : {'fail', 'replace', 'append'}, default 'fail' + if_exists : {'fail', 'replace', 'append', 'upsert_overwrite', 'upsert_keep'}, + default 'fail' How to behave if the table already exists. * fail: Raise a ValueError. * replace: Drop the table before inserting new values. * append: Insert new values to the existing table. + * upsert_overwrite: Overwrite matches in database with incoming data. + * upsert_keep: Keep matches in database instead of incoming data. index : bool, default True Write DataFrame index as a column. Uses `index_label` as the column diff --git a/pandas/io/sql.py b/pandas/io/sql.py index 162dc61827ba9..de1b0362a7c4a 100644 --- a/pandas/io/sql.py +++ b/pandas/io/sql.py @@ -467,15 +467,15 @@ def to_sql( schema : str, optional Name of SQL schema in database to write to (if database flavor supports this). If None, use default schema (default). - if_exists : {'fail', 'replace', 'append', 'upsert_delete', 'upsert_ignore'}, + if_exists : {'fail', 'replace', 'append', 'upsert_overwrite', 'upsert_keep'}, default 'fail'. - fail: If table exists, do nothing. - replace: If table exists, drop it, recreate it, and insert data. - append: If table exists, insert data. Create if does not exist. - - upsert_ignore: If table exists, perform an UPSERT (based on primary keys), - prioritising records already in the database over incoming duplicates. - - upsert_delete: If table exists, perform an UPSERT (based on primary keys), + - upsert_overwrite: If table exists, perform an UPSERT (based on primary keys), prioritising incoming records over duplicates already in the database. + - upsert_keep: If table exists, perform an UPSERT (based on primary keys), + prioritising records already in the database over incoming duplicates. index : boolean, default True Write DataFrame index as a column. index_label : str or sequence, optional @@ -502,7 +502,13 @@ def to_sql( .. versionadded:: 0.24.0 """ - if if_exists not in ("fail", "replace", "append", "upsert_ignore", "upsert_delete"): + if if_exists not in ( + "fail", + "replace", + "append", + "upsert_keep", + "upsert_overwrite", + ): raise ValueError("'{0}' is not valid for if_exists".format(if_exists)) pandas_sql = pandasSQL_builder(con, schema=schema) @@ -654,11 +660,7 @@ def create(self): elif self.if_exists == "replace": self.pd_sql.drop_table(self.name, self.schema) self._execute_create() - elif self.if_exists == "append": - pass - elif self.if_exists == "upsert_delete": - pass - elif self.if_exists == "upsert_ignore": + elif self.if_exists in {"append", "upsert_overwrite", "upsert_keep"}: pass else: raise ValueError( @@ -667,11 +669,11 @@ def create(self): else: self._execute_create() - def _upsert_delete_processing(self): + def _upsert_overwrite_processing(self): """ Generate delete statement for rows with clashing primary key from database. - `upsert_delete` prioritizes incoming data, over existing data in the DB. + `upsert_overwrite` prioritizes incoming data, over existing data in the DB. This method generates the Delete statement for duplicate rows, which is to be executed in the same transaction as the ensuing data insert. @@ -690,11 +692,11 @@ def _upsert_delete_processing(self): ) return delete_statement - def _upsert_ignore_processing(self): + def _upsert_keep_processing(self): """ Delete clashing values from a copy of the incoming dataframe. - `upsert_ignore` prioritizes data in DB over incoming data. + `upsert_keep` prioritizes data in DB over incoming data. This method creates a copy of the incoming dataframe, fetches matching data from DB, deletes matching data from copied frame, and returns that frame to be inserted. @@ -849,11 +851,11 @@ def insert(self, chunksize=None, method=None): Determines what data to pass to the underlying insert method. """ with self.pd_sql.run_transaction() as trans: - if self.if_exists == "upsert_ignore": - data = self._upsert_ignore_processing() + if self.if_exists == "upsert_keep": + data = self._upsert_keep_processing() self._insert(data=data, chunksize=chunksize, method=method, conn=trans) - elif self.if_exists == "upsert_delete": - delete_statement = self._upsert_delete_processing() + elif self.if_exists == "upsert_overwrite": + delete_statement = self._upsert_overwrite_processing() trans.execute(delete_statement) self._insert(chunksize=chunksize, method=method, conn=trans) else: @@ -1401,10 +1403,15 @@ def to_sql( frame : DataFrame name : string Name of SQL table. - if_exists : {'fail', 'replace', 'append'}, default 'fail' - - fail: If table exists, do nothing. - - replace: If table exists, drop it, recreate it, and insert data. - - append: If table exists, insert data. Create if does not exist. + if_exists : {'fail', 'replace', 'append', 'upsert_overwrite', 'upsert_keep'}, + default 'fail'. + - fail: If table exists, do nothing. + - replace: If table exRsts, drop it, recreate it, and insert data. + - append: If table exists, insert data. Create if does not exist. + - upsert_overwrite: If table exists, perform an UPSERT (based on primary keys), + prioritising incoming records over duplicates already in the database. + - upsert_keep: If table exists, perform an UPSERT (based on primary keys), + prioritising records already in the database over incoming duplicates. index : boolean, default True Write DataFrame index as a column. index_label : string or sequence, default None diff --git a/pandas/tests/io/test_sql.py b/pandas/tests/io/test_sql.py index 8db47f280b8d3..af086ee55019d 100644 --- a/pandas/tests/io/test_sql.py +++ b/pandas/tests/io/test_sql.py @@ -220,6 +220,11 @@ "mysql": """INSERT INTO pkey_table VALUES (%s, %s)""", "postgresql": """INSERT INTO pkey_table VALUES (%s, %s)""", }, + "read_pkey_table": { + "sqlite": """SELECT B FROM pkey_table WHERE A IN (?, ?)""", + "mysql": """SELECT B FROM pkey_table WHERE A IN (%s, %s)""", + "postgresql": """SELECT B FROM pkey_table WHERE A IN (%s, %s)""", + }, } @@ -540,7 +545,7 @@ def sample(pd_table, conn, keys, data_iter): # Nuke table self.drop_table("test_frame1") - def _to_sql_upsert_ignore(self): + def _to_sql_upsert_keep(self): """ Original table: 3 rows pkey_table_frame: 4 rows (2 duplicate keys) @@ -558,13 +563,13 @@ def _to_sql_upsert_ignore(self): assert self._count_rows("pkey_table") == 3 # Insert new dataframe self.pandasSQL.to_sql( - self.pkey_table_frame, "pkey_table", if_exists="upsert_ignore", index=False + self.pkey_table_frame, "pkey_table", if_exists="upsert_keep", index=False ) # Check table len correct assert self._count_rows("pkey_table") == 5 # Check original DB values maintained for duplicate keys duplicate_keys = [1, 2] - duplicate_key_query = """SELECT B FROM pkey_table WHERE A IN (?, ?)""" + duplicate_key_query = SQL_STRINGS["read_pkey_table"][self.flavor] duplicate_val = self._get_exec().execute(duplicate_key_query, duplicate_keys) data_from_db = [val[0] for val in duplicate_val].sort() expected = ["name1", "name2"].sort() @@ -572,7 +577,7 @@ def _to_sql_upsert_ignore(self): # Finally, confirm that duplicate values are not removed from original df object assert len(self.pkey_table_frame.index) == 4 - def _to_sql_upsert_delete(self): + def _to_sql_upsert_overwrite(self): """ Original table: 3 rows pkey_table_frame: 4 rows (2 duplicate keys) @@ -589,13 +594,16 @@ def _to_sql_upsert_delete(self): assert self._count_rows("pkey_table") == 3 # Insert new dataframe self.pandasSQL.to_sql( - self.pkey_table_frame, "pkey_table", if_exists="upsert_delete", index=False + self.pkey_table_frame, + "pkey_table", + if_exists="upsert_overwrite", + index=False, ) # Check table len correct assert self._count_rows("pkey_table") == 5 # Check original DB values maintained for duplicate keys duplicate_keys = [1, 2] - duplicate_key_query = """SELECT B FROM pkey_table WHERE A IN (?, ?)""" + duplicate_key_query = SQL_STRINGS["read_pkey_table"][self.flavor] duplicate_val = self._get_exec().execute(duplicate_key_query, duplicate_keys) data_from_db = [val[0] for val in duplicate_val].sort() data_from_df = list( @@ -679,7 +687,7 @@ class _TestSQLApi(PandasSQLTest): """ flavor = "sqlite" - mode: str + mode = None # type: str def setup_connect(self): self.conn = self.connect() @@ -1331,7 +1339,7 @@ class _TestSQLAlchemy(SQLAlchemyMixIn, PandasSQLTest): """ - flavor: str + flavor = None # type: str @pytest.fixture(autouse=True, scope="class") def setup_class(cls): @@ -1402,11 +1410,11 @@ def test_to_sql_method_multi(self): def test_to_sql_method_callable(self): self._to_sql_method_callable() - def test_to_sql_upsert_ignore(self): - self._to_sql_upsert_ignore() + def test_to_sql_upsert_keep(self): + self._to_sql_upsert_keep() - def test_to_sql_upsert_delete(self): - self._to_sql_upsert_delete() + def test_to_sql_upsert_overwrite(self): + self._to_sql_upsert_overwrite() def test_create_table(self): temp_conn = self.connect() From b23f5282c54e073cdacbff6a53664e5bbd6892ce Mon Sep 17 00:00:00 2001 From: cvonsteg Date: Sun, 24 Nov 2019 17:47:44 +0000 Subject: [PATCH 39/75] reformatted tests, added requested changes, and updated generic docstring --- .pre-commit-config.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index b34f5dfdd1a83..47acd87124b4a 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -5,13 +5,13 @@ repos: - id: black language_version: python3.7 - repo: https://gitlab.com/pycqa/flake8 - rev: 3.7.7 + rev: 3.7.9 hooks: - id: flake8 language: python_venv additional_dependencies: [flake8-comprehensions>=3.1.0] - repo: https://github.com/pre-commit/mirrors-isort - rev: v4.3.20 + rev: v4.3.21 hooks: - id: isort language: python_venv From 899da90ce8694a230078d97a0ecf1ae7e47915ce Mon Sep 17 00:00:00 2001 From: cvonsteg Date: Sun, 24 Nov 2019 19:19:12 +0000 Subject: [PATCH 40/75] line-separated docstring in generc and removed unwatned pattern from tests --- pandas/core/generic.py | 2 +- pandas/tests/io/test_sql.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 0828876c37e33..dad1dc8663de8 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -2604,7 +2604,7 @@ def to_sql( schema : str, optional Specify the schema (if database flavor supports this). If None, use default schema. - if_exists : {'fail', 'replace', 'append', 'upsert_overwrite', 'upsert_keep'}, + if_exists : {'fail', 'replace', 'append', 'upsert_overwrite', 'upsert_keep'},\ default 'fail' How to behave if the table already exists. diff --git a/pandas/tests/io/test_sql.py b/pandas/tests/io/test_sql.py index af086ee55019d..02de3aa2d9f44 100644 --- a/pandas/tests/io/test_sql.py +++ b/pandas/tests/io/test_sql.py @@ -687,7 +687,7 @@ class _TestSQLApi(PandasSQLTest): """ flavor = "sqlite" - mode = None # type: str + mode = None def setup_connect(self): self.conn = self.connect() @@ -1339,7 +1339,7 @@ class _TestSQLAlchemy(SQLAlchemyMixIn, PandasSQLTest): """ - flavor = None # type: str + flavor = None @pytest.fixture(autouse=True, scope="class") def setup_class(cls): From 8ebc2566d3e91ce3cfcb69f831cdb2acefbd6c3e Mon Sep 17 00:00:00 2001 From: cvonsteg Date: Sun, 24 Nov 2019 20:27:17 +0000 Subject: [PATCH 41/75] postgresql type-o and type-checking in sql tests --- pandas/tests/io/test_sql.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pandas/tests/io/test_sql.py b/pandas/tests/io/test_sql.py index 02de3aa2d9f44..d959e8f2e1969 100644 --- a/pandas/tests/io/test_sql.py +++ b/pandas/tests/io/test_sql.py @@ -210,7 +210,7 @@ `B` TEXT, PRIMARY KEY (A) )""", - "posgresql": """CREATE TABLE pkey_table ( + "postgresql": """CREATE TABLE pkey_table ( "A" INTEGER PRIMARY KEY, "B" TEXT )""", @@ -687,7 +687,7 @@ class _TestSQLApi(PandasSQLTest): """ flavor = "sqlite" - mode = None + mode: str def setup_connect(self): self.conn = self.connect() @@ -1339,7 +1339,7 @@ class _TestSQLAlchemy(SQLAlchemyMixIn, PandasSQLTest): """ - flavor = None + flavor: str @pytest.fixture(autouse=True, scope="class") def setup_class(cls): From baad9e3b07f06f1a24e883e4eed22b13af975c7e Mon Sep 17 00:00:00 2001 From: cvonsteg Date: Sun, 24 Nov 2019 22:20:31 +0000 Subject: [PATCH 42/75] reformatting tests --- pandas/tests/io/test_sql.py | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/pandas/tests/io/test_sql.py b/pandas/tests/io/test_sql.py index d959e8f2e1969..a70fde4df7358 100644 --- a/pandas/tests/io/test_sql.py +++ b/pandas/tests/io/test_sql.py @@ -202,17 +202,17 @@ }, "create_pkey_table": { "sqlite": """CREATE TABLE pkey_table ( - "A" Integer Primary Key, - "B" TEXT + "a" Integer Primary Key, + "b" TEXT )""", "mysql": """CREATE TABLE pkey_table ( - `A` INTEGER, - `B` TEXT, - PRIMARY KEY (A) + `a` INTEGER, + `b` TEXT, + PRIMARY KEY (a) )""", "postgresql": """CREATE TABLE pkey_table ( - "A" INTEGER PRIMARY KEY, - "B" TEXT + "a" INTEGER PRIMARY KEY, + "b" TEXT )""", }, "insert_pkey_table": { @@ -221,9 +221,9 @@ "postgresql": """INSERT INTO pkey_table VALUES (%s, %s)""", }, "read_pkey_table": { - "sqlite": """SELECT B FROM pkey_table WHERE A IN (?, ?)""", - "mysql": """SELECT B FROM pkey_table WHERE A IN (%s, %s)""", - "postgresql": """SELECT B FROM pkey_table WHERE A IN (%s, %s)""", + "sqlite": """SELECT b FROM pkey_table WHERE A IN (?, ?)""", + "mysql": """SELECT b FROM pkey_table WHERE A IN (%s, %s)""", + "postgresql": """SELECT b FROM pkey_table WHERE A IN (%s, %s)""", }, } @@ -435,7 +435,7 @@ def _load_raw_sql(self): ) def _load_pkey_table_data(self): - columns = ["A", "B"] + columns = ["a", "b"] data = [(1, "new_name1"), (2, "new_name2"), (4, "name4"), (5, "name5")] self.pkey_table_frame = DataFrame(data, columns=columns) @@ -608,7 +608,7 @@ def _to_sql_upsert_overwrite(self): data_from_db = [val[0] for val in duplicate_val].sort() data_from_df = list( self.pkey_table_frame.loc[ - self.pkey_table_frame["A"].isin(duplicate_keys), "B" + self.pkey_table_frame["a"].isin(duplicate_keys), "b" ] ).sort() assert data_from_db == data_from_df From 79ef9c0923e93ed9294c27ee229c3059ce47fbff Mon Sep 17 00:00:00 2001 From: cvonsteg Date: Sun, 24 Nov 2019 22:28:20 +0000 Subject: [PATCH 43/75] remove quotations from postgres queries --- pandas/tests/io/test_sql.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/tests/io/test_sql.py b/pandas/tests/io/test_sql.py index a70fde4df7358..a022a0756bad5 100644 --- a/pandas/tests/io/test_sql.py +++ b/pandas/tests/io/test_sql.py @@ -211,8 +211,8 @@ PRIMARY KEY (a) )""", "postgresql": """CREATE TABLE pkey_table ( - "a" INTEGER PRIMARY KEY, - "b" TEXT + a INTEGER PRIMARY KEY, + b TEXT )""", }, "insert_pkey_table": { From d0eb251075883902280cba6cd0dd9a1a1c4a69a4 Mon Sep 17 00:00:00 2001 From: cvonsteg Date: Sun, 24 Nov 2019 22:29:28 +0000 Subject: [PATCH 44/75] postgres formatting --- pandas/tests/io/test_sql.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/tests/io/test_sql.py b/pandas/tests/io/test_sql.py index a022a0756bad5..a70fde4df7358 100644 --- a/pandas/tests/io/test_sql.py +++ b/pandas/tests/io/test_sql.py @@ -211,8 +211,8 @@ PRIMARY KEY (a) )""", "postgresql": """CREATE TABLE pkey_table ( - a INTEGER PRIMARY KEY, - b TEXT + "a" INTEGER PRIMARY KEY, + "b" TEXT )""", }, "insert_pkey_table": { From efd23828bb56a587a1d69bfb17f3fbf49c9f22cc Mon Sep 17 00:00:00 2001 From: cvonsteg Date: Thu, 9 Jan 2020 13:55:26 +0000 Subject: [PATCH 45/75] removed temp=self.frame in _get_index_formatted_dataframe --- pandas/io/sql.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/pandas/io/sql.py b/pandas/io/sql.py index a5889f7d269e4..2160334e87938 100644 --- a/pandas/io/sql.py +++ b/pandas/io/sql.py @@ -808,8 +808,6 @@ def _get_index_formatted_dataframe(self): temp.reset_index(inplace=True) except ValueError as err: raise ValueError(f"duplicate name in index/columns: {err}") - else: - temp = self.frame return temp From 4822ce0aa433d66b76da701a9adbf19ef0a63b3a Mon Sep 17 00:00:00 2001 From: cvonsteg Date: Thu, 16 Jan 2020 11:42:38 +0000 Subject: [PATCH 46/75] Commenting out tests to confirm that stalled builds are caused by upserts --- pandas/tests/io/test_sql.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pandas/tests/io/test_sql.py b/pandas/tests/io/test_sql.py index 208dcf7250f5d..aaa51275a8c6c 100644 --- a/pandas/tests/io/test_sql.py +++ b/pandas/tests/io/test_sql.py @@ -1404,11 +1404,13 @@ def test_to_sql_method_multi(self): def test_to_sql_method_callable(self): self._to_sql_method_callable() + """ def test_to_sql_upsert_keep(self): self._to_sql_upsert_keep() def test_to_sql_upsert_overwrite(self): self._to_sql_upsert_overwrite() + """ def test_create_table(self): temp_conn = self.connect() From 0fba1b65a757ad0d5a8cf01e1488322ea0c41487 Mon Sep 17 00:00:00 2001 From: cvonsteg Date: Thu, 16 Jan 2020 12:55:27 +0000 Subject: [PATCH 47/75] re-enabling to_sql_upsert_keep test --- pandas/tests/io/test_sql.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/io/test_sql.py b/pandas/tests/io/test_sql.py index aaa51275a8c6c..9c75fdcb7d524 100644 --- a/pandas/tests/io/test_sql.py +++ b/pandas/tests/io/test_sql.py @@ -1404,10 +1404,10 @@ def test_to_sql_method_multi(self): def test_to_sql_method_callable(self): self._to_sql_method_callable() - """ def test_to_sql_upsert_keep(self): self._to_sql_upsert_keep() + """ def test_to_sql_upsert_overwrite(self): self._to_sql_upsert_overwrite() """ From c230d16619d507f7ba80063b5d25217dc65e2bd3 Mon Sep 17 00:00:00 2001 From: cvonsteg Date: Thu, 16 Jan 2020 13:42:28 +0000 Subject: [PATCH 48/75] re-enabling to_upsert_ignore, whilst disabling to_upsert_keep --- pandas/tests/io/test_sql.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas/tests/io/test_sql.py b/pandas/tests/io/test_sql.py index 9c75fdcb7d524..649bf0a492f85 100644 --- a/pandas/tests/io/test_sql.py +++ b/pandas/tests/io/test_sql.py @@ -1404,13 +1404,14 @@ def test_to_sql_method_multi(self): def test_to_sql_method_callable(self): self._to_sql_method_callable() + """ def test_to_sql_upsert_keep(self): self._to_sql_upsert_keep() """ + def test_to_sql_upsert_overwrite(self): self._to_sql_upsert_overwrite() - """ def test_create_table(self): temp_conn = self.connect() From c3c6ed1c2694d8c873c97ed77ba89e322408b350 Mon Sep 17 00:00:00 2001 From: cvonsteg Date: Fri, 31 Jan 2020 12:06:19 +0000 Subject: [PATCH 49/75] clean up flake8 version, re-activate test --- .pre-commit-config.yaml | 2 +- pandas/tests/io/test_sql.py | 7 ++++--- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 5f7dacbcb43c0..139b9e31df46c 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -5,7 +5,7 @@ repos: - id: black language_version: python3.7 - repo: https://gitlab.com/pycqa/flake8 - rev: 3.7.9 + rev: 3.7.7 hooks: - id: flake8 language: python_venv diff --git a/pandas/tests/io/test_sql.py b/pandas/tests/io/test_sql.py index 649bf0a492f85..2740d05111930 100644 --- a/pandas/tests/io/test_sql.py +++ b/pandas/tests/io/test_sql.py @@ -570,6 +570,8 @@ def _to_sql_upsert_keep(self): assert data_from_db == expected # Finally, confirm that duplicate values are not removed from original df object assert len(self.pkey_table_frame.index) == 4 + # Clean up + self.drop_table("pkey_table") def _to_sql_upsert_overwrite(self): """ @@ -606,6 +608,8 @@ def _to_sql_upsert_overwrite(self): ] ).sort() assert data_from_db == data_from_df + # Clean up + self.drop_table("pkey_table") def _roundtrip(self): self.drop_table("test_frame_roundtrip") @@ -1404,12 +1408,9 @@ def test_to_sql_method_multi(self): def test_to_sql_method_callable(self): self._to_sql_method_callable() - """ def test_to_sql_upsert_keep(self): self._to_sql_upsert_keep() - """ - def test_to_sql_upsert_overwrite(self): self._to_sql_upsert_overwrite() From 79becdcd25f902e0b4c7362652da19d4a0eb9643 Mon Sep 17 00:00:00 2001 From: cvonsteg Date: Mon, 3 Feb 2020 08:54:27 +0000 Subject: [PATCH 50/75] remove SQLAlchemyConn tests to confirm hypothesis --- pandas/tests/io/test_sql.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pandas/tests/io/test_sql.py b/pandas/tests/io/test_sql.py index 2740d05111930..d31785a1311cc 100644 --- a/pandas/tests/io/test_sql.py +++ b/pandas/tests/io/test_sql.py @@ -2184,10 +2184,12 @@ class TestMySQLAlchemy(_TestMySQLAlchemy, _TestSQLAlchemy): pass +""" @pytest.mark.single @pytest.mark.db class TestMySQLAlchemyConn(_TestMySQLAlchemy, _TestSQLAlchemyConn): pass +""" @pytest.mark.single From 3b6ca764760e3691ebdca17abdff4d97880f70ad Mon Sep 17 00:00:00 2001 From: cvonsteg Date: Mon, 3 Feb 2020 10:03:04 +0000 Subject: [PATCH 51/75] relaunc TestMySQLAlchemyConn - remove TestMySQLAlchemy --- pandas/tests/io/test_sql.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/tests/io/test_sql.py b/pandas/tests/io/test_sql.py index d31785a1311cc..f47af5ea9359e 100644 --- a/pandas/tests/io/test_sql.py +++ b/pandas/tests/io/test_sql.py @@ -2178,18 +2178,18 @@ def psql_insert_copy(table, conn, keys, data_iter): tm.assert_frame_equal(result, expected) +""" @pytest.mark.single @pytest.mark.db class TestMySQLAlchemy(_TestMySQLAlchemy, _TestSQLAlchemy): pass +""" -""" @pytest.mark.single @pytest.mark.db class TestMySQLAlchemyConn(_TestMySQLAlchemy, _TestSQLAlchemyConn): pass -""" @pytest.mark.single From 61d998f45a3e47096053cec49449eff689efe781 Mon Sep 17 00:00:00 2001 From: cvonsteg Date: Mon, 3 Feb 2020 10:52:41 +0000 Subject: [PATCH 52/75] added all MySQL test classes back --- pandas/tests/io/test_sql.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/pandas/tests/io/test_sql.py b/pandas/tests/io/test_sql.py index f47af5ea9359e..2740d05111930 100644 --- a/pandas/tests/io/test_sql.py +++ b/pandas/tests/io/test_sql.py @@ -2178,12 +2178,10 @@ def psql_insert_copy(table, conn, keys, data_iter): tm.assert_frame_equal(result, expected) -""" @pytest.mark.single @pytest.mark.db class TestMySQLAlchemy(_TestMySQLAlchemy, _TestSQLAlchemy): pass -""" @pytest.mark.single From 529e5fbc10bc75590360abea270f8773d416c82d Mon Sep 17 00:00:00 2001 From: cvonsteg Date: Mon, 24 Aug 2020 21:17:52 +0100 Subject: [PATCH 53/75] chore: re-order imports --- pandas/io/sql.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/io/sql.py b/pandas/io/sql.py index 59724f2adc8ac..42d39fcfdd947 100644 --- a/pandas/io/sql.py +++ b/pandas/io/sql.py @@ -780,7 +780,7 @@ def _upsert_keep_processing(self): DataFrame Filtered dataframe, with values that are already in DB removed. """ - from sqlalchemy import tuple_, select + from sqlalchemy import select, tuple_ # Primary key data primary_keys, primary_key_values = self._get_primary_key_data() From 6d718dc61ec2f497752acbdacdb5324ae2d29b9c Mon Sep 17 00:00:00 2001 From: cvonsteg Date: Tue, 25 Aug 2020 09:02:59 +0100 Subject: [PATCH 54/75] remove unnecessary sort --- pandas/tests/io/test_sql.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/io/test_sql.py b/pandas/tests/io/test_sql.py index 08e2c68efca66..829fe42dbfbd2 100644 --- a/pandas/tests/io/test_sql.py +++ b/pandas/tests/io/test_sql.py @@ -581,7 +581,7 @@ def _to_sql_upsert_keep(self, method): duplicate_key_query = SQL_STRINGS["read_pkey_table"][self.flavor] duplicate_val = self._get_exec().execute(duplicate_key_query, duplicate_keys) data_from_db = [val[0] for val in duplicate_val].sort() - expected = ["name1", "name2"].sort() + expected = ["name1", "name2"] assert data_from_db == expected # Finally, confirm that duplicate values are not removed from original df object assert len(self.pkey_table_frame.index) == 4 From 53e35654d1e65d72c072359d2c98002246e452e3 Mon Sep 17 00:00:00 2001 From: cvonsteg Date: Tue, 25 Aug 2020 10:01:23 +0100 Subject: [PATCH 55/75] undo sort rmv --- pandas/tests/io/test_sql.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/io/test_sql.py b/pandas/tests/io/test_sql.py index 829fe42dbfbd2..08e2c68efca66 100644 --- a/pandas/tests/io/test_sql.py +++ b/pandas/tests/io/test_sql.py @@ -581,7 +581,7 @@ def _to_sql_upsert_keep(self, method): duplicate_key_query = SQL_STRINGS["read_pkey_table"][self.flavor] duplicate_val = self._get_exec().execute(duplicate_key_query, duplicate_keys) data_from_db = [val[0] for val in duplicate_val].sort() - expected = ["name1", "name2"] + expected = ["name1", "name2"].sort() assert data_from_db == expected # Finally, confirm that duplicate values are not removed from original df object assert len(self.pkey_table_frame.index) == 4 From 26c0b0f6a5e57db9e694f2f22f582db86c710157 Mon Sep 17 00:00:00 2001 From: cvonsteg Date: Mon, 9 Nov 2020 21:39:42 +0000 Subject: [PATCH 56/75] feat: use updates instead of deletes --- pandas/io/sql.py | 223 ++++++++++++++++++++++++++++++++--------------- 1 file changed, 155 insertions(+), 68 deletions(-) diff --git a/pandas/io/sql.py b/pandas/io/sql.py index 59724f2adc8ac..5e3e05e82f918 100644 --- a/pandas/io/sql.py +++ b/pandas/io/sql.py @@ -523,6 +523,7 @@ def to_sql( con, schema=None, if_exists="fail", + on_conflict=None, index=True, index_label=None, chunksize=None, @@ -545,15 +546,21 @@ def to_sql( schema : str, optional Name of SQL schema in database to write to (if database flavor supports this). If None, use default schema (default). - if_exists : {'fail', 'replace', 'append', 'upsert_overwrite', 'upsert_keep'}, + if_exists : {'fail', 'replace', 'append'}, default 'fail'. - fail: If table exists, do nothing. - replace: If table exists, drop it, recreate it, and insert data. - append: If table exists, insert data. Create if does not exist. - - upsert_overwrite: If table exists, perform an UPSERT (based on primary keys), - prioritising incoming records over duplicates already in the database. - - upsert_keep: If table exists, perform an UPSERT (based on primary keys), - prioritising records already in the database over incoming duplicates. + on_conflict : {None, 'do_nothing', 'do_update'}, optional + Determine insertion behaviour in case of a primary key clash. + If the table being written has primary key constraints, attempting + to insert new rows with the same values in the primary key columns, + will cause an error. In this case the conflicting records can either + be updated in the database or ignored from the incoming dataframe. + - do_nothing: Ignore incoming rows with primary key clashes, and + insert only the incoming rows with non-conflicting primary keys + - do_update: Update existing rows in database with primary key clashes, + and append the remaining rows with non-conflicting primary keys index : boolean, default True Write DataFrame index as a column. index_label : str or sequence, optional @@ -584,10 +591,8 @@ def to_sql( "fail", "replace", "append", - "upsert_keep", - "upsert_overwrite", ): - raise ValueError("'{0}' is not valid for if_exists".format(if_exists)) + raise ValueError(f"'{if_exists}' is not valid for if_exists") pandas_sql = pandasSQL_builder(con, schema=schema) @@ -602,6 +607,7 @@ def to_sql( frame, name, if_exists=if_exists, + on_conflict=on_conflict, index=index, index_label=index_label, schema=schema, @@ -690,6 +696,7 @@ def __init__( frame=None, index=True, if_exists="fail", + on_conflict=None, prefix="pandas", index_label=None, schema=None, @@ -703,6 +710,7 @@ def __init__( self.index = self._index_name(index, index_label) self.schema = schema self.if_exists = if_exists + self.on_conflict = (on_conflict,) self.keys = keys self.dtype = dtype @@ -736,74 +744,142 @@ def create(self): elif self.if_exists == "replace": self.pd_sql.drop_table(self.name, self.schema) self._execute_create() - elif self.if_exists in {"append", "upsert_overwrite", "upsert_keep"}: + elif self.if_exists == "append": pass else: raise ValueError(f"'{self.if_exists}' is not valid for if_exists") else: self._execute_create() - def _upsert_overwrite_processing(self): + def _load_existing_pkeys(self, primary_keys, primary_key_values): """ - Generate delete statement for rows with clashing primary key from database. + Load existing primary keys from Database - `upsert_overwrite` prioritizes incoming data, over existing data in the DB. - This method generates the Delete statement for duplicate rows, - which is to be executed in the same transaction as the ensuing data insert. + Parameters + ---------- + primary_keys : list of str + List of primary key column names + primary_key_values : list of str + List of primary key values already present in incoming dataframe Returns - ---------- - sqlalchemy.sql.dml.Delete - Delete statement to be executed against DB + ------- + list of str + primary key values in incoming dataframe which already exist in database """ - from sqlalchemy import tuple_ + from sqlalchemy import select, tuple_ - # Primary key data - primary_keys, primary_key_values = self._get_primary_key_data() - # Generate delete statement - delete_statement = self.table.delete().where( - tuple_(*(self.table.c[col] for col in primary_keys)).in_(primary_key_values) + cols_to_fetch = [self.table.c[key] for key in primary_keys] + select_stmt = select(cols_to_fetch).where( + tuple_(*cols_to_fetch).in_(primary_key_values) ) - return delete_statement + return self.pd_sql.execute(select_stmt).fetchall() - def _upsert_keep_processing(self): + def _split_incoming_data(self, primary_keys, keys_in_db): """ - Delete clashing values from a copy of the incoming dataframe. + Split incoming dataframe based off whether primary key already exists in db. - `upsert_keep` prioritizes data in DB over incoming data. - This method creates a copy of the incoming dataframe, - fetches matching data from DB, deletes matching data from copied frame, - and returns that frame to be inserted. + Parameters + ---------- + primary_keys : list of str + Primary keys columns + keys_in_db : list of str + Primary key values which already exist in database table Returns - ---------- - DataFrame - Filtered dataframe, with values that are already in DB removed. + ------- + tuple of DataFrame, DataFrame + DataFrame of rows with duplicate pkey, DataFrame of rows with new pkey """ - from sqlalchemy import tuple_, select - - # Primary key data - primary_keys, primary_key_values = self._get_primary_key_data() - # Fetch matching pkey values from database - columns_to_fetch = [self.table.c[key] for key in primary_keys] - select_statement = select(columns_to_fetch).where( - tuple_(*columns_to_fetch).in_(primary_key_values) - ) - pkeys_from_database = _wrap_result( - data=self.pd_sql.execute(select_statement), columns=primary_keys - ) + in_db = _wrap_result(data=keys_in_db, columns=primary_keys) # Get temporary dataframe so as not to delete values from main df temp = self._get_index_formatted_dataframe() - # Delete rows from dataframe where primary keys match - # Method requires tuples, to account for cases where indexes do not match - to_be_deleted_mask = ( + exists_mask = ( temp[primary_keys] .apply(tuple, axis=1) - .isin(pkeys_from_database[primary_keys].apply(tuple, axis=1)) + .isin(in_db[primary_keys].apply(tuple, axis=1)) ) - temp.drop(temp[to_be_deleted_mask].index, inplace=True) + return temp.loc[exists_mask], temp.loc[~exists_mask] - return temp + def _generate_update_statments(self, primary_keys, keys_in_db, rows_to_update): + """ + Generate SQL Update statements for rows with existing primary keys + + Currently, SQL Update statements do not support a multi-statement query, + therefore this method returns a list of individual update queries which + will need to be executed in one transaction. + + Parameters + ---------- + primary_keys : list of str + Primary key columns + keys_in_db : list of str + Primary key values which already exist in database table + rows_to_update : DataFrame + DataFrame of rows containing data with which to update existing pkeys + + Returns + ------- + list of sqlalchemy.sql.dml.Update + List of update queries + """ + from sqlalchemy import tuple_ + + new_records = rows_to_update.to_dict(orient="records") + # TODO: Move this or remove entirely + assert len(new_records) == len( + keys_in_db + ), "Mismatch between new records and existing keys" + stmts = [] + for i, keys in enumerate(keys_in_db): + stmt = ( + self.table.update() + .where(tuple_(*(self.table.c[key] for key in primary_keys)).in_([keys])) + .values(new_records[i]) + ) + stmts.append(stmt) + return stmts + + def _on_conflict_do_update(self): + """ + Generate update statements for rows with clashing primary key from database. + + `on_conflict do_update` prioritizes incoming data, over existing data in the DB. + This method splits the incoming dataframe between rows with new and existing + primary key values. + For existing values Update statements are generated, while new values are passed + on to be inserted as usual. + + Updates are executed in the same transaction as the ensuing data insert. + + Returns + ---------- + sqlalchemy.sql.dml.Delete + Delete statement to be executed against DB + """ + # Primary key data + pk_cols, pk_values = self._get_primary_key_data() + existing_keys = self._load_existing_pkeys(pk_cols, pk_values) + existing_data, new_data = self._split_incoming_data(pk_cols, existing_keys) + update_stmts = self._generate_update_statements( + pk_cols, existing_keys, existing_data + ) + + return new_data, update_stmts + + def _on_conflict_do_nothing(self): + """ + Split incoming dataframe so that only rows with new primary keys are inserted + + `on_conflict` set to `do_nothing` prioritizes existing data in the DB. + This method identifies incoming records in the primary key columns + which correspond to existing primary key constraints in the db table, and + avoids them from being inserted. + """ + pk_cols, pk_values = self._get_primary_key_data() + existing_keys = self._load_existing_pkeys(pk_cols, pk_values) + existing_data, new_data = self._split_incoming_data(pk_cols, existing_keys) + return new_data def _get_primary_key_data(self): """ @@ -818,7 +894,7 @@ def _get_primary_key_data(self): ------- primary_keys : list of str Primary key names - primary_key_values : iterable + primary_key_values : list of str DataFrame rows, for columns corresponding to `primary_key` names """ # reflect MetaData object and assign contents of db to self.table attribute @@ -838,7 +914,7 @@ def _get_primary_key_data(self): raise ValueError(f"No primary keys found for table {self.name}") temp = self._get_index_formatted_dataframe() - primary_key_values = zip(*[temp[key] for key in primary_keys]) + primary_key_values = list(zip(*[temp[key] for key in primary_keys])) return primary_keys, primary_key_values def _execute_insert(self, conn, keys, data_iter): @@ -921,14 +997,17 @@ def insert(self, chunksize=None, method=None): """ Determines what data to pass to the underlying insert method. """ - if self.if_exists == "upsert_keep": - data = self._upsert_keep_processing() - self._insert(data=data, chunksize=chunksize, method=method) - elif self.if_exists == "upsert_overwrite": - delete_statement = self._upsert_overwrite_processing() + if self.on_conflict == "do_update": + new_data, update_stmts = self._on_conflict_do_update() self._insert( - chunksize=chunksize, method=method, other_stmts=[delete_statement] + data=new_data, + chunksize=chunksize, + method=method, + other_stmts=[update_stmts], ) + elif self.on_conflict == "do_nothing": + new_data = self._on_conflict_do_nothing() + self._insert(data=new_data, chunksize=chunksize, method=method) else: self._insert(chunksize=chunksize, method=method) @@ -1467,6 +1546,7 @@ def to_sql( frame, name, if_exists="fail", + on_conflict=None, index=True, index_label=None, schema=None, @@ -1482,15 +1562,21 @@ def to_sql( frame : DataFrame name : string Name of SQL table. - if_exists : {'fail', 'replace', 'append', 'upsert_overwrite', 'upsert_keep'}, - default 'fail'. - - fail: If table exists, do nothing. - - replace: If table exRsts, drop it, recreate it, and insert data. - - append: If table exists, insert data. Create if does not exist. - - upsert_overwrite: If table exists, perform an UPSERT (based on primary keys), - prioritising incoming records over duplicates already in the database. - - upsert_keep: If table exists, perform an UPSERT (based on primary keys), - prioritising records already in the database over incoming duplicates. + if_exists : {'fail', 'replace', 'append'}, + default 'fail'. + - fail: If table exists, do nothing. + - replace: If table exRsts, drop it, recreate it, and insert data. + - append: If table exists, insert data. Create if does not exist. + on_conflict : {None, 'do_nothing', 'do_update'}, optional + Determine insertion behaviour in case of a primary key clash. + If the table being written has primary key constraints, attempting + to insert new rows with the same values in the primary key columns, + will cause an error. In this case the conflicting records can either + be updated in the database or ignored from the incoming dataframe. + - do_nothing: Ignore incoming rows with primary key clashes, and + insert only the incoming rows with non-conflicting primary keys + - do_update: Update existing rows in database with primary key clashes, + and append the remaining rows with non-conflicting primary keys index : boolean, default True Write DataFrame index as a column. index_label : string or sequence, default None @@ -1536,6 +1622,7 @@ def to_sql( frame=frame, index=index, if_exists=if_exists, + on_conflict=on_conflict, index_label=index_label, schema=schema, dtype=dtype, From 59c76ac9c6e901b1a198bfc0ccff6221bf769c3b Mon Sep 17 00:00:00 2001 From: cvonsteg Date: Wed, 11 Nov 2020 20:52:16 +0000 Subject: [PATCH 57/75] chore: update tests to reflect new api --- pandas/tests/io/test_sql.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/pandas/tests/io/test_sql.py b/pandas/tests/io/test_sql.py index 28bed3ae58167..884f7c0b36d39 100644 --- a/pandas/tests/io/test_sql.py +++ b/pandas/tests/io/test_sql.py @@ -549,7 +549,7 @@ def sample(pd_table, conn, keys, data_iter): # Nuke table self.drop_table("test_frame1") - def _to_sql_upsert_keep(self, method): + def _to_sql_on_conflict_update(self, method): """ Original table: 3 rows pkey_table_frame: 4 rows (2 duplicate keys) @@ -569,7 +569,8 @@ def _to_sql_upsert_keep(self, method): self.pandasSQL.to_sql( self.pkey_table_frame, "pkey_table", - if_exists="upsert_keep", + if_exists="append", + on_conflict="do_nothing", index=False, method=method, ) @@ -587,7 +588,7 @@ def _to_sql_upsert_keep(self, method): # Clean up self.drop_table("pkey_table") - def _to_sql_upsert_overwrite(self, method): + def _to_sql_on_conflict_nothing(self, method): """ Original table: 3 rows pkey_table_frame: 4 rows (2 duplicate keys) @@ -606,7 +607,8 @@ def _to_sql_upsert_overwrite(self, method): self.pandasSQL.to_sql( self.pkey_table_frame, "pkey_table", - if_exists="upsert_overwrite", + if_exists="append", + on_conflict="do_update", index=False, method=method, ) From 286e8b8f1e94e4c03c3d7c0b107cf4f3e31ce05a Mon Sep 17 00:00:00 2001 From: cvonsteg Date: Mon, 16 Nov 2020 08:02:19 +0000 Subject: [PATCH 58/75] feat: use on_conflict update methodology --- pandas/io/sql.py | 234 +++++++++++++++++++++++++----------- pandas/tests/io/test_sql.py | 18 +-- 2 files changed, 176 insertions(+), 76 deletions(-) diff --git a/pandas/io/sql.py b/pandas/io/sql.py index 42d39fcfdd947..d9a9bff9122c3 100644 --- a/pandas/io/sql.py +++ b/pandas/io/sql.py @@ -523,6 +523,7 @@ def to_sql( con, schema=None, if_exists="fail", + on_conflict=None, index=True, index_label=None, chunksize=None, @@ -545,15 +546,21 @@ def to_sql( schema : str, optional Name of SQL schema in database to write to (if database flavor supports this). If None, use default schema (default). - if_exists : {'fail', 'replace', 'append', 'upsert_overwrite', 'upsert_keep'}, + if_exists : {'fail', 'replace', 'append'}, default 'fail'. - fail: If table exists, do nothing. - replace: If table exists, drop it, recreate it, and insert data. - append: If table exists, insert data. Create if does not exist. - - upsert_overwrite: If table exists, perform an UPSERT (based on primary keys), - prioritising incoming records over duplicates already in the database. - - upsert_keep: If table exists, perform an UPSERT (based on primary keys), - prioritising records already in the database over incoming duplicates. + on_conflict : {None, 'do_nothing', 'do_update'}, optional + Determine insertion behaviour in case of a primary key clash. + If the table being written has primary key constraints, attempting + to insert new rows with the same values in the primary key columns, + will cause an error. In this case the conflicting records can either + be updated in the database or ignored from the incoming dataframe. + - do_nothing: Ignore incoming rows with primary key clashes, and + insert only the incoming rows with non-conflicting primary keys + - do_update: Update existing rows in database with primary key clashes, + and append the remaining rows with non-conflicting primary keys index : boolean, default True Write DataFrame index as a column. index_label : str or sequence, optional @@ -584,10 +591,8 @@ def to_sql( "fail", "replace", "append", - "upsert_keep", - "upsert_overwrite", ): - raise ValueError("'{0}' is not valid for if_exists".format(if_exists)) + raise ValueError(f"'{if_exists}' is not valid for if_exists") pandas_sql = pandasSQL_builder(con, schema=schema) @@ -602,6 +607,7 @@ def to_sql( frame, name, if_exists=if_exists, + on_conflict=on_conflict, index=index, index_label=index_label, schema=schema, @@ -690,6 +696,7 @@ def __init__( frame=None, index=True, if_exists="fail", + on_conflict=None, prefix="pandas", index_label=None, schema=None, @@ -703,6 +710,7 @@ def __init__( self.index = self._index_name(index, index_label) self.schema = schema self.if_exists = if_exists + self.on_conflict = on_conflict self.keys = keys self.dtype = dtype @@ -736,74 +744,142 @@ def create(self): elif self.if_exists == "replace": self.pd_sql.drop_table(self.name, self.schema) self._execute_create() - elif self.if_exists in {"append", "upsert_overwrite", "upsert_keep"}: + elif self.if_exists == "append": pass else: raise ValueError(f"'{self.if_exists}' is not valid for if_exists") else: self._execute_create() - def _upsert_overwrite_processing(self): + def _load_existing_pkeys(self, primary_keys, primary_key_values): """ - Generate delete statement for rows with clashing primary key from database. + Load existing primary keys from Database - `upsert_overwrite` prioritizes incoming data, over existing data in the DB. - This method generates the Delete statement for duplicate rows, - which is to be executed in the same transaction as the ensuing data insert. + Parameters + ---------- + primary_keys : list of str + List of primary key column names + primary_key_values : list of str + List of primary key values already present in incoming dataframe Returns - ---------- - sqlalchemy.sql.dml.Delete - Delete statement to be executed against DB + ------- + list of str + primary key values in incoming dataframe which already exist in database """ - from sqlalchemy import tuple_ + from sqlalchemy import select, tuple_ - # Primary key data - primary_keys, primary_key_values = self._get_primary_key_data() - # Generate delete statement - delete_statement = self.table.delete().where( - tuple_(*(self.table.c[col] for col in primary_keys)).in_(primary_key_values) + cols_to_fetch = [self.table.c[key] for key in primary_keys] + select_stmt = select(cols_to_fetch).where( + tuple_(*cols_to_fetch).in_(primary_key_values) ) - return delete_statement + return self.pd_sql.execute(select_stmt).fetchall() - def _upsert_keep_processing(self): + def _split_incoming_data(self, primary_keys, keys_in_db): """ - Delete clashing values from a copy of the incoming dataframe. + Split incoming dataframe based off whether primary key already exists in db. - `upsert_keep` prioritizes data in DB over incoming data. - This method creates a copy of the incoming dataframe, - fetches matching data from DB, deletes matching data from copied frame, - and returns that frame to be inserted. + Parameters + ---------- + primary_keys : list of str + Primary keys columns + keys_in_db : list of str + Primary key values which already exist in database table Returns - ---------- - DataFrame - Filtered dataframe, with values that are already in DB removed. + ------- + tuple of DataFrame, DataFrame + DataFrame of rows with duplicate pkey, DataFrame of rows with new pkey """ - from sqlalchemy import select, tuple_ - - # Primary key data - primary_keys, primary_key_values = self._get_primary_key_data() - # Fetch matching pkey values from database - columns_to_fetch = [self.table.c[key] for key in primary_keys] - select_statement = select(columns_to_fetch).where( - tuple_(*columns_to_fetch).in_(primary_key_values) - ) - pkeys_from_database = _wrap_result( - data=self.pd_sql.execute(select_statement), columns=primary_keys - ) + in_db = _wrap_result(data=keys_in_db, columns=primary_keys) # Get temporary dataframe so as not to delete values from main df temp = self._get_index_formatted_dataframe() - # Delete rows from dataframe where primary keys match - # Method requires tuples, to account for cases where indexes do not match - to_be_deleted_mask = ( + exists_mask = ( temp[primary_keys] .apply(tuple, axis=1) - .isin(pkeys_from_database[primary_keys].apply(tuple, axis=1)) + .isin(in_db[primary_keys].apply(tuple, axis=1)) ) - temp.drop(temp[to_be_deleted_mask].index, inplace=True) + return temp.loc[exists_mask], temp.loc[~exists_mask] - return temp + def _generate_update_statements(self, primary_keys, keys_in_db, rows_to_update): + """ + Generate SQL Update statements for rows with existing primary keys + + Currently, SQL Update statements do not support a multi-statement query, + therefore this method returns a list of individual update queries which + will need to be executed in one transaction. + + Parameters + ---------- + primary_keys : list of str + Primary key columns + keys_in_db : list of str + Primary key values which already exist in database table + rows_to_update : DataFrame + DataFrame of rows containing data with which to update existing pkeys + + Returns + ------- + list of sqlalchemy.sql.dml.Update + List of update queries + """ + from sqlalchemy import tuple_ + + new_records = rows_to_update.to_dict(orient="records") + # TODO: Move this or remove entirely + assert len(new_records) == len( + keys_in_db + ), "Mismatch between new records and existing keys" + stmts = [] + for i, keys in enumerate(keys_in_db): + stmt = ( + self.table.update() + .where(tuple_(*(self.table.c[key] for key in primary_keys)).in_([keys])) + .values(new_records[i]) + ) + stmts.append(stmt) + return stmts + + def _on_conflict_do_update(self): + """ + Generate update statements for rows with clashing primary key from database. + + `on_conflict do_update` prioritizes incoming data, over existing data in the DB. + This method splits the incoming dataframe between rows with new and existing + primary key values. + For existing values Update statements are generated, while new values are passed + on to be inserted as usual. + + Updates are executed in the same transaction as the ensuing data insert. + + Returns + ---------- + sqlalchemy.sql.dml.Delete + Delete statement to be executed against DB + """ + # Primary key data + pk_cols, pk_values = self._get_primary_key_data() + existing_keys = self._load_existing_pkeys(pk_cols, pk_values) + existing_data, new_data = self._split_incoming_data(pk_cols, existing_keys) + update_stmts = self._generate_update_statements( + pk_cols, existing_keys, existing_data + ) + + return new_data, update_stmts + + def _on_conflict_do_nothing(self): + """ + Split incoming dataframe so that only rows with new primary keys are inserted + + `on_conflict` set to `do_nothing` prioritizes existing data in the DB. + This method identifies incoming records in the primary key columns + which correspond to existing primary key constraints in the db table, and + avoids them from being inserted. + """ + pk_cols, pk_values = self._get_primary_key_data() + existing_keys = self._load_existing_pkeys(pk_cols, pk_values) + existing_data, new_data = self._split_incoming_data(pk_cols, existing_keys) + return new_data def _get_primary_key_data(self): """ @@ -818,7 +894,7 @@ def _get_primary_key_data(self): ------- primary_keys : list of str Primary key names - primary_key_values : iterable + primary_key_values : list of str DataFrame rows, for columns corresponding to `primary_key` names """ # reflect MetaData object and assign contents of db to self.table attribute @@ -838,7 +914,7 @@ def _get_primary_key_data(self): raise ValueError(f"No primary keys found for table {self.name}") temp = self._get_index_formatted_dataframe() - primary_key_values = zip(*[temp[key] for key in primary_keys]) + primary_key_values = list(zip(*[temp[key] for key in primary_keys])) return primary_keys, primary_key_values def _execute_insert(self, conn, keys, data_iter): @@ -921,14 +997,17 @@ def insert(self, chunksize=None, method=None): """ Determines what data to pass to the underlying insert method. """ - if self.if_exists == "upsert_keep": - data = self._upsert_keep_processing() - self._insert(data=data, chunksize=chunksize, method=method) - elif self.if_exists == "upsert_overwrite": - delete_statement = self._upsert_overwrite_processing() + if self.on_conflict == "do_update": + new_data, update_stmts = self._on_conflict_do_update() self._insert( - chunksize=chunksize, method=method, other_stmts=[delete_statement] + data=new_data, + chunksize=chunksize, + method=method, + other_stmts=update_stmts, ) + elif self.on_conflict == "do_nothing": + new_data = self._on_conflict_do_nothing() + self._insert(data=new_data, chunksize=chunksize, method=method) else: self._insert(chunksize=chunksize, method=method) @@ -1467,6 +1546,7 @@ def to_sql( frame, name, if_exists="fail", + on_conflict=None, index=True, index_label=None, schema=None, @@ -1482,15 +1562,21 @@ def to_sql( frame : DataFrame name : string Name of SQL table. - if_exists : {'fail', 'replace', 'append', 'upsert_overwrite', 'upsert_keep'}, - default 'fail'. - - fail: If table exists, do nothing. - - replace: If table exRsts, drop it, recreate it, and insert data. - - append: If table exists, insert data. Create if does not exist. - - upsert_overwrite: If table exists, perform an UPSERT (based on primary keys), - prioritising incoming records over duplicates already in the database. - - upsert_keep: If table exists, perform an UPSERT (based on primary keys), - prioritising records already in the database over incoming duplicates. + if_exists : {'fail', 'replace', 'append'}, + default 'fail'. + - fail: If table exists, do nothing. + - replace: If table exRsts, drop it, recreate it, and insert data. + - append: If table exists, insert data. Create if does not exist. + on_conflict : {None, 'do_nothing', 'do_update'}, optional + Determine insertion behaviour in case of a primary key clash. + If the table being written has primary key constraints, attempting + to insert new rows with the same values in the primary key columns, + will cause an error. In this case the conflicting records can either + be updated in the database or ignored from the incoming dataframe. + - do_nothing: Ignore incoming rows with primary key clashes, and + insert only the incoming rows with non-conflicting primary keys + - do_update: Update existing rows in database with primary key clashes, + and append the remaining rows with non-conflicting primary keys index : boolean, default True Write DataFrame index as a column. index_label : string or sequence, default None @@ -1536,6 +1622,7 @@ def to_sql( frame=frame, index=index, if_exists=if_exists, + on_conflict=on_conflict, index_label=index_label, schema=schema, dtype=dtype, @@ -1910,6 +1997,7 @@ def to_sql( frame, name, if_exists="fail", + on_conflict=None, index=True, index_label=None, schema=None, @@ -1929,6 +2017,16 @@ def to_sql( fail: If table exists, do nothing. replace: If table exists, drop it, recreate it, and insert data. append: If table exists, insert data. Create if it does not exist. + on_conflict : {None, 'do_nothing', 'do_update'}, optional + Determine insertion behaviour in case of a primary key clash. + If the table being written has primary key constraints, attempting + to insert new rows with the same values in the primary key columns, + will cause an error. In this case the conflicting records can either + be updated in the database or ignored from the incoming dataframe. + - do_nothing: Ignore incoming rows with primary key clashes, and + insert only the incoming rows with non-conflicting primary keys + - do_update: Update existing rows in database with primary key clashes, + and append the remaining rows with non-conflicting primary keys index : boolean, default True Write DataFrame index as a column index_label : string or sequence, default None diff --git a/pandas/tests/io/test_sql.py b/pandas/tests/io/test_sql.py index 28bed3ae58167..bdfb0efab6390 100644 --- a/pandas/tests/io/test_sql.py +++ b/pandas/tests/io/test_sql.py @@ -549,7 +549,7 @@ def sample(pd_table, conn, keys, data_iter): # Nuke table self.drop_table("test_frame1") - def _to_sql_upsert_keep(self, method): + def _to_sql_on_conflict_update(self, method): """ Original table: 3 rows pkey_table_frame: 4 rows (2 duplicate keys) @@ -569,7 +569,8 @@ def _to_sql_upsert_keep(self, method): self.pandasSQL.to_sql( self.pkey_table_frame, "pkey_table", - if_exists="upsert_keep", + if_exists="append", + on_conflict="do_update", index=False, method=method, ) @@ -587,7 +588,7 @@ def _to_sql_upsert_keep(self, method): # Clean up self.drop_table("pkey_table") - def _to_sql_upsert_overwrite(self, method): + def _to_sql_on_conflict_nothing(self, method): """ Original table: 3 rows pkey_table_frame: 4 rows (2 duplicate keys) @@ -606,7 +607,8 @@ def _to_sql_upsert_overwrite(self, method): self.pandasSQL.to_sql( self.pkey_table_frame, "pkey_table", - if_exists="upsert_overwrite", + if_exists="append", + on_conflict="do_nothing", index=False, method=method, ) @@ -1422,12 +1424,12 @@ def test_to_sql_method_callable(self): self._to_sql_method_callable() @pytest.mark.parametrize("method", [None, "multi"]) - def test_to_sql_upsert_keep(self, method): - self._to_sql_upsert_keep(method) + def test_to_sql_conflict_nothing(self, method): + self._to_sql_on_conflict_nothing(method) @pytest.mark.parametrize("method", [None, "multi"]) - def test_to_sql_upsert_overwrite(self, method): - self._to_sql_upsert_overwrite(method) + def test_to_sql_conflict_update(self, method): + self._to_sql_on_conflict_update(method) def test_create_table(self): temp_conn = self.connect() From 70e0eb153a0e692867e224edfdfcd865c5587a0d Mon Sep 17 00:00:00 2001 From: cvonsteg Date: Sun, 22 Nov 2020 12:57:47 +0000 Subject: [PATCH 59/75] testing: add tests for single and composite primary keys --- pandas/tests/io/test_sql.py | 145 ++++++++++++++++++++++++------------ 1 file changed, 98 insertions(+), 47 deletions(-) diff --git a/pandas/tests/io/test_sql.py b/pandas/tests/io/test_sql.py index bdfb0efab6390..d1c6665045583 100644 --- a/pandas/tests/io/test_sql.py +++ b/pandas/tests/io/test_sql.py @@ -205,30 +205,53 @@ SELECT * FROM iris """ }, - "create_pkey_table": { + "create_single_pkey_table": { "sqlite": """CREATE TABLE pkey_table ( "a" Integer Primary Key, - "b" TEXT + "b" TEXT, + "c" TEXT )""", "mysql": """CREATE TABLE pkey_table ( `a` INTEGER, `b` TEXT, + `c` TEXT PRIMARY KEY (a) )""", "postgresql": """CREATE TABLE pkey_table ( "a" INTEGER PRIMARY KEY, - "b" TEXT + "b" TEXT, + "c" TEXT + )""", + }, + "create_comp_pkey_table": { + "sqlite": """CREATE TABLE pkey_table ( + "a" Integer, + "b" TEXT, + "c" TEXT, + PRIMARY KEY ("a", "b") + )""", + "mysql": """CREATE TABLE pkey_table ( + `a` INTEGER, + `b` TEXT, + `c` TEXT + PRIMARY KEY (a, b) + )""", + "postgresql": """CREATE TABLE pkey_table ( + "a" INTEGER PRIMARY KEY, + "b" TEXT, + "c" TEXT, + PRIMARY KEY(a, b) )""", }, "insert_pkey_table": { - "sqlite": """INSERT INTO pkey_table VALUES (?, ?)""", - "mysql": """INSERT INTO pkey_table VALUES (%s, %s)""", - "postgresql": """INSERT INTO pkey_table VALUES (%s, %s)""", + "sqlite": """INSERT INTO pkey_table VALUES (?, ?, ?)""", + "mysql": """INSERT INTO pkey_table VALUES (%s, %s, %s)""", + "postgresql": """INSERT INTO pkey_table VALUES (%s, %s, %s)""", }, "read_pkey_table": { - "sqlite": """SELECT b FROM pkey_table WHERE A IN (?, ?)""", - "mysql": """SELECT b FROM pkey_table WHERE A IN (%s, %s)""", - "postgresql": """SELECT b FROM pkey_table WHERE A IN (%s, %s)""", + "sqlite": """SELECT c FROM pkey_table WHERE A IN (?, ?)""", + "mysql": """SELECT c FROM pkey_table WHERE A IN (%s, %s)""", + "postgresql": """SELECT c FROM pkey_table WHERE A IN (%s, %s)""", }, } @@ -334,11 +357,13 @@ def _check_iris_loaded_frame(self, iris_frame): assert issubclass(pytype, np.floating) tm.equalContents(row.values, [5.1, 3.5, 1.4, 0.2, "Iris-setosa"]) - def _create_pkey_table(self): + def _create_pkey_table(self, pkey_type): self.drop_table("pkey_table") - self._get_exec().execute(SQL_STRINGS["create_pkey_table"][self.flavor]) + self._get_exec().execute( + SQL_STRINGS[f"create_{pkey_type}_pkey_table"][self.flavor] + ) ins = SQL_STRINGS["insert_pkey_table"][self.flavor] - data = [(1, "name1"), (2, "name2"), (3, "name3")] + data = [(1, "name1", "val1"), (2, "name2", "val2"), (3, "name3", "val3")] self._get_exec().execute(ins, data) def _load_test1_data(self): @@ -438,8 +463,13 @@ def _load_raw_sql(self): ) def _load_pkey_table_data(self): - columns = ["a", "b"] - data = [(1, "new_name1"), (2, "new_name2"), (4, "name4"), (5, "name5")] + columns = ["a", "b", "c"] + data = [ + (1, "name1", "new_val1"), + (2, "name2", "new_val2"), + (4, "name4", "val4"), + (5, "name5", "val5"), + ] self.pkey_table_frame = DataFrame(data, columns=columns) @@ -549,19 +579,21 @@ def sample(pd_table, conn, keys, data_iter): # Nuke table self.drop_table("test_frame1") - def _to_sql_on_conflict_update(self, method): + def _to_sql_on_conflict_update(self, method, pkey_type): """ - Original table: 3 rows - pkey_table_frame: 4 rows (2 duplicate keys) - Expected after upsert: - - table len = 5 - - Original database values for rows with duplicate keys - - dataframe has all original values + GIVEN: + - Original database table: 3 rows + - new dataframe: 4 rows (2 duplicate keys) + WHEN: + - on conflict update insert + THEN: + - DB table len = 5 + - Conflicting primary keys in DB updated """ # Nuke self.drop_table("pkey_table") # Re-create original table - self._create_pkey_table() + self._create_pkey_table(pkey_type) # Original table exists and as 3 rows assert self.pandasSQL.has_table("pkey_table") assert self._count_rows("pkey_table") == 3 @@ -576,33 +608,46 @@ def _to_sql_on_conflict_update(self, method): ) # Check table len correct assert self._count_rows("pkey_table") == 5 - # Check original DB values maintained for duplicate keys + # Check conflicting primary keys have been updated + # Get new values for conflicting keys duplicate_keys = [1, 2] duplicate_key_query = SQL_STRINGS["read_pkey_table"][self.flavor] duplicate_val = self._get_exec().execute(duplicate_key_query, duplicate_keys) - data_from_db = [val[0] for val in duplicate_val].sort() - expected = ["name1", "name2"].sort() + data_from_db = sorted(val[0] for val in duplicate_val) + # Expected values from pkey_table_frame + expected = sorted(["new_val1", "new_val2"]) assert data_from_db == expected # Finally, confirm that duplicate values are not removed from original df object assert len(self.pkey_table_frame.index) == 4 # Clean up self.drop_table("pkey_table") - def _to_sql_on_conflict_nothing(self, method): + def _to_sql_on_conflict_nothing(self, method, pkey_type): """ - Original table: 3 rows - pkey_table_frame: 4 rows (2 duplicate keys) - Expected after upsert: - - table len = 5 - - dataframe values for rows with duplicate keys + GIVEN: + - Original table: 3 rows + - new dataframe: 4 rows (2 duplicate keys) + WHEN: + - on conflict do nothing insert + THEN: + - database table len = 5 + - conflicting keys in table not updated """ # Nuke self.drop_table("pkey_table") # Re-create original table - self._create_pkey_table() - # Original table exists and as 3 rows + self._create_pkey_table(pkey_type) + # Original table exists and has 3 rows assert self.pandasSQL.has_table("pkey_table") assert self._count_rows("pkey_table") == 3 + # Prepare SQL for reading duplicate keys + duplicate_keys = [1, 2] + duplicate_key_query = SQL_STRINGS["read_pkey_table"][self.flavor] + #  get conflicting pkey values before insert + duplicate_val_before = self._get_exec().execute( + duplicate_key_query, duplicate_keys + ) + data_from_db_before = sorted(val[0] for val in duplicate_val_before) # Insert new dataframe self.pandasSQL.to_sql( self.pkey_table_frame, @@ -614,17 +659,21 @@ def _to_sql_on_conflict_nothing(self, method): ) # Check table len correct assert self._count_rows("pkey_table") == 5 - # Check original DB values maintained for duplicate keys - duplicate_keys = [1, 2] - duplicate_key_query = SQL_STRINGS["read_pkey_table"][self.flavor] - duplicate_val = self._get_exec().execute(duplicate_key_query, duplicate_keys) - data_from_db = [val[0] for val in duplicate_val].sort() - data_from_df = list( + # Get conflicting keys from DB after to_sql + duplicate_val_after = self._get_exec().execute( + duplicate_key_query, duplicate_keys + ) + data_from_db_after = sorted(val[0] for val in duplicate_val_after) + # Get data from incoming df + data_from_df = sorted( self.pkey_table_frame.loc[ - self.pkey_table_frame["a"].isin(duplicate_keys), "b" - ] - ).sort() - assert data_from_db == data_from_df + self.pkey_table_frame["a"].isin(duplicate_keys), "c" + ].tolist() + ) + # Check original DB values maintained for duplicate keys + assert data_from_db_before == data_from_db_after + # Check DB values not equal to new values + assert data_from_db_after != data_from_df # Clean up self.drop_table("pkey_table") @@ -1424,12 +1473,14 @@ def test_to_sql_method_callable(self): self._to_sql_method_callable() @pytest.mark.parametrize("method", [None, "multi"]) - def test_to_sql_conflict_nothing(self, method): - self._to_sql_on_conflict_nothing(method) + @pytest.mark.parametrize("pkey_type", ["single", "comp"]) + def test_to_sql_conflict_nothing(self, method, pkey_type): + self._to_sql_on_conflict_nothing(method, pkey_type) @pytest.mark.parametrize("method", [None, "multi"]) - def test_to_sql_conflict_update(self, method): - self._to_sql_on_conflict_update(method) + @pytest.mark.parametrize("pkey_type", ["single", "comp"]) + def test_to_sql_conflict_update(self, method, pkey_type): + self._to_sql_on_conflict_update(method, pkey_type) def test_create_table(self): temp_conn = self.connect() From d4fc6d4bb91f6f86b2ab783ae42de04743a3a4c2 Mon Sep 17 00:00:00 2001 From: cvonsteg Date: Mon, 23 Nov 2020 06:57:00 +0000 Subject: [PATCH 60/75] chore: add api tests for invalid if_exists, on_conflict args --- pandas/io/sql.py | 8 +++ pandas/tests/io/test_sql.py | 107 +++++++++++++++++++++++++++++++++++- 2 files changed, 114 insertions(+), 1 deletion(-) diff --git a/pandas/io/sql.py b/pandas/io/sql.py index d9a9bff9122c3..f22d6dd63ce46 100644 --- a/pandas/io/sql.py +++ b/pandas/io/sql.py @@ -594,6 +594,14 @@ def to_sql( ): raise ValueError(f"'{if_exists}' is not valid for if_exists") + if on_conflict: + # on_conflict argument is valid + if on_conflict not in ("do_update", "do_nothing"): + raise ValueError(f"'{on_conflict}' is not valid for on_conflict'") + # on_conflict only used with append + elif if_exists != "append": + raise ValueError("on_conflict can only be used with 'append' operations") + pandas_sql = pandasSQL_builder(con, schema=schema) if isinstance(frame, Series): diff --git a/pandas/tests/io/test_sql.py b/pandas/tests/io/test_sql.py index d1c6665045583..e320b1e79915a 100644 --- a/pandas/tests/io/test_sql.py +++ b/pandas/tests/io/test_sql.py @@ -214,7 +214,7 @@ "mysql": """CREATE TABLE pkey_table ( `a` INTEGER, `b` TEXT, - `c` TEXT + `c` TEXT, PRIMARY KEY (a) )""", "postgresql": """CREATE TABLE pkey_table ( @@ -677,6 +677,79 @@ def _to_sql_on_conflict_nothing(self, method, pkey_type): # Clean up self.drop_table("pkey_table") + def _test_to_sql_on_conflict_with_index(self, method, pkey_type): + """ + GIVEN: + - Original db table: 3 rows + - New dataframe: 4 rows (2 duplicate keys), pkey as index + WHEN: + - inserting new data, noting the index column + - on conflict do update + THEN: + - DB table len = 5 + - Conflicting primary keys in DB updated + """ + # Nuke + self.drop_table("pkey_table") + # Re-create table + self._create_pkey_table(pkey_type) + # Original table exists and as 3 rows + assert self.pandasSQL.has_table("pkey_table") + assert self._count_rows("pkey_table") == 3 + if pkey_type == "single": + index_pkey_table = self.pkey_table_frame.set_index("a") + else: + index_pkey_table = self.pkey_table_frame.set_index(["a", "b"]) + # Insert new dataframe + self.pandasSQL.to_sql( + index_pkey_table, + "pkey_table", + if_exists="append", + on_conflict="do_update", + index=True, + method=method, + ) + # Check table len correct + assert self._count_rows("pkey_table") == 5 + # Check conflicting primary keys have been updated + # Get new values for conflicting keys + duplicate_keys = [1, 2] + duplicate_key_query = SQL_STRINGS["read_pkey_table"][self.flavor] + duplicate_val = self._get_exec().execute(duplicate_key_query, duplicate_keys) + data_from_db = sorted(val[0] for val in duplicate_val) + # Expected values from pkey_table_frame + expected = sorted(["new_val1", "new_val2"]) + assert data_from_db == expected + # Finally, confirm that duplicate values are not removed from original df object + assert len(self.pkey_table_frame.index) == 4 + # Clean up + self.drop_table("pkey_table") + + def _to_sql_on_conflict_with_non_append(self, if_exists, on_conflict): + """ + GIVEN: + - to_sql is called + WHEN: + - `on_conflict` is not null + - `if_exists` is set to a value other than `append` + THEN: + - ValueError is raised + """ + # Nuke table and re-create + self.drop_table("pkey_table") + self._create_pkey_table("single") + # Attempt insert + assert if_exists != "append" + with pytest.raises(ValueError): + # Insert new dataframe + self.pandasSQL.to_sql( + self.pkey_table_frame, + "pkey_table", + if_exists=if_exists, + on_conflict=on_conflict, + index=False, + ) + def _roundtrip(self): self.drop_table("test_frame_roundtrip") self.pandasSQL.to_sql(self.test_frame1, "test_frame_roundtrip") @@ -823,6 +896,28 @@ def test_to_sql_series(self): s2 = sql.read_sql_query("SELECT * FROM test_series", self.conn) tm.assert_frame_equal(s.to_frame(), s2) + def test_to_sql_invalid_on_conflict(self): + msg = "'update' is not valid for on_conflict" + with pytest.raises(ValueError, match=msg): + sql.to_sql( + self.pkey_table_frame, + "pkey_frame1", + self.conn, + if_exists="append", + on_conflict="update", + ) + + def test_to_sql_on_conflict_non_append(self): + msg = "on_conflict can only be used with 'append' operations" + with pytest.raises(ValueError, match=msg): + sql.to_sql( + self.pkey_table_frame, + "pkey_frame1", + self.conn, + if_exists="replace", + on_conflict="do_update", + ) + def test_roundtrip(self): sql.to_sql(self.test_frame1, "test_frame_roundtrip", con=self.conn) result = sql.read_sql_query("SELECT * FROM test_frame_roundtrip", con=self.conn) @@ -1482,6 +1577,16 @@ def test_to_sql_conflict_nothing(self, method, pkey_type): def test_to_sql_conflict_update(self, method, pkey_type): self._to_sql_on_conflict_update(method, pkey_type) + @pytest.mark.parametrize("method", [None, "multi"]) + @pytest.mark.parametrize("pkey_type", ["single", "comp"]) + def test_to_sql_on_conflict_with_index(self, method, pkey_type): + self._test_to_sql_on_conflict_with_index(method, pkey_type) + + @pytest.mark.parametrize("if_exists", ["fail", "replace"]) + @pytest.mark.parametrize("on_conflict", ["do_update", "do_nothing"]) + def test_to_sql_conflict_with_non_append(self, if_exists, on_conflict): + self._to_sql_on_conflict_with_non_append(if_exists, on_conflict) + def test_create_table(self): temp_conn = self.connect() temp_frame = DataFrame( From c7834966580925907f07461d379009f45ce03343 Mon Sep 17 00:00:00 2001 From: cvonsteg Date: Sat, 28 Nov 2020 20:50:10 +0000 Subject: [PATCH 61/75] chore: missing comma in tests --- pandas/tests/io/test_sql.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/io/test_sql.py b/pandas/tests/io/test_sql.py index e320b1e79915a..a744f765b3de5 100644 --- a/pandas/tests/io/test_sql.py +++ b/pandas/tests/io/test_sql.py @@ -233,7 +233,7 @@ "mysql": """CREATE TABLE pkey_table ( `a` INTEGER, `b` TEXT, - `c` TEXT + `c` TEXT, PRIMARY KEY (a, b) )""", "postgresql": """CREATE TABLE pkey_table ( From 4f4e9d9067679f0a1f785ee7e71585ab7499ba27 Mon Sep 17 00:00:00 2001 From: cvonsteg Date: Sun, 29 Nov 2020 14:27:09 +0000 Subject: [PATCH 62/75] chore: use backticks --- pandas/tests/io/test_sql.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/io/test_sql.py b/pandas/tests/io/test_sql.py index a744f765b3de5..af906557a5d4f 100644 --- a/pandas/tests/io/test_sql.py +++ b/pandas/tests/io/test_sql.py @@ -234,7 +234,7 @@ `a` INTEGER, `b` TEXT, `c` TEXT, - PRIMARY KEY (a, b) + PRIMARY KEY (`a`, `b`) )""", "postgresql": """CREATE TABLE pkey_table ( "a" INTEGER PRIMARY KEY, From f2bc1219cc81c1ad65e859ec920cd373d3569048 Mon Sep 17 00:00:00 2001 From: cvonsteg Date: Mon, 30 Nov 2020 08:06:26 +0000 Subject: [PATCH 63/75] chore: use add constraint syntax --- pandas/tests/io/test_sql.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/io/test_sql.py b/pandas/tests/io/test_sql.py index 94ee5734e8581..64fef9982824e 100644 --- a/pandas/tests/io/test_sql.py +++ b/pandas/tests/io/test_sql.py @@ -234,7 +234,7 @@ `a` INTEGER, `b` TEXT, `c` TEXT, - PRIMARY KEY (`a`, `b`) + CONSTRAINT pk_PkeyTable PRIMARY KEY (a,b) )""", "postgresql": """CREATE TABLE pkey_table ( "a" INTEGER PRIMARY KEY, From b9bfedc0ecaafc43af801eb788c07ce987b61844 Mon Sep 17 00:00:00 2001 From: cvonsteg Date: Fri, 18 Dec 2020 19:08:30 +0000 Subject: [PATCH 64/75] tidy up --- pandas/io/sql.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/io/sql.py b/pandas/io/sql.py index 5e3e05e82f918..c180857638bec 100644 --- a/pandas/io/sql.py +++ b/pandas/io/sql.py @@ -710,7 +710,7 @@ def __init__( self.index = self._index_name(index, index_label) self.schema = schema self.if_exists = if_exists - self.on_conflict = (on_conflict,) + self.on_conflict = on_conflict self.keys = keys self.dtype = dtype From b27449e417ab2f8f83804757a6e89518de859f44 Mon Sep 17 00:00:00 2001 From: cvonsteg Date: Fri, 18 Dec 2020 19:25:24 +0000 Subject: [PATCH 65/75] fix: add not null constraints to mysql tables --- pandas/tests/io/test_sql.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/pandas/tests/io/test_sql.py b/pandas/tests/io/test_sql.py index 67ab23c02c2e7..66ac2a3f01ade 100644 --- a/pandas/tests/io/test_sql.py +++ b/pandas/tests/io/test_sql.py @@ -231,16 +231,16 @@ PRIMARY KEY ("a", "b") )""", "mysql": """CREATE TABLE pkey_table ( - `a` INTEGER, - `b` TEXT, + `a` INTEGER NOT NULL, + `b` TEXT NOT NULL, `c` TEXT, - CONSTRAINT pk_PkeyTable PRIMARY KEY (a,b) + PRIMARY KEY (`a`,`b`) )""", "postgresql": """CREATE TABLE pkey_table ( - "a" INTEGER PRIMARY KEY, + "a" INTEGER, "b" TEXT, "c" TEXT, - PRIMARY KEY(a, b) + PRIMARY KEY("a", "b") )""", }, "insert_pkey_table": { From c8c18269b1b3a1faf6ced197a1cfb32978764c91 Mon Sep 17 00:00:00 2001 From: cvonsteg Date: Sat, 19 Dec 2020 13:19:39 +0000 Subject: [PATCH 66/75] fix: change mysql type from TEXT to VARCHAR for indexing --- pandas/tests/io/test_sql.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/pandas/tests/io/test_sql.py b/pandas/tests/io/test_sql.py index 66ac2a3f01ade..53971701e4f97 100644 --- a/pandas/tests/io/test_sql.py +++ b/pandas/tests/io/test_sql.py @@ -207,14 +207,14 @@ }, "create_single_pkey_table": { "sqlite": """CREATE TABLE pkey_table ( - "a" Integer Primary Key, + "a" Primary Key, "b" TEXT, "c" TEXT )""", "mysql": """CREATE TABLE pkey_table ( `a` INTEGER, - `b` TEXT, - `c` TEXT, + `b` VARCHAR(200), + `c` VARCHAR(200), PRIMARY KEY (a) )""", "postgresql": """CREATE TABLE pkey_table ( @@ -232,8 +232,8 @@ )""", "mysql": """CREATE TABLE pkey_table ( `a` INTEGER NOT NULL, - `b` TEXT NOT NULL, - `c` TEXT, + `b` VARCHAR(200) NOT NULL, + `c` VARCHAR(200), PRIMARY KEY (`a`,`b`) )""", "postgresql": """CREATE TABLE pkey_table ( From 197e172029358b21d8b5598cfc525c6c08dbd25d Mon Sep 17 00:00:00 2001 From: cvonsteg Date: Mon, 15 Mar 2021 07:54:19 +0000 Subject: [PATCH 67/75] chore: use and_ inplace of tuple_ --- pandas/io/sql.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/pandas/io/sql.py b/pandas/io/sql.py index f4d2440486aa6..e734e9966bbf5 100644 --- a/pandas/io/sql.py +++ b/pandas/io/sql.py @@ -862,11 +862,17 @@ def _load_existing_pkeys(self, primary_keys, primary_key_values): list of str primary key values in incoming dataframe which already exist in database """ - from sqlalchemy import select + from sqlalchemy import and_, select cols_to_fetch = [self.table.c[key] for key in primary_keys] + # select_stmt = select(cols_to_fetch).where( + # tuple_(*cols_to_fetch).in_(primary_key_values) + # ) select_stmt = select(cols_to_fetch).where( - (*cols_to_fetch).in_(primary_key_values) + and_( + col.in_(key[i] for key in primary_key_values) + for i, col in enumerate(cols_to_fetch) + ) ) return self.pd_sql.execute(select_stmt).fetchall() From 8fa8e0eb3ca29badab33ee4b9991b61595c2a9c9 Mon Sep 17 00:00:00 2001 From: cvonsteg Date: Tue, 16 Mar 2021 08:47:52 +0000 Subject: [PATCH 68/75] chore: same change for update stmts --- pandas/io/sql.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/pandas/io/sql.py b/pandas/io/sql.py index 1fec6b266ed0e..1670db0cf489c 100644 --- a/pandas/io/sql.py +++ b/pandas/io/sql.py @@ -883,7 +883,10 @@ def _load_existing_pkeys(self, primary_keys, primary_key_values): list of str primary key values in incoming dataframe which already exist in database """ - from sqlalchemy import and_, select + from sqlalchemy import ( + and_, + select, + ) cols_to_fetch = [self.table.c[key] for key in primary_keys] # select_stmt = select(cols_to_fetch).where( @@ -945,18 +948,21 @@ def _generate_update_statements(self, primary_keys, keys_in_db, rows_to_update): list of sqlalchemy.sql.dml.Update List of update queries """ - from sqlalchemy import tuple_ + from sqlalchemy import and_ new_records = rows_to_update.to_dict(orient="records") + pk_cols = [self.table.c[key] for key in primary_keys] + # TODO: Move this or remove entirely assert len(new_records) == len( keys_in_db ), "Mismatch between new records and existing keys" + stmts = [] for i, keys in enumerate(keys_in_db): stmt = ( self.table.update() - .where(tuple_(*(self.table.c[key] for key in primary_keys)).in_([keys])) + .where(and_(col == keys[j] for j, col in enumerate(pk_cols))) .values(new_records[i]) ) stmts.append(stmt) From 1dee40904190bedcbcff5b40b0cab7d53a64fea6 Mon Sep 17 00:00:00 2001 From: cvonsteg Date: Sun, 21 Mar 2021 10:30:06 +0000 Subject: [PATCH 69/75] fix: primary keys no longer require names getter --- pandas/io/sql.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/pandas/io/sql.py b/pandas/io/sql.py index 1670db0cf489c..eece65e8a176d 100644 --- a/pandas/io/sql.py +++ b/pandas/io/sql.py @@ -1029,10 +1029,7 @@ def _get_primary_key_data(self): self.pd_sql.meta.reflect(only=[self.name], views=True) self.table = self.pd_sql.get_table(table_name=self.name, schema=self.schema) - primary_keys = [ - str(primary_key.name) - for primary_key in self.table.primary_key.columns.values() - ] + primary_keys = self.table.primary_key.columns.keys() # For the time being, this method is defensive and will break if # no pkeys are found. If desired this default behaviour could be From a05937e12265acb3bac942b4ff99789f6af94fe6 Mon Sep 17 00:00:00 2001 From: cvonsteg Date: Sat, 26 Jun 2021 09:27:39 +0000 Subject: [PATCH 70/75] chore: remove wrong kwarg --- pandas/io/sql.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pandas/io/sql.py b/pandas/io/sql.py index b710dc5a21ed8..d31d2683f54c0 100644 --- a/pandas/io/sql.py +++ b/pandas/io/sql.py @@ -2009,7 +2009,6 @@ def to_sql( frame=frame, name=name, if_exists=if_exists, - on_conflict=on_conflict, index=index, index_label=index_label, schema=schema, From 11f201fa8f68f19be2dfad0260df0323cf52d0c0 Mon Sep 17 00:00:00 2001 From: cvonsteg Date: Sun, 18 Jul 2021 12:46:43 +0000 Subject: [PATCH 71/75] tests: fix on conflict with non append tests --- pandas/tests/io/test_sql.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/pandas/tests/io/test_sql.py b/pandas/tests/io/test_sql.py index ad74148dc18f1..4457106bbbc7f 100644 --- a/pandas/tests/io/test_sql.py +++ b/pandas/tests/io/test_sql.py @@ -807,9 +807,10 @@ def _to_sql_on_conflict_with_non_append(self, if_exists, on_conflict): ValueError, match="on_conflict can only be used with 'append' operations" ): # Insert new dataframe - self.pandasSQL.to_sql( + sql.to_sql( self.pkey_table_frame, - "pkey_table", + "some_table", + con=self.conn, if_exists=if_exists, on_conflict=on_conflict, index=False, From 7f0b5ddb8e97bb364f0d6e0b7826e37d21f94805 Mon Sep 17 00:00:00 2001 From: cvonsteg Date: Mon, 19 Jul 2021 05:59:05 +0000 Subject: [PATCH 72/75] fix: pass on_conflict into prep_table --- pandas/io/sql.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/pandas/io/sql.py b/pandas/io/sql.py index 8f85ffa186674..0d2968b2c7a60 100644 --- a/pandas/io/sql.py +++ b/pandas/io/sql.py @@ -934,7 +934,7 @@ def _split_incoming_data(self, primary_keys, keys_in_db): tuple of DataFrame, DataFrame DataFrame of rows with duplicate pkey, DataFrame of rows with new pkey """ - from pandas.core.indexex.multi import MultiIndex + from pandas.core.indexes.multi import MultiIndex in_db = _wrap_result(data=keys_in_db, columns=primary_keys) # Get temporary dataframe so as not to delete values from main df @@ -1840,6 +1840,7 @@ def prep_table( frame, name, if_exists="fail", + on_conflict=None, index=True, index_label=None, schema=None, @@ -1875,6 +1876,7 @@ def prep_table( frame=frame, index=index, if_exists=if_exists, + on_conflict=on_conflict, index_label=index_label, schema=schema, dtype=dtype, @@ -1996,6 +1998,7 @@ def to_sql( frame=frame, name=name, if_exists=if_exists, + on_conflict=on_conflict, index=index, index_label=index_label, schema=schema, From 8123cd7e1a8c76731123f2f696af069b60110493 Mon Sep 17 00:00:00 2001 From: cvonsteg Date: Wed, 8 Sep 2021 07:09:37 +0000 Subject: [PATCH 73/75] fix: tests working --- pandas/io/sql.py | 5 ++- pandas/tests/io/test_sql.py | 74 +++++++++++++++++++++++++------------ 2 files changed, 55 insertions(+), 24 deletions(-) diff --git a/pandas/io/sql.py b/pandas/io/sql.py index ddd096f57e494..95d2fe30e62bd 100644 --- a/pandas/io/sql.py +++ b/pandas/io/sql.py @@ -1001,7 +1001,10 @@ def _get_primary_key_data(self): DataFrame rows, for columns corresponding to `primary_key` names """ # reflect MetaData object and assign contents of db to self.table attribute - self.pd_sql.meta.reflect(only=[self.name], views=True) + # add self.pd_sql.meta.is_bound() check here? + self.pd_sql.meta.reflect( + bind=self.pd_sql.connectable, only=[self.name], views=True + ) self.table = self.pd_sql.get_table(table_name=self.name, schema=self.schema) primary_keys = self.table.primary_key.columns.keys() diff --git a/pandas/tests/io/test_sql.py b/pandas/tests/io/test_sql.py index bcaf1c976d6f0..c8d58dc9ddedc 100644 --- a/pandas/tests/io/test_sql.py +++ b/pandas/tests/io/test_sql.py @@ -94,9 +94,16 @@ "postgresql": "SELECT * FROM iris WHERE \"Name\" LIKE '%'", }, "read_pkey_table": { - "sqlite": """SELECT c FROM pkey_table WHERE A IN (?, ?)""", - "mysql": """SELECT c FROM pkey_table WHERE A IN (%s, %s)""", - "postgresql": """SELECT c FROM pkey_table WHERE A IN (%s, %s)""", + "pkey_table_single": { + "sqlite": """SELECT c FROM pkey_table_single WHERE A IN (?, ?)""", + "mysql": """SELECT c FROM pkey_table_single WHERE A IN (%s, %s)""", + "postgresql": """SELECT c FROM pkey_table_single WHERE A IN (%s, %s)""", + }, + "pkey_table_comp": { + "sqlite": """SELECT c FROM pkey_table_comp WHERE A IN (?, ?)""", + "mysql": """SELECT c FROM pkey_table_comp WHERE A IN (%s, %s)""", + "postgresql": """SELECT c FROM pkey_table_comp WHERE A IN (%s, %s)""", + }, }, } @@ -171,7 +178,7 @@ def create_and_load_pkey(conn): def create_and_load_pkey_sqlite3(conn: sqlite3.Connection): - cur = conn.cusror() + cur = conn.cursor() stmt_single = """ CREATE TABLE pkey_table_single ( "a" Primary Key, @@ -191,7 +198,7 @@ def create_and_load_pkey_sqlite3(conn: sqlite3.Connection): cur.execute(stmt_comp) data = [(1, "name1", "val1"), (2, "name2", "val2"), (3, "name3", "val3")] for tbl in ["pkey_table_single", "pkey_table_comp"]: - stmt = f"INSERT INTO {tbl} VALUE (?, ?, ?)" + stmt = f"INSERT INTO {tbl} VALUES (?, ?, ?)" cur.executemany(stmt, data) @@ -365,6 +372,31 @@ def count_rows(conn, table_name: str): return result.fetchone()[0] +def read_pkeys_from_database(conn, tbl_name: str, duplicate_keys: list[int]): + if isinstance(conn, sqlite3.Connection): + stmt = f"""SELECT c FROM {tbl_name} WHERE A IN (?, ?)""" + cur = conn.cursor() + result = cur.execute(stmt, duplicate_keys) + else: + from sqlalchemy import ( + MetaData, + Table, + select, + ) + from sqlalchemy.engine import Engine + + meta = MetaData() + tbl = Table(tbl_name, meta, autoload_with=conn) + stmt = select([tbl.c.c]).where(tbl.c.a.in_(duplicate_keys)) + + if isinstance(conn, Engine): + with conn.connect() as conn: + result = conn.execute(stmt) + else: + result = conn.execute(stmt) + return sorted(val[0] for val in result.fetchall()) + + @pytest.fixture def iris_path(datapath): iris_path = datapath("io", "data", "csv", "iris.csv") @@ -475,7 +507,6 @@ def pkey_frame(): (4, "name4", "val4"), (5, "name5", "val5"), ] - return DataFrame(data, columns=columns) @@ -577,12 +608,6 @@ def load_pkey_data(self): else: create_and_load_pkey(self.conn) - def _load_pkeys_from_database(self): - duplicate_keys = [1, 2] - query = SQL_STRINGS["read_pkey_table"][self.flavor] - records = self._get_exec().execute(query, duplicate_keys) - return sorted(val[0] for val in records) - def _check_iris_loaded_frame(self, iris_frame): pytype = iris_frame.dtypes[0].type row = iris_frame.iloc[0] @@ -717,7 +742,7 @@ def _to_sql_on_conflict_update(self, method, tbl_name, pkey_frame): assert count_rows(self.conn, tbl_name) == 5 # Check conflicting primary keys have been updated # Get new values for conflicting keys - data_from_db = self._load_pkeys_from_database() + data_from_db = read_pkeys_from_database(self.conn, tbl_name, [1, 2]) # duplicate_keys = [1, 2] # duplicate_key_query = SQL_STRINGS["read_pkey_table"][self.flavor] # duplicate_val = self._get_exec().execute(duplicate_key_query, duplicate_keys) @@ -753,7 +778,10 @@ def _to_sql_on_conflict_nothing(self, method, tbl_name, pkey_frame): # duplicate_key_query, duplicate_keys # ) # data_from_db_before = sorted(val[0] for val in duplicate_val_before) - data_from_db_before = self._load_pkeys_from_database() + duplicate_keys = [1, 2] + data_from_db_before = read_pkeys_from_database( + self.conn, tbl_name, duplicate_keys + ) # Insert new dataframe self.pandasSQL.to_sql( pkey_frame, @@ -770,12 +798,12 @@ def _to_sql_on_conflict_nothing(self, method, tbl_name, pkey_frame): # duplicate_key_query, duplicate_keys # ) # data_from_db_after = sorted(val[0] for val in duplicate_val_after) - data_from_db_after = self._load_pkeys_from_database() + data_from_db_after = read_pkeys_from_database( + self.conn, tbl_name, duplicate_keys + ) # Get data from incoming df data_from_df = sorted( - self.pkey_table_frame.loc[ - self.pkey_table_frame["a"].isin([1, 2]), "c" - ].tolist() + pkey_frame.loc[pkey_frame["a"].isin(duplicate_keys), "c"].tolist() ) # Check original DB values maintained for duplicate keys assert data_from_db_before == data_from_db_after @@ -820,7 +848,7 @@ def _test_to_sql_on_conflict_with_index(self, method, tbl_name, pkey_frame): # duplicate_key_query = SQL_STRINGS["read_pkey_table"][self.flavor] # duplicate_val = self._get_exec().execute(duplicate_key_query, duplicate_keys) # data_from_db = sorted(val[0] for val in duplicate_val) - data_from_db = self._load_pkeys_from_database() + data_from_db = read_pkeys_from_database(self.conn, tbl_name, [1, 2]) # Expected values from pkey_table_frame expected = sorted(["new_val1", "new_val2"]) assert data_from_db == expected @@ -1029,22 +1057,22 @@ def test_to_sql_series(self): s2 = sql.read_sql_query("SELECT * FROM test_series", self.conn) tm.assert_frame_equal(s.to_frame(), s2) - def test_to_sql_invalid_on_conflict(self): + def test_to_sql_invalid_on_conflict(self, pkey_frame): msg = "'update' is not valid for on_conflict" with pytest.raises(ValueError, match=msg): sql.to_sql( - self.pkey_frame, + pkey_frame, "pkey_frame1", self.conn, if_exists="append", on_conflict="update", ) - def test_to_sql_on_conflict_non_append(self): + def test_to_sql_on_conflict_non_append(self, pkey_frame): msg = "on_conflict can only be used with 'append' operations" with pytest.raises(ValueError, match=msg): sql.to_sql( - self.pkey_frame, + pkey_frame, "pkey_frame1", self.conn, if_exists="replace", From 26faabee3412e36f77fd87d6187eb18f954200cb Mon Sep 17 00:00:00 2001 From: cvonsteg Date: Wed, 8 Sep 2021 07:15:46 +0000 Subject: [PATCH 74/75] chore: check metadata bind before reflect --- pandas/io/sql.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/pandas/io/sql.py b/pandas/io/sql.py index 95d2fe30e62bd..58bae68dd0459 100644 --- a/pandas/io/sql.py +++ b/pandas/io/sql.py @@ -1001,10 +1001,10 @@ def _get_primary_key_data(self): DataFrame rows, for columns corresponding to `primary_key` names """ # reflect MetaData object and assign contents of db to self.table attribute - # add self.pd_sql.meta.is_bound() check here? - self.pd_sql.meta.reflect( - bind=self.pd_sql.connectable, only=[self.name], views=True - ) + bind = None + if not self.pd_sql.meta.is_bound(): + bind = self.pd_sql.connectable + self.pd_sql.meta.reflect(bind=bind, only=[self.name], views=True) self.table = self.pd_sql.get_table(table_name=self.name, schema=self.schema) primary_keys = self.table.primary_key.columns.keys() From 9260eca23a30a942d0c131dfcbed74896e126466 Mon Sep 17 00:00:00 2001 From: cvonsteg Date: Thu, 9 Sep 2021 06:41:07 +0000 Subject: [PATCH 75/75] clean up docstrings --- pandas/io/sql.py | 44 ++++++++++++++++++-------------------------- 1 file changed, 18 insertions(+), 26 deletions(-) diff --git a/pandas/io/sql.py b/pandas/io/sql.py index 58bae68dd0459..7cdec43728212 100644 --- a/pandas/io/sql.py +++ b/pandas/io/sql.py @@ -634,13 +634,10 @@ def to_sql( - append: If table exists, insert data. Create if does not exist. on_conflict : {None, 'do_nothing', 'do_update'}, optional Determine insertion behaviour in case of a primary key clash. - If the table being written has primary key constraints, attempting - to insert new rows with the same values in the primary key columns, - will cause an error. In this case the conflicting records can either - be updated in the database or ignored from the incoming dataframe. - - do_nothing: Ignore incoming rows with primary key clashes, and + - None: Do nothing to handle primary key clashes, will raise an Error. + - 'do_nothing': Ignore incoming rows with primary key clashes, and insert only the incoming rows with non-conflicting primary keys - - do_update: Update existing rows in database with primary key clashes, + - 'do_update': Update existing rows in database with primary key clashes, and append the remaining rows with non-conflicting primary keys index : bool, default True Write DataFrame index as a column. @@ -981,7 +978,7 @@ def _on_conflict_do_nothing(self): """ pk_cols, pk_values = self._get_primary_key_data() existing_keys = self._load_existing_pkeys(pk_cols, pk_values) - existing_data, new_data = self._split_incoming_data(pk_cols, existing_keys) + _, new_data = self._split_incoming_data(pk_cols, existing_keys) return new_data def _get_primary_key_data(self): @@ -1477,6 +1474,7 @@ def to_sql( frame, name, if_exists="fail", + on_conflict=None, index=True, index_label=None, schema=None, @@ -1881,7 +1879,7 @@ def to_sql( frame, name, if_exists="fail", - on_conflict=None, + on_conflict: str | None = None, index=True, index_label=None, schema=None, @@ -1902,18 +1900,15 @@ def to_sql( if_exists : {'fail', 'replace', 'append'}, default 'fail'. - fail: If table exists, do nothing. - - replace: If table exRsts, drop it, recreate it, and insert data. + - replace: If table exists, drop it, recreate it, and insert data. - append: If table exists, insert data. Create if does not exist. on_conflict : {None, 'do_nothing', 'do_update'}, optional Determine insertion behaviour in case of a primary key clash. - If the table being written has primary key constraints, attempting - to insert new rows with the same values in the primary key columns, - will cause an error. In this case the conflicting records can either - be updated in the database or ignored from the incoming dataframe. - - do_nothing: Ignore incoming rows with primary key clashes, and - insert only the incoming rows with non-conflicting primary keys - - do_update: Update existing rows in database with primary key clashes, - and append the remaining rows with non-conflicting primary keys + - None: Do nothing to handle primary key clashes, will raise an Error. + - 'do_nothing': Ignore incoming rows with primary key clashes, and + insert only the incoming rows with non-conflicting primary keys + - 'do_update': Update existing rows in database with primary key clashes, + and append the remaining rows with non-conflicting primary keys index : boolean, default True Write DataFrame index as a column. index_label : string or sequence, default None @@ -2352,7 +2347,7 @@ def to_sql( frame, name, if_exists="fail", - on_conflict=None, + on_conflict: str | None = None, index=True, index_label=None, schema=None, @@ -2375,14 +2370,11 @@ def to_sql( append: If table exists, insert data. Create if it does not exist. on_conflict : {None, 'do_nothing', 'do_update'}, optional Determine insertion behaviour in case of a primary key clash. - If the table being written has primary key constraints, attempting - to insert new rows with the same values in the primary key columns, - will cause an error. In this case the conflicting records can either - be updated in the database or ignored from the incoming dataframe. - - do_nothing: Ignore incoming rows with primary key clashes, and - insert only the incoming rows with non-conflicting primary keys - - do_update: Update existing rows in database with primary key clashes, - and append the remaining rows with non-conflicting primary keys + - None: Do nothing to handle primary key clashes, will raise an Error. + - 'do_nothing': Ignore incoming rows with primary key clashes, and + insert only the incoming rows with non-conflicting primary keys + - 'do_update': Update existing rows in database with primary key clashes, + and append the remaining rows with non-conflicting primary keys index : bool, default True Write DataFrame index as a column index_label : string or sequence, default None