From ce097a522b44dbdc0ef405a1c900587da4e74960 Mon Sep 17 00:00:00 2001 From: Frances Hartwell Date: Mon, 26 Sep 2022 10:18:36 -0400 Subject: [PATCH] initial commit --- .github/ISSUE_TEMPLATE.md | 15 + .github/workflows/tests.yml | 31 + .gitignore | 114 +++ CONTRIBUTING.rst | 237 +++++++ MANIFEST.in | 11 + Makefile | 244 +++++++ README.md | 163 +++++ docs/Makefile | 20 + docs/authors.rst | 1 + docs/conf.py | 200 ++++++ docs/contributing.rst | 1 + docs/history.rst | 1 + docs/images/dai-logo-white-200.png | Bin 0 -> 33432 bytes docs/images/dai-logo-white.ico | Bin 0 -> 4286 bytes docs/index.rst | 22 + docs/make.bat | 36 + docs/readme.rst | 1 + notebooks/data/alarms.csv | 3 + notebooks/data/notifications.csv | 3 + notebooks/data/pidata.csv | 5 + notebooks/data/scada.csv | 3 + notebooks/data/stoppages.csv | 3 + notebooks/data/turbines.csv | 2 + notebooks/data/work_orders.csv | 3 + notebooks/feature_engineering.ipynb | 651 ++++++++++++++++++ setup.cfg | 49 ++ setup.py | 105 +++ tests/__init__.py | 0 tests/labeling/__init__.py | 0 tests/labeling/test_data_labeler.py | 21 + tests/labeling/test_helpers.py | 151 ++++ tests/test___init__.py | 116 ++++ tests/test_entityset.py | 219 ++++++ tests/test_metadata.py | 61 ++ tox.ini | 20 + zephyr_ml/__init__.py | 10 + zephyr_ml/entityset.py | 135 ++++ zephyr_ml/labeling/__init__.py | 40 ++ zephyr_ml/labeling/data_labeler.py | 66 ++ .../labeling/labeling_functions/__init__.py | 4 + .../labeling_functions/brake_pad_presence.py | 49 ++ .../converter_replacement_presence.py | 55 ++ .../labeling_functions/total_power_loss.py | 46 ++ zephyr_ml/labeling/utils.py | 232 +++++++ zephyr_ml/metadata.py | 150 ++++ 45 files changed, 3299 insertions(+) create mode 100644 .github/ISSUE_TEMPLATE.md create mode 100644 .github/workflows/tests.yml create mode 100644 .gitignore create mode 100644 CONTRIBUTING.rst create mode 100644 MANIFEST.in create mode 100644 Makefile create mode 100644 README.md create mode 100644 docs/Makefile create mode 100644 docs/authors.rst create mode 100644 docs/conf.py create mode 100644 docs/contributing.rst create mode 100644 docs/history.rst create mode 100644 docs/images/dai-logo-white-200.png create mode 100644 docs/images/dai-logo-white.ico create mode 100644 docs/index.rst create mode 100644 docs/make.bat create mode 100644 docs/readme.rst create mode 100644 notebooks/data/alarms.csv create mode 100644 notebooks/data/notifications.csv create mode 100644 notebooks/data/pidata.csv create mode 100644 notebooks/data/scada.csv create mode 100644 notebooks/data/stoppages.csv create mode 100644 notebooks/data/turbines.csv create mode 100644 notebooks/data/work_orders.csv create mode 100644 notebooks/feature_engineering.ipynb create mode 100644 setup.cfg create mode 100644 setup.py create mode 100644 tests/__init__.py create mode 100644 tests/labeling/__init__.py create mode 100644 tests/labeling/test_data_labeler.py create mode 100644 tests/labeling/test_helpers.py create mode 100644 tests/test___init__.py create mode 100644 tests/test_entityset.py create mode 100644 tests/test_metadata.py create mode 100644 tox.ini create mode 100644 zephyr_ml/__init__.py create mode 100644 zephyr_ml/entityset.py create mode 100644 zephyr_ml/labeling/__init__.py create mode 100644 zephyr_ml/labeling/data_labeler.py create mode 100644 zephyr_ml/labeling/labeling_functions/__init__.py create mode 100644 zephyr_ml/labeling/labeling_functions/brake_pad_presence.py create mode 100644 zephyr_ml/labeling/labeling_functions/converter_replacement_presence.py create mode 100644 zephyr_ml/labeling/labeling_functions/total_power_loss.py create mode 100644 zephyr_ml/labeling/utils.py create mode 100644 zephyr_ml/metadata.py diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md new file mode 100644 index 0000000..3bd5e8c --- /dev/null +++ b/.github/ISSUE_TEMPLATE.md @@ -0,0 +1,15 @@ +* Zephyr version: +* Python version: +* Operating System: + +### Description + +Describe what you were trying to get done. +Tell us what happened, what went wrong, and what you expected to happen. + +### What I Did + +``` +Paste the command(s) you ran and the output. +If there was a crash, please include the traceback here. +``` diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml new file mode 100644 index 0000000..e60b99b --- /dev/null +++ b/.github/workflows/tests.yml @@ -0,0 +1,31 @@ +name: Run Tests + +on: + push: + branches: [ '*' ] + pull_request: + branches: [ main ] + +jobs: + build: + runs-on: ${{ matrix.os }} + strategy: + matrix: + python-version: [3.7, 3.8] + os: [ubuntu-latest] + + steps: + - uses: actions/checkout@v1 + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v1 + with: + python-version: ${{ matrix.python-version }} + + - name: Install dependencies + run: | + sudo apt-get install pandoc + python -m pip install --upgrade pip + pip install tox tox-gh-actions + + - name: Test with tox + run: tox diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..c5de81b --- /dev/null +++ b/.gitignore @@ -0,0 +1,114 @@ +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +env/ +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +*.egg-info/ +.installed.cfg +*.egg + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +.hypothesis/ +.pytest_cache/ + +# Backup files +*~ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ +docs/api/ + +# PyBuilder +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# pyenv +.python-version + +# celery beat schedule file +celerybeat-schedule + +# SageMath parsed files +*.sage.py + +# dotenv +.env + +# virtualenv +.venv +venv/ +ENV/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ + +# Vim +.*.swp + +notebooks +notebooks-private + +.DS_Store \ No newline at end of file diff --git a/CONTRIBUTING.rst b/CONTRIBUTING.rst new file mode 100644 index 0000000..3693f9c --- /dev/null +++ b/CONTRIBUTING.rst @@ -0,0 +1,237 @@ +.. highlight:: shell + +============ +Contributing +============ + +Contributions are welcome, and they are greatly appreciated! Every little bit +helps, and credit will always be given. + +You can contribute in many ways: + +Types of Contributions +---------------------- + +Report Bugs +~~~~~~~~~~~ + +Report bugs at the `GitHub Issues page`_. + +If you are reporting a bug, please include: + +* Your operating system name and version. +* Any details about your local setup that might be helpful in troubleshooting. +* Detailed steps to reproduce the bug. + +Fix Bugs +~~~~~~~~ + +Look through the GitHub issues for bugs. Anything tagged with "bug" and "help +wanted" is open to whoever wants to implement it. + +Implement Features +~~~~~~~~~~~~~~~~~~ + +Look through the GitHub issues for features. Anything tagged with "enhancement" +and "help wanted" is open to whoever wants to implement it. + +Write Documentation +~~~~~~~~~~~~~~~~~~~ + +Zephyr could always use more documentation, whether as part of the +official Zephyr docs, in docstrings, or even on the web in blog posts, +articles, and such. + +Submit Feedback +~~~~~~~~~~~~~~~ + +The best way to send feedback is to file an issue at the `GitHub Issues page`_. + +If you are proposing a feature: + +* Explain in detail how it would work. +* Keep the scope as narrow as possible, to make it easier to implement. +* Remember that this is a volunteer-driven project, and that contributions + are welcome :) + +Get Started! +------------ + +Ready to contribute? Here's how to set up `Zephyr` for local development. + +1. Fork the `Zephyr` repo on GitHub. +2. Clone your fork locally:: + + $ git clone git@github.com:your_name_here/zephyr.git + +3. Install your local copy into a virtualenv. Assuming you have virtualenvwrapper installed, + this is how you set up your fork for local development:: + + $ mkvirtualenv zephyr + $ cd zephyr/ + $ make install-develop + +4. Create a branch for local development:: + + $ git checkout -b name-of-your-bugfix-or-feature + + Try to use the naming scheme of prefixing your branch with ``gh-X`` where X is + the associated issue, such as ``gh-3-fix-foo-bug``. And if you are not + developing on your own fork, further prefix the branch with your GitHub + username, like ``githubusername/gh-3-fix-foo-bug``. + + Now you can make your changes locally. + +5. While hacking your changes, make sure to cover all your developments with the required + unit tests, and that none of the old tests fail as a consequence of your changes. + For this, make sure to run the tests suite and check the code coverage:: + + $ make lint # Check code styling + $ make test # Run the tests + $ make coverage # Get the coverage report + +6. When you're done making changes, check that your changes pass all the styling checks and + tests, including other Python supported versions, using:: + + $ make test-all + +7. Make also sure to include the necessary documentation in the code as docstrings following + the `Google docstrings style`_. + If you want to view how your documentation will look like when it is published, you can + generate and view the docs with this command:: + + $ make view-docs + +8. Commit your changes and push your branch to GitHub:: + + $ git add . + $ git commit -m "Your detailed description of your changes." + $ git push origin name-of-your-bugfix-or-feature + +9. Submit a pull request through the GitHub website. + +Pull Request Guidelines +----------------------- + +Before you submit a pull request, check that it meets these guidelines: + +1. It resolves an open GitHub Issue and contains its reference in the title or + the comment. If there is no associated issue, feel free to create one. +2. Whenever possible, it resolves only **one** issue. If your PR resolves more than + one issue, try to split it in more than one pull request. +3. The pull request should include unit tests that cover all the changed code +4. If the pull request adds functionality, the docs should be updated. Put + your new functionality into a function with a docstring, and add the + feature to the documentation in an appropriate place. +5. The pull request should work for all the supported Python versions. Check the `Travis Build + Status page`_ and make sure that all the checks pass. + +Unit Testing Guidelines +----------------------- + +All the Unit Tests should comply with the following requirements: + +1. Unit Tests should be based only in unittest and pytest modules. + +2. The tests that cover a module called ``zephyr/path/to/a_module.py`` + should be implemented in a separated module called + ``tests/zephyr/path/to/test_a_module.py``. + Note that the module name has the ``test_`` prefix and is located in a path similar + to the one of the tested module, just inside the ``tests`` folder. + +3. Each method of the tested module should have at least one associated test method, and + each test method should cover only **one** use case or scenario. + +4. Test case methods should start with the ``test_`` prefix and have descriptive names + that indicate which scenario they cover. + Names such as ``test_some_methed_input_none``, ``test_some_method_value_error`` or + ``test_some_method_timeout`` are right, but names like ``test_some_method_1``, + ``some_method`` or ``test_error`` are not. + +5. Each test should validate only what the code of the method being tested does, and not + cover the behavior of any third party package or tool being used, which is assumed to + work properly as far as it is being passed the right values. + +6. Any third party tool that may have any kind of random behavior, such as some Machine + Learning models, databases or Web APIs, will be mocked using the ``mock`` library, and + the only thing that will be tested is that our code passes the right values to them. + +7. Unit tests should not use anything from outside the test and the code being tested. This + includes not reading or writing to any file system or database, which will be properly + mocked. + +Tips +---- + +To run a subset of tests:: + + $ python -m pytest tests.test_zephyr + $ python -m pytest -k 'foo' + +Release Workflow +---------------- + +The process of releasing a new version involves several steps combining both ``git`` and +``bumpversion`` which, briefly: + +1. Merge what is in ``main`` branch into ``stable`` branch. +2. Update the version in ``setup.cfg``, ``zephyr_ml/__init__.py`` and + ``HISTORY.md`` files. +3. Create a new git tag pointing at the corresponding commit in ``stable`` branch. +4. Merge the new commit from ``stable`` into ``main``. +5. Update the version in ``setup.cfg`` and ``zephyr_ml/__init__.py`` + to open the next development iteration. + +.. note:: Before starting the process, make sure that ``HISTORY.md`` has been updated with a new + entry that explains the changes that will be included in the new version. + Normally this is just a list of the Pull Requests that have been merged to main + since the last release. + +Once this is done, run of the following commands: + +1. If you are releasing a patch version:: + + make release + +2. If you are releasing a minor version:: + + make release-minor + +3. If you are releasing a major version:: + + make release-major + +Release Candidates +~~~~~~~~~~~~~~~~~~ + +Sometimes it is necessary or convenient to upload a release candidate to PyPi as a pre-release, +in order to make some of the new features available for testing on other projects before they +are included in an actual full-blown release. + +In order to perform such an action, you can execute:: + + make release-candidate + +This will perform the following actions: + +1. Build and upload the current version to PyPi as a pre-release, with the format ``X.Y.Z.devN`` + +2. Bump the current version to the next release candidate, ``X.Y.Z.dev(N+1)`` + +After this is done, the new pre-release can be installed by including the ``dev`` section in the +dependency specification, either in ``setup.py``:: + + install_requires = [ + ... + 'zephyr_ml>=X.Y.Z.dev', + ... + ] + +or in command line:: + + pip install 'zephyr_ml>=X.Y.Z.dev' + + +.. _GitHub issues page: https://github.com/D3-AI/zephyr/issues +.. _Travis Build Status page: https://travis-ci.org/D3-AI/zephyr/pull_requests +.. _Google docstrings style: https://google.github.io/styleguide/pyguide.html?showone=Comments#Comments diff --git a/MANIFEST.in b/MANIFEST.in new file mode 100644 index 0000000..469520f --- /dev/null +++ b/MANIFEST.in @@ -0,0 +1,11 @@ +include AUTHORS.rst +include CONTRIBUTING.rst +include HISTORY.md +include LICENSE +include README.md + +recursive-include tests * +recursive-exclude * __pycache__ +recursive-exclude * *.py[co] + +recursive-include docs *.md *.rst conf.py Makefile make.bat *.jpg *.png *.gif diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..77dc8f0 --- /dev/null +++ b/Makefile @@ -0,0 +1,244 @@ +.DEFAULT_GOAL := help + +define BROWSER_PYSCRIPT +import os, webbrowser, sys + +try: + from urllib import pathname2url +except: + from urllib.request import pathname2url + +webbrowser.open("file://" + pathname2url(os.path.abspath(sys.argv[1]))) +endef +export BROWSER_PYSCRIPT + +define PRINT_HELP_PYSCRIPT +import re, sys + +for line in sys.stdin: + match = re.match(r'^([a-zA-Z_-]+):.*?## (.*)$$', line) + if match: + target, help = match.groups() + print("%-20s %s" % (target, help)) +endef +export PRINT_HELP_PYSCRIPT + +BROWSER := python -c "$$BROWSER_PYSCRIPT" + +.PHONY: help +help: + @python -c "$$PRINT_HELP_PYSCRIPT" < $(MAKEFILE_LIST) + +# CLEAN TARGETS + +.PHONY: clean-build +clean-build: ## remove build artifacts + rm -fr build/ + rm -fr dist/ + rm -fr .eggs/ + find . -name '*.egg-info' -exec rm -fr {} + + find . -name '*.egg' -exec rm -f {} + + +.PHONY: clean-pyc +clean-pyc: ## remove Python file artifacts + find . -name '*.pyc' -exec rm -f {} + + find . -name '*.pyo' -exec rm -f {} + + find . -name '*~' -exec rm -f {} + + find . -name '__pycache__' -exec rm -fr {} + + +.PHONY: clean-docs +clean-docs: ## remove previously built docs + rm -f docs/api/*.rst + rm -rf docs/tutorials + -$(MAKE) -C docs clean 2>/dev/null # this fails if sphinx is not yet installed + +.PHONY: clean-coverage +clean-coverage: ## remove coverage artifacts + rm -f .coverage + rm -f .coverage.* + rm -fr htmlcov/ + +.PHONY: clean-test +clean-test: ## remove test artifacts + rm -fr .tox/ + rm -fr .pytest_cache + +.PHONY: clean +clean: clean-build clean-pyc clean-test clean-coverage clean-docs ## remove all build, test, coverage, docs and Python artifacts + +# INSTALL TARGETS + +.PHONY: install +install: clean-build clean-pyc ## install the package to the active Python's site-packages + pip install . + +.PHONY: install-test +install-test: clean-build clean-pyc ## install the package and test dependencies + pip install .[test] + +.PHONY: install-develop +install-develop: clean-build clean-pyc ## install the package in editable mode and dependencies for development + pip install -e .[dev] + +# LINT TARGETS + +.PHONY: lint +lint: ## check style with flake8 and isort + flake8 zephyr_ml tests + isort -c --recursive zephyr_ml tests + +.PHONY: fix-lint +fix-lint: ## fix lint issues using autoflake, autopep8, and isort + find zephyr_ml tests -name '*.py' | xargs autoflake --in-place --ignore-init-module-imports --remove-all-unused-imports --remove-unused-variables + autopep8 --in-place --recursive --aggressive zephyr_ml tests + isort --apply --atomic --recursive zephyr_ml tests + +# TEST TARGETS + +.PHONY: test-unit +test-unit: ## run tests quickly with the default Python + python -m pytest --cov=zephyr_ml + +.PHONY: test-readme +test-readme: ## run the readme snippets + rm -rf tests/readme_test && mkdir -p tests/readme_test/notebooks + cp -r notebooks/data tests/readme_test/notebooks/ + cd tests/readme_test && rundoc run --single-session python3 -t python3 ../../README.md + rm -rf tests/readme_test + + +.PHONY: test-tutorials +test-tutorials: ## run the tutorial notebooks + jupyter nbconvert --execute --ExecutePreprocessor.timeout=3600 --to=html --stdout notebooks/*.ipynb > /dev/null + + +.PHONY: test +test: test-unit test-readme test-tutorials ## test everything that needs test dependencies + +.PHONY: check-dependencies +check-dependencies: ## test if there are any broken dependencies + pip check + +.PHONY: test-devel +test-devel: check-dependencies lint docs ## test everything that needs development dependencies + +.PHONY: test-all +test-all: + tox -r + +.PHONY: coverage +coverage: ## check code coverage quickly with the default Python + coverage run --source zephyr_ml -m pytest + coverage report -m + coverage html + $(BROWSER) htmlcov/index.html + +# DOCS TARGETS + +.PHONY: docs +docs: clean-docs ## generate Sphinx HTML documentation, including API docs + sphinx-apidoc --separate --no-toc -o docs/api/ zephyr_ml + $(MAKE) -C docs html + +.PHONY: view-docs +view-docs: docs ## view docs in browser + $(BROWSER) docs/_build/html/index.html + +.PHONY: serve-docs +serve-docs: view-docs ## compile the docs watching for changes + watchmedo shell-command -W -R -D -p '*.rst;*.md' -c '$(MAKE) -C docs html' docs + +# RELEASE TARGETS + +.PHONY: dist +dist: clean ## builds source and wheel package + python setup.py sdist + python setup.py bdist_wheel + ls -l dist + +.PHONY: publish-confirm +publish-confirm: + @echo "WARNING: This will irreversibly upload a new version to PyPI!" + @echo -n "Please type 'confirm' to proceed: " \ + && read answer \ + && [ "$${answer}" = "confirm" ] + +.PHONY: publish +publish: dist publish-confirm ## package and upload a release + twine upload --repository-url https://pypi.dailab.ml:8080 dist/* + +.PHONY: bumpversion-release +bumpversion-release: ## Merge main to stable and bumpversion release + git checkout stable || git checkout -b stable + git merge --no-ff main -m"make release-tag: Merge branch 'main' into stable" + bumpversion release + git push --tags origin stable + +.PHONY: bumpversion-patch +bumpversion-patch: ## Merge stable to main and bumpversion patch + git checkout main + git merge stable + bumpversion --no-tag patch + git push + +.PHONY: bumpversion-minor +bumpversion-minor: ## Bump the version the next minor skipping the release + bumpversion --no-tag minor + +.PHONY: bumpversion-major +bumpversion-major: ## Bump the version the next major skipping the release + bumpversion --no-tag major + +.PHONY: bumpversion-revert +bumpversion-revert: ## Undo a previous bumpversion-release + git checkout main + git branch -D stable + +.PHONY: bumpversion-candidate +bumpversion-candidate: ## Bump the version to the next candidate + bumpversion candidate --no-tag + +CLEAN_DIR := $(shell git status --short | grep -v ??) +CURRENT_BRANCH := $(shell git rev-parse --abbrev-ref HEAD 2>/dev/null) +CHANGELOG_LINES := $(shell git diff HEAD..origin/stable HISTORY.md 2>&1 | wc -l) + +.PHONY: check-main +check-main: ## Check if we are in main branch +ifneq ($(CURRENT_BRANCH),main) + $(error Please make the release from main branch\n) +endif + +.PHONY: check-history +check-history: ## Check if HISTORY.md has been modified +ifeq ($(CHANGELOG_LINES),0) + $(error Please insert the release notes in HISTORY.md before releasing) +endif + +.PHONY: check-release +check-release: check-main check-history ## Check if the release can be made + +.PHONY: release +release: check-release bumpversion-release docker-push publish bumpversion-patch + +.PHONY: release-candidate +release-candidate: check-main publish bumpversion-candidate + +.PHONY: release-minor +release-minor: check-release bumpversion-minor release + +.PHONY: release-major +release-major: check-release bumpversion-major release + +# DOCKER TARGETS + +.PHONY: docker-build +docker-build: + docker build -f docker/Dockerfile -t zephyr_ml . + +.PHONY: docker-push +docker-push: docker-build + @$(eval VERSION := $(shell python -c 'import zephyr_ml; print(zephyr_ml.__version__)')) + docker tag zephyr_ml docker.pkg.github.com/signals-dev/zephyr_ml/zephyr_ml:$(VERSION) + docker push docker.pkg.github.com/signals-dev/zephyr_ml/zephyr_ml:$(VERSION) + docker tag zephyr_ml docker.pkg.github.com/signals-dev/zephyr_ml/zephyr_ml + docker push docker.pkg.github.com/signals-dev/zephyr_ml/zephyr_ml diff --git a/README.md b/README.md new file mode 100644 index 0000000..437cded --- /dev/null +++ b/README.md @@ -0,0 +1,163 @@ +

+DAI-Lab +A project from Data to AI Lab at MIT. +

+ + + + + + + +# Zephyr Prediction Engineering + +Prediction engineering methods for Zephyr. + +- Homepage: https://github.com/signals-dev/zephyr + +# Overview + +The **Zephyr** library is a framework designed to assist in the +generation of machine learning problems for wind farms operations data by analyzing past +occurrences of events. + +The main features of **Zephyr** are: + +* **EntitySet creation**: tools designed to represent wind farm data and the relationship +between different tables. We have functions to create EntitySets for datasets with PI data +and datasets using SCADA data. +* **Labeling Functions**: a collection of functions, as well as tools to create custom versions +of them, ready to be used to analyze past operations data in the search for occurrences of +specific types of events in the past. +* **Prediction Engineering**: a flexible framework designed to apply labeling functions on +wind turbine operations data in a number of different ways to create labels for custom +Machine Learning problems. +* **Feature Engineering**: a guide to using Featuretools to apply automated feature engineerinig +to wind farm data. + +# Install + +## Requirements + +**Zephyr** has been developed and runs on Python 3.6 and 3.7. + +Also, although it is not strictly required, the usage of a [virtualenv]( +https://virtualenv.pypa.io/en/latest/) is highly recommended in order to avoid interfering +with other software installed in the system where you are trying to run **Zephyr**. + +## Download and Install + +**Zephyr** can be installed locally using [pip](https://pip.pypa.io/en/stable/) with +the following command: + +```bash +pip install zephyr-ml +``` + +If you want to install from source or contribute to the project please read the +[Contributing Guide](CONTRIBUTING.rst). + +# Quickstart + +In this short tutorial we will guide you through a series of steps that will help you +getting started with **Zephyr**. + +## 1. Loading the data + +The first step we will be to use preprocessed data to create an EntitySet. Depending on the +type of data, we will either the `zephyr_ml.create_pidata_entityset` or `zephyr_ml.create_scada_entityset` +functions. + +**NOTE**: if you cloned the **Zephyr** repository, you will find some demo data inside the +`notebooks/data` folder which has been preprocessed to fit the `create_entityset` data +requirements. + +```python3 +import os +import pandas as pd +from zephyr_ml import create_scada_entityset + +data_path = 'notebooks/data' + +data = { + 'turbines': pd.read_csv(os.path.join(data_path, 'turbines.csv')), + 'alarms': pd.read_csv(os.path.join(data_path, 'alarms.csv')), + 'work_orders': pd.read_csv(os.path.join(data_path, 'work_orders.csv')), + 'stoppages': pd.read_csv(os.path.join(data_path, 'stoppages.csv')), + 'notifications': pd.read_csv(os.path.join(data_path, 'notifications.csv')), + 'scada': pd.read_csv(os.path.join(data_path, 'scada.csv')) +} + +scada_es = create_scada_entityset(data) +``` + +This will load the turbine, alarms, stoppages, work order, notifications, and SCADA data, and return it +as an EntitySet. + +``` +Entityset: SCADA data + DataFrames: + turbines [Rows: 1, Columns: 10] + alarms [Rows: 2, Columns: 9] + work_orders [Rows: 2, Columns: 20] + stoppages [Rows: 2, Columns: 16] + notifications [Rows: 2, Columns: 15] + scada [Rows: 2, Columns: 5] + Relationships: + alarms.COD_ELEMENT -> turbines.COD_ELEMENT + stoppages.COD_ELEMENT -> turbines.COD_ELEMENT + work_orders.COD_ELEMENT -> turbines.COD_ELEMENT + scada.COD_ELEMENT -> turbines.COD_ELEMENT + notifications.COD_ORDER -> work_orders.COD_ORDER +``` + +## 2. Selecting a Labeling Function + +The second step will be to choose an adequate **Labeling Function**. + +We can see the list of available labeling functions using the `zephyr_ml.labeling.get_labeling_functions` +function. + +```python3 +from zephyr_ml import labeling + +labeling.get_labeling_functions() +``` + +This will return us a dictionary with the name and a short description of each available +function. + +``` +{'brake_pad_presence': 'Calculates the total power loss over the data slice.', + 'converter_replacement_presence': 'Calculates the converter replacement presence.', + 'total_power_loss': 'Calculates the total power loss over the data slice.'} + ``` + +In this case, we will choose the `total_power_loss` function, which calculates the total +amount of power lost over a slice of time. + +## 3. Generate Target Times + +Once we have loaded the data and the Labeling Function, we are ready to start using +the `zephyr_ml.generate_labels` function to generate a Target Times table. + +```python3 +from zephyr_ml import DataLabeler + +data_labeler = DataLabeler(labeling.labeling_functions.total_power_loss) +target_times, metadata = data_labeler.generate_label_times(scada_es) +``` + +This will return us a `compose.LabelTimes` containing the three columns required to start +working on a Machine Learning problem: the turbine ID (COD_ELEMENT), the cutoff time (time) and the label. + +``` + COD_ELEMENT time label +0 0 2022-01-01 45801.0 +``` + +# What's Next? + +If you want to continue learning about **Zephyr** and all its +features please have a look at the tutorials found inside the [notebooks folder]( +https://github.com/signals-dev/zephyr/tree/main/notebooks). diff --git a/docs/Makefile b/docs/Makefile new file mode 100644 index 0000000..2ba6011 --- /dev/null +++ b/docs/Makefile @@ -0,0 +1,20 @@ +# Minimal makefile for Sphinx documentation +# + +# You can set these variables from the command line. +SPHINXOPTS = +SPHINXBUILD = python -msphinx +SPHINXPROJ = zephyr +SOURCEDIR = . +BUILDDIR = _build + +# Put it first so that "make" without argument is like "make help". +help: + @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) + +.PHONY: help Makefile + +# Catch-all target: route all unknown targets to Sphinx using the new +# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). +%: Makefile + @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) diff --git a/docs/authors.rst b/docs/authors.rst new file mode 100644 index 0000000..e122f91 --- /dev/null +++ b/docs/authors.rst @@ -0,0 +1 @@ +.. include:: ../AUTHORS.rst diff --git a/docs/conf.py b/docs/conf.py new file mode 100644 index 0000000..cc6fa4b --- /dev/null +++ b/docs/conf.py @@ -0,0 +1,200 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# +# Zephyr documentation build configuration file, created by +# sphinx-quickstart on Fri Jun 9 13:47:02 2017. +# +# This file is execfile()d with the current directory set to its +# containing dir. +# +# Note that not all possible configuration values are present in this +# autogenerated file. +# +# All configuration values have a default; values that are commented out +# serve to show the default. + +# If extensions (or modules to document with autodoc) are in another +# directory, add these directories to sys.path here. If the directory is +# relative to the documentation root, use os.path.abspath to make it +# absolute, like shown here. + +import sphinx_rtd_theme # For read the docs theme + +import zephyr_ml + +# -- General configuration --------------------------------------------- + +# If your documentation needs a minimal Sphinx version, state it here. +# +# needs_sphinx = '1.0' + +# Add any Sphinx extension module names here, as strings. They can be +# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom ones. +extensions = [ + 'm2r', + 'nbsphinx', + 'sphinx.ext.autodoc', + 'sphinx.ext.githubpages', + 'sphinx.ext.viewcode', + 'sphinx.ext.napoleon', + 'autodocsumm', +] + +autodoc_default_options = { + 'autosummary': True, +} + +# Add any paths that contain templates here, relative to this directory. +templates_path = ['_templates'] + +# The suffix(es) of source filenames. +# You can specify multiple suffix as a list of string: +source_suffix = ['.rst', '.md'] + +# The master toctree document. +master_doc = 'index' + +# Jupyter Notebooks +nbsphinx_execute = 'never' + +# General information about the project. +project = 'Zephyr' +slug = 'zephyr-ml' +title = project + ' Documentation', +copyright = '2022, MIT Data To AI Lab' +author = 'MIT Data To AI Lab' +description = 'Prediction engineering methods for wind turbine maintenance.' +user = 'D3-AI' + +# The version info for the project you're documenting, acts as replacement +# for |version| and |release|, also used in various other places throughout +# the built documents. +# +# The short X.Y version. +version = zephyr_ml.__version__ +# The full version, including alpha/beta/rc tags. +release = zephyr_ml.__version__ + +# The language for content autogenerated by Sphinx. Refer to documentation +# for a list of supported languages. +# +# This is also used if you do content translation via gettext catalogs. +# Usually you set "language" from the command line for these cases. +language = None + +# List of patterns, relative to source directory, that match files and +# directories to ignore when looking for source files. +# This patterns also effect to html_static_path and html_extra_path +exclude_patterns = ['.py', '_build', 'Thumbs.db', '.DS_Store', '**.ipynb_checkpoints'] + +# The name of the Pygments (syntax highlighting) style to use. +pygments_style = 'sphinx' + +# If true, `todo` and `todoList` produce output, else they produce nothing. +todo_include_todos = False + +# -- Options for HTML output ------------------------------------------- + +# The theme to use for HTML and HTML Help pages. See the documentation for +# a list of builtin themes. +# +html_theme = 'sphinx_rtd_theme' +html_theme_path = [sphinx_rtd_theme.get_html_theme_path()] + +# Readthedocs additions +html_context = { + 'display_github': True, + 'github_user': user, + 'github_repo': project, + 'github_version': 'main', + 'conf_py_path': '/docs/', +} + +# Theme options are theme-specific and customize the look and feel of a +# theme further. For a list of options available for each theme, see the +# documentation. +html_theme_options = { + 'collapse_navigation': False, + 'display_version': False, +} + +# Add any paths that contain custom static files (such as style sheets) here, +# relative to this directory. They are copied after the builtin static files, +# so a file named "default.css" will overwrite the builtin "default.css". +# html_static_path = ['_static'] + +# The name of an image file (relative to this directory) to use as a favicon of +# the docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32 +# pixels large. +html_favicon = 'images/dai-logo-white.ico' + +# If given, this must be the name of an image file (path relative to the +# configuration directory) that is the logo of the docs. It is placed at +# the top of the sidebar; its width should therefore not exceed 200 pixels. +html_logo = 'images/dai-logo-white-200.png' + +# -- Options for HTMLHelp output --------------------------------------- + +# Output file base name for HTML help builder. +htmlhelp_basename = slug + 'doc' + + +# -- Options for LaTeX output ------------------------------------------ + +latex_elements = { + # The paper size ('letterpaper' or 'a4paper'). + # + # 'papersize': 'letterpaper', + + # The font size ('10pt', '11pt' or '12pt'). + # + # 'pointsize': '10pt', + + # Additional stuff for the LaTeX preamble. + # + # 'preamble': '', + + # Latex figure (float) alignment + # + # 'figure_align': 'htbp', +} + +# Grouping the document tree into LaTeX files. List of tuples +# (source start file, target name, title, author, documentclass +# [howto, manual, or own class]). +latex_documents = [( + master_doc, + slug + '.tex', + title, + author, + 'manual' +)] + + +# -- Options for manual page output ------------------------------------ + +# One entry per manual page. List of tuples +# (source start file, name, description, authors, manual section). +man_pages = [( + master_doc, + slug, + title, + [author], + 1 +)] + + +# -- Options for Texinfo output ---------------------------------------- + +# Grouping the document tree into Texinfo files. List of tuples +# (source start file, target name, title, author, +# dir menu entry, description, category) +texinfo_documents = [( + master_doc, + slug, + title, + author, + slug, + description, + 'Miscellaneous' +)] diff --git a/docs/contributing.rst b/docs/contributing.rst new file mode 100644 index 0000000..e582053 --- /dev/null +++ b/docs/contributing.rst @@ -0,0 +1 @@ +.. include:: ../CONTRIBUTING.rst diff --git a/docs/history.rst b/docs/history.rst new file mode 100644 index 0000000..d26e5be --- /dev/null +++ b/docs/history.rst @@ -0,0 +1 @@ +.. mdinclude:: ../HISTORY.md diff --git a/docs/images/dai-logo-white-200.png b/docs/images/dai-logo-white-200.png new file mode 100644 index 0000000000000000000000000000000000000000..58bb9fdec120fdc5baa9a4a66a7c4fd7862e03ea GIT binary patch literal 33432 zcmXtA1yogAv^|&Z?h-*lM7kv<1?hTpcY{d71*9aEZWNG~77>sVLAtxUyYp}UAD@RZ z1`pw$bJkvKt{FtAD9K`DP+&k1g#AWNN)3GMdi;Zi0{$-h?|T;bfMTj3D+N6~{+H8K z5Dz|s?kuO{20@sFkAGlLS_T>TB5B?8# z9*}r}Qs#YXR5N_t(3@2150(=7g#U8=lw#JJm4#tmM6>%2iEfRxYWYhhrZ6#=WvTSx zVYN!#GLlNx7u(adCE9P^MC6o~_G*?-plWMq97dqAv9T=*D3K5lsHG$&CE0PM8k?C( zMnpthr=+A*EVYMaLvZOx5|g#=#D~(;2e@~pT(9=Frax;t?Wm}8|GJXVcfTs-@5K;I zO>Ad@|3x+4*6pwubj8hM(uQ*l@@cZz!iZVCtO@y0Re&CgTXKD#Veh$52D5`&8L=e4;|`8iQB+iHYL4iVW{i){`h!5Z1 zTtl*}t1B}%cl>&Prb2&z|DTm3GzgY6UZyva#%ottSQ!3Jh?+cF#ydkGhCX*lPAD^m z2m`rHr~0qASUU$JTG8Y~_CnXiua8xH9B`AKbSDX`j&+diPn z$;$_&r&Cf&n@bmv&#nd`&?uu81%xaEc!oB3ok6FSKjFM*#+s2;N9F9xzXz)?JyXW zZBb1LMKEi%#S{4sIj&hMQ(mk%=T7Ys3Gxi!hJr2<{Is&P)ILg_cC*@Qfad;( z!;JIwsm-5`4wK8nrOWM-^2Oahta-jlhT#afZ;#?ae26W*soZ1G=tQ~E!;D*)XLHu3JKfQ8 zVB-oW((Bk&lQp{;|IH?x7g(`Bxfh?guCDV3VBaalc!!vfdA6F0M))ti5J0Sy+ zwe-%FKnooWZSCB~Mk-JRg~i1|YDGxx9Ngnl0 zEf)&`u@goC)Tp>L=IMM6Y0d}pO@0q(ik8z!P3cpgt6w$Cm!F<}_AD*tE1WKhPV2u@ zX1GH$K3)0orD_`EDL(oyswX`Ebzz)RPW$OAqfRAz@NBL`u|qjn5K^fHJ^c{ofCLpa zwO(>k(tVy*34MQtU~mW~>FbiQS*eRY9r-9tn8;9yo% zR|iYc1^&kx3f?m_ssEUAO3{5H&H?utf+J&+v>0cs{&Eb)gN0;$t2IY+% zFnAo(DwLzIZG#F?xlFNBi60*yFCHAke*R1#M)#{!gGJz~yjo9(iTIqe z;-aIsUZ3qeoI|P<3At#-WE?$OR=yFv#jn($MNZ)v?{vbRaurIR72Bw35`?5!dKWQV zvx=tq{|tm@)LnsiAM+^Uj|Vd_SIElQc|a`zP@~L6Y-$qlat&R+TT@FRq_9s zbL|YYhvC|Qi}}NWRck#Iq^7Q3^z&y5_@d`vJc4_mMez%0-18V(P8OlQJ|ZPd=7I!h5u%fO{ftYj@|LLueeMO zLw&G1I5ny@O-xGq`t6&HuD*V$sLS%7)3`Xi$8(g-ZX~0tOCc;G(&|EmgaC6~_#MsX zFip$Bfv2RT1TI-xULFGr1yLrJCVFwv7zr8KWVP!P^NSblq1co;D$JlbRn^slK_!}Q z4d-JEZv`+YrtzSI1`kf53QHmxuPsrH?p|R)j;ht+BD(IF3)aZCo{@|-R+Nwo%|20eX~OtpNzv1%Hgt|g^{&WOGcL?;rQOe)U>ovXktlO zl6y*~YwfIFz2E(>H%o-_>gww7{5({Ou63@#<5Nlsnv;_gt6?JLfiZ5@- zT3qVC3S-!)ZLP!Gx4naeOiH(Vur`-jX=u~)op|5;Ej_uex&t@tMayqTv3}pih0@bH zm#-WB>sfsJ{%mv`KMSHe;m>&kPZ;RP1nKh-UhRcWj{V*6{|X9SLh3suzvHN0BewXZQdn|HycQ@RG?E3a(z~>%YpZ zijvRMS*pB0Nu2fHT;_jfy#HP_;mDOLM~kOoYs&)4lZBnV0DO6TX69T+z$Gfj!!cLE zJX7e#v#eC%fUYfcqjOQt44DVMCJ!?|%u(qF*G8Iu<<)oHX z-;}crk>Fq|$~ZbYjy4AbfLf)8c;0QQOgDOQ_hpM^-`?J$LD*CRapwmM$`sM-?8vXb zB;~?ariR53C>x%&W$yj&he9ZJ~UyR41AZrHc%Bq%J(>!=W&`B{@k1jZnY&gIJ+Y|<;_Zv95c~ysZKSR4clS}5@lOk z6Ega0gy+oG-D4DXGZOV#6?!JfK_O98_zlVum(mhL1tsV{#5{YticLqu!^2%uQ>oqE z-Exfas-UP~IRKmGb6fUaq9O-~gTXKt1CHRGQndHs!8|vcyJhoLa&pLC&#Pn8w+Q%^ zQ&UqKs;UyLt*u0qlnO^jM>Wn%uZ6shpTTl+a%j0c?(D3^)6y6X$5cYA?j6hd@lCWD zOv;0TJ|~X9FKE})az?ulL+HAv69J!`Z_dqUud+jLARgVam&C_{=IY(T%(=)PFLZaR z`VB2!!YyK;nC@n|j}8K^Pd3@S_|Z(V2NA@vviw?^&h+TK`e-(w{z4Q;B#JT^uZQf{JlDm>1Fi^=?Z!+Q5RgF#2rcT%!m-%-q}eK_wQ^RkF!$ zVSNkA68vh^DY9KYi2vGN2M)=$=05)~MF$f~nUq3%Qdw_cV_*2Ep`k&KI#wnZ9}yMj zL#wef6W#g&2HYz-83t+1>)VY?y4R|n;HxJaeA%D~y?=JuY34m0G-yX$w<_5m)f|Cq zBo0qVO2SZN9C-n7W~Pw)YtXPrSpS2Rm6e$j1ZWd97wcvqQ3C|_eJS(>j&)=q7=-UTH7{pBO_tDnI{#fw=Hg|$R} z{MdH9*2C$sufKWbU1L8*U}0fFa>K{xG#_<&d3l{7D;q~u+SaI7`|}&mSVNMoN!u%O zT{mx1#KWDeYv)S*4qnFa|4@wKFq<^*i#0pJs1s5I#;Twgh-!ybIyDADaW2F>Dq8DG z6aCB8@{bF#1qviEG($n#ovk0CU>xAU`t~+8sIjq;+-;-Z;=vKpp*(pC4p!#7+U37D zSY#SoB|FH3{tt^Ac#t!Rpyqg=_EfytYSD3ZYUlCAF)uP(G|Tlxfd4%N`f=#;vZ-v$ zie+d>hy>jyCo#uygWx_`M++Swt`42R)X zUTO&TI|6@`5V%X`N4rsI7wcT)!iu0CM6XM1;Kai@$8ZlQXZ#wnsb#8o_9brsOXT@bUD7AVOkd z-irf*WMZFZGz#V4&plN#{gMWb&-V278n}(+97buIc0Ed{##-q~05C1BS(!P8#M|Qa z%b3z8k{-IuJoUx0X+Eth5*3bWkBQD(?dN;Ipt%^P1PXy*|M=$aHVIR#Hg`2du57UW-`mIX`}kvt-SxyInTThFiWD#yaC; zmsv7X>zi_|F0AGoMtSev&=fL4w1YNpO5*#sR|ieJj}Rj&992r-gp&E9961t-p)K+I zkfE$Qxi}{NG$@aCJv7|;cjRf&0Tmt|+-~A~V3E43y}kWlkvfyG@8zN7ts-;Tf~xuR zgWs&ga=1!1e1G@8e9QTz^sytNtr}YZ9>G0_2$5FC^DZ9F+{_3kT-)m5zv!OJ^!S-% zY{-0$U?U=UQ~l$>Y%(=MY)haiQYVf!!M5j@*a-Ce#f#wAUk>FJ6lQ+;-+50?Pq+2; zVT0vJ8nk=>U?B(%6BE>d794=sk|4Vt)ajo`E-#cm>PLIw&s4;k51kM*h9hC=3LWNr z$Hc7Mq(vZ6(SuZ(-t(TnW_mUKB^R?f>-gZAam9~BB(}Ee(1LP3`M9|rn(QAHfrq%3 zodC{m^k-Hx>ZiVCWHtCjG&VLC1h|3?gNFBaFU6}$X6j^P&SpjBzG!OfC<@u#G;L>~ zR9msf8S=94y1v*si}&35FH}0u?Q$n@n9;Ze4+{5M_QqJ-Atgm7wq7+uE`<|Ty+%ev zVdSdy{03WIUq{Iu^5o&<#E0TOe-6r5NdEfsCt0p+`iU7G9o^#Enguc{g!hSuopjL=5)dGOCAIRu*;y{807?a|KN_Q{_2hIp5t$x{ z>SH~Qsy^f#afc42(vbJfyOK0Ap{l732Qh)|7e~viRDUFx%a}~p3XjhZb!bAbE>7Yx z?A_4df!y9=)sW46!-qaMw9EG)H!V5aanQ21Xip;ogk zZcZ-xM=Bi)XpW|KhVO1h9>=Jv}!B8#>>!ZiUD~s!~TR2kwv)ab4S-V^M{-CV} zrh$sOx;i_4Ksn_dE3pw ziDHgHe8IPImUo&VSY(svyZTxGB2u_4~= zdGAOlEn%gamE#N_F7B8;la$}b1MZ@x=J#b0o`eb`gJ`qeDlcvp>Di)~sSzNxyp zPlLQ>vn?y)AD7Rt%W3q)Sz^w(7w{0-boBH^RaL`j_&WOfZ`Fzt+S=L}!RzOuclhEJ z|MTb1zNi{xiHEWQa%bDq^GW)n(TzWc58f}~58SA#%tY2B-}<r;v~S8GQH za$;iQ%ENoAXXj>6%dht@*qp)*5>FPcsVIL-$jbPvSaRLqzPcx{`>Xr zIoRJT&HoX2`uMB>;I=tY{v?KqGmM#(ghba)9Z&>lNC;rA^O3z0Z~IzNj`)@Ofl6E{ zk)rbWJC2*9?M`h#VhL(C>Q6KsIE=^rc=&xPjWN8v@&^PXA7da$v z_yifs$jn??S_+)0a}pcY4(BlM!eb`-7;_nRZ${zw-g|QO#LcI_{xqhx&tUg63LrBA z2l8;}KFQ@li(6ahpX5FO*6MAxD7AMJ4JcwMX=!B3-jqNvj$hN_Ee?K>1pG2qJ3O40 z2*c%My-)0{MZ5UzWI~BkP1A-G3t7aW!V*1Z>NrxPdlL%!{OcMIPDEw(y2+Db#c;Qc z+lqQIGqg;$EwERW z!&RvVip%<6r*yH%FeFnzG{FWA4AvX=DB;CQ(r>p|q+NYph3kmZnv0vSLz><}Z_Qqd zJ$br&h5gcw#CGAg$Ri+9?R|ZJe_v5qDe?aO`_lzth;skd`0hc+OEOo}t$-s+_-1?A ztV&%lSnaRQs|D?K+HOIFcK^&%r31qDHv{CNW9g^;?fq|ywMG6)Fwgz7x`b3tdSQPC z29z%dQf@@(}RCp_%6Kuak5N+-A_-|U0 zI*8-)^d~)+;t(oJuhSN65hnY8Ojjzia87$C;x%G=!>H^llfJ(=?%8h4g;Kp-==_&`?K?+cg69g_ze9U7VVT=*xKQX_*+ zA$S7|_fKIdte7M+h~%)+#<%!U6##6X-JW4&7BLwaTBg#(o-|UYGVXg*+Y!!`I3XX~ za>59n+llAdY<7aphS@fx>$vQLMEgwzz4NcuZfF0I%CjLtB>MiiGc>zr-CAqk*Q&QNz}KMV~AL zJcxkjFkQ!gfCm86c!-gnUStgQD^qYDAE{r!iRS;TW*EH#Yi+c@%!xWnHJdoR70vg! z1uOJUI#%nwVJ*6*J8hR%`pdVRO2Pu@FLms&JBcgcrR9jf8hB5a+~1d(;bR4R;&OL= zGaNDP^P_Ho&CE=SD$agLPuFXTBZR;?lXQ1?AB;qcb9vg%2{=1#kKzNTQoh2t^5+?^ zR!=|dDU^#W)9G~kh;bRWaA3KPBEQ)a=|#mVv#lngJb3U7p?{P->X1uw(qLw1;ju!m z{E%g-R_I*kFZ@s2!XVnENHJ9S@^qD9V(rHVoALGRDpO7@v(=L;*W96SpkC0k4`7++ z4n4A8r3MWo=;-K^RTh{)wpiTTi~jnRPXDKIlJrYX1!eWoC)RHd>r7lqbEzS&9ehZD z?k5_#8wqXC;)T$NU4`IBim+9gj1rFEuB?Si;a`y{GVQ9ue^@`TB9gSEFuoQ*-}%FX zLpCNA?rgK7-ciL0hLtlQs;=QXv*e8N=l}sY-hYLy`i~)Y%4eFUVH?@MjTE!Rh4O3H z{O0W}*6I9|V;T!LuuO|1B?i+^wP0r_y-q>V8AFt`R9lsM+%a!iVdHZJ`r1bnfh}C6 znj3>|p(jWP_?%21{r!Hxj>!_C7J%hU%*+tAwcR}X0KIp3xFq?Efx)CF=_Q$fGwt$n zZ*6OTwwM#K1mHG5C5>0~+RD_TqY!e88=*155b<(~)cKFECs!PmeWWH&dUg26Ea9il zD~K{}%Wunc1UKMX53=f<4yI_3n=A%eSK974CWRM+S=P; z5CDbVOZ}GV?p|-IK5@6H?`E}dM9Fs%1{wcgRIKN4GEFojH|t3*+`as9fsXX*OPQw0 zZSCr;7Q!Oq`@2Mw6kUWBQ=Kq{a{pd+@=;R=ud?k}HY1yz!;OHF0zaWegcPc*u6}_4 z@9lkid3CjvpKOQ_+ZimMA7ofAYi-R82rAY0toi3IB#%8FrG zRk(t0b^b?_(g+P41QfQ{DF3K;TdLOe37`s6j-R9l@ZVVoQHR7|J>iI#4@;setmbi> z^q0w^sc<;7y0M{GpF6f}$@NI$+tvzgOT%Qo7q{3W^AyrieK_|L^SHV`{N0!vBb18+ zSs!2VUt2L({YHc{7lePK>h;Iqr0<5{%G|$A6>5pg(2+QmPb*VjZ6diRnyOfoTBiJB zK>*ly%&vsjHksQP(7~5i0eAfY6tqAH-GD8gZjBtCoJay9)7Q_h(te67yu*}Az!?Gz zdW9_AFWMl+xRU?PD9DEeFh+#{KaoL_a1cw`DY_%Vl z#DPj@&GIhsL|XXM?^=aXxbpOG?NaWoE6A9=rU3`iW^vS=NKYYN@V{Hudx@QakJUa3 ziAPH)BsqJ)dPrd{n+xA!OGY)NIkAG*hMfAm9HC*L+pR#(OKr%^L>MKWn*aS5{wP8L zc+wd~&iyDl_SE~2t6RcopFV{_&byP9s{G->|L+CxUO7J@g0MK&bg_SX7b1oq%2>B^ ztA~1G4))wNKjHBC-iCC4UtAz_qZ`D&=IVtA2N)}OMc{$-P2=@ZYmA8$(vye{i)ul` zv2+-sD80(=&b?W((akPlnN!{IA*YUfTtqKr#s~I#z0Q|pLG?UpvIAsmlC}1wI*k?> zu*LNA>=Y?s)t87UGF(b>x9;5u`tGf`g@Lc*Wk1P?h`6c!_BxSx87o1@n$# zI$y|;^&c>lTyD-Cu&4y^Y;0^ixAKz*b7gUzsltw3tF5O1uN^M+6=i9ej>LMSH@Gx3 zcv>z6pXEo=`V~FbT|~QbH80iIfx<(B{gGH}pS8~vj0nr@!gsS>$wvOY>MVh&pc(pOBxoSW53X3tA5UO!?G%+O~M}nG( z3;K$NUu+=2)1!OlaUFdzLQU}Kurtu0wc5mLAm=*?EF1UKy%e6elkI(oy|{}VjzmI` zpn>e?8_C6P4-o-lhXqYmTj7Fb8IOYEV?2}cSFouIc#rA}I!$~xFxDXfwg`vm8XFB& z7hPns@-FMV_SU$Rw$zz`6RcLx(Ws z3my;qbH41QEveE$GUJUp#{3J%1qF+V!&w+oizWmVblU%tIsy?<2f>rFpQ=JW*%+w% zs6sLz2-(m4z{YA9FQ3Q-WMbFAK$uLf4M#=7gJ)u%*tc`5F$kCJ!sm{*LxPmo^W@u) zEcKz+9;1wJ9}I%!m}taXd~~o0fAXzSv}|y0ZgGixMz7Z-x1pIJ6BafHe0ktAujLuBUnm1tJy-8B?ij>fu*F+`awnK_rk< z0z*6;3C^4xU8*M56Y{sIF4(zf+cV(n&-QY`pZ-Gl+kQPzu!XEfRiDjrd9-P2?<_RUHyhJeyAgo6deLSjCJJ= zuMAes|1eb}aW(L@C)h^P(3(?rj{oNKm6lWb{YpDjV8F*`Fc`-JN`sZeoiI_Dq$baC zRjAf1=SGNc`iy5*_gIPczmAUlRvR5u94cC<#%dsoksU$cy_hf(wCQT^>=l-KSN}fj zMh$>sN|8UOx#>INQe^J$uXo-w_oZ0a7`aquJqtzFp1)DZcG=Ho7{EbA$Hi4f>9xuL zZU~8I1OVQ{0b|kGiE7oKAp!LEO1lX*EE-`lg`|`x^UE%{n3%x@1x#Q|xGKnAY7ovm zd-?XXFlf1yQrGVAYt^TU_!11lZ(%9YaGOBOPp!+9bt>N4uk0OOO7Qbn*Y0{od~vz! zG`H3GQ(Tf5GE$)2jAuGt(E{_l*p}T~ndAMVxLU>0(qFFZTAqo{Jxs#0d84(;TH1e0 z00zi*WLYME>8lJU98&`-!3M)-P4)+u>-(=SV!wJaWu%W;KjKD@Jzx7c+rurFbTPY| z)@sB!v(A`1NA$|c3KPUfSsj=ibYE(avKvYPajz1%Bc45bW_-BV3QpkG))x4imzP)W z?c3XtF9%RXZVO!H6-i-9m`PyU6-MqD>GA6DoPjy?_JfJL^wJ0SFTe7YHQw1c-igXErlj)V?-*qeb@<%xE=p|qSF8t~41H)se2qAdi^&VB-H1K=~`ZfJd-4PqY&n|C*)sfzjK0YQbwa4H6uV0b^m3GsG-D*S3G8K0vCat~Mjqx1x7u#~ zJfYG4+v_Biq&&vgJZuWwm-)fe*WP@F7M$GN)k7>VlWf^YdgdZuzj@kz|Jmj5{rU7y zQ6^b^Bbuf{!<%!e>QTPk-nr78dym6S&Z(yi59xqzV(gXE$CEgv*@;c zF=d583+PllV;qBxlPhNzmsX&AKPAFA0$}auuV0VcDv&-dFIQqYfs@PJ++4hfM|KO$ zN0QfH@GwbOJ-4kg*tqFf>M7#OM{}k)8E9;K30*7M_O}z6x`W44&|*wc5Sx%7v|FYG zD>2Gys~xRc^oc}9*7_pkSY%PpH@EMO+GOEleU!~viHRYDgDQ5zL_JmG)$=g7^`*$y zzzlRhYL{){hy=YA5gP~q^+mnV^lKXBfM?zVINs>1#=OL|wh~lV2Kv%|3#}+r-_ZK- z%b+kAw-n{AA}B7&Vzp~2c$(5f)~CA7MTFnN`!dYB8^gAPG+Eo#QY1HpgRZi>b4;?H z!vzEdGZCROt;mcFDptK8SpWY0lT+q@V&Bby1(X;SUEPmWRa`){o@|UX;!U<1+j{4D zvsaf*ag77HG#8oBTgJ`)Bf-jUJ*9NNT0?@v> z^iT@cD$sypTRZUslrV;^c|cE=%@*}j0V@%ZYV2o!;+vS5gvao0D1?0YFsiUURd?{o zaHhfIU;~4eP*Qd|kvm@dt*BU?1tPNGvSx2{W2w5#Smvy$gUxowaZjRW-s?r@KpkB2 z?Ca~Wy2OhgZsz@5eVS{^=?k~Ki+;4vJ;|=K<$vjZE7@dNnz8oZace=Q6(w7|zStsM zwdkO>(%0vBW~BZz6ghG|#-wtsmUx3TyVdM|y3P9G$T~GOm6Ub8FY$%;V{HIlO-?}} zxVc#j^jv%@swkko=jc>hE*^P0ySoEh)(cvaNE5Eq!8#`+>ssC2xrRE`eHdMo9i3E0 zI)p$X_gx_=jheOX5W@dT?Pa|_s&cx@EgsbXP0KWfBl}Se&VE8=M-M(R$WNIFue&$; z^w0dYrY1293ybsVrYaH&ifL~OCmIIEYZ)07APMxj-rO9J0tn;$VeL1dvQJi)m%VEp z=e+%0U60)mV=xRDLbFm+o|in=O;B~;nC?!`zA-OxEt}}f`^_k*?Y+es*aDZRm0+Mr z;aWHNU)(UVP)=I>PQKM|tXnPiI42z*Dw+X zmL*a|i>hSoRSX}&A(^pX9<~6Fu;NJ%GbPdtpRXc2XRw}&8qt}%xX;EzdivKNKS&}; zUIuBfNCQRgX>yOt_ULn9@mT_66zs*pa;Ip{s;a8VT-@9QqL$(X6%A|6-}N5=rue1YkKbm$hzpRK0nWv&&7KZ zY*x^q%7cGqE#pfkuhZp5Gm(JM1gDt<7$U-_@w|M{s-VT?QTj2ode);ulnx#&!Ft7g=DchQfe<%qfg03L1 z{-DV06G>bj@PxR^>O@^Pjp|jEq z-0Djq$;!%>AZjqiBZC_>FxMRL0E{c;ssLFgfhXuQz1$J0I1p{^?%(gL6Ya~Ky(ZCx zp@BsfPHT$}l{kLygsYp)5nmUJLqCN!CSC@Opb%GzC+ZLJt&Nv^5kyE$l{m3SSN#x5 zfF_9}7tnXcZfPOs#R`^{$dta!amwlXA3qj`+|AU0D@z9SWBUcKTT-%jC>6#oH>Q@# z)y({R$GsJA#+%~A=`c|%JrAvIYn9Q+774hlfW> z&zrP`e?Th<3=F)y-YG9j^#m>mTDmAo{_pdb`*dzaS>sk3|LJWMcruk9z3Gao4okV1 z*tF(%oNYFI`&>ip9lE+DosQfyTJSxVYXnR`gyYCd)pr zyAibPLzbSl-&&tcdN{B|Bd^4Htp{^rz>k^E0vE1?RDp$wQpj($B3VZ8qK45RR8TN- z5+8<`{gUA>nt#3p0G55ga)UC%73Fwk3#?bbrt~o~@@b+xcrR;s)#(B#a}J8=acjds zaX9FRk7iCSSvu$hU0J3LJ74gCiVE+6$ZaR!H3I{~CGZLj)w@{%f2T2kcVuGz!v1$> z+P;UasDSzliHyWN_4X4f0FJE>a?f4^PZvq~caSEzL|sgLRDi)pe$L2*P_AfUYoa z5cHu^@+rZ`oZ`a2g+sYYg$czpa{*MR&!L|mGY%L9&c?3ggdqenQN$a0N-+Nr)7$-m{X0wt~8$;r>_kb5Qc5xE=|N zfCTq?Qu3Bmxc2pz7N7-kdL5f1At4zrwgeWIm31hK-F*PoZ?pZ`dZ6wGYn6b7)wkEB zqwVis6#0AtU`T2QQVKpHVKBEsC;)Z`AQ!h>+%bSXeT8K4gh=a88A3A8>RnA?c(N1VBmtX_M&;7UJ+b23kUi z=|4wbVSnd6p3Ev16%|LIK%zYhsL;mSWa=muL>gaj!UZgy1|ah&{a5FhX$GX|U+&x! z4TJw|RkC!E6jFT6YJT+%f941qOnt2t?rI?Y=Yv~yc2idfXgEJC$U>n(u&fk){TlQ+ zJ{}!1`0bkt2&__0`6S)n;B6Gh>MYNnKTUlr16Fn((?4hk;&qO5*9vwgS{fRU|J3Ga zxnp%}>kU|cc&rC$g+11sIXHCK5>#eA^GsC#+}@Y)Q2SRcq% z1`@-g>4_^f3N-p;h&EAv4JzEfa8tA!Pr==%7V)mJb9O#VhM3Qtg=+<;Bj*oAPL#V;?_*Zpk zvqy>dR5z;ILxZ4#iQS+8gdpiEY+b7vX3? zKlsLLHx3V9+;{ZXs?~MSG=?pXzjW(c0%mvM@6d@gqo;dpI{=M7{DbjGA%(ntl#y1- z>gI!TbLl~o;q<@t1aGwT^pA7274Q)eg@w$_iQV4%K_+B}p2CG=wzEw>q}7&vLCMLa zp}0?Qz-b;G)d1Eibl~C+Rbj57QDGs%sB_<)bonZue~bhV4-Xe@@;>wa4m(~?clrwa z(9{4DYb9l7iUi|V8DCGlGrwDkGUY%#&rKIW)NF=R<2a@)K8^W7;#pb7y{Sh*f8Q66>~ z%2O}FSbVlvKh1A5{HJeXV&av6z~llj5HHNli4^8Lndf9?8o*;?mEp~G%@sBy-+uf1 z`|ks#x_|DMd$k|fxT;-DDFN0M<@x#fcRS-R5yXMr z`YV{PpZ}R_=<3RYb5%2c!NWxn4GfoZUsP0dbjU5t%?CHK10LP8mJZPt&xM6EKwd!B zwLSwBle*xCH70iU*!_iO<%7l(96(Ny<*{$_M(0r~#Q<~Zm)8j+@s9j+U?#Nv-OP=T?{_ih@#h+?=5j(uh z0G{whLx*{=Qvhc8DKPC04Glf6#5@$(Ko>#>h5?X@p%)M!1Nh-@+*8R?rFww$=>!GI z0maZ!@3sMGZ0y`2>oqOl=WQR(S2)_MTL|i0(FsPSjI60mu>iiRM;rGe&?ziL1Khr3 zD3$~mANmFcWyFLpTf!P=D`j>c*y+b&I*Vaxv(3;H|C4u3xRc^8gk_oRu(tjA%6OM6NA?~j0P z)Y%vXccGuC(6LP*<1n>jkfYIH4d9=a_v}fP5G41e( zh*p4&mo_%SD$P3E=H{s7^M}0|>%nTjyu6G@N*V?n41lv~86O|FMXE^oOo$zLh`yyiyl01xAk5L06GA2MffCVj{Fy3CFjdSlf1kZ>?sVe(w}?KuvA43sDPg8q%I-Ry2>;&L%oTZl?iFf6o_l< zBSNbb8+{)d@sSnM(tlK;kQ4SlhpeMza$i6u`uZ7D1cf8*UF5Cgii4C`kM0BDk1ZX4 zn`Ch&1B&YT+q+ed?X4}Hj+LXT-MzhXQ2QCLypC64p(YgObNK}YdYDlUI@bz~AGeEqN&mM1vfC~axRsZC@iRS=$m1nls zZS(!Gmtj1uhxqL<_|nNG9)GQ*cZVjA8(QlZUY{>Gae_?`_cuzwz6{bTlTAMS^8pWj zfCxg-$phKBXLxAGZyDoBV>?%*Wn|a@bufhq2Zd|LL($>gWLP}0vzM8XK?iEu*~_aw zz}@|f5(b00fpw*yiGjiIr}fZR*B0L#`?qhg0LBTfsHm{Aas@qx?{N>~z)}aapCCwY zJW|*A*#mXDAA#Fn5ybPfcE-Sv0e<2*5OKP?yo}RlD|o5*iR~W_N6$7RSDS|IXCd)U*K1R+Qu55Hi!*^58jwI2iUc4eqHg1#skd(K9n! zD*@`)O4yCC5y($ZAb^6r+(7`M#-fJ=)H)sXIK2SAnfJeO0MW-6zzd^8d;$(HJ|wTC z#F(%42Gp#Efq`Ne4)xbsy9qU5r8H>t`~)o5vS6+=fMt&YBqVBW$38wIh&aPMytpU; z`{oE}W%A$^j-QKNySTgOZ*E#|&o{kdc=l`%RH)6>@!I1H5DK^#);2WE0%-)Q+@VQY zg72VBBSLYpu>%qi2wsf3VaCvicta4lUQE~^h;`Zg`^-Q@PJZQ-IvgJ#Z*zURwYs?} z2Z5}?IxGwoJ>>Og`FvauIb5It*@K>e(Psdl z&ANJcEIcQO_}F}VtUyNxRE#elJ|G<*9s(t_3bQ&W=& zpx5xB$MSo;mH%rmIz7z;wu#SbMKx)T;NEP3hIR@dsG7ZfDKQo>r2(`7Jn>n-uhuES z#_*-Mm<5D35#yaFdA`uFwri$~z8qw}V=wiX0#3jqr8V-2hvn3iG!JNfa&#Y2^5 ziPkuw32YFY#>Y~z9LQ1x5Gl92TYeiz&#)YDg5yCS1dR+du58*^@OLcfSA@f3C-JPE1I6?450Gue(8(43m^?5S(*eWf~GRBp&-o4l`Zd zX(`BhtT>T_g~b-c`1n9#HFqxlfEUR7I}Z-__QkM4;MPnRbE?!-rZ+NLP$+zr`189V z8|c#Is?H$($iIy{Y+I{kz`)EL4jRhexw)pxqY#6F@}cdqk~egp;?mP+bf6RwpC4v` z6j{H#xiNAF)dDtZQ1HbUesA*Cyg?ZE`KwnAS<0*o3?V>ixVpKC2h{=|MV9Y3COuj_ zvXzw;Sttj#9Kv|EL8;OBRd8|T^ z-=KpM;^RleZ4Q7?)Z~m`udk$|qw^@Q3DcAZhX1W%1wWBeuh#Z#e!VaK4%FOt@Uqsx z3ZBmR>=_6A>fO9d<0)(QKsikO>C>kpMnpp_e0<~cT>RjG8+xq~Bd92I)_s8AeyXm` z%ljkL45mVjB_fQ(6Gu9}gY? zCb~@3H8lw+xHLdG1nq=~hzRhI(yL2Lk3bEe2Eayv@&j2k5d?8CWwE!FL1Czr8#Wb! z^e{+)0&8J^F*8CAI4EksXu7t#ItX%YTOje4kdZOr)qME>UI2aN=B1OX+WPwX7^rkD zf&vy6*SRtVfkaWx9#r9bbxevt3vAcUnAq5g)clTAsc#?3n|5NHiVYVA{wc-3D~R7G zVtyz7<;P$LO%Q^b29lK2r>D-EY+r-Ib8_go){5aRi3}`3y>z*yT;}CC^4jnD4Q%4q z@9u`O-1L|W#g^u^j(tV1-er7R&A1RexR^UziF`~EDUtPAo`_5|)hBC}9Gsqu% z*aK!gnu7NuMUvX-ywAZ9Favk(RfFf^DA-4^306CB9;ZsruQ$XV{+<1&S7VJ2Qt#WN zMT9$*jR~JW4_8s1f%1Zfe*llus0O40*l#+(5L1li?E>W3&-^ja{-1Mj#DTax*aimy zOZWy7c%3Qi^%)ujcQmc(`GW5WwzRBlycFG2h$rCQce=rY-NMq+b>F;JSF!p2`o-Oj z(Zg3jEh2~qqvA3`T0ja0USE6r-9MJXZYjKWNph?t$uQb{wNz!P-W-YGdy-Ypr4Uje zzc~$nF7o3?5}1neP+>*IA+Ald%og9Qnl}JkL=+U=`S;M`ZUT6W@!(#FT+F)Z+x~le ziNGXamxKpk1CP(SeZPBlLV^^S2@G^!|F0s=YrAgBw%ZEw^1`8(si`XxkRqi!wqqSN z`x~`X<;M=hX)uiQCU)MiB&q@K{c%eM!|hGM$k1%P8~7xj&R1T40KB3a@BzMd;JaYq z=TDxo0=&P5n%bi?b9H&yl?(z<(G)B*3y+8YU~BQPmwVuJ>B!R@^xA4*twg`KR4LNH z1VQwzcR~o_U|oMk_tmNS?i@GX4EVl~-~c@qHa2F|=)SDA83_f3ThKw$0gZ8gxx%P0 z2Y#JGyu7^p{?iVinrs2~GY8nI5yU|r%>Hxw1^68T1a>tuQ&VFf!on`Bohj>nM?N7V z6F`H`wnnTfv`R!ey(vMIx#RC&H4vG$u(Engxd{dVi}NrjA~}laP>|$|CgThOMdY#I z?gVxmbg1ciyF@Vw0m$87PEX%~rpV(sE3kZW1=@3XH~_@bNbvqFkrLoOmaRtg`ww(N z_8-;N)!;kJ-h0sf1?}=^+OAyi@X8mYayweaHQ%V=m2 zY_bvI0##BnGOn*hTvz4#sf7V21k#-=KOT`T7;KyM8(Aw|C#&(w!yXHMoqN@88%o~m zsa8Mtu-YFR^0<~CBAfnC2y_!e2$Yx}Ogwi;qgwg%-OU9*{yWgKz*xysd!GO8n=8r% zsgbYCLK=2(t2lsYU)c475&xGpjR@}=8%qS;TUzYyl(5!m;rEpN`t7an z@XpyYA`BZ)UQ$vuzE`eFEfL{hi4qhNQUkv_;ldf*+;^*8_-R7!EPw|@5C>H?3bdu^ zpDy%|B+et&55^VE0qQG)xUZ;K&&-UEpOn-e^gwZyu`<0n3Fs$@dKt;10EG~kvzcm@ zwY32LISjuZyo4+Q0)of$lofdomTMJr^Jk=T=E>&RMKpOMTg;%AK@4?jPa43#63XTR zncVtM$;qKUr=T)FkJ;uwl?=tB3}K>s>=(&p&VV)kOVQHH5j8Y1 zQ8zd@mkG>i=;ANouOJMlaBgl8L_P7UBoW&mCWA*@N?4hlxSq~FeQzID3 zS0HT#7m7j}Vr`?Z}IH zANE_egPgl3SEW*a9%Y_@=5!9zKjDl4VWFY@@87)}1xybCM#wBM0=V5fL92_4;$UAp zjSaXB28^W)V3E7~y1Uc7&v$jM{->rdf#$MX-+l}kiZWHAkTN7mLIWX$N>NEN(||H$ zDiWEa2q_f_k$7c_LS)EHkz^)>P-ck;-?cmc?_Fn|b=K*P-|u<$zW04!)9%{62iV4w ztsKmgE(Q8pTJGV8d3gQTP{US}RTLb{uX1vBx_2@^(-B(}7x#CcfPkmL zc-G~=XtyOS#>Os;g7zir6#hbHdhZGe$FW&7dN1d*VutZjSBHdz@UL69?ss8P5%d>Y z^+%z;O18z?oeI}ds9x^q<^uWf7l+bYPWDD%Q)kiJp3~3>=tDIg&I1|{3kwb(khXW1 zE?<_5{CLqMrRm3%dh@bs*{Nh5zLkE0fbB;WfqzFTxbGJd5orX-&O{mIUh%@7m>Fpc zNA(L)tm_vQ6%F0ZqH_1{T>_oY2yv6AK0uF7b&=0u$Z0>p%P}x$Bg-LYe6%BH7*I^F z;V8DjmIw(WF{pRjP@9$I`9iq;GpAe%F8P~@xyT#KgjDWc<)=LL`*$L~_W1=+(k#r( zu8LUJCz5p-=H}zsQ>U;ML{F5%BF$h52 z#5WDg&tEtbvV9+Ll~sz0MUN@c<4Z|N?H!HN5o4xym=ykf+7o;HMSOd^{y*ea(BDiW zS?4pB<~CHrTCbnL{j0jhU57qx&Oc+~e){pFM@b_cIq43c6AmATqA7QrZ5~)$slPhC z;J2e&IfyI!VKc2~A;uLkQBfEV6fvyK_1hP!;p^|GegTRh=?a6kE0hrCP+(#8ym2@n z0Kjm@CiPqny5HBHpL8w5EVV4r5-@x1-5-c9;g@@)DGbW;ii$ff2ca;weyEA$1!y+cC*9*gsFAgxh0I=OG7eQ>x?r{w-15fc_Il%b)ao@oDp zzv(bCFr>ZF6Tc3E5$(1s5X%E@zlO}-9C&817agoW2xdB95{d11}kTZPO7!ecGO0@V;vZKp2T4zyW3dM{o8{-JUO{ zEfV_KL*1C$l37Y0QF_`j3o1A>qQdXpQ{J?3qc0+12H%!uL^Bykf$>XwwePs~8`>zA z_B10o3DHL}F@Sa5EIgMMlR`jp|B>!2H)pu%kjUjA;fg7*TDwgs|z(V4pgg`ZO&8yh>s z%2Of65%{0Cw|61%mK!M<7XW>&JUgnVtE;9K9|j^VH^?AjqpB*t4o;cvFEk%d)dQkX zpfn#zmIynguCP9_47axy2Fv{jCb5#{L2s}HLl43~nh(&MUG5j+U`EFpsv{;8sHzN? z49aNnb1k}9%<%JA7mzwT-j9h%x9!|^!LSkcE;;&1}h zkN=px)<*;gU3j(c?b|E4*01lwvAY-06!M{NO-(t#?ROc*2;{gJd11{s0~snMkVntp z;28!AGnJzPT3BqSz!2YkxEW@lL7!d|bq zEWd82F^im>9Iz=`oSUZ{9UOGq`ASo{aA;57P$)_O_V8=EUNKxp>^U~SAJ`CNBZOo8 zA8pHe5vXVq-*Ck%4drXuc2_8N2x7IsaDjRn0!E*0R?2{MGxmQ?`t@A!b5#RV(?^~= z*jQQ)IR+~Z7PwAtG^mj|io<1v5>m1@2dk;zOgGd&3~&=qod?`|UT*FUyYLL%m8ho= zeSCa!=wcrlUk@b1QCd=>Ns)&WGHwWj@@IOGlj4y&&$I|w%xUu5A!u{B(6B&fOoNI0 zGj`!oEHi**|A$-c%dv1g_LE2C2e@Z~I-!lwe|KHx%u?U1bp{wvnJ5p z`yl^kuP~?+z>G!!2`H#7*)ZM#ACoy6AnNUvsP56Fpa zd-pzidu{wU4hYN_R`AaLAHYE}KoB^Vh|TW}AiWwL5I(eOdco96EMsjf9V-aBNxS`{ zAzqzRu?WwdK#MphE3Ei_zi6;bKu1oRDT47L(sz+>|jzy@u1~@=Sx3* zI@7HNiY!1|Gyw<&wyR-_DwXXte3CHG2DAK@;cwU71%b;%d8RImwZHyJN&nE8!q4F> zmCSY+s1cSKe98VaB0>#BYa;d}i7Wx)D$A{by8N>Q?YTORP0aMbTuwXnRj}fXSUDVj zvl=yddp7*v=0;UGzd^RBL5zqT{4^&~#AqQ6Dju#*Jc47-KH`@i3&5@m46p<+ zN)AglDpJ~t1Egog-<rhc%JjJ?QX=tsCxH%)w7nL!`KT?Ja7CNcz7&A8wcO7KZ7xjDo*If zNp~H*ojdAD`Ysanf}fr8-1v?42Anbt7BTK>ETx|SNF^sEsQ#9G6|u~M(T|qW*3sd@ zT76?8K@)pCdOv$eWfqzsuVeH2lCAQ5d?6NN7{xR+G^#GoX`Q)}R%z>)dUI*$C;m^v zC-K-YhRn9;2y8lY!!9!tL`8A=?`{n53Rm_&0LTDHEpo5DU|_j<@AtDnhnHKUmfV5Q#SXc~BVMJZX+{6y9 zHh1apO}~vJoH0fYAP*a$ZA!kmykuxlD|9sC6E<$OrL@A8etnQ2Xai4yGeUq`Aeu7O z9J_AYXUEW7t-miWCid+|CXrK^$S4N{{6Ry=Ol{glo@b0jrj$l}l&Ak?ReZ$yK zU%x8s6chUq1vkntexU@IJ3@DTxVRfr9zB@VsSEyQxo3RXq!+SwwZ_STRzK+AAhnub zAqbtB=2Q6%S_fvhFv^=sJsy*Leq<|0Tw)>vws{H8NOoS{I{?N7MMarlk%QC+;Y`5~ zWi$>SE&-(wcQScm;Gv1&)~)m>ley3>J63=AAnq~!aS#9SjQEZnem*`b>ga$j6FtoA z_$^2>kywTg%UC%NLK})F#Zt9f>oh|#R&f&pG^O+!D6@;J>t@6Q!lJj*j~0_iXzg<2 zJr3 z3cvt+hdTCat9{9A)cx&nk&cadbUW&&>Y|dxqCYU@X4tlE8+NOy$dE^J%g-0`i@zHA zNdLH+{lJ%qM69q-jjf|tDK`Oya>0{F*zd5Ys8x{ZL51vuNnsfEWZ51H6j@nm5zZY0 zJ_SqjKfi%1w2W1Vj;tS4k$AX8R`z#T(KREZP3Y()p*(`Qx3sBgt%%msGUy;roH=vN z#6jcvXU2OzLPkzb(#k3-2UT6Lp-EaCx?f}nYCYX;ah&&^@pxJL$u*pKAs{`pghfR9 z&_=%pgon4{4@@6wFeSmT9xNMKhioVBmnKDg&`Dgqu&gV}CAw`}3G$=rHrxwQHo&X> z_PWK`2TVPtwA=hQ|FT1bvpv7>I3i9J22pfPRX`73EH#x_`dbBE3;|6Nm3%BHi!7vt}Wk?14c<1 zBmDtcFh!iXw8@IAApH+i^#<)t!W#pEf>Z$zU~0?2`3N&rx8uY22y21hrYL{B>pTk+ z6UA=!CFukb0ibgs8WG@x3$U5{;v z997)Xa;*>CgN?W)>rwIehPgGrlRq}^k=y?@O2`9ZX(&Jxg`P+~W&x|Cgmnu({5Bd& zBA!BrMhyO#gT{WAn+Kw=?CI^L5fKsj+cv-!Q-2HGGG&T`hlp$;Z38JstbVXqxxNFF zSPi}Rfov;z94;C9sXOhu-mZlNr33*#pnDl>{FGrQI7Fzwotar9HkKvyn*|F?OqqD4 zE!j|A{u#8MXv?hof(kj|QnZ{o;INN~Zh`;5udZgsv0jV<69$Pi`UruoTYcT0DFkq% zU;zZVa_!n6{m63G6V5USoZJ$uRTIT5e=PRMM=` zS0+DY6n`S%9QqVT2i}g+-z?+WxN*XZ7w6EOq1+U8ybB{y1HI6DAi7p#>&>pbt?TaW zLJBVe6TV?okc|4MhxQdxzs!ptc`T)Q{L0PQ@l!?lo1G7UNHWC6E?gLs`w+Ef4f^ud zY%5L-ZrFFfJF+Xs;}7M!o)$a33@~gILAqXs61Un6yRVI)+sA=HyaZrp5xhxi5>!WH zR&|mrP$HnW^GAu~AiyEULrGL3`RNb4(X-y@nHsh=4^S1{yLYdWxJRz6Ee{a&L(0muL{mX( zEqeXdJVy}_*B#c{;~pr+6H`-&t?f_|VO%h3{k0m(ZmbW)JD9>+1o`YC{Q8Q!n^xg- zAwT%%{F$#urG;zqeyR6En3SC$%1*Pp7?iZ#(ZDE z$O3}+y1mdn^!;AXM6U;^4?TkUxpVf|KsS~bXNjp54E2)aHcCl2yriRp7ka=oZL-UD zr@LZ@;Ue^biyOu!bfPTCQ-fM0;O^ZDb$)kx3k!=Or?+;LDOSny_$=O@ktI$=%!M+4 z;jW=PG2p$omMyiGjd>I3vE3V?nrMo`^izfTLe#O;={<3P0Wm&>NgXjJX_>F^N5G>Q z6XO(zp11rQf1J12?Wv!d2*UOg==>xkT-GKUq3OerauTI?&z~GPvig?18{-)bcrbGw z7YqvkX6=q{6D^p1xv+gI6aI}E6&2PwK;}_jM5QIPl}ouL{mn$d6<`b?Z1c_A+?aNv zye5l0OH5-S=yq^&N= zB+^zwO9u}>`m*~|P|WTB+wi>k z*`YfWg+XvLH30yWZoBSFiqeddWz|h-x(D<_0da3p{mTdc)*ls5NlEdcVlhCVzcUH9 z(M@^axN!Wj)C%G$Mb*Oo`h+f{|ND1`wzq4hGy&}_CRfeIdus}-VC2KlRdTVJZ@<&f zXRLKO0C`MUapk}e*}1u^nx9TUQoYmxW{_8NWX(V3_@dVS(P43fB!&VL@Xfc zns#>MUsBGb>!%sNyA=Mn`Po5m>&i4F_!Gx#fg7&~URy6}93!?jT2L@sw?BON(A~r& z_4wtXP@kp%RbQZJy*Lw?n3z2A72f9N1{p)SLzzM#L&RXS??8UVLz7TiYax~vmRdY5ijfEC8Qiwy- zrKbeLL?MeLA+|X0-=ohYc5iS=2=Wn-k#X*WG;jdU2G?#!lLD7XPQhGly!p7_g0wTl;}uk zWF!yL5G5WgqrR-t5qprcg7)C;BhTOCNZ954LkGyHDf%!N3b=t!*r9m&Lm{2oCh~rF zy?lICRn;R75HRTf5)!^BS|Hrjp9FOSFFiLNdCE@ZsECGx!>h|sdSUg&H4Ya`Y8rvD zmg3zoag6CRtla(m{XK~HM*lt3nz?1qQtZBLu8uxi??)dm;+e&vVo@+vHAqruj25Xc z#~w-25XmX_Jh3g1&IqTT%9Se_dxeB9s5lN1ohuLJvh-&#EHt!u(Ii>t89rRN*`p&- zxj9na;`-Sd1K+>5K8?~tM`bmN0~$3x*I{5K)->CuO~L4AyIYnlt!h&$ z5RIaaA_tlshmbd~Z=UW}jsu5O^?;#Zv;~zLl3ujL0}Pb3br57EVQBcTb(%D|hy}NK z`1tW3af;Y?5dYaza~~<$A)woxWlz{$px^aa^V6l*2N6#|Yz3GQ&Ft)MQl6piQ{Bhd zFpd!EI1Y7G9xQoAoGp&svvVdD@_a50uJV||7?JMfd*rz?j%7&-LvHcMk2d@vl}B^* z#Q!hJLlUTpb!&!~qeTpM@nK<0JDd8p0g%n8AF21Z(>#h?v^Nzc>O_@gBBF^Bj9o z(f<>&I5ZfLP&K^J;>T9@bsqkFVG3R|YmV;72t816bjT>#YgY?^F)IZnAVf&x^y#Jp zmGq2^j=tu+a@nc|eX}EMDNQSpS%!~-QBah}h!a_*U&Ps%5??*d#lc_(QQVUbyv!0y z>EqMCe;Yq|{5bEHyUPu@C{Y~dhT=m);mdigAB^Ap4QmC4uJ)I>Hz2AkXPr0Oi3Lmn zB5yc11D9}u>OfU|-TF-WeLGS;m#d|?5kaAFr*HOxBh-#KplGG z>+l(wfwWydO;7W~=nIYjCA3xC{`R?UZg}U=%sf5Mo#?-)fH4i)3SOLH5j#!3s+Rtz zBL-tb@4|QynWMF}wLPJIq@^T`3zz_zn`3WDuzo(3-r`74Q#>;}IWr?er*!C0`2kiA z1e9Sphv4w@sUL(gl1csUMk{A}_-}?jKVNlp-Edb&x%n=vHrR6VvzD{;#7T@kO8dV2 z6zBxGfpJ2~z8l&)r{T{lAc#RTSqX(KF{bwSpLZH}2wL?^Yr7()eBE z?m0&2chPT{VV99`aElZ`-~W6Y(a~MOC{|l!?d^o~O)nr2HlbfG^uKwu(QgoYNo;^kA+OJSUMP=tOcmu*HG0!Zh zmSjOW!t~$vLTJ#3W#!8M&dcGI15Z=8>=U+om3a|g_fA5n+SRtvF2DFT^M1$ zG&Pl=1O58d19Gp&Zs<3|j(sp-KB#9yQ+y&-m&9hx*jUl{lxSD3M87}B=WjNaa*(`Y zpg*X2G?;+BF?Nqh&|YGtx6;$6rHH&lc1^ohvJUBOAwA83ku0Wu@Ft9bb|ALp05_7P{p3BnUN;_BS?eTA8Ph)|Flgy1VJ;(K2Yp3zh>0@AzS4F+?}jgPkM=QYXS1 z=iDwJrdzP9au-ky>8S!x`m)S+Hb-Bgzdx+xe$5_oF^TB1jtBn(vbFWLE+5EM>2}AL~ zx=h(Bbk^g-_vSqgq+!QH=g-E$;SJdOu0ZhNhR^Tx#FdqmKbgAyS!{A7jMoalQg1UL zw~RWUqvnkRXChTMg_ETIASvQjN+2dao3Sb72bAaW7x{zokTPSiQis-)R8stdLVP8` zvO4hgwbJ^KzC)i%_YcaA7n}-@b?dQOQAd~W)lHFGaJIFPYv+3{uvggfd!v67&8 ze6T3esA9DEW0U7W#AC*-)2^?nq4gAYf2FUoF`=aOVl>w)=uM(j73#>Tym*!l-6^+2 z-bVZ|D49O}oKdFcz^w^F*L>B@)is*N6TL7dKIzFLATe-Jg6G;K0z#4y{My?5%8tg* zC9)|=DJh(7Ws|!^0-7$3)=S*fo3+J$mec-BM?wZ`p*)4dBlHGE3?L*iH-AeBps*(* z+ht_d65h)8YXLJAk|4LR4+mC&bsq1IWdY+<)6|O!zSNW^9Ay1fh%va#9cbQ1D}|p(J6%8UzC7C!2YL1wS;X z>Cz1SQe)j9Iu3kpAG`>zm40?Uqoxut~2v_9p(|HuNw{03^CH~3=V%0&z)?4I^klR5 zZ^mGHr|pHpQMTW()rbHBdJb8e_`w=HQ8(ZuGHy4{t(Q1T7FAOckfSZa!ilFje1j9| z1i)_N3(G&$GmkAymO2^)odI3ac>XFSCJNM_5Kh9?QJRSgxKjYq95G4&O|yZ1rs4*F zuOp|s1%LFE_zDY$n!MuV^fID$Kwq~9<)5Xw%Hrm$_s&ayUD3GUN&1*QQk3x@y~&#VOL2vt zwcJTwK1J_|wt+I}I%9Rj@>I+5#Bs+-T~%wY{W&opVcMcjGLJQz{V%LX>P!$G+g$+w zM*v&#kwkayBt8>+j8wYE*bJ`EpF0u?(x@<4TT~V98R|>r9>5P2h2s+a<+08vTCbk{ zA(fZUW$NWVJ)x!WXX~EUlfvN#MY)JuKfQ2X3^X(W9zlqBn>`Ya=hWZd`qfm{TfTz6 zt-v+Q1Jn`$z4msPegE*-^xWu+!-|lB)2d05o252zsG4@EZG+ttvLGw8L6(3Rk~_iAx;9{KYhI z1L8TMRf5kme%HB*jIeXGN+7B`X6xj}r5m=c^m~?sULYO)9d2KUZbWsqqlzmlE&31y zeB{#WNX!V)`yBmA&Mi1slR5x%(zS_%VtPvTpGilc0BI?Bj^G$U_v zaunCGv1K$@Ru29J?vT4H7eN#kVAYL7AC21$I9%P_8leIoLc4}LSdz1|f8A6L!eaeA z>~vm|dkCs?!Zv~*2bL#D6eFYUGC!b)ATr>*oZkGs|EvHVOjCQGB_&neb#F(r7X^`5 z^~aCFg?Jh5Fmp}*`RhhY0agJ-e=i1XHYAa9w379aGAWaeSn3c(LhN}@eInywxUlmUARauhAdXuviMfrt`<6oUv=hyoah znCd_AG;9@c(rt5w~e$ zT;3g#2Pg)|p6m_7IkF8VVTHdFDr0Un=U;WMQJ5>{Oj>jIYcOmbMU9d7#BE%1O@Ccq zd|#oDU2}`lHZMcl85laB*T%4SxR_xRl90Lj8>%g?O@PR8`E?ig!rjkV-T@AdUQ|B#AaPHr%=6&9D z36)nHo_}a*Zv9X14o1)kTnApq7VKFF#)#D$#{|762KySMXwKbt!nL8&8Tom=(_=1T1Nl5mcgFe_+-yyyqequyijv#(NEx))4}@ z=*J0DucGE{SdZ|}GXNRHN&|P-eWF3ZT16D7n!y|Exa@3cEYwan4fw0A-?H90O^-F^YGNgO^vXAKDi|M5w}NMda`C+M?2cD5OEK zko$Os@u%?oU0-7cNe-Vz5Ksdc{Sx7SJ5l7ZSXENu)!Er;36wb7b^2mWb#)wsW5R=9 zET!*9M<0XWeJfQ433;#kHAHaLyy7a?KnAkoxMbIf> z=D8lNc(brt^Bv$MN6rTn;c$g_?{>U`;H;oF>$_+eIL40dVnLU83(*$wco#U-I(wqA zQ_&a`SPp+gOtM%MsC_i>ghPL^SwdnpZlTc@<-+;*@w+;@aWi_f32nu-tPf zEV6dQ)u@FQ`L<(KGq?TmWho`;!sJe;d@i~42Ftf!-L*mX{Q(wQi(wVzxCEEvV9zNU zfsD+8TEZ27L`G11v_*h!@QKFsFgXAREk>d!xk>`vEP|SY5N)!7ho@}A%y@Uq?)NmGP!UO;?YLFh*Cu#FR$d-s z>UjXaVmo%&2qr*Pw3z~LaCK_z9I__TOyzb;v7D+LoF@Jd>^f2$VWv?A;*c74%d@NEGi)QPCBZoz~9O$|CJBerEaUYMark?sh;QDGgzau$BvI13ZLi`XR2} z_p0*Ae&^dW=aYdh)bJ>N9QOz;|0aC17J~9X)sJJm+}s+d02}{Va!??&VY_5!kKSvK ziyJ$07eSIha}(ZRxiwQy6M$p__$`NyO`A3~T3>7SzqJyVTC`PH2O>bdP4x~wfseL{ z&(pHi3G|*2T0EQVl|YGT&_adARA0*sPz*m!#Nq1I?^Y zlbE@_P<3^(yGe{cIeH5FNaYv~%VzgybjrDS^5 z!C^lSroiwgPde2k9`5XZ>zwkqvac6hn^%cxF=}j2Pmd?ShCaXb#>FIe0RzQKbm9LP zPOpFr_l2wSI6XmFpB}7_S(e|w|GFkx75}4K?{PG5Y0hQtf-|u9=g(DnhO3cSO~JMw z*(}vidv)Cu({KtD@6PiioN3{?n(&;rdTN7W!vM`mCTwC$@Ad8{K~vJ`Ydj)2K~x z0FhUI`lR_b^Lv=aR_SvUEZ9pz5O1@N-z>f=V&gIdWdgqhRD!sCFboRVX#DTo{ZlBg zkb3uzAT%~N@pW*PJM_yOi1{c2eBT6#J~+$sc6M5{UY;;7ZF_L!2Y3;jp$)JoYhnhdq#Tn0Wdt7>n;ijmAbhiGU5bc0l2C{<~)E#G8DpQk7e`ZdDGF*sF$9 zS_cMJdBVL;nq~lWlYeeV)KQ)ozvf03>(&B7z!gDDmbbqbh(iT`lGXrh&Yv=h^7Ha~ z!Mt-rRRP+u_)rn{&ZM(r#RP~VSeQ}@0eBpbQDCKj`5uA{Hj-Ck3mX%BG%Kkg0OMmS zY}7H{yv#QNKoMH}jKtS1U*z1vdytPA1dSo(8Snu@jHFYdT)3M}>hSZ=AMnDy!Oexy z7gsC%yZW-z17fe=*J3PyivSL?ISndm^~SxFzu6}^WN@S>wz>M@{J+ex2&2`yuEDI+ zG?t&^GI0Y8o&bd^ubMs(9}&EiLPzBUh#{mU*NnC22@OvOfg$wDS)f zh|G7&X$8skB{ZIGxMNDqQG__-YQ5$&ZIOWj`LjAklB~Ql){Z~@hhNR|T3XUl;niiE z;v0u^N4n4eFR@VI?r$FpvZi;1z?7Z?G$tFnKg)*CS*R^{X!Ky++n59Q<2cM1>odCQ zE=X;lVzaZq96(#sJLbr`Y5y18p}F02-b)@g76Jyyg{~)yJ~<_&@8bPEZe}j>r%I*R zSoK!})B*>JOE?jyL`xA3aI>%)#(0iLAHV;XojuPIjB~`j5Q=eqb@fU>XCng$gGocu z)g7n}mEowuW~nt) zK?e3>2b5v|(|lR42Xo1d?{M@)KB>p7sHi>8@J2rsxL_ID9f%G}>UvSIW}~kOhgbi{ z!}~p$S%@R>BHDdt;4Wun*@;s&-oA@D1Fc<1 zEZxUctb(|+%)roKfe;qfOa@BH)U*c@kxw3=xrvs983&{UG!#uzk4IV|eBGBbf{rWi z#UGUXhkLlkN8U*Jo4rI!0lWc{IVClB)cm*vi3j96tt%*KR|X@{cLu`e-UXH)wi9LwS9yH9 z$os?TZ30e<9E*Ci*+%NB&H(O6CNr{QVq&@vlsW}4fg6R*dcq?{3k;XE<-LnJElo|Q zVH290K&}bTqgJLVCL&eDTH$?i$nf+)6Ii=5!D^gCgM)+3mLKg*f0ms(d6EKu_00Ex zEecs}TlX41c|8%Xm8nFSL*#CtbigwZK6us=@|ySG4P;h}`8mx5Fjp+yogHM15E$|0+#dBj!QG*d?W<6{dS)B|g~0{d2Zef|>vfqm>aD zg4f7}Q@Tl%tMk5vA)5A46uw265*G&jUpU`wusq-Dj_2$}t}((bFt=EFn+?R%Vy#b++|5|9_rySRF~_kUJ;n;+8j?-J$hHs8)|soHW& zRd8Q%-(=5^A1m=F#+y?+K4E#rvnofPGBoTw6sP^t%H_X4w6=gZp~gCuh6P+nA)bC# z+N}`X-6ni%Ao2KG0QOwQ@&?CTyP>xNjQ6|HRzJR>it>bJuV`a-wnGc8SM})Vs2$WM zkI$}YVdTFNx(>dg;J)=AKZ+4M*PCb0z7^92If3%>)D*sa?{fLzbkVE8U_4QJfMgd~ zP5FU9#8VljCEonCH8od;KMVQ#`W9QulE&jO!5lFUN)UXlv(R1c#b$4#Yly(haNdZ` zQK)*~4e}IbYU1?MrXMv^Ok(!c{?@=@gE^8QrSPMS(<=|}fyf$lv;)_%5 zrll%}un1RR1C~<)9Dyro>F7*oJYh~S=a9RoB+7LU<}B3MTF5oq87Q!TA&5ij4^rcNdr3L70?V!_2r- zjy#X->M^UCnVE670F-;|YdiuNlN-cCcCs>*j#Tu6mEgTO?s#{@?(a{RGfR%}TDxo| z%1H_dgNu9k`D0?ZR+gLL5%YI-b@i>FrD10W#OTJw%{>=!M1d!(pQ31H4yzv2d9IJ7 zmZ5=>ksDaShBgzrlekkT;0XdBRir z0wwhE`x1mQ{x>P0Re&7-655YGU6#U-~p?0 zf_2{XX0Bjo#*N$?X6O#YFjAmiw-A@uLGyfNS^3}G$SjeJsTV%te4^xgL53 zZ(23i@LDV3!#=@PGZ*yq9@!+mVwII=!r=VMh@N-p=bWOzr*9jo%Obu|_~-DUs%bL08Zv%5sP(iYSMK zGRPsGD@F*CC^K9kSpgMI7DUvzDu@csu4q)u2+aKXdfcHn7S{^&R8RNo*Z==@|M%lK z9)2b!9Q)_a=y04a$8ihLvBWu|v-J;Y`uh5cZS&^MGt|-1iL|%3SN{;Y>D)7W_Uw;6 zJw30?oH;W}OH1pso}M0qzP>)|2M->oOefA{yynlJueE&n^1K-{W)z#5nH8KkapF{a zdpk)a60Mw^oIkdhy9Uc7uXP~mOGVR^FcULDS zCT24ne}Dg}!cCx0PfzDGH8s^&ty(3rwzht)qM|Zy!-frm7cX8UoWT)!~4LWXZYjgVtlZQZ&xb@S%U zj|~kCdA7E;X@?FSl3l%em7sTOY;2^esw!H%cyTF+e!&QFE;dYC1OFN)C#PavU0s(Q zJ9ebtSx%!G?2xWqyGA>A?)=u>-8~4vTGIv&MjJP7Y+bf&S%$W@_Km%J_bTo?3Vvid zJUmRWi+&Rm6XWT`udJ-Bv$V84!RPb4va_@4^5x4TnBym~6$!8nrxU-kvojO%CvDrd zZPJvK6zcBo9uGV6TwY#&57;UgxqyHG#W8$+eW!GsrluwbKR>^$l9CeIw{IWy_V$k8 zji02YrQK#=ckkZaf>@F7=H{k1C2$bq9@-uZ}sbC^P&$rF11{5i?xW89E#-n>a+VPP@{2ZvY&W^Qiox@OH9DW?3XfHPX;pd1!+ z+uGWwwzih)>+7kbql2D4eM&N!OmY9Bq9TI--$tHd>0Z$gH>B`KFXi+9JL>7_xwANX z=gu9fuC69#3+K+AQ=9?zNzgfkhlkVLxpU2cp#nQpKm`T{7FbwVl(2`X=)lOxNR!9o ztwQeWW3etXGm}zNQ&|q8GiT1cfnHu@WMt$jKu7)U+O^BV+uNHWBO}?l6wScEKso){ z5p~a)J&>)=rcImj;^X5L_eXB%f?T#EHm+bBVecdRk(W96gBteqLT3`(KYZln2TYXm zg74`f53hjEKZVY9gMxw_V`5@h&KqM_vu4ejz!f&g!#VQf;^OGSg$uND<;s^>`}Lwl zi#(vK6{x3B9lp2eBd`1u`lQm*QmU(~d$oW6{w@}WP>-d9QX4$%7zfmclQCM6oSaOk z*{GtTf*>P`ii-LHdGuL9LBai{OP9Vg7Eoh-3=h;aM-Lu6NXS71IZ{HxSHxm4{G9Ug z^4bv(qCg)k5D4Du;PA=rM~@%TpDNDEL&HLwEUhfFptC5{H2Z-04=XFH065VI_K$kh zhD~=e8$;};>#fan|GQgs{rbP?{P`*>gulV|SU!5@<>i$F;$Jcb@HaLnno=hk8X9_G z@j(xNTN3%oBh|6W`8W2>aq)3a&1_7g_09CWEbSM}hn^MngOV0G$0#^Bm`*u8_blIhVVhU^%6gxJ zt2YG{dwlL&$vz3nQZ!N=!F`rRRV^X*2W zR~-=BAm!%fvTn_YN8ew`?30rMF0f|xf<#HQYQQCDbH5tLz`#qc#vaE$wZ*w20>#3Q zlzWvUYTq*WJE8WX8_mb)zqe9ICha5H%g%T4UM3|P`VZtVL|Iu`(mi|jSSV?KY7DLs z+WCkakD>p;U+i+kTja1CFE9UY0tsaC3Ux2Bn&;vD4jSsKrnmR==}p@gH2kcI zq_=jGv~xS%>A1pTVlTdpCJijxTB5iBv8zF#$j!*7+QH4Y+9}mf?BoRe)j{IbvnPOK z0s3!XF*hkGnO;9U54r!Dh9tElfBTH&QV9(`Igk5)PvYt-hTjGfwV&cMdd*MdY#UrF zv|?)^weA*>MF{Xe8NEk2Z)j+k;o{=*edhiQ8n{IYl!;S0=1QQNJz-<5tH^Jza=96+2GsogTI9X z#jaX@BA1wm1|Ln>-<_U=qQSUmhhvcf=;L|1O p;=kZ`sY+EFc~V{@H;hry1sM5_Jh`w@RgU*^!3(}zAXO#7{{UA#lYsyL literal 0 HcmV?d00001 diff --git a/docs/index.rst b/docs/index.rst new file mode 100644 index 0000000..16ad669 --- /dev/null +++ b/docs/index.rst @@ -0,0 +1,22 @@ +.. include:: readme.rst + +.. toctree:: + :hidden: + :maxdepth: 2 + + Overview + +.. toctree:: + :caption: Resources + :hidden: + + API Reference + contributing + authors + history + +Indices and tables +================== +* :ref:`genindex` +* :ref:`modindex` +* :ref:`search` diff --git a/docs/make.bat b/docs/make.bat new file mode 100644 index 0000000..d2d6cc3 --- /dev/null +++ b/docs/make.bat @@ -0,0 +1,36 @@ +@ECHO OFF + +pushd %~dp0 + +REM Command file for Sphinx documentation + +if "%SPHINXBUILD%" == "" ( + set SPHINXBUILD=python -msphinx +) +set SOURCEDIR=. +set BUILDDIR=_build +set SPHINXPROJ=zephyr + +if "%1" == "" goto help + +%SPHINXBUILD% >NUL 2>NUL +if errorlevel 9009 ( + echo. + echo.The Sphinx module was not found. Make sure you have Sphinx installed, + echo.then set the SPHINXBUILD environment variable to point to the full + echo.path of the 'sphinx-build' executable. Alternatively you may add the + echo.Sphinx directory to PATH. + echo. + echo.If you don't have Sphinx installed, grab it from + echo.http://sphinx-doc.org/ + exit /b 1 +) + +%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% +goto end + +:help +%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% + +:end +popd diff --git a/docs/readme.rst b/docs/readme.rst new file mode 100644 index 0000000..97d4958 --- /dev/null +++ b/docs/readme.rst @@ -0,0 +1 @@ +.. mdinclude:: ../README.md diff --git a/notebooks/data/alarms.csv b/notebooks/data/alarms.csv new file mode 100644 index 0000000..0de2b2c --- /dev/null +++ b/notebooks/data/alarms.csv @@ -0,0 +1,3 @@ +COD_ELEMENT,DAT_START,DAT_END,IND_DURATION,COD_ALARM,COD_ALARM_INT,DES_NAME,DES_TITLE,COD_STATUS +0,2022-01-01 00:00:00,2022-01-01 13:00:00,0.5417,12345,12345,Alarm1,Description of alarm 1,Alarm1 +0,2022-03-01 11:12:13,2022-03-02 11:12:13,1.0,98754,98754,Alarm2,Description of alarm 2,Alarm2 diff --git a/notebooks/data/notifications.csv b/notebooks/data/notifications.csv new file mode 100644 index 0000000..93fae5f --- /dev/null +++ b/notebooks/data/notifications.csv @@ -0,0 +1,3 @@ +COD_ELEMENT,COD_ORDER,IND_QUANTITY,COD_MATERIAL_SAP,DAT_POSTING,COD_MAT_DOC,DES_MEDIUM,COD_NOTIF,DAT_MALF_START,DAT_MALF_END,IND_BREAKDOWN_DUR,FUNCT_LOC_DES,COD_ALARM,DES_ALARM +0,12345,1,36052411,2022-01-01,77889900,Description of notification 1,567890123,2021-12-25 18:07:10,2022-01-08 11:07:17,14.1378,location description 1,12345,Alarm description +0,67890,-20,67890,2022-03-01,12345690,Description of notification 2,32109877,2022-02-28 06:04:00,2022-03-01 17:00:13,2.4792,location description 2,12345,Alarm description diff --git a/notebooks/data/pidata.csv b/notebooks/data/pidata.csv new file mode 100644 index 0000000..644a3f7 --- /dev/null +++ b/notebooks/data/pidata.csv @@ -0,0 +1,5 @@ +time,turbine,tag,val +2022-01-02 13:21:01,0,T0.val1,9872.0 +2022-01-02 13:21:01,0,T0.val2,10.0 +2022-03-08 13:21:01,0,T0.val1,559.0 +2022-03-08 13:21:01,0,T0.val2,-7.0 diff --git a/notebooks/data/scada.csv b/notebooks/data/scada.csv new file mode 100644 index 0000000..9aeb063 --- /dev/null +++ b/notebooks/data/scada.csv @@ -0,0 +1,3 @@ +TIMESTAMP,COD_ELEMENT,val1,val2 +2022-01-02 13:21:01,0,1002.0,-98.7 +2022-03-08 13:21:01,0,56.8,1004.2 diff --git a/notebooks/data/stoppages.csv b/notebooks/data/stoppages.csv new file mode 100644 index 0000000..31fc221 --- /dev/null +++ b/notebooks/data/stoppages.csv @@ -0,0 +1,3 @@ +COD_ELEMENT,DAT_START,DAT_END,DES_WO_NAME,DES_COMMENTS,COD_WO,IND_DURATION,IND_LOST_GEN,COD_ALARM,COD_CAUSE,COD_INCIDENCE,COD_ORIGIN,COD_STATUS,COD_CODE,DES_DESCRIPTION +0,2022-01-01 00:00:00,2022-01-08 11:07:17,stoppage name 1,description of stoppage 1,12345,7.4642,45678.0,12345,32,987654,6,STOP,ABC,Description 1 +0,2022-03-01 11:12:13,2022-03-01 17:00:13,stoppage name 2,description of stoppage 2,67890,0.2417,123.0,12345,48,123450,23,PAUSE,XYZ,Description 2 diff --git a/notebooks/data/turbines.csv b/notebooks/data/turbines.csv new file mode 100644 index 0000000..dfd4963 --- /dev/null +++ b/notebooks/data/turbines.csv @@ -0,0 +1,2 @@ +COD_ELEMENT,TURBINE_PI_ID,TURBINE_LOCAL_ID,TURBINE_SAP_COD,DES_CORE_ELEMENT,SITE,DES_CORE_PLANT,COD_PLANT_SAP,PI_COLLECTOR_SITE_NAME,PI_LOCAL_SITE_NAME +0,TA00,A0,LOC000,T00,LOCATION,LOC,ABC,LOC0,LOC0 diff --git a/notebooks/data/work_orders.csv b/notebooks/data/work_orders.csv new file mode 100644 index 0000000..e37195a --- /dev/null +++ b/notebooks/data/work_orders.csv @@ -0,0 +1,3 @@ +COD_ELEMENT,COD_ORDER,DAT_BASIC_START,DAT_BASIC_END,COD_EQUIPMENT,COD_MAINT_PLANT,COD_MAINT_ACT_TYPE,COD_CREATED_BY,COD_ORDER_TYPE,DAT_REFERENCE,DAT_CREATED_ON,DAT_VALID_END,DAT_VALID_START,COD_SYSTEM_STAT,DES_LONG,COD_FUNCT_LOC,COD_NOTIF_OBJ,COD_MAINT_ITEM,DES_MEDIUM,DES_FUNCT_LOC +0,12345,2022-01-01,2022-01-09,98765,ABC,XYZ,A1234,A,2022-01-01,2022-03-01,,,ABC XYZ,description of work order,!12345,00112233,,short description,XYZ1234 +0,67890,2022-03-01,2022-03-02,98765,ABC,XYZ,B6789,B,2022-03-01,2022-04-18,,,LMN OPQ,description of work order,?09876,00998877,019283,short description,ABC9876 diff --git a/notebooks/feature_engineering.ipynb b/notebooks/feature_engineering.ipynb new file mode 100644 index 0000000..dcc744b --- /dev/null +++ b/notebooks/feature_engineering.ipynb @@ -0,0 +1,651 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "8ff6ceb0-9b28-45fa-86c4-c2a16df4a4fc", + "metadata": {}, + "source": [ + "# Feature Engineering\n", + "In this tutorial, we will show you how to use zephyr_ml to create EntitySets, generate label times, and do automated feature engineering. This tutorial assumes you have a folder with the mostly pre-processed data in seperate CSVs. If necessary, please update the steps and paths below.\n", + "\n", + "## 1) Create EntitySet\n", + "zephyr_ml has strict assumptions about the data passed into its `create_pidata_entityset` and `create_scada_entityset` functions. It's the user's responsibility to apply the necessary pre-processing steps to get data into a format acceptable for zephyr_ml. \n", + "\n", + "For example, the demo PI data needs to be converted to a tabular format instead of a `tag` `value` format. The `turbine` column also needs too be renamed to `COD_ELEMENT` to match the rest of the data." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "4f11a97d", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
timeturbinetagval
02022-01-02 13:21:010T0.val19872.0
12022-01-02 13:21:010T0.val210.0
22022-03-08 13:21:010T0.val1559.0
32022-03-08 13:21:010T0.val2-7.0
\n", + "
" + ], + "text/plain": [ + " time turbine tag val\n", + "0 2022-01-02 13:21:01 0 T0.val1 9872.0\n", + "1 2022-01-02 13:21:01 0 T0.val2 10.0\n", + "2 2022-03-08 13:21:01 0 T0.val1 559.0\n", + "3 2022-03-08 13:21:01 0 T0.val2 -7.0" + ] + }, + "execution_count": 1, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import pandas as pd\n", + "from os import path\n", + "\n", + "data_path = 'data'\n", + "\n", + "pidata_df = pd.read_csv(path.join(data_path, 'pidata.csv'))\n", + "pidata_df" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "17cd61b9-096b-4359-93eb-b1c63b99e79d", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
tagtimeCOD_ELEMENTval1val2
02022-01-02 13:21:0109872.010.0
12022-03-08 13:21:010559.0-7.0
\n", + "
" + ], + "text/plain": [ + "tag time COD_ELEMENT val1 val2\n", + "0 2022-01-02 13:21:01 0 9872.0 10.0\n", + "1 2022-03-08 13:21:01 0 559.0 -7.0" + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pidata_df['tag'] = pidata_df['tag'].apply(lambda x: '.'.join(x.split('.')[1:]))\n", + "pidata_df = pd.pivot_table(pidata_df, index=['time', 'turbine'],\n", + " columns=['tag']).droplevel(0, axis=1).reset_index()\n", + "pidata_df.rename(columns={'turbine': 'COD_ELEMENT'}, inplace=True)\n", + "pidata_df" + ] + }, + { + "cell_type": "markdown", + "id": "d6828251", + "metadata": {}, + "source": [ + "Once the necessary preprocessing steps have been done, the dataframes can be passed to the respective create EntitySet function. The keys used for the data dictionary are significant, and must match the ones used in this example. Default column names and entity keywork arguments can be overwritten by passing in a dictionary mapping entity names to keyword arguments for adding the dataframe to the EntitySet." + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "7bfd56a7", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Entityset: PI data\n", + " DataFrames:\n", + " turbines [Rows: 1, Columns: 10]\n", + " alarms [Rows: 2, Columns: 10]\n", + " stoppages [Rows: 2, Columns: 16]\n", + " work_orders [Rows: 2, Columns: 20]\n", + " notifications [Rows: 2, Columns: 15]\n", + " pidata [Rows: 2, Columns: 5]\n", + " Relationships:\n", + " alarms.COD_ELEMENT -> turbines.COD_ELEMENT\n", + " stoppages.COD_ELEMENT -> turbines.COD_ELEMENT\n", + " work_orders.COD_ELEMENT -> turbines.COD_ELEMENT\n", + " pidata.COD_ELEMENT -> turbines.COD_ELEMENT\n", + " notifications.COD_ORDER -> work_orders.COD_ORDER" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from zephyr_ml import create_pidata_entityset\n", + "\n", + "data = {\n", + " 'turbines': pd.read_csv(path.join(data_path, 'turbines.csv')),\n", + " 'alarms': pd.read_csv(path.join(data_path, 'alarms.csv')),\n", + " 'stoppages': pd.read_csv(path.join(data_path, 'stoppages.csv')),\n", + " 'work_orders': pd.read_csv(path.join(data_path, 'work_orders.csv')),\n", + " 'notifications': pd.read_csv(path.join(data_path, 'notifications.csv')),\n", + " 'pidata': pidata_df\n", + "}\n", + "\n", + "pidata_es = create_pidata_entityset(data)\n", + "pidata_es" + ] + }, + { + "cell_type": "markdown", + "id": "4746e7e1", + "metadata": {}, + "source": [ + "## 2) Generating Labels and Cutoff Times\n", + "The `DataLabeler` is used to generate labels and label times for an EntitySet. It is instantiated with a labeling function, and labels can be generated by calling the `generate_label_times` method. The list of available labeling functions can be found using `zephyr_ml.labeling.get_labeling_functions()`. Custom labeling functions can also be created, provided they follow the expected format of returning the deserialized dataframe, the actual labeling function to use for the dataslice, and additional metadata." + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "e0ee16eb", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/swampfox/.pyenv/versions/3.8.12/envs/test/lib/python3.8/site-packages/composeml/label_times/object.py:55: FutureWarning: The default dtype for empty Series will be 'object' instead of 'float64' in a future version. Specify a dtype explicitly to silence this warning.\n", + " self.target_types = pd.Series(self.target_types)\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
COD_ELEMENTtimelabel
002022-01-0145801.0
\n", + "
" + ], + "text/plain": [ + " COD_ELEMENT time label\n", + "0 0 2022-01-01 45801.0" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from zephyr_ml import DataLabeler, labeling\n", + "\n", + "data_labeler = DataLabeler(labeling.total_power_loss)\n", + "\n", + "label_times, _ = data_labeler.generate_label_times(pidata_es)\n", + "label_times" + ] + }, + { + "cell_type": "markdown", + "id": "5aacf99b", + "metadata": {}, + "source": [ + "## 3) Feature Engineering with Featuretools\n", + "Using EntitySets and LabelTimes allows us to easily use Featuretools for automatic feature generation. For example, we can set interesting categorical values in our EntitySet and use them to generate aggregation features grouped by those interesting values. We can also set which primitives we want to use and control which columns and entities those primitives can be applied to. Featuretools can also use label times as cutoff times, ensuring that data after the label times is not used in feature generation. \n", + "\n", + "For additonal help using Featuretools, please see the documentation: https://featuretools.alteryx.com/en/stable/index.html" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "ee020300", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/swampfox/.pyenv/versions/3.8.12/envs/test/lib/python3.8/site-packages/composeml/label_times/object.py:55: FutureWarning: The default dtype for empty Series will be 'object' instead of 'float64' in a future version. Specify a dtype explicitly to silence this warning.\n", + " self.target_types = pd.Series(self.target_types)\n", + "/Users/swampfox/.pyenv/versions/3.8.12/envs/test/lib/python3.8/site-packages/composeml/label_times/object.py:55: FutureWarning: The default dtype for empty Series will be 'object' instead of 'float64' in a future version. Specify a dtype explicitly to silence this warning.\n", + " self.target_types = pd.Series(self.target_types)\n", + "/Users/swampfox/.pyenv/versions/3.8.12/envs/test/lib/python3.8/site-packages/composeml/label_times/object.py:55: FutureWarning: The default dtype for empty Series will be 'object' instead of 'float64' in a future version. Specify a dtype explicitly to silence this warning.\n", + " self.target_types = pd.Series(self.target_types)\n", + "/Users/swampfox/.pyenv/versions/3.8.12/envs/test/lib/python3.8/site-packages/composeml/label_times/object.py:55: FutureWarning: The default dtype for empty Series will be 'object' instead of 'float64' in a future version. Specify a dtype explicitly to silence this warning.\n", + " self.target_types = pd.Series(self.target_types)\n", + "/Users/swampfox/.pyenv/versions/3.8.12/envs/test/lib/python3.8/site-packages/composeml/label_times/object.py:55: FutureWarning: The default dtype for empty Series will be 'object' instead of 'float64' in a future version. Specify a dtype explicitly to silence this warning.\n", + " self.target_types = pd.Series(self.target_types)\n", + "/Users/swampfox/.pyenv/versions/3.8.12/envs/test/lib/python3.8/site-packages/composeml/label_times/object.py:55: FutureWarning: The default dtype for empty Series will be 'object' instead of 'float64' in a future version. Specify a dtype explicitly to silence this warning.\n", + " self.target_types = pd.Series(self.target_types)\n", + "/Users/swampfox/.pyenv/versions/3.8.12/envs/test/lib/python3.8/site-packages/composeml/label_times/object.py:55: FutureWarning: The default dtype for empty Series will be 'object' instead of 'float64' in a future version. Specify a dtype explicitly to silence this warning.\n", + " self.target_types = pd.Series(self.target_types)\n", + "/Users/swampfox/.pyenv/versions/3.8.12/envs/test/lib/python3.8/site-packages/composeml/label_times/object.py:55: FutureWarning: The default dtype for empty Series will be 'object' instead of 'float64' in a future version. Specify a dtype explicitly to silence this warning.\n", + " self.target_types = pd.Series(self.target_types)\n" + ] + }, + { + "data": { + "text/plain": [ + "[,\n", + " ,\n", + " ,\n", + " ,\n", + " ,\n", + " ,\n", + " ,\n", + " ,\n", + " ,\n", + " ,\n", + " ,\n", + " ,\n", + " ,\n", + " ,\n", + " ,\n", + " ,\n", + " ,\n", + " ,\n", + " ,\n", + " ,\n", + " ,\n", + " ,\n", + " ,\n", + " ,\n", + " ,\n", + " ,\n", + " ,\n", + " ,\n", + " ,\n", + " ,\n", + " ,\n", + " ,\n", + " ,\n", + " ,\n", + " ,\n", + " ,\n", + " ,\n", + " ,\n", + " ,\n", + " ,\n", + " ,\n", + " ,\n", + " ]" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import featuretools as ft\n", + "\n", + "interesting_alarms = ['Alarm1', 'Alarm2']\n", + "pidata_es.add_interesting_values(dataframe_name='alarms', values={'DES_NAME': interesting_alarms})\n", + "\n", + "feature_matrix, features = ft.dfs(\n", + " entityset=pidata_es,\n", + " target_dataframe_name='turbines',\n", + " cutoff_time_in_index=True,\n", + " cutoff_time=label_times,\n", + " where_primitives=['count', 'sum'],\n", + " agg_primitives=['count', 'min', 'max', 'sum'],\n", + " trans_primitives=['num_words'],\n", + " ignore_dataframes=['notifications', 'work_orders'] \n", + ")\n", + "\n", + "features" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "bdce0acf", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
TURBINE_PI_IDTURBINE_LOCAL_IDTURBINE_SAP_CODDES_CORE_ELEMENTSITEDES_CORE_PLANTCOD_PLANT_SAPPI_COLLECTOR_SITE_NAMEPI_LOCAL_SITE_NAMECOUNT(alarms)...MAX(stoppages.NUM_WORDS(DES_COMMENTS))MAX(stoppages.NUM_WORDS(DES_DESCRIPTION))MAX(stoppages.NUM_WORDS(DES_WO_NAME))MIN(stoppages.NUM_WORDS(DES_COMMENTS))MIN(stoppages.NUM_WORDS(DES_DESCRIPTION))MIN(stoppages.NUM_WORDS(DES_WO_NAME))SUM(stoppages.NUM_WORDS(DES_COMMENTS))SUM(stoppages.NUM_WORDS(DES_DESCRIPTION))SUM(stoppages.NUM_WORDS(DES_WO_NAME))label
COD_ELEMENTtime
02022-01-01TA00A0LOC000T00LOCATIONLOCABCLOC0LOC01...4.02.03.04.02.03.04.02.03.045801.0
\n", + "

1 rows × 44 columns

\n", + "
" + ], + "text/plain": [ + " TURBINE_PI_ID TURBINE_LOCAL_ID TURBINE_SAP_COD \\\n", + "COD_ELEMENT time \n", + "0 2022-01-01 TA00 A0 LOC000 \n", + "\n", + " DES_CORE_ELEMENT SITE DES_CORE_PLANT \\\n", + "COD_ELEMENT time \n", + "0 2022-01-01 T00 LOCATION LOC \n", + "\n", + " COD_PLANT_SAP PI_COLLECTOR_SITE_NAME \\\n", + "COD_ELEMENT time \n", + "0 2022-01-01 ABC LOC0 \n", + "\n", + " PI_LOCAL_SITE_NAME COUNT(alarms) ... \\\n", + "COD_ELEMENT time ... \n", + "0 2022-01-01 LOC0 1 ... \n", + "\n", + " MAX(stoppages.NUM_WORDS(DES_COMMENTS)) \\\n", + "COD_ELEMENT time \n", + "0 2022-01-01 4.0 \n", + "\n", + " MAX(stoppages.NUM_WORDS(DES_DESCRIPTION)) \\\n", + "COD_ELEMENT time \n", + "0 2022-01-01 2.0 \n", + "\n", + " MAX(stoppages.NUM_WORDS(DES_WO_NAME)) \\\n", + "COD_ELEMENT time \n", + "0 2022-01-01 3.0 \n", + "\n", + " MIN(stoppages.NUM_WORDS(DES_COMMENTS)) \\\n", + "COD_ELEMENT time \n", + "0 2022-01-01 4.0 \n", + "\n", + " MIN(stoppages.NUM_WORDS(DES_DESCRIPTION)) \\\n", + "COD_ELEMENT time \n", + "0 2022-01-01 2.0 \n", + "\n", + " MIN(stoppages.NUM_WORDS(DES_WO_NAME)) \\\n", + "COD_ELEMENT time \n", + "0 2022-01-01 3.0 \n", + "\n", + " SUM(stoppages.NUM_WORDS(DES_COMMENTS)) \\\n", + "COD_ELEMENT time \n", + "0 2022-01-01 4.0 \n", + "\n", + " SUM(stoppages.NUM_WORDS(DES_DESCRIPTION)) \\\n", + "COD_ELEMENT time \n", + "0 2022-01-01 2.0 \n", + "\n", + " SUM(stoppages.NUM_WORDS(DES_WO_NAME)) label \n", + "COD_ELEMENT time \n", + "0 2022-01-01 3.0 45801.0 \n", + "\n", + "[1 rows x 44 columns]" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "feature_matrix" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b02986d9", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "191a123a", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "961af0ef", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "bea94368", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "interpreter": { + "hash": "2d6fabd7bf745a21519616ebdce3b2479184204dadf576aa19f086ff78438203" + }, + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.12" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/setup.cfg b/setup.cfg new file mode 100644 index 0000000..c1e566a --- /dev/null +++ b/setup.cfg @@ -0,0 +1,49 @@ +[bumpversion] +current_version = 0.2.4.dev0 +commit = True +tag = True +parse = (?P\d+)\.(?P\d+)\.(?P\d+)(\.(?P[a-z]+)(?P\d+))? +serialize = + {major}.{minor}.{patch}.{release}{candidate} + {major}.{minor}.{patch} + +[bumpversion:part:release] +optional_value = release +first_value = dev +values = + dev + release + +[bumpversion:part:candidate] + +[bumpversion:file:setup.py] +search = version='{current_version}' +replace = version='{new_version}' + +[bumpversion:file:zephyr_ml/__init__.py] +search = __version__ = '{current_version}' +replace = __version__ = '{new_version}' + +[bdist_wheel] +universal = 1 + +[flake8] +max-line-length = 99 +exclude = docs, .tox, .git, __pycache__, .ipynb_checkpoints +per-file-ignores = __init__.py:F401 +ignore = # keep empty to prevent default ignores + +[isort] +include_trailing_comment = True +line_length = 99 +lines_between_types = 0 +multi_line_output = 4 +not_skip = __init__.py +use_parentheses = True + +[aliases] +test = pytest + +[tool:pytest] +collect_ignore = ['setup.py'] + diff --git a/setup.py b/setup.py new file mode 100644 index 0000000..776f322 --- /dev/null +++ b/setup.py @@ -0,0 +1,105 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +"""The setup script.""" + +from setuptools import setup, find_packages + +try: + with open('README.md') as readme_file: + readme = readme_file.read() +except IOError: + readme = '' + +try: + with open('HISTORY.md') as history_file: + history = history_file.read() +except IOError: + history = '' + +install_requires = [ + 'numpy>=1.16.0,<1.23.0', + 'pandas>=1,<2', + 'composeml>=0.1.6,<1.0', + 'featuretools>=1.0.0,<2.0.0', + 'jupyter==1.0.0', +] + +setup_requires = [ + 'pytest-runner>=2.11.1', +] + +tests_require = [ + 'pytest>=3.4.2', + 'pytest-cov>=2.6.0', + 'jupyter>=1.0.0,<2', + 'rundoc>=0.4.3,<0.5', +] + +development_requires = [ + # general + 'bumpversion>=0.5.3,<0.6', + 'pip>=9.0.1', + 'watchdog>=0.8.3,<0.11', + + # docs + 'm2r>=0.2.0,<0.3', + 'nbsphinx>=0.5.0,<0.7', + 'Sphinx>=1.7.1,<3', + 'sphinx_rtd_theme>=0.2.4,<0.5', + 'autodocsumm>=0.1.10', + 'Jinja2<3.1', + + # style check + 'flake8>=3.7.7,<4', + 'isort>=4.3.4,<5', + + # fix style issues + 'autoflake>=1.1,<2', + 'autopep8>=1.4.3,<2', + + # distribute on PyPI + 'twine>=1.10.0,<4', + 'wheel>=0.30.0', + + # Advanced testing + 'coverage>=4.5.1,<6', + 'tox>=2.9.1,<4', + + # Jupyter + 'jupyter>=1.0.0', +] + +setup( + author='MIT Data To AI Lab', + author_email='dai-lab@mit.edu', + classifiers=[ + 'Development Status :: 2 - Pre-Alpha', + 'Intended Audience :: Developers', + 'Natural Language :: English', + 'Programming Language :: Python :: 3', + 'Programming Language :: Python :: 3.6', + 'Programming Language :: Python :: 3.7', + 'Programming Language :: Python :: 3.8', + ], + description='Prediction engineering methods for Draco.', + extras_require={ + 'test': tests_require, + 'dev': development_requires + tests_require, + }, + install_package_data=True, + install_requires=install_requires, + long_description=readme + '\n\n' + history, + long_description_content_type='text/markdown', + include_package_data=True, + keywords='zephyr Draco Prediction Engineering', + name='zephyr-ml', + packages=find_packages(include=['zephyr_ml', 'zephyr_ml.*']), + python_requires='>=3.7,<3.9', + setup_requires=setup_requires, + test_suite='tests', + tests_require=tests_require, + url='https://github.com/sintel-dev/zephyr', + version='0.0.0', + zip_safe=False, +) diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/labeling/__init__.py b/tests/labeling/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/labeling/test_data_labeler.py b/tests/labeling/test_data_labeler.py new file mode 100644 index 0000000..a0176ff --- /dev/null +++ b/tests/labeling/test_data_labeler.py @@ -0,0 +1,21 @@ +from unittest.mock import Mock + +import featuretools as ft + +from zephyr_ml.labeling import DataLabeler + + +class TestDataLabeler: + + @classmethod + def setup_class(cls): + cls.function = lambda x: x + cls.es = Mock(autospec=ft.EntitySet) + cls.subset = None + cls.verbose = False + + def test_data_labeler(self): + def function(x): + return x + + DataLabeler(function) diff --git a/tests/labeling/test_helpers.py b/tests/labeling/test_helpers.py new file mode 100644 index 0000000..080c312 --- /dev/null +++ b/tests/labeling/test_helpers.py @@ -0,0 +1,151 @@ +import featuretools as ft +import numpy as np +import pandas as pd + +from zephyr_ml.labeling.utils import ( + aggregate_by_column, categorical_presence, denormalize, greater_than, keyword_in_text, + merge_binary_labeling_functions, total_duration) + + +def test_aggregate_by_column(): + data = pd.DataFrame({ + 'column': [1, 2, 3] + }) + assert 2 == aggregate_by_column('column', np.mean)(data) + + +def test_merge_labeling_and_true(): + functions = [ + lambda df: True, + lambda df: True + ] + assert 1 == merge_binary_labeling_functions(functions, and_connected=True)(pd.DataFrame()) + + +def test_merge_labeling_and_false(): + functions = [ + lambda df: True, + lambda df: False + ] + assert 0 == merge_binary_labeling_functions(functions, and_connected=True)(pd.DataFrame()) + + +def test_merge_labeling_or_true(): + functions = [ + lambda df: False, + lambda df: True + ] + assert 1 == merge_binary_labeling_functions(functions, and_connected=False)(pd.DataFrame()) + + +def test_merge_labeling_or_false(): + functions = [ + lambda df: False, + lambda df: False + ] + assert 0 == merge_binary_labeling_functions(functions, and_connected=False)(pd.DataFrame()) + + +def test_categorical_presence_true(): + data = pd.DataFrame({ + 'column': ['A', 'B', 'C'] + }) + function = categorical_presence('column', 'A') + assert 1 == function(data) + + +def test_categorical_presence_false(): + data = pd.DataFrame({ + 'column': ['A', 'B', 'C'] + }) + function = categorical_presence('column', 'D') + assert 0 == function(data) + + +def test_keyword_in_text_true(): + data = pd.DataFrame({ + 'A': ['this is a comment'], + 'B': ['this is a description'] + }) + function = keyword_in_text('description', columns=['A', 'B']) + assert 1 == function(data) + + +def test_keyword_in_text_false(): + data = pd.DataFrame({ + 'A': ['this is a comment'], + 'B': ['this is a description'] + }) + function = keyword_in_text('text', columns=['A', 'B']) + assert 0 == function(data) + + +def test_keyword_in_unknown_column(): + data = pd.DataFrame({ + 'A': ['this is a comment'], + 'B': ['this is a description'] + }) + function = keyword_in_text('text', columns=['A', 'B', 'C']) + assert 0 == function(data) + + +def test_greater_than_true(): + data = pd.DataFrame({ + 'A': [1, 2, 3, 4], + }) + function = greater_than('A', 3) + assert 1 == function(data) + + +def test_greater_than_false(): + data = pd.DataFrame({ + 'A': [1, 2, 3, 4], + }) + function = greater_than('A', 5) + assert 0 == function(data) + + +def test_total_duration(): + data = pd.DataFrame({ + 'Start': [pd.to_datetime('2010-01-01T10:00'), pd.to_datetime('2010-01-01T14:00')], + 'End': [pd.to_datetime('2010-01-01T11:00'), pd.to_datetime('2010-01-01T16:00')] + }) + assert 10800 == total_duration('Start', 'End')(data) + + +def test_total_duration_nan(): + data = pd.DataFrame({ + 'Start': [pd.to_datetime('2010-01-01T10:00'), np.nan], + 'End': [pd.to_datetime('2010-01-01T11:00'), pd.to_datetime('2010-01-01T18:00')] + }) + assert 3600 == total_duration('Start', 'End')(data) + + +def test_denormalize(): + child_data = pd.DataFrame({ + 'child_id': [0, 1, 2], + 'parent_id': ['a', 'a', 'b'], + 'child_value': [100, -5, 25] + }) + parent_data = pd.DataFrame({ + 'parent_id': ['a', 'b'], + 'parent_val': ['x', 'y'] + }) + + es = ft.EntitySet(dataframes={'parent': (parent_data, 'parent_id'), + 'child': (child_data, 'child_id')}, + relationships=[('parent', 'parent_id', 'child', 'parent_id')]) + + expected = pd.DataFrame({ + 'child_id': [0, 1, 2], + 'parent_id': ['a', 'a', 'b'], + 'parent_val': ['x', 'x', 'y'], + 'child_value': [100, -5, 25] + }) + expected.ww.init() + + actual = denormalize(es, ['child', 'parent']) + pd.testing.assert_frame_equal(expected, actual, check_like=True) + + actual2 = denormalize(es, ['parent', 'child']) + pd.testing.assert_frame_equal(expected, actual2, check_like=True) diff --git a/tests/test___init__.py b/tests/test___init__.py new file mode 100644 index 0000000..72b810e --- /dev/null +++ b/tests/test___init__.py @@ -0,0 +1,116 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +"""Tests for zephyr_ml package.""" +from datetime import datetime, time + +import numpy as np +import pandas as pd + +STOPPAGES_DATA = { + 'stoppage_id': [0, 1], + 'WTG': ['A001', 'A001'], + 'Start': [pd.Timestamp('2019-08-01 00:00:00.123000'), + pd.Timestamp('2019-09-01 00:00:00.456000')], + 'End': [pd.Timestamp('2019-08-23 23:59:59.123000'), + pd.Timestamp('2019-09-19 01:02:03.456000')], + 'Duration (hh:mi:ss)': [datetime(1900, 1, 30, 3, 4, 5), + time(14, 15, 16)], + 'Status': [np.nan, np.nan], + 'Cause': ['Cause', 'Cause'], + 'Origin': [np.nan, np.nan], + 'Class': [np.nan, np.nan], + 'Alarm': [np.nan, np.nan], + 'Est. Lost Gen (kWh)': [1234.0, 45678.0], + 'Comments': ['Yaw Error Stoppage', 'Comment'], + 'SAP W. Order': [np.nan, np.nan], + 'SAP W. Order Name': [np.nan, np.nan], + 'Alarms CORE': [' ', ' '] +} + +WORK_ORDERS_DATA = { + 'Order reference timestamp': [pd.Timestamp('2019-07-27 23:59:59'), + pd.Timestamp('2019-08-02 13:21:01')], + 'Order basic start timestamp': [pd.Timestamp('2019-07-27 12:09:41'), + pd.Timestamp('2019-08-02 03:04:56')], + 'Order basic finish timestamp': [pd.Timestamp('2019-08-04 09:08:13'), + pd.Timestamp('2019-08-28 01:28:23')], + 'Order': [123456789.0, 987654321.0], + 'Order created on date': [pd.Timestamp('2019-07-27 00:00:00'), + pd.Timestamp('2019-08-02 00:00:00')], + 'Order actual release date': [pd.Timestamp('2019-07-27 00:00:00'), + pd.Timestamp('2019-08-02 00:00:00')], + 'Order actual start date': [pd.Timestamp(np.nan), pd.Timestamp('2019-04-21 00:00:00')], + 'Order actual finish date': [pd.Timestamp(np.nan), pd.Timestamp(np.nan)], + 'Maintenance activity type': ['MPI', 'MPI'], + 'Maintenance activity description': ['Yaw Maintenance', 'Description'], + 'Functional location': ['1234A001123', '2345A001ABC'], + 'Functional location description': ['Func Description 1', 'Func Description 2'], + 'Notification': [1000101115.0, 1000201113.0] +} + +NOTIFICATIONS_DATA = { + 'Notification timestamp': [pd.Timestamp('2019-07-28 02:12:34'), + pd.Timestamp('2019-08-02 14:12:34')], + 'Malfunction start timestamp': [pd.Timestamp('2019-07-29 00:01:23'), + pd.Timestamp('2019-08-03 00:04:56')], + 'Malfunction end timestamp': [pd.Timestamp('2019-08-03 11:44:23'), + pd.Timestamp('2019-08-20 10:22:16')], + 'Notification created timestamp': [pd.Timestamp('2019-07-30 03:12:34'), + pd.Timestamp('2019-08-04 15:12:34')], + 'Notification changed timestamp': [pd.Timestamp('2019-09-03 05:06:07'), + pd.Timestamp('2019-10-20 05:06:07')], + 'Required start timestamp': [pd.Timestamp('2019-07-28 02:27:06'), + pd.Timestamp('2019-08-02 14:30:44')], + 'Required end timestamp': [pd.Timestamp('2019-07-30 10:19:12'), + pd.Timestamp('2019-08-04 11:45:14')], + 'Completion timestamp': [pd.Timestamp('2019-09-03 04:05:06'), + pd.Timestamp('2019-10-20 04:05:06')], + 'Order': [123456789.0, 987654321.0], + 'Notification': [1000101115.0, 1000201113.0], + 'Breakdown (X=yes)': ['X', 'nan'], + 'Breakdown duration (hrs)': [12.0, 13.0], + 'Technical object description': ['nan', 'nan'], + 'Material description': ['nan', 'nan'], + 'Assembly description': ['nan', 'nan'], + 'Long text available (X=yes)': ['nan', 'nan'], + 'Subject long text': ['nan', 'nan'], + 'Equipment number': [np.nan, np.nan], + 'Assembly number': [np.nan, np.nan], + 'Priority code': ['P', 'P'], + 'Priority': ['Priority', 'Priority'], + 'Serial number': ['nan', 'nan'], + 'Material number': [np.nan, np.nan], + 'Causing element description': ['nan', 'nan'], + 'Fault mode description': ['nan', 'nan'], + 'Cause of malfunction description': ['nan', 'nan'], + 'Subject description': ['nan', 'Description'], + 'Functional location': ['1234A001123', '2345A001ABC'], + 'Functional location description': ['Func Description 1', 'Func Description 2'], +} + + +def merge_work_orders_notifications_data(): + """Helper function to merge work orders and notifications data.""" + changed_wo_data = WORK_ORDERS_DATA.copy() + changed_wo_data['WTG'] = ['A001', 'A001'] + changed_notif_data = NOTIFICATIONS_DATA.copy() + # matching the output of the merge + changed_notif_data['Functional location_y'] = changed_notif_data.pop('Functional location') + changed_notif_data['Functional location description_y'] = ( + changed_notif_data.pop('Functional location description')) + # matching the notifications update + changed_wo_data.update(changed_notif_data) + return changed_wo_data + + +def merge_label_generation_data(): + expected_data = STOPPAGES_DATA.copy() + expected_data['stoppage_id'] = [0, 1] + expected_won = merge_work_orders_notifications_data() + + for key, value in expected_won.items(): + if key not in expected_data: + expected_data[key] = [expected_won[key][1], np.nan] + + return expected_data diff --git a/tests/test_entityset.py b/tests/test_entityset.py new file mode 100644 index 0000000..ae2dd45 --- /dev/null +++ b/tests/test_entityset.py @@ -0,0 +1,219 @@ +import pandas as pd +import pytest + +from zephyr_ml import create_pidata_entityset, create_scada_entityset + + +@pytest.fixture +def base_dfs(): + alarms_df = pd.DataFrame({ + 'COD_ELEMENT': [0, 0], + 'DAT_START': [pd.Timestamp('2022-01-01 00:00:00'), pd.Timestamp('2022-03-01 11:12:13')], + 'DAT_END': [pd.Timestamp('2022-01-01 13:00:00'), pd.Timestamp('2022-03-02 11:12:13')], + 'IND_DURATION': [0.5417, 1.0], + 'COD_ALARM': [12345, 98754], + 'COD_ALARM_INT': [12345, 98754], + 'DES_NAME': ['Alarm1', 'Alarm2'], + 'DES_TITLE': ['Description of alarm 1', 'Description of alarm 2'], + }) + stoppages_df = pd.DataFrame({ + 'COD_ELEMENT': [0, 0], + 'DAT_START': [pd.Timestamp('2022-01-01 00:00:00'), pd.Timestamp('2022-03-01 11:12:13')], + 'DAT_END': [pd.Timestamp('2022-01-08 11:07:17'), pd.Timestamp('2022-03-01 17:00:13')], + 'DES_WO_NAME': ['stoppage name 1', 'stoppage name 2'], + 'DES_COMMENTS': ['description of stoppage 1', 'description of stoppage 2'], + 'COD_WO': [12345, 67890], + 'IND_DURATION': [7.4642, 0.2417], + 'IND_LOST_GEN': [45678.0, 123.0], + 'COD_ALARM': [12345, 12345], + 'COD_CAUSE': [32, 48], + 'COD_INCIDENCE': [987654, 123450], + 'COD_ORIGIN': [6, 23], + 'COD_STATUS': ['STOP', 'PAUSE'], + 'COD_CODE': ['ABC', 'XYZ'], + 'DES_DESCRIPTION': ['Description 1', 'Description 2'] + }) + notifications_df = pd.DataFrame({ + 'COD_ELEMENT': [0, 0], + 'COD_ORDER': [12345, 67890], + 'IND_QUANTITY': [1, -20], + 'COD_MATERIAL_SAP': [36052411, 67890], + 'DAT_POSTING': [pd.Timestamp('2022-01-01 00:00:00'), pd.Timestamp('2022-03-01 00:00:00')], + 'COD_MAT_DOC': [77889900, 12345690], + 'DES_MEDIUM': ['Description of notification 1', 'Description of notification 2'], + 'COD_NOTIF': [567890123, 32109877], + 'DAT_MALF_START': [pd.Timestamp('2021-12-25 18:07:10'), + pd.Timestamp('2022-02-28 06:04:00')], + 'DAT_MALF_END': [pd.Timestamp('2022-01-08 11:07:17'), pd.Timestamp('2022-03-01 17:00:13')], + 'IND_BREAKDOWN_DUR': [14.1378, 2.4792], + 'FUNCT_LOC_DES': ['location description 1', 'location description 2'], + 'COD_ALARM': [12345, 12345], + 'DES_ALARM': ['Alarm description', 'Alarm description'], + }) + work_orders_df = pd.DataFrame({ + 'COD_ELEMENT': [0, 0], + 'COD_ORDER': [12345, 67890], + 'DAT_BASIC_START': [pd.Timestamp('2022-01-01 00:00:00'), + pd.Timestamp('2022-03-01 00:00:00')], + 'DAT_BASIC_END': [pd.Timestamp('2022-01-09 00:00:00'), + pd.Timestamp('2022-03-02 00:00:00')], + 'COD_EQUIPMENT': [98765, 98765], + 'COD_MAINT_PLANT': ['ABC', 'ABC'], + 'COD_MAINT_ACT_TYPE': ['XYZ', 'XYZ'], + 'COD_CREATED_BY': ['A1234', 'B6789'], + 'COD_ORDER_TYPE': ['A', 'B'], + 'DAT_REFERENCE': [pd.Timestamp('2022-01-01 00:00:00'), + pd.Timestamp('2022-03-01 00:00:00')], + 'DAT_CREATED_ON': [pd.Timestamp('2022-03-01 00:00:00'), + pd.Timestamp('2022-04-18 00:00:00')], + 'DAT_VALID_END': [pd.NaT, pd.NaT], + 'DAT_VALID_START': [pd.NaT, pd.NaT], + 'COD_SYSTEM_STAT': ['ABC XYZ', 'LMN OPQ'], + 'DES_LONG': ['description of work order', 'description of work order'], + 'COD_FUNCT_LOC': ['!12345', '?09876'], + 'COD_NOTIF_OBJ': ['00112233', '00998877'], + 'COD_MAINT_ITEM': ['', '019283'], + 'DES_MEDIUM': ['short description', 'short description'], + 'DES_FUNCT_LOC': ['XYZ1234', 'ABC9876'], + }) + turbines_df = pd.DataFrame({ + 'COD_ELEMENT': [0], + 'TURBINE_PI_ID': ['TA00'], + 'TURBINE_LOCAL_ID': ['A0'], + 'TURBINE_SAP_COD': ['LOC000'], + 'DES_CORE_ELEMENT': ['T00'], + 'SITE': ['LOCATION'], + 'DES_CORE_PLANT': ['LOC'], + 'COD_PLANT_SAP': ['ABC'], + 'PI_COLLECTOR_SITE_NAME': ['LOC0'], + 'PI_LOCAL_SITE_NAME': ['LOC0'] + }) + return { + 'alarms': alarms_df, + 'stoppages': stoppages_df, + 'notifications': notifications_df, + 'work_orders': work_orders_df, + 'turbines': turbines_df + } + + +@pytest.fixture +def pidata_dfs(base_dfs): + pidata_df = pd.DataFrame({ + 'time': [pd.Timestamp('2022-01-02 13:21:01'), pd.Timestamp('2022-03-08 13:21:01')], + 'COD_ELEMENT': [0, 0], + 'val1': [9872.0, 559.0], + 'val2': [10.0, -7.0] + }) + return {**base_dfs, 'pidata': pidata_df} + + +@pytest.fixture +def scada_dfs(base_dfs): + scada_df = pd.DataFrame({ + 'TIMESTAMP': [pd.Timestamp('2022-01-02 13:21:01'), pd.Timestamp('2022-03-08 13:21:01')], + 'COD_ELEMENT': [0, 0], + 'val1': [1002.0, 56.8], + 'val2': [-98.7, 1004.2] + }) + return {**base_dfs, 'scada': scada_df} + + +def test_create_pidata_missing_entities(pidata_dfs): + error_msg = 'Missing dataframes for entities notifications.' + + pidata_dfs.pop('notifications') + with pytest.raises(ValueError, match=error_msg): + create_pidata_entityset(pidata_dfs) + + +def test_create_scada_missing_entities(scada_dfs): + error_msg = 'Missing dataframes for entities notifications.' + + scada_dfs.pop('notifications') + with pytest.raises(ValueError, match=error_msg): + create_scada_entityset(scada_dfs) + + +def test_create_pidata_extra_entities(pidata_dfs): + error_msg = "Unrecognized entities extra included in dfs." + + pidata_dfs['extra'] = pd.DataFrame() + with pytest.raises(ValueError, match=error_msg): + create_pidata_entityset(pidata_dfs) + + +def test_create_scada_extra_entities(scada_dfs): + error_msg = "Unrecognized entities extra included in dfs." + + scada_dfs['extra'] = pd.DataFrame() + with pytest.raises(ValueError, match=error_msg): + create_scada_entityset(scada_dfs) + + +def test_missing_wo_index_columns(pidata_dfs): + error_msg = 'Expected column "COD_ORDER" missing from notifications entity' + pidata_dfs['notifications'].drop(columns=['COD_ORDER'], inplace=True) + with pytest.raises(ValueError, match=error_msg): + create_pidata_entityset(pidata_dfs) + + error_msg = 'Expected index column "COD_ORDER" missing from work_orders entity' + pidata_dfs['work_orders'].drop(columns=['COD_ORDER'], inplace=True) + with pytest.raises(ValueError, match=error_msg): + create_pidata_entityset(pidata_dfs) + + +def test_wo_index_column_nonunique(pidata_dfs): + error_msg = 'Expected index column "COD_ORDER" of work_orders entity is not unique' + + pidata_dfs['work_orders']['COD_ORDER'] = [12345, 12345] + with pytest.raises(ValueError, match=error_msg): + create_pidata_entityset(pidata_dfs) + + +def test_missing_turbine_index_columns(pidata_dfs): + error_msg = 'Turbines index column "COD_ELEMENT" missing from data for alarms entity' + + pidata_dfs['alarms'].drop(columns='COD_ELEMENT', inplace=True) + with pytest.raises(ValueError, match=error_msg): + create_pidata_entityset(pidata_dfs) + + error_msg = 'Expected index column "COD_ELEMENT" missing from turbines entity' + + pidata_dfs['turbines'].drop(columns=['COD_ELEMENT'], inplace=True) + with pytest.raises(ValueError, match=error_msg): + create_pidata_entityset(pidata_dfs) + + +def test_missing_time_indices(pidata_dfs): + error_msg = 'Column "IND_LOST_GEN" associated with secondary time index ' + \ + '"DAT_END" missing from stoppages entity' + pidata_dfs['stoppages'].drop(columns=['IND_LOST_GEN'], inplace=True) + with pytest.raises(ValueError, match=error_msg): + create_pidata_entityset(pidata_dfs) + + error_msg = 'Secondary time index "DAT_END" missing from stoppages entity' + pidata_dfs['stoppages'].drop(columns=['DAT_END'], inplace=True) + with pytest.raises(ValueError, match=error_msg): + create_pidata_entityset(pidata_dfs) + + error_msg = 'Missing time index column "DAT_START" from stoppages entity' + pidata_dfs['stoppages'].drop(columns=['DAT_START'], inplace=True) + with pytest.raises(ValueError, match=error_msg): + create_pidata_entityset(pidata_dfs) + + +def test_default_create_pidata_entityset(pidata_dfs): + es = create_pidata_entityset(pidata_dfs) + + assert es.id == 'PI data' + assert set(es.dataframe_dict.keys()) == set( + ['alarms', 'turbines', 'stoppages', 'work_orders', 'notifications', 'pidata']) + + +def test_default_create_scada_entityset(scada_dfs): + es = create_scada_entityset(scada_dfs) + + assert es.id == 'SCADA data' + assert set(es.dataframe_dict.keys()) == set( + ['alarms', 'turbines', 'stoppages', 'work_orders', 'notifications', 'scada']) diff --git a/tests/test_metadata.py b/tests/test_metadata.py new file mode 100644 index 0000000..ddb816a --- /dev/null +++ b/tests/test_metadata.py @@ -0,0 +1,61 @@ +import pytest + +from zephyr_ml.metadata import DEFAULT_ES_KWARGS, DEFAULT_ES_TYPE_KWARGS, get_mapped_kwargs + + +def test_default_scada_mapped_kwargs(): + expected = {**DEFAULT_ES_KWARGS, 'scada': DEFAULT_ES_TYPE_KWARGS['scada']} + actual = get_mapped_kwargs('scada') + assert expected == actual + + +def test_default_pidata_mapped_kwargs(): + expected = {**DEFAULT_ES_KWARGS, 'pidata': DEFAULT_ES_TYPE_KWARGS['pidata']} + actual = get_mapped_kwargs('pidata') + assert expected == actual + + +def test_new_kwargs_bad_es_type(): + error_text = "Unrecognized es_type argument: bad_es_type" + with pytest.raises(ValueError, match=error_text): + get_mapped_kwargs('bad_es_type') + + +def test_new_kwargs_bad_format(): + error_text = "new_kwargs must be dictionary mapping entity name to dictionary " + \ + "with updated keyword arguments for EntitySet creation." + bad_kwargs = ['list', 'of', 'args'] + with pytest.raises(ValueError, match=error_text): + get_mapped_kwargs('pidata', bad_kwargs) + + +def test_new_kwargs_unexpected_entity(): + error_text = 'Unrecognized entity "unexpected" found in new keyword argument mapping.' + bad_kwargs = {'unexpected': {}} + with pytest.raises(ValueError, match=error_text): + get_mapped_kwargs('pidata', bad_kwargs) + + +def test_new_kwargs_update_kwargs(): + updated_kwargs = {'turbines': { + 'index': 'new_turbine_index', + 'logical_types': { + 'COD_ELEMENT': 'integer', + 'extra_column': 'double' + } + }} + + expected = { + **DEFAULT_ES_KWARGS, + 'pidata': DEFAULT_ES_TYPE_KWARGS['pidata'], + 'turbines': { + 'index': 'new_turbine_index', + 'logical_types': { + 'COD_ELEMENT': 'integer', + 'extra_column': 'double' + } + } + } + + actual = get_mapped_kwargs('pidata', updated_kwargs) + assert actual == expected diff --git a/tox.ini b/tox.ini new file mode 100644 index 0000000..95de6af --- /dev/null +++ b/tox.ini @@ -0,0 +1,20 @@ +[tox] +envlist = py3{6,7}, test-devel + +[gh-actions] +python = + 3.7: py37, test-devel + 3.6: py36 + +[testenv] +passenv = CI TRAVIS TRAVIS_* +skipsdist = false +skip_install = false +extras = test +commands = + /usr/bin/env make test + +[testenv:test-devel] +extras = dev +commands = + /usr/bin/env make test-devel diff --git a/zephyr_ml/__init__.py b/zephyr_ml/__init__.py new file mode 100644 index 0000000..2f36507 --- /dev/null +++ b/zephyr_ml/__init__.py @@ -0,0 +1,10 @@ +# -*- coding: utf-8 -*- + +"""Top-level package for Zephyr.""" + +__author__ = 'MIT Data To AI Lab' +__email__ = 'dai-lab@mit.edu' +__version__ = '0.2.4.dev0' + +from zephyr_ml.entityset import create_pidata_entityset, create_scada_entityset +from zephyr_ml.labeling import DataLabeler diff --git a/zephyr_ml/entityset.py b/zephyr_ml/entityset.py new file mode 100644 index 0000000..ecf6ef8 --- /dev/null +++ b/zephyr_ml/entityset.py @@ -0,0 +1,135 @@ +import featuretools as ft + +from zephyr_ml.metadata import get_mapped_kwargs + + +def _create_entityset(entities, es_type, es_kwargs): + # filter out stated logical types for missing columns + for entity, df in entities.items(): + es_kwargs[entity]['logical_types'] = { + col: t for col, t in es_kwargs[entity]['logical_types'].items() + if col in df.columns + } + + turbines_index = es_kwargs['turbines']['index'] + work_orders_index = es_kwargs['work_orders']['index'] + + relationships = [ + ('turbines', turbines_index, 'alarms', turbines_index), + ('turbines', turbines_index, 'stoppages', turbines_index), + ('turbines', turbines_index, 'work_orders', turbines_index), + ('turbines', turbines_index, es_type, turbines_index), + ('work_orders', work_orders_index, 'notifications', work_orders_index) + ] + + es = ft.EntitySet() + + for name, df in entities.items(): + es.add_dataframe( + dataframe_name=name, + dataframe=df, + **es_kwargs[name] + ) + + for relationship in relationships: + parent_df, parent_column, child_df, child_column = relationship + es.add_relationship(parent_df, parent_column, child_df, child_column) + + return es + + +def create_pidata_entityset(dfs, new_kwargs_mapping=None): + '''Generate an entityset for PI data datasets + + Args: + data_paths (dict): Dictionary mapping entity names ('alarms', 'notifications', + 'stoppages', 'work_orders', 'pidata', 'turbines') to the pandas dataframe for + that entity. + **kwargs: Updated keyword arguments to be used during entityset creation + ''' + entity_kwargs = get_mapped_kwargs('pidata', new_kwargs_mapping) + _validate_data(dfs, 'pidata', entity_kwargs) + + es = _create_entityset(dfs, 'pidata', entity_kwargs) + es.id = 'PI data' + + return es + + +def create_scada_entityset(dfs, new_kwargs_mapping=None): + '''Generate an entityset for SCADA data datasets + + Args: + data_paths (dict): Dictionary mapping entity names ('alarms', 'notifications', + 'stoppages', 'work_orders', 'scada', 'turbines') to the pandas dataframe for + that entity. + ''' + entity_kwargs = get_mapped_kwargs('scada', new_kwargs_mapping) + _validate_data(dfs, 'scada', entity_kwargs) + + es = _create_entityset(dfs, 'scada', entity_kwargs) + es.id = 'SCADA data' + + return es + + +def _validate_data(dfs, es_type, es_kwargs): + '''Validate data by checking for required columns in each entity + ''' + entities = set(['alarms', 'stoppages', 'work_orders', 'notifications', 'turbines', es_type]) + if set(dfs.keys()) != entities: + missing = entities.difference(set(dfs.keys())) + extra = set(dfs.keys()).difference(entities) + msg = [] + if missing: + msg.append('Missing dataframes for entities {}.'.format(', '.join(missing))) + if extra: + msg.append('Unrecognized entities {} included in dfs.'.format(', '.join(extra))) + + raise ValueError(' '.join(msg)) + + turbines_index = es_kwargs['turbines']['index'] + work_orders_index = es_kwargs['work_orders']['index'] + + if work_orders_index not in dfs['work_orders'].columns: + raise ValueError( + 'Expected index column "{}" missing from work_orders entity'.format(work_orders_index)) + + if work_orders_index not in dfs['notifications'].columns: + raise ValueError( + 'Expected column "{}" missing from notifications entity'.format(work_orders_index)) + + if not dfs['work_orders'][work_orders_index].is_unique: + raise ValueError('Expected index column "{}" of work_orders entity is not ' + 'unique'.format(work_orders_index)) + + if turbines_index not in dfs['turbines'].columns: + raise ValueError( + 'Expected index column "{}" missing from turbines entity'.format(turbines_index)) + + if not dfs['turbines'][turbines_index].is_unique: + raise ValueError( + 'Expected index column "{}" of turbines entity is not unique.'.format(turbines_index)) + + for entity, df in dfs.items(): + if turbines_index not in df.columns: + raise ValueError( + 'Turbines index column "{}" missing from data for {} entity'.format( + turbines_index, entity)) + + time_index = es_kwargs[entity].get('time_index', False) + if time_index and time_index not in df.columns: + raise ValueError( + 'Missing time index column "{}" from {} entity'.format( + time_index, entity)) + + secondary_time_indices = es_kwargs[entity].get('secondary_time_index', {}) + for time_index, cols in secondary_time_indices.items(): + if time_index not in df.columns: + raise ValueError( + 'Secondary time index "{}" missing from {} entity'.format( + time_index, entity)) + for col in cols: + if col not in df.columns: + raise ValueError(('Column "{}" associated with secondary time index "{}" ' + 'missing from {} entity').format(col, time_index, entity)) diff --git a/zephyr_ml/labeling/__init__.py b/zephyr_ml/labeling/__init__.py new file mode 100644 index 0000000..32aeacc --- /dev/null +++ b/zephyr_ml/labeling/__init__.py @@ -0,0 +1,40 @@ +from zephyr_ml.labeling import utils +from zephyr_ml.labeling.data_labeler import DataLabeler +from zephyr_ml.labeling.labeling_functions import ( + brake_pad_presence, converter_replacement_presence, total_power_loss) + +LABELING_FUNCTIONS = [ + brake_pad_presence, + converter_replacement_presence, + total_power_loss +] +UTIL_FUNCTIONS = [ + utils.aggregate_by_column, + utils.categorical_presence, + utils.greater_than, + utils.keyword_in_text, + utils.merge_binary_labeling_functions, + utils.total_duration, +] + + +def get_labeling_functions(): + functions = {} + for function in LABELING_FUNCTIONS: + name = function.__name__ + functions[name] = function.__doc__.split('\n')[0] + + return functions + + +def get_helper_functions(): + functions = {} + for function in UTIL_FUNCTIONS: + name = function.__name__ + functions[name] = function.__doc__.split('\n')[0] + + return functions + + +def get_util_functions(): + return get_helper_functions() diff --git a/zephyr_ml/labeling/data_labeler.py b/zephyr_ml/labeling/data_labeler.py new file mode 100644 index 0000000..080eee8 --- /dev/null +++ b/zephyr_ml/labeling/data_labeler.py @@ -0,0 +1,66 @@ +from inspect import getfullargspec + +import composeml as cp + + +class DataLabeler: + """Class that defines the prediction problem. + This class supports the generation of `label_times` which + is fundamental to the feature generation phase as well + as specifying the target labels. + Args: + function (LabelingFunction): + function that defines the labeling function, it should return a + tuple of labeling function, the dataframe, and the name of the + target entity. + """ + + def __init__(self, function): + self.function = function + + def generate_label_times(self, es, num_samples=-1, subset=None, + column_map={}, verbose=False, **kwargs): + """Searches the data to calculate label times. + Args: + es (featuretools.EntitySet): + Entityset to extract `label_times` from. + num_samples (int): + Number of samples for each to return. Defaults to -1 which returns all possible + samples. + subset (float or int): + Portion of the data to select for searching. + verbose: + An indicator to the verbosity of searching. + column_map: + Dictionary mapping column references in labeling function to actual column names. + See labeling function for columns referenced. + Returns: + composeml.LabelTimes: + Calculated labels with cutoff times. + """ + labeling_function, df, meta = self.function(es, column_map) + + data = df + if isinstance(subset, float) or isinstance(subset, int): + data = data.sample(subset) + + target_entity_index = meta.get('target_entity_index') + time_index = meta.get('time_index') + thresh = kwargs.get('thresh') or meta.get('thresh') + window_size = kwargs.get('window_size') or meta.get('window_size') + label_maker = cp.LabelMaker(labeling_function=labeling_function, + target_dataframe_name=target_entity_index, + time_index=time_index, + window_size=window_size) + + kwargs = {**meta, **kwargs} + kwargs = { + k: kwargs.get(k) for k in set( + getfullargspec( + label_maker.search)[0]) if kwargs.get(k) is not None} + label_times = label_maker.search(data.sort_values(time_index), num_samples, + verbose=verbose, **kwargs) + if thresh is not None: + label_times = label_times.threshold(thresh) + + return label_times, meta diff --git a/zephyr_ml/labeling/labeling_functions/__init__.py b/zephyr_ml/labeling/labeling_functions/__init__.py new file mode 100644 index 0000000..f85b3f3 --- /dev/null +++ b/zephyr_ml/labeling/labeling_functions/__init__.py @@ -0,0 +1,4 @@ +from zephyr_ml.labeling.labeling_functions.brake_pad_presence import brake_pad_presence +from zephyr_ml.labeling.labeling_functions.converter_replacement_presence import ( + converter_replacement_presence) +from zephyr_ml.labeling.labeling_functions.total_power_loss import total_power_loss diff --git a/zephyr_ml/labeling/labeling_functions/brake_pad_presence.py b/zephyr_ml/labeling/labeling_functions/brake_pad_presence.py new file mode 100644 index 0000000..bcaf7ee --- /dev/null +++ b/zephyr_ml/labeling/labeling_functions/brake_pad_presence.py @@ -0,0 +1,49 @@ +from zephyr_ml.labeling.utils import denormalize + + +def brake_pad_presence(es, column_map={}): + """Determines if brake pad present in stoppages. + + Args: + es (ft.EntitySet): + EntitySet of data to calculate total power loss across. + column_mapping (dict): + Optional dictionary to update default column names to the + actual corresponding column names in the data slice. Can contain the + following keys: + "comments": Column that contains comments about the stoppage. Defaults + to "DES_COMMENTS". + "turbine_id": Column containing the ID of the turbine associated with a + stoppage. Must match the index column of the 'turbines' entity. + Defaults to "COD_ELEMENT". + "time_index": Column to use as the time index for the data slice. + Defaults to "DAT_END". + + Returns: + label: + Labeling function to find brake pad presence over a data slice. + df: + Denormalized dataframe of data to get labels from + meta: + Dictionary containing metadata about labeling function + + """ + comments = column_map.get('comments_column', 'DES_COMMENTS') + turbine_id = column_map.get('turbine_id_column', 'COD_ELEMENT') + time_index = column_map.get('time_index_column', 'DAT_END') + + def label(ds, **kwargs): + a = ds[comments] + a = a.fillna('') + a = a.str.lower() + f = any(a.apply(lambda d: ('brake' in d) and ('pad' in d) and ('yaw' not in d))) + return f + + meta = { + "target_entity_index": turbine_id, + "time_index": time_index, + } + + df = denormalize(es, entities=['stoppages']) + + return label, df, meta diff --git a/zephyr_ml/labeling/labeling_functions/converter_replacement_presence.py b/zephyr_ml/labeling/labeling_functions/converter_replacement_presence.py new file mode 100644 index 0000000..eef8bd8 --- /dev/null +++ b/zephyr_ml/labeling/labeling_functions/converter_replacement_presence.py @@ -0,0 +1,55 @@ +from zephyr_ml.labeling.utils import denormalize + + +def converter_replacement_presence(es, column_map={}): + """Calculates the converter replacement presence. + + Args: + es (ft.EntitySet): + EntitySet of data to check converter replacements. + column_map (dict): + Optional dictionary to update default column names to the + actual corresponding column names in the data slice. Can contain the + following keys: + "sap_code": Column that contains the material SAP code. Defaults + to "COD_MATERIAL_SAP". + "turbine_id": Column containing the ID of the turbine associated with a + stoppage. Must match the index column of the 'turbines' entity. + Defaults to "COD_ELEMENT". + "description": Column containing the description for a given notification. + Defaults to "DES_MEDIUM". + "time_index": Column to use as the time index for the data slice. + Defaults to "DAT_MALF_START". + + + Returns: + label: + Labeling function to find converter replacement presence over a data slice. + df: + Denormalized dataframe of data to get labels from. + meta: + Dictionary containing metadata about labeling function. + + """ + sap_code = column_map.get('sap_code', 'COD_MATERIAL_SAP') + column_map.get('description', 'DES_MEDIUM') + turbine_id = column_map.get('turbine_id_column', 'COD_ELEMENT') + time_index = column_map.get('time_index', 'DAT_MALF_START') + + def label(ds, **kwargs): + logic1 = (ds[sap_code] == 36052411).any() + # logic2 = ds[DESCRIPTION].str.lower().apply(lambda x: 'inu' in x).any() + f = logic1 # or logic2 + return f + + meta = { + "target_entity_index": turbine_id, + "time_index": time_index, + "window_size": "10d" + } + + # denormalize(es, entities=['notifications', 'work_orders']) + df = denormalize(es, entities=['notifications']) + df = df.dropna(subset=[time_index]) + + return label, df, meta diff --git a/zephyr_ml/labeling/labeling_functions/total_power_loss.py b/zephyr_ml/labeling/labeling_functions/total_power_loss.py new file mode 100644 index 0000000..a866a05 --- /dev/null +++ b/zephyr_ml/labeling/labeling_functions/total_power_loss.py @@ -0,0 +1,46 @@ + +from zephyr_ml.labeling.utils import denormalize + + +def total_power_loss(es, column_map={}): + """Calculates the total power loss over the data slice. + + Args: + es (ft.EntitySet): + EntitySet of data to calculate total power loss across. + column_map (dict): + Optional dictionary to update default column names to the + actual corresponding column names in the data slice. Can contain the + following keys: + "lost_gen": Column that contains the generation lost due to stoppage. Defaults + to "IND_LOST_GEN". + "turbine_id": Column containing the ID of the turbine associated with a + stoppage. Must match the index column of the 'turbines' entity. + Defaults to "COD_ELEMENT". + "time_index": Column to use as the time index for the data slice. + Defaults to "DAT_START". + + Returns: + label: + Labeling function to find the total power loss over a data slice. + df: + Denormalized dataframe of data to get labels from + meta: + Dictionary containing metadata about labeling function + + """ + lost_gen = column_map.get('lost_gen', 'IND_LOST_GEN') + turbine_id = column_map.get('turbine_id_column', 'COD_ELEMENT') + time_index = column_map.get('time_index', 'DAT_START') + + def label(ds, **kwargs): + return sum(ds[lost_gen]) + + meta = { + "target_entity_index": turbine_id, + "time_index": time_index, + } + + df = denormalize(es, entities=['stoppages']) + + return label, df, meta diff --git a/zephyr_ml/labeling/utils.py b/zephyr_ml/labeling/utils.py new file mode 100644 index 0000000..23bf0c4 --- /dev/null +++ b/zephyr_ml/labeling/utils.py @@ -0,0 +1,232 @@ + +import numpy as np +import pandas as pd + + +def _search_relationship(es, left, right): + for r in es.relationships: + if r.parent_name in left: + if right == r.child_name: + left_on = r.parent_column.name + right_on = r.child_column.name + + elif r.child_name in left: + if right == r.parent_name: + left_on = r.child_column.name + right_on = r.parent_column.name + + return left_on, right_on + + +def denormalize(es, entities): + """Merge a set of entities into a single dataframe. + Convert a set of entities from the entityset into a single + dataframe by repetitively merging the selected entities. The + merge process is applied sequentially. + Args: + entities (list): + list of strings denoting which entities to merge. + Returns: + pandas.DataFrame: + A single dataframe containing all the information from the + selected entities. + """ + k = len(entities) + + # initial entity to start from (should be the target entity) + first = entities[0] + previous = [first] + df = es[first] + + # merge the dataframes to create a single input + for i in range(1, k): + right = entities[i] + + left_on, right_on = _search_relationship(es, previous, right) + df = pd.merge(df, es[right], + left_on=left_on, right_on=right_on, + how='left', suffixes=('', '_y')).filter(regex='^(?!.*_y)') + + previous.append(right) + + return df + + +def required_columns(columns): + """Decorator function for recording required columns for a function.""" + def wrapper(wrapped): + def func(*args, **kwargs): + return wrapped(*args, **kwargs) + + func.__required_columns__ = columns + func.__doc__ = wrapped.__doc__ + func.__name__ = wrapped.__name__ + return func + + return wrapper + + +def merge_binary_labeling_functions(labeling_functions, and_connected=True): + """Generates a labeling function from merging multiple binary labeling functions. + + Args: + labeling_functions (list): + A list of labeling functions (with df as an input) to merge. + and_connected (bool): + If and_connected is True, each individual labeling function criteria must be True + for the output function to give a positive label. If and_connected is False, + at least one labeling function criteria has to be met for the output function + to give a positive label. Default is True. + + Returns: + function: + A function that takes in a dataframe, which is derived from the input labeling + functions. + """ + def merged_function(df): + out = and_connected + for function in labeling_functions: + if and_connected: + out &= function(df) + else: + out |= function(df) + + return int(out) + + return merged_function + + +def aggregate_by_column(numerical_column, aggregation): + """Generates a function for aggregates numerical column values over a data slice. + + Args: + numerical_column (str): + Numerical column to aggregate over. + aggregation (function): + Aggregation function to apply. + + Returns: + function: + The function returns the total numerical column value over the data + slice as a continuous label. + """ + def aggregate_function(df): + """Aggregate function with: + numerical_column={} + aggregation={} + """ + return aggregation(df[numerical_column]) + + aggregate_function.__doc__ = aggregate_function.__doc__.format(numerical_column, + aggregation.__name__) + + return aggregate_function + + +def categorical_presence(categorical_column, value): + """Generates a function that determines if the categorical column has the desired value. + + Args: + categorical_column (str): + Categorical column to use values from. + value (str or int or float): + Value to compare categorical columns values to. + + Returns: + function: + The function returns 1 if categorical column has the desired value, + 0 otherwise. + """ + def categorical_function(df): + """Categorical presence function with: + categorical_column={} + value={} + """ + return int(df[categorical_column].isin([value]).sum() > 0) + + categorical_function.__doc__ = categorical_function.__doc__.format(categorical_column, value) + return categorical_function + + +def keyword_in_text(keyword, columns=None): + """Determines presence of keyword in text field data columns. + + Args: + keyword (str): + Keyword to search the text columns for. + columns (list or None): + List of columns to search through to find keyword. If None, all + columns are tested. Default is None. + + Returns: + function: + The function returns 1 if the keyword is present in any column, + 0 otherwise. + """ + def keyword_function(df): + """Keyword function with: + keyword={} + columns={} + """ + mask = np.full(len(df), False) + for col in columns: + try: + mask |= df[col].str.contains(keyword, case=False, na=False) + except KeyError: + print("Unable to find column for keyword search") + + return int(mask.sum() != 0) + + keyword_function.__doc__ = keyword_function.__doc__.format(keyword, columns) + return keyword_function + + +def greater_than(numerical_column, threshold): + """Generates a function to see if there are numerical values greater than a threshold. + + Args: + numerical_column (str): + Numerical column to use values from. + threshold (float): + Threshold for the numerical values used to define the binary labels. + + Returns: + function: + The function returns 1 if data contains a value is greater than threshold, + 0 otherwise. + """ + def numerical_function(df): + """Numerical presence function with: + numerical_column={} + threshold={} + """ + series = df[numerical_column] + return int(len(series[series > threshold]) > 0) + + numerical_function.__doc__ = numerical_function.__doc__.format(numerical_column, threshold) + return numerical_function + + +def total_duration(start_time, end_time): + """Generates function for calculating the total duration given start/end time indexes. + + Args: + start_time (str): + Name of the start time column. + end_time (str): + Name of the end time column. + + Returns: + function: + The function returns the total duration in seconds based on the two + given time endpoints for the data slice. + """ + def duration_function(df): + """Duration function with: + start_time={} + end_time={} + """ + return ((df[end_time] - df[start_time]).dt.total_seconds()).sum() + + duration_function.__doc__ = duration_function.__doc__.format(start_time, end_time) + return duration_function diff --git a/zephyr_ml/metadata.py b/zephyr_ml/metadata.py new file mode 100644 index 0000000..db5b540 --- /dev/null +++ b/zephyr_ml/metadata.py @@ -0,0 +1,150 @@ +# Default EntitySet keyword arguments for entities +DEFAULT_ES_KWARGS = { + 'alarms': { + 'index': '_index', + 'make_index': True, + 'time_index': 'DAT_START', + 'secondary_time_index': {'DAT_END': ['IND_DURATION']}, + 'logical_types': { + 'COD_ELEMENT': 'categorical', # turbine id + 'DAT_START': 'datetime', # start + 'DAT_END': 'datetime', # end + 'IND_DURATION': 'double', # duration + 'COD_ALARM': 'categorical', # alarm code + 'COD_ALARM_INT': 'categorical', # international alarm code + 'DES_NAME': 'categorical', # alarm name + 'DES_TITLE': 'categorical', # alarm description + 'COD_STATUS': 'categorical' # status code + } + }, + 'stoppages': { + 'index': '_index', + 'make_index': True, + 'time_index': 'DAT_START', + 'secondary_time_index': {'DAT_END': ['IND_DURATION', 'IND_LOST_GEN']}, + 'logical_types': { + 'COD_ELEMENT': 'categorical', # turbine id + 'DAT_START': 'datetime', # start + 'DAT_END': 'datetime', # end + 'DES_WO_NAME': 'natural_language', # work order name + 'DES_COMMENTS': 'natural_language', # work order comments + 'COD_WO': 'integer_nullable', # stoppage code + 'IND_DURATION': 'double', # duration + 'IND_LOST_GEN': 'double', # generation loss + 'COD_ALARM': 'categorical', # alarm code + 'COD_CAUSE': 'categorical', # stoppage cause + 'COD_INCIDENCE': 'categorical', # incidence code + 'COD_ORIGIN': 'categorical', # origin code + 'DESC_CLASS': 'categorical', # ???? + 'COD_STATUS': 'categorical', # status code + 'COD_CODE': 'categorical', # stoppage code + 'DES_DESCRIPTION': 'natural_language', # stoppage description + 'DES_TECH_NAME': 'categorical' # turbine technology + } + }, + 'notifications': { + 'index': '_index', + 'make_index': True, + 'time_index': 'DAT_POSTING', + 'secondary_time_index': {'DAT_MALF_END': ['IND_BREAKDOWN_DUR']}, + 'logical_types': { + 'COD_ELEMENT': 'categorical', # turbine id + 'COD_ORDER': 'categorical', + 'IND_QUANTITY': 'double', + 'COD_MATERIAL_SAP': 'categorical', + 'DAT_POSTING': 'datetime', + 'COD_MAT_DOC': 'categorical', + 'DES_MEDIUM': 'categorical', + 'COD_NOTIF': 'categorical', + 'DAT_MALF_START': 'datetime', + 'DAT_MALF_END': 'datetime', + 'IND_BREAKDOWN_DUR': 'double', + 'FUNCT_LOC_DES': 'categorical', + 'COD_ALARM': 'categorical', + 'DES_ALARM': 'categorical' + } + }, + 'work_orders': { + 'index': 'COD_ORDER', + 'time_index': 'DAT_BASIC_START', + 'secondary_time_index': {'DAT_VALID_END': []}, + 'logical_types': { + 'COD_ELEMENT': 'categorical', + 'COD_ORDER': 'categorical', + 'DAT_BASIC_START': 'datetime', + 'DAT_BASIC_END': 'datetime', + 'COD_EQUIPMENT': 'categorical', + 'COD_MAINT_PLANT': 'categorical', + 'COD_MAINT_ACT_TYPE': 'categorical', + 'COD_CREATED_BY': 'categorical', + 'COD_ORDER_TYPE': 'categorical', + 'DAT_REFERENCE': 'datetime', + 'DAT_CREATED_ON': 'datetime', + 'DAT_VALID_END': 'datetime', + 'DAT_VALID_START': 'datetime', + 'COD_SYSTEM_STAT': 'categorical', + 'DES_LONG': 'natural_language', + 'COD_FUNCT_LOC': 'categorical', + 'COD_NOTIF_OBJ': 'categorical', + 'COD_MAINT_ITEM': 'categorical', + 'DES_MEDIUM': 'natural_language', + 'DES_FUNCT_LOC': 'categorical' + } + }, + 'turbines': { + 'index': 'COD_ELEMENT', + 'logical_types': { + 'COD_ELEMENT': 'categorical', + 'TURBINE_PI_ID': 'categorical', + 'TURBINE_LOCAL_ID': 'categorical', + 'TURBINE_SAP_COD': 'categorical', + 'DES_CORE_ELEMENT': 'categorical', + 'SITE': 'categorical', + 'DES_CORE_PLANT': 'categorical', + 'COD_PLANT_SAP': 'categorical', + 'PI_COLLECTOR_SITE_NAME': 'categorical', + 'PI_LOCAL_SITE_NAME': 'categorical' + } + } +} + +DEFAULT_ES_TYPE_KWARGS = { + 'pidata': { + 'index': '_index', + 'make_index': True, + 'time_index': 'time', + 'logical_types': { + 'time': 'datetime', + 'COD_ELEMENT': 'categorical' + } + }, + 'scada': { + 'index': '_index', + 'make_index': True, + 'time_index': 'TIMESTAMP', + 'logical_types': { + 'TIMESTAMP': 'datetime', + 'COD_ELEMENT': 'categorical' + } + } +} + + +def get_mapped_kwargs(es_type, new_kwargs=None): + if es_type not in DEFAULT_ES_TYPE_KWARGS.keys(): + raise ValueError('Unrecognized es_type argument: {}'.format(es_type)) + mapped_kwargs = DEFAULT_ES_KWARGS.copy() + mapped_kwargs.update({es_type: DEFAULT_ES_TYPE_KWARGS[es_type]}) + + if new_kwargs is not None: + if not isinstance(new_kwargs, dict): + raise ValueError('new_kwargs must be dictionary mapping entity name to dictionary ' + 'with updated keyword arguments for EntitySet creation.') + for entity in new_kwargs: + if entity not in mapped_kwargs: + raise ValueError('Unrecognized entity "{}" found in new keyword argument ' + 'mapping.'.format(entity)) + + mapped_kwargs[entity].update(new_kwargs[entity]) + + return mapped_kwargs