Skip to content

Commit

Permalink
v 0.8.9 (#353)
Browse files Browse the repository at this point in the history
* added anvoa to supported pre-deployed models in tabpy (#350)

* added anvoa to supported pre-deployed models in tabpy

* fixed pep8 issue

* fixed md

* Add Ctrl+C handler (#348)

* Add Ctrl+C handler

* Fix unit tests warnings for genson

* Add test to increase code coverage

* Add

* Change default from 10Mb to 100Mb for request size

* Increase code coverage

* Increase code coverage

* Convert buffer size to int

* Add Ctrl+C test

* Delete test added to the wrong folder

* Update CHANGELOG

* Update test_app.py

* Remove dead code

* Don't count coverage for multiline expressions

* Add test case for invalid protocol

* Add test case for _check_endpoint_name

* Remove dead code
  • Loading branch information
0golovatyi committed Oct 22, 2019
1 parent 3055526 commit 00a4d3c
Show file tree
Hide file tree
Showing 31 changed files with 188 additions and 207 deletions.
1 change: 1 addition & 0 deletions .coveragerc
Expand Up @@ -2,6 +2,7 @@
# Exclude lines that match patterns from coverage report.
exclude_lines =
if __name__ == .__main__.:
\\$

# Only show one number after decimal point in report.
precision = 1
Expand Down
8 changes: 8 additions & 0 deletions CHANGELOG
@@ -1,5 +1,13 @@
# Changelog

## v0.8.9

### Improvements

- Added Ctrl+C handler
- Added configurable buffer size for HTTP requests
- Added anvoa to supported pre-deployed models in tabpy

## v0.8.7

### Improvements
Expand Down
12 changes: 12 additions & 0 deletions CONTRIBUTING.md
Expand Up @@ -177,3 +177,15 @@ TabPy package:
python setup.py sdist bdist_wheel
python -m twine upload dist/*
```

To publish test version of the package use the following command:

```sh
python -m twine upload --repository-url https://test.pypi.org/legacy/ dist/*
```

To install package from TestPyPi use the command:

```sh
pip install -i https://test.pypi.org/simple/ tabpy
```
10 changes: 9 additions & 1 deletion docs/server-config.md
Expand Up @@ -85,6 +85,9 @@ at [`logging.config` documentation page](https://docs.python.org/3.6/library/log
not set.
- `TABPY_LOG_DETAILS` - when set to `true` additional call information
(caller IP, URL, client info, etc.) is logged. Default value - `false`.
- `TABPY_MAX_REQUEST_SIZE_MB` - maximal request size supported by TabPy server
in Megabytes. All requests of exceeding size are rejected. Default value is
100 Mb.
- `TABPY_EVALUATE_TIMEOUT` - script evaluation timeout in seconds. Default
value - `30`.

Expand Down Expand Up @@ -116,10 +119,15 @@ settings._
# end user info if provided.
# TABPY_LOG_DETAILS = true

# Limit request size (in Mb) - any request which size exceeds
# specified amount will be rejected by TabPy.
# Default value is 100 Mb.
# TABPY_MAX_REQUEST_SIZE_MB = 100

# Configure how long a custom script provided to the /evaluate method
# will run before throwing a TimeoutError.
# The value should be a float representing the timeout time in seconds.
#TABPY_EVALUATE_TIMEOUT = 30
# TABPY_EVALUATE_TIMEOUT = 30

[loggers]
keys=root
Expand Down
17 changes: 17 additions & 0 deletions docs/tabpy-tools.md
Expand Up @@ -14,6 +14,7 @@ on TabPy server.
* [Principal Component Analysis (PCA)](#principal-component-analysis-pca)
* [Sentiment Analysis](#sentiment-analysis)
* [T-Test](#t-test)
* [ANOVA](#anova)
- [Providing Schema Metadata](#providing-schema-metadata)
- [Querying an Endpoint](#querying-an-endpoint)
- [Evaluating Arbitrary Python Scripts](#evaluating-arbitrary-python-scripts)
Expand Down Expand Up @@ -318,6 +319,22 @@ The function returns a two-tailed [p-value](https://en.wikipedia.org/wiki/P-valu
you may reject or fail to reject the null hypothesis.
<!-- markdownlint-enable MD029 -->

### ANOVA

[Analysis of variance](https://en.wikipedia.org/wiki/Analysis_of_variance)
helps inform if two or more group means within a sample differ. By measuring
the variation between and among groups and computing the resulting F-statistic
we are able to obtain a p-value. While a statistically significant p-value
will inform you that at least 2 of your groups’ means are different from each
other, it will not tell you which of the two groups differ.

You can call ANOVA from tableau in the following way,

```python

tabpy.query(‘anova’, _arg1, _arg2, _arg3)[‘response’]
```

## Providing Schema Metadata

As soon as you share your deployed functions, you also need to share metadata
Expand Down
2 changes: 1 addition & 1 deletion tabpy/VERSION
@@ -1 +1 @@
0.8.7
0.8.9
1 change: 0 additions & 1 deletion tabpy/models/deploy_models.py
Expand Up @@ -2,7 +2,6 @@
import os
import sys
import platform
import runpy
import subprocess
from pathlib import Path
from tabpy.models.utils import setup_utils
Expand Down
25 changes: 25 additions & 0 deletions tabpy/models/scripts/ANOVA.py
@@ -0,0 +1,25 @@
import scipy.stats as stats
from tabpy.models.utils import setup_utils


def anova(_arg1, _arg2, *_argN):
'''
ANOVA is a statistical hypothesis test that is used to compare
two or more group means for equality.For more information on
the function and how to use it please refer to tabpy-tools.md
'''

cols = [_arg1, _arg2] + list(_argN)
for col in cols:
if not isinstance(col[0], (int, float)):
print("values must be numeric")
raise ValueError
_, p_value = stats.f_oneway(_arg1, _arg2, *_argN)
return p_value


if __name__ == '__main__':
setup_utils.deploy_model(
'anova',
anova,
'Returns the p-value form an ANOVA test')
2 changes: 0 additions & 2 deletions tabpy/models/scripts/PCA.py
Expand Up @@ -4,8 +4,6 @@
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import OneHotEncoder
import sys
from pathlib import Path
from tabpy.models.utils import setup_utils


Expand Down
2 changes: 0 additions & 2 deletions tabpy/models/scripts/SentimentAnalysis.py
@@ -1,8 +1,6 @@
from textblob import TextBlob
import nltk
from nltk.sentiment.vader import SentimentIntensityAnalyzer
import sys
from pathlib import Path
from tabpy.models.utils import setup_utils


Expand Down
2 changes: 0 additions & 2 deletions tabpy/models/scripts/tTest.py
@@ -1,6 +1,4 @@
from scipy import stats
import sys
from pathlib import Path
from tabpy.models.utils import setup_utils


Expand Down
1 change: 1 addition & 0 deletions tabpy/tabpy_server/app/ConfigParameters.py
Expand Up @@ -12,4 +12,5 @@ class ConfigParameters:
TABPY_PWD_FILE = 'TABPY_PWD_FILE'
TABPY_LOG_DETAILS = 'TABPY_LOG_DETAILS'
TABPY_STATIC_PATH = 'TABPY_STATIC_PATH'
TABPY_MAX_REQUEST_SIZE_MB = 'TABPY_MAX_REQUEST_SIZE_MB'
TABPY_EVALUATE_TIMEOUT = 'TABPY_EVALUATE_TIMEOUT'
1 change: 1 addition & 0 deletions tabpy/tabpy_server/app/SettingsParameters.py
Expand Up @@ -12,4 +12,5 @@ class SettingsParameters:
ApiVersions = 'versions'
LogRequestContext = 'log_request_context'
StaticPath = 'static_path'
MaxRequestSizeInMb = 'max_request_size_in_mb'
EvaluateTimeout = 'evaluate_timeout'
47 changes: 37 additions & 10 deletions tabpy/tabpy_server/app/app.py
Expand Up @@ -6,6 +6,7 @@
import multiprocessing
import os
import shutil
import signal
import tabpy.tabpy_server
from tabpy.tabpy import __version__
from tabpy.tabpy_server.app.ConfigParameters import ConfigParameters
Expand Down Expand Up @@ -60,32 +61,52 @@ def __init__(self, config_file=None):

def run(self):
application = self._create_tornado_web_app()
max_request_size =\
int(self.settings[SettingsParameters.MaxRequestSizeInMb]) *\
1024 * 1024
logger.info(f'Setting max request size to {max_request_size} bytes')

init_model_evaluator(
self.settings,
self.tabpy_state,
self.python_service)

protocol = self.settings[SettingsParameters.TransferProtocol]
if protocol == 'http':
application.listen(self.settings[SettingsParameters.Port])
elif protocol == 'https':
application.listen(self.settings[SettingsParameters.Port],
ssl_options={
ssl_options = None
if protocol == 'https':
ssl_options = {
'certfile': self.settings[SettingsParameters.CertificateFile],
'keyfile': self.settings[SettingsParameters.KeyFile]
})
else:
}
elif protocol != 'http':
msg = f'Unsupported transfer protocol {protocol}.'
logger.critical(msg)
raise RuntimeError(msg)

application.listen(
self.settings[SettingsParameters.Port],
ssl_options=ssl_options,
max_buffer_size=max_request_size,
max_body_size=max_request_size)

logger.info(
'Web service listening on port '
f'{str(self.settings[SettingsParameters.Port])}')
tornado.ioloop.IOLoop.instance().start()

def _create_tornado_web_app(self):
class TabPyTornadoApp(tornado.web.Application):
is_closing = False

def signal_handler(self, signal):
logger.critical(f'Exiting on signal {signal}...')
self.is_closing = True

def try_exit(self):
if self.is_closing:
tornado.ioloop.IOLoop.instance().stop()
logger.info('Shutting down TabPy...')

logger.info('Initializing TabPy...')
tornado.ioloop.IOLoop.instance().run_sync(
lambda: init_ps_server(self.settings, self.tabpy_state))
Expand All @@ -95,7 +116,7 @@ def _create_tornado_web_app(self):
max_workers=multiprocessing.cpu_count())

# initialize Tornado application
application = tornado.web.Application([
application = TabPyTornadoApp([
# skip MainHandler to use StaticFileHandler .* page requests and
# default to index.html
# (r"/", MainHandler),
Expand All @@ -121,10 +142,12 @@ def _create_tornado_web_app(self):
default_filename="index.html")),
], debug=False, **self.settings)

signal.signal(signal.SIGINT, application.signal_handler)
tornado.ioloop.PeriodicCallback(application.try_exit, 500).start()

return application

@staticmethod
def _parse_cli_arguments():
def _parse_cli_arguments(self):
'''
Parse command line arguments. Expected arguments:
* --config: string
Expand Down Expand Up @@ -303,6 +326,10 @@ def set_parameter(settings_key,
else 'disabled'
logger.info(f'Call context logging is {call_context_state}')

set_parameter(SettingsParameters.MaxRequestSizeInMb,
ConfigParameters.TABPY_MAX_REQUEST_SIZE_MB,
default_val=100)

def _validate_transfer_protocol_settings(self):
if SettingsParameters.TransferProtocol not in self.settings:
msg = 'Missing transfer protocol information.'
Expand Down
7 changes: 6 additions & 1 deletion tabpy/tabpy_server/common/default.conf
Expand Up @@ -20,10 +20,15 @@
# end user info if provided.
# TABPY_LOG_DETAILS = true

# Limit request size (in Mb) - any request which size exceeds
# specified amount will be rejected by TabPy.
# Default value is 100 Mb.
# TABPY_MAX_REQUEST_SIZE_MB = 100

# Configure how long a custom script provided to the /evaluate method
# will run before throwing a TimeoutError.
# The value should be a float representing the timeout time in seconds.
#TABPY_EVALUATE_TIMEOUT = 30
# TABPY_EVALUATE_TIMEOUT = 30

[loggers]
keys=root
Expand Down
2 changes: 1 addition & 1 deletion tabpy/tabpy_server/handlers/management_handler.py
Expand Up @@ -94,7 +94,7 @@ def _add_or_update_endpoint(self, action, name, version, request_data):
self.settings[SettingsParameters.StateFilePath], name, version)
self.logger.log(logging.DEBUG,
f'Checking source path {src_path}...')
_path_checker = _compile(r'^[\\\:a-zA-Z0-9-_~\s/\.]+$')
_path_checker = _compile(r'^[\\\:a-zA-Z0-9-_~\s/\.\(\)]+$')
# copy from staging
if src_path:
if not isinstance(request_data['src_path'], str):
Expand Down
2 changes: 2 additions & 0 deletions tabpy/tabpy_server/handlers/query_plane_handler.py
Expand Up @@ -135,6 +135,7 @@ def _process_query(self, endpoint_name, start):
# Sanitize input data
data = self._sanitize_request_data(json.loads(request_json))
except Exception as e:
self.logger.log(logging.ERROR, str(e))
err_msg = format_exception(e, "Invalid Input Data")
self.error_out(400, err_msg)
return
Expand Down Expand Up @@ -177,6 +178,7 @@ def _process_query(self, endpoint_name, start):
return

except Exception as e:
self.logger.log(logging.ERROR, str(e))
err_msg = format_exception(e, 'process query')
self.error_out(500, 'Error processing query', info=err_msg)
return
Expand Down
32 changes: 0 additions & 32 deletions tabpy/tabpy_server/management/util.py
Expand Up @@ -46,35 +46,3 @@ def _get_state_from_file(state_path, logger=logging.getLogger(__name__)):

return config


_ZERO = timedelta(0)


class _UTC(tzinfo):
"""
A UTC datetime.tzinfo class modeled after the pytz library. It includes a
__reduce__ method for pickling,
"""

def fromutc(self, dt):
if dt.tzinfo is None:
return self.localize(dt)
return super(_UTC, self).fromutc(dt)

def utcoffset(self, dt):
return _ZERO

def tzname(self, dt):
return "UTC"

def dst(self, dt):
return _ZERO

def __reduce__(self):
return _UTC, ()

def __repr__(self):
return "<UTC>"

def __str__(self):
return "UTC"
4 changes: 4 additions & 0 deletions tabpy/tabpy_server/psws/python_service.py
Expand Up @@ -42,6 +42,7 @@ def manage_request(self, msg):
logger.debug(f'Returning response {response}')
return response
except Exception as e:
logger.exception(e)
msg = e
if hasattr(e, 'message'):
msg = e.message
Expand Down Expand Up @@ -90,6 +91,7 @@ def _load_object(self, object_uri, object_url, object_version, is_update,
'status': 'LoadSuccessful',
'last_error': None}
except Exception as e:
logger.exception(e)
logger.error(f'Unable to load QueryObject: path={object_url}, '
f'error={str(e)}')

Expand Down Expand Up @@ -132,6 +134,7 @@ def load_object(self, object_uri, object_url, object_version, is_update,
object_uri, object_url, object_version, is_update,
object_type)
except Exception as e:
logger.exception(e)
logger.error(f'Unable to load QueryObject: path={object_url}, '
f'error={str(e)}')

Expand Down Expand Up @@ -226,6 +229,7 @@ def query(self, object_uri, params, uid):
else:
return UnknownURI(object_uri)
except Exception as e:
logger.exception(e)
err_msg = format_exception(e, '/query')
logger.error(err_msg)
return QueryFailed(uri=object_uri, error=err_msg)

0 comments on commit 00a4d3c

Please sign in to comment.