v 0.8.9 (#353)

* added anvoa to supported pre-deployed models in tabpy (#350) * added anvoa to supported pre-deployed models in tabpy * fixed pep8 issue * fixed md * Add Ctrl+C handler (#348) * Add Ctrl+C handler * Fix unit tests warnings for genson * Add test to increase code coverage * Add * Change default from 10Mb to 100Mb for request size * Increase code coverage * Increase code coverage * Convert buffer size to int * Add Ctrl+C test * Delete test added to the wrong folder * Update CHANGELOG * Update test_app.py * Remove dead code * Don't count coverage for multiline expressions * Add test case for invalid protocol * Add test case for _check_endpoint_name * Remove dead code
tableau · Oct 22, 2019 · 00a4d3c · 00a4d3c
1 parent 3055526
commit 00a4d3c
Show file tree

Hide file tree

Showing 31 changed files with 188 additions and 207 deletions.
diff --git a/.coveragerc b/.coveragerc
@@ -2,6 +2,7 @@
 # Exclude lines that match patterns from coverage report.
 exclude_lines = 
     if __name__ == .__main__.:
+    \\$
 
 # Only show one number after decimal point in report.
 precision = 1

diff --git a/CHANGELOG b/CHANGELOG
@@ -1,5 +1,13 @@
 # Changelog
 
+## v0.8.9
+
+### Improvements
+
+- Added Ctrl+C handler
+- Added configurable buffer size for HTTP requests
+- Added anvoa to supported pre-deployed models in tabpy
+
 ## v0.8.7
 
 ### Improvements

diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
@@ -177,3 +177,15 @@ TabPy package:
 python setup.py sdist bdist_wheel
 python -m twine upload dist/*
 ```
+
+To publish test version of the package use the following command:
+
+```sh
+python -m twine upload --repository-url https://test.pypi.org/legacy/ dist/*
+```
+
+To install package from TestPyPi use the command:
+
+```sh
+pip install -i https://test.pypi.org/simple/ tabpy
+```
diff --git a/docs/server-config.md b/docs/server-config.md
@@ -85,6 +85,9 @@ at [`logging.config` documentation page](https://docs.python.org/3.6/library/log
   not set.
 - `TABPY_LOG_DETAILS` - when set to `true` additional call information
   (caller IP, URL, client info, etc.) is logged. Default value - `false`.
+- `TABPY_MAX_REQUEST_SIZE_MB` - maximal request size supported by TabPy server
+  in Megabytes. All requests of exceeding size are rejected. Default value is
+  100 Mb.
 - `TABPY_EVALUATE_TIMEOUT` - script evaluation timeout in seconds. Default
   value - `30`.
 
@@ -116,10 +119,15 @@ settings._
 # end user info if provided.
 # TABPY_LOG_DETAILS = true
 
+# Limit request size (in Mb) - any request which size exceeds
+# specified amount will be rejected by TabPy.
+# Default value is 100 Mb.
+# TABPY_MAX_REQUEST_SIZE_MB = 100
+
 # Configure how long a custom script provided to the /evaluate method
 # will run before throwing a TimeoutError.
 # The value should be a float representing the timeout time in seconds.
-#TABPY_EVALUATE_TIMEOUT = 30
+# TABPY_EVALUATE_TIMEOUT = 30
 
 [loggers]
 keys=root

diff --git a/docs/tabpy-tools.md b/docs/tabpy-tools.md
@@ -14,6 +14,7 @@ on TabPy server.
   * [Principal Component Analysis (PCA)](#principal-component-analysis-pca)
   * [Sentiment Analysis](#sentiment-analysis)
   * [T-Test](#t-test)
+  * [ANOVA](#anova)
 - [Providing Schema Metadata](#providing-schema-metadata)
 - [Querying an Endpoint](#querying-an-endpoint)
 - [Evaluating Arbitrary Python Scripts](#evaluating-arbitrary-python-scripts)
@@ -318,6 +319,22 @@ The function returns a two-tailed [p-value](https://en.wikipedia.org/wiki/P-valu
 you may reject or fail to reject the null hypothesis.
 <!-- markdownlint-enable MD029 -->
 
+### ANOVA
+
+[Analysis of variance](https://en.wikipedia.org/wiki/Analysis_of_variance)
+helps inform if two or more group means within a sample differ. By measuring
+the variation between and among groups and computing the resulting F-statistic
+we are able to obtain a p-value. While a statistically significant p-value
+will inform you that at least 2 of your groups’ means are different from each
+other, it will not tell you which of the two groups differ.
+
+You can call ANOVA from tableau in the following way,
+
+```python
+
+tabpy.query(‘anova’, _arg1, _arg2, _arg3)[‘response’]
+```
+
 ## Providing Schema Metadata
 
 As soon as you share your deployed functions, you also need to share metadata

diff --git a/tabpy/VERSION b/tabpy/VERSION
@@ -1 +1 @@
-0.8.7
+0.8.9
diff --git a/tabpy/models/deploy_models.py b/tabpy/models/deploy_models.py
@@ -2,7 +2,6 @@
 import os
 import sys
 import platform
-import runpy
 import subprocess
 from pathlib import Path
 from tabpy.models.utils import setup_utils

diff --git a/tabpy/models/scripts/ANOVA.py b/tabpy/models/scripts/ANOVA.py
@@ -0,0 +1,25 @@
+import scipy.stats as stats
+from tabpy.models.utils import setup_utils
+
+
+def anova(_arg1, _arg2, *_argN):
+    '''
+    ANOVA is a statistical hypothesis test that is used to compare
+    two or more group means for equality.For more information on
+    the function and how to use it please refer to tabpy-tools.md
+    '''
+
+    cols = [_arg1, _arg2] + list(_argN)
+    for col in cols:
+        if not isinstance(col[0], (int, float)):
+            print("values must be numeric")
+            raise ValueError
+    _, p_value = stats.f_oneway(_arg1, _arg2, *_argN)
+    return p_value
+
+
+if __name__ == '__main__':
+    setup_utils.deploy_model(
+        'anova',
+        anova,
+        'Returns the p-value form an ANOVA test')
diff --git a/tabpy/models/scripts/PCA.py b/tabpy/models/scripts/PCA.py
@@ -4,8 +4,6 @@
 from sklearn.preprocessing import StandardScaler
 from sklearn.preprocessing import LabelEncoder
 from sklearn.preprocessing import OneHotEncoder
-import sys
-from pathlib import Path
 from tabpy.models.utils import setup_utils
 
 

diff --git a/tabpy/models/scripts/SentimentAnalysis.py b/tabpy/models/scripts/SentimentAnalysis.py
@@ -1,8 +1,6 @@
 from textblob import TextBlob
 import nltk
 from nltk.sentiment.vader import SentimentIntensityAnalyzer
-import sys
-from pathlib import Path
 from tabpy.models.utils import setup_utils
 
 

diff --git a/tabpy/models/scripts/tTest.py b/tabpy/models/scripts/tTest.py
@@ -1,6 +1,4 @@
 from scipy import stats
-import sys
-from pathlib import Path
 from tabpy.models.utils import setup_utils
 
 

diff --git a/tabpy/tabpy_server/app/ConfigParameters.py b/tabpy/tabpy_server/app/ConfigParameters.py
@@ -12,4 +12,5 @@ class ConfigParameters:
     TABPY_PWD_FILE = 'TABPY_PWD_FILE'
     TABPY_LOG_DETAILS = 'TABPY_LOG_DETAILS'
     TABPY_STATIC_PATH = 'TABPY_STATIC_PATH'
+    TABPY_MAX_REQUEST_SIZE_MB = 'TABPY_MAX_REQUEST_SIZE_MB'
     TABPY_EVALUATE_TIMEOUT = 'TABPY_EVALUATE_TIMEOUT'
diff --git a/tabpy/tabpy_server/app/SettingsParameters.py b/tabpy/tabpy_server/app/SettingsParameters.py
@@ -12,4 +12,5 @@ class SettingsParameters:
     ApiVersions = 'versions'
     LogRequestContext = 'log_request_context'
     StaticPath = 'static_path'
+    MaxRequestSizeInMb = 'max_request_size_in_mb'
     EvaluateTimeout = 'evaluate_timeout'
diff --git a/tabpy/tabpy_server/app/app.py b/tabpy/tabpy_server/app/app.py
@@ -6,6 +6,7 @@
 import multiprocessing
 import os
 import shutil
+import signal
 import tabpy.tabpy_server
 from tabpy.tabpy import __version__
 from tabpy.tabpy_server.app.ConfigParameters import ConfigParameters
@@ -60,32 +61,52 @@ def __init__(self, config_file=None):
 
     def run(self):
         application = self._create_tornado_web_app()
+        max_request_size =\
+            int(self.settings[SettingsParameters.MaxRequestSizeInMb]) *\
+            1024 * 1024
+        logger.info(f'Setting max request size to {max_request_size} bytes')
 
         init_model_evaluator(
             self.settings,
             self.tabpy_state,
             self.python_service)
 
         protocol = self.settings[SettingsParameters.TransferProtocol]
-        if protocol == 'http':
-            application.listen(self.settings[SettingsParameters.Port])
-        elif protocol == 'https':
-            application.listen(self.settings[SettingsParameters.Port],
-                               ssl_options={
+        ssl_options = None
+        if protocol == 'https':
+            ssl_options = {
                 'certfile': self.settings[SettingsParameters.CertificateFile],
                 'keyfile': self.settings[SettingsParameters.KeyFile]
-            })
-        else:
+            }
+        elif protocol != 'http':
             msg = f'Unsupported transfer protocol {protocol}.'
             logger.critical(msg)
             raise RuntimeError(msg)
 
+        application.listen(
+            self.settings[SettingsParameters.Port],
+            ssl_options=ssl_options,
+            max_buffer_size=max_request_size,
+            max_body_size=max_request_size)
+
         logger.info(
             'Web service listening on port '
             f'{str(self.settings[SettingsParameters.Port])}')
         tornado.ioloop.IOLoop.instance().start()
 
     def _create_tornado_web_app(self):
+        class TabPyTornadoApp(tornado.web.Application):
+            is_closing = False
+
+            def signal_handler(self, signal):
+                logger.critical(f'Exiting on signal {signal}...')
+                self.is_closing = True
+
+            def try_exit(self):
+                if self.is_closing:
+                    tornado.ioloop.IOLoop.instance().stop()
+                    logger.info('Shutting down TabPy...')
+
         logger.info('Initializing TabPy...')
         tornado.ioloop.IOLoop.instance().run_sync(
             lambda: init_ps_server(self.settings, self.tabpy_state))
@@ -95,7 +116,7 @@ def _create_tornado_web_app(self):
             max_workers=multiprocessing.cpu_count())
 
         # initialize Tornado application
-        application = tornado.web.Application([
+        application = TabPyTornadoApp([
             # skip MainHandler to use StaticFileHandler .* page requests and
             # default to index.html
             # (r"/", MainHandler),
@@ -121,10 +142,12 @@ def _create_tornado_web_app(self):
                   default_filename="index.html")),
         ], debug=False, **self.settings)
 
+        signal.signal(signal.SIGINT, application.signal_handler)
+        tornado.ioloop.PeriodicCallback(application.try_exit, 500).start()
+
         return application
 
-    @staticmethod
-    def _parse_cli_arguments():
+    def _parse_cli_arguments(self):
         '''
         Parse command line arguments. Expected arguments:
         * --config: string
@@ -303,6 +326,10 @@ def set_parameter(settings_key,
             else 'disabled'
         logger.info(f'Call context logging is {call_context_state}')
 
+        set_parameter(SettingsParameters.MaxRequestSizeInMb,
+                      ConfigParameters.TABPY_MAX_REQUEST_SIZE_MB,
+                      default_val=100)
+
     def _validate_transfer_protocol_settings(self):
         if SettingsParameters.TransferProtocol not in self.settings:
             msg = 'Missing transfer protocol information.'

diff --git a/tabpy/tabpy_server/common/default.conf b/tabpy/tabpy_server/common/default.conf
@@ -20,10 +20,15 @@
 # end user info if provided.
 # TABPY_LOG_DETAILS = true
 
+# Limit request size (in Mb) - any request which size exceeds
+# specified amount will be rejected by TabPy.
+# Default value is 100 Mb.
+# TABPY_MAX_REQUEST_SIZE_MB = 100
+
 # Configure how long a custom script provided to the /evaluate method
 # will run before throwing a TimeoutError.
 # The value should be a float representing the timeout time in seconds.
-#TABPY_EVALUATE_TIMEOUT = 30
+# TABPY_EVALUATE_TIMEOUT = 30
 
 [loggers]
 keys=root

diff --git a/tabpy/tabpy_server/handlers/management_handler.py b/tabpy/tabpy_server/handlers/management_handler.py
@@ -94,7 +94,7 @@ def _add_or_update_endpoint(self, action, name, version, request_data):
                 self.settings[SettingsParameters.StateFilePath], name, version)
             self.logger.log(logging.DEBUG,
                             f'Checking source path {src_path}...')
-            _path_checker = _compile(r'^[\\\:a-zA-Z0-9-_~\s/\.]+$')
+            _path_checker = _compile(r'^[\\\:a-zA-Z0-9-_~\s/\.\(\)]+$')
             # copy from staging
             if src_path:
                 if not isinstance(request_data['src_path'], str):

diff --git a/tabpy/tabpy_server/handlers/query_plane_handler.py b/tabpy/tabpy_server/handlers/query_plane_handler.py
@@ -135,6 +135,7 @@ def _process_query(self, endpoint_name, start):
             # Sanitize input data
             data = self._sanitize_request_data(json.loads(request_json))
         except Exception as e:
+            self.logger.log(logging.ERROR, str(e))
             err_msg = format_exception(e, "Invalid Input Data")
             self.error_out(400, err_msg)
             return
@@ -177,6 +178,7 @@ def _process_query(self, endpoint_name, start):
                 return
 
         except Exception as e:
+            self.logger.log(logging.ERROR, str(e))
             err_msg = format_exception(e, 'process query')
             self.error_out(500, 'Error processing query', info=err_msg)
             return

diff --git a/tabpy/tabpy_server/management/util.py b/tabpy/tabpy_server/management/util.py
@@ -46,35 +46,3 @@ def _get_state_from_file(state_path, logger=logging.getLogger(__name__)):
 
     return config
 
-
-_ZERO = timedelta(0)
-
-
-class _UTC(tzinfo):
-    """
-    A UTC datetime.tzinfo class modeled after the pytz library. It includes a
-    __reduce__ method for pickling,
-    """
-
-    def fromutc(self, dt):
-        if dt.tzinfo is None:
-            return self.localize(dt)
-        return super(_UTC, self).fromutc(dt)
-
-    def utcoffset(self, dt):
-        return _ZERO
-
-    def tzname(self, dt):
-        return "UTC"
-
-    def dst(self, dt):
-        return _ZERO
-
-    def __reduce__(self):
-        return _UTC, ()
-
-    def __repr__(self):
-        return "<UTC>"
-
-    def __str__(self):
-        return "UTC"
diff --git a/tabpy/tabpy_server/psws/python_service.py b/tabpy/tabpy_server/psws/python_service.py
@@ -42,6 +42,7 @@ def manage_request(self, msg):
             logger.debug(f'Returning response {response}')
             return response
         except Exception as e:
+            logger.exception(e)
             msg = e
             if hasattr(e, 'message'):
                 msg = e.message
@@ -90,6 +91,7 @@ def _load_object(self, object_uri, object_url, object_version, is_update,
                                               'status': 'LoadSuccessful',
                                               'last_error': None}
         except Exception as e:
+            logger.exception(e)
             logger.error(f'Unable to load QueryObject: path={object_url}, '
                          f'error={str(e)}')
 
@@ -132,6 +134,7 @@ def load_object(self, object_uri, object_url, object_version, is_update,
                     object_uri, object_url, object_version, is_update,
                     object_type)
         except Exception as e:
+            logger.exception(e)
             logger.error(f'Unable to load QueryObject: path={object_url}, '
                          f'error={str(e)}')
 
@@ -226,6 +229,7 @@ def query(self, object_uri, params, uid):
             else:
                 return UnknownURI(object_uri)
         except Exception as e:
+            logger.exception(e)
             err_msg = format_exception(e, '/query')
             logger.error(err_msg)
             return QueryFailed(uri=object_uri, error=err_msg)