Merge e5e0c19 into fb581bf

sbabayan · web-flow · commit c825b365b889 · 2019-06-06T22:50:01.000Z
diff --git a/docs/tabpy-tools.md b/docs/tabpy-tools.md
@@ -8,6 +8,7 @@ on TabPy server.
 - [Connecting to TabPy](#connecting-to-tabpy)
 - [Authentication](#authentication)
 - [Deploying a Function](#deploying-a-function)
+- [Predeployed Functions](#predeployed-functions)
 - [Providing Schema Metadata](#providing-schema-metadata)
 - [Querying an Endpoint](#querying-an-endpoint)
 - [Evaluating Arbitrary Python Scripts](#evaluating-arbitrary-python-scripts)
@@ -265,6 +266,50 @@ tabpy.query('Sentiment Analysis', _arg1, library='textblob')[‘response’]
 
 ```
 
+### T-Test
+
+A [t-test](https://en.wikipedia.org/wiki/Student%27s_t-test) is a statistical
+hypothesis test that is used to compare two sample means or a sample’s mean against
+a known population mean. The ttest should be used when the means of the samples
+follows a normal distribution but the variance may not be known.
+
+TabPy’s pre-deployed t-test implementation can be called using the following syntax,
+
+```python
+
+tabpy.query(‘ttest’, _arg1, _arg2)[‘response’]
+
+```
+
+and is capable of performing two types of t-tests:
+
+<!-- markdownlint-disable MD029 -->
+1\. [A t-test for the means of two independent samples with equal variance](https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.ttest_ind.html)
+This is a two-sided t test with the null hypothesis being that the mean of
+sample1 is equal to the mean of sample2.
+_arg1 (list of numeric values): a list of independent observations
+_arg2 (list of numeric values): a list of independent observations equal to
+the length of _arg1
+
+Alternatively, your data may not be split into separate measures. If that is
+the case you can pass the following fields to ttest,
+
+_arg1 (list of numeric values): a list of independent observations
+_arg2 (list of categorical variables with cardinality two): a binary factor
+that maps each observation in _arg1 to either sample1 or sample2 (this list
+should be equal to the length of _arg1)
+
+2\. [A t-test for the mean of one group](https://docs.scipy.org/doc/scipy-0.14.0/reference/generated/scipy.stats.ttest_1samp.html)
+_arg1 (list of numeric values): a list of independent observations
+_arg2 (a numeric value): the known population mean
+A two-sided t test with the null hypothesis being that the mean of a sample of
+independent observations is equal to the given population mean.
+
+The function returns a two-tailed [p-value](https://en.wikipedia.org/wiki/P-value)
+(between 0 and 1). Depending on your [significance level](https://en.wikipedia.org/wiki/Statistical_significance)
+you may reject or fail to reject the null hypothesis.
+<!-- markdownlint-enable MD029 -->
+
 ## Providing Schema Metadata
 
 As soon as you share your deployed functions, you also need to share metadata
diff --git a/models/scripts/PCA.py b/models/scripts/PCA.py
@@ -1,4 +1,3 @@
-from tabpy_tools.client import Client
 import pandas as pd
 from numpy import array
 from sklearn.decomposition import PCA as sklearnPCA
@@ -60,25 +59,6 @@ def PCA(component, _arg1, _arg2, *_argN):
 
 
 if __name__ == '__main__':
-    # running from setup.py
-    if len(sys.argv) > 1:
-        config_file_path = sys.argv[1]
-    else:
-        config_file_path = setup_utils.get_default_config_file_path()
-    port, auth_on, prefix = setup_utils.parse_config(config_file_path)
-
-    connection = Client(f'{prefix}://localhost:{port}/')
-
-    if auth_on:
-        # credentials are passed in from setup.py
-        if len(sys.argv) == 4:
-            user, passwd = sys.argv[2], sys.argv[3]
-        # running PCA independently
-        else:
-            user, passwd = setup_utils.get_creds()
-        connection.set_credentials(user, passwd)
-
-    connection.deploy('PCA', PCA,
-                      'Returns the specified principal component.',
-                      override=True)
-    print("Successfully deployed PCA")
+    setup_utils.main('PCA',
+                     PCA,
+                     'Returns the specified principal component')
diff --git a/models/scripts/SentimentAnalysis.py b/models/scripts/SentimentAnalysis.py
@@ -1,4 +1,3 @@
-from tabpy_tools.client import Client
 from textblob import TextBlob
 import nltk
 from nltk.sentiment.vader import SentimentIntensityAnalyzer
@@ -43,25 +42,7 @@ def SentimentAnalysis(_arg1, library='nltk'):
 
 
 if __name__ == '__main__':
-    # running from setup.py
-    if len(sys.argv) > 1:
-        config_file_path = sys.argv[1]
-    else:
-        config_file_path = setup_utils.get_default_config_file_path()
-    port, auth_on, prefix = setup_utils.parse_config(config_file_path)
-
-    connection = Client(f'{prefix}://localhost:{port}/')
-
-    if auth_on:
-        # credentials are passed in from setup.py
-        if len(sys.argv) == 4:
-            user, passwd = sys.argv[2], sys.argv[3]
-        # running Sentiment Analysis independently
-        else:
-            user, passwd = setup_utils.get_creds()
-        connection.set_credentials(user, passwd)
-
-    connection.deploy('Sentiment Analysis', SentimentAnalysis,
-                      'Returns a sentiment score between -1 and '
-                      '1 for a given string.', override=True)
-    print("Successfully deployed SentimentAnalysis")
+    setup_utils.main('Sentiment Analysis',
+                     SentimentAnalysis,
+                     'Returns a sentiment score between -1 and 1 for '
+                     'a given string')
diff --git a/models/scripts/tTest.py b/models/scripts/tTest.py
@@ -0,0 +1,44 @@
+from scipy import stats
+import sys
+from pathlib import Path
+sys.path.append(str(Path(__file__).resolve().parent.parent.parent / 'models'))
+from utils import setup_utils
+
+
+def ttest(_arg1, _arg2):
+    '''
+    T-Test is a statistical hypothesis test that is used to compare
+    two sample means or a sample’s mean against a known population mean.
+    For more information on the function and how to use it please refer
+    to tabpy-tools.md
+    '''
+    # one sample test with mean
+    if len(_arg2) == 1:
+        test_stat, p_value = stats.ttest_1samp(_arg1, _arg2)
+        return p_value
+    # two sample t-test where _arg1 is numeric and _arg2 is a binary factor
+    elif len(set(_arg2)) == 2:
+        # each sample in _arg1 needs to have a corresponding classification
+        # in _arg2
+        if not (len(_arg1) == len(_arg2)):
+            raise ValueError
+        class1, class2 = set(_arg2)
+        sample1 = []
+        sample2 = []
+        for i in range(len(_arg1)):
+            if _arg2[i] == class1:
+                sample1.append(_arg1[i])
+            else:
+                sample2.append(_arg1[i])
+        test_stat, p_value = stats.ttest_ind(sample1, sample2, equal_var=False)
+        return p_value
+    # arg1 is a sample and arg2 is a sample
+    else:
+        test_stat, p_value = stats.ttest_ind(_arg1, _arg2, equal_var=False)
+        return p_value
+
+
+if __name__ == '__main__':
+    setup_utils.main('ttest',
+                     ttest,
+                     'Returns the p-value form a t-test')
diff --git a/models/utils/setup_utils.py b/models/utils/setup_utils.py
@@ -2,6 +2,7 @@
 from pathlib import Path
 import getpass
 import sys
+from tabpy_tools.client import Client
 
 
 def get_default_config_file_path():
@@ -31,3 +32,25 @@ def get_creds():
         user = sys.stdin.readline().rstrip()
         passwd = sys.stdin.readline().rstrip()
     return [user, passwd]
+
+def main(funcName, func, funcDescription):
+   # running from setup.py
+    if len(sys.argv) > 1:
+        config_file_path = sys.argv[1]
+    else:
+        config_file_path = get_default_config_file_path()
+    port, auth_on, prefix = parse_config(config_file_path)
+
+    connection = Client(f'{prefix}://localhost:{port}/')
+
+    if auth_on:
+        # credentials are passed in from setup.py
+        if len(sys.argv) == 4:
+            user, passwd = sys.argv[2], sys.argv[3]
+        # running Sentiment Analysis independently
+        else:
+            user, passwd = get_creds()
+        connection.set_credentials(user, passwd)
+
+    connection.deploy(funcName, func, funcDescription, override=True)
+    print(f'Successfully deployed {funcName}')
diff --git a/tests/integration/test_deploy_model_ssl_off_auth_off.py b/tests/integration/test_deploy_model_ssl_off_auth_off.py
@@ -5,16 +5,13 @@
 
 class TestDeployModelSSLOffAuthOff(integ_test_base.IntegTestBase):
     def test_deploy_ssl_off_auth_off(self):
+        models = ['PCA', 'Sentiment%20Analysis', "ttest"]
         path = str(Path('models', 'setup.py'))
         subprocess.call([self.py, path, self._get_config_file_name()])
 
         conn = self._get_connection()
-        conn.request("GET", "/endpoints/PCA")
-        PCA_request = conn.getresponse()
-        self.assertEqual(200, PCA_request.status)
-        PCA_request.read()
-
-        conn.request("GET", "/endpoints/Sentiment%20Analysis")
-        SentimentAnalysis_request = conn.getresponse()
-        self.assertEqual(200, SentimentAnalysis_request.status)
-        SentimentAnalysis_request.read()
+        for m in models:
+            conn.request("GET", f'/endpoints/{m}')
+            m_request = conn.getresponse()
+            self.assertEqual(200, m_request.status)
+            m_request.read()
diff --git a/tests/integration/test_deploy_model_ssl_off_auth_on.py b/tests/integration/test_deploy_model_ssl_off_auth_on.py
@@ -9,6 +9,7 @@ def _get_pwd_file(self) -> str:
         return './tests/integration/resources/pwdfile.txt'
 
     def test_deploy_ssl_off_auth_on(self):
+        models = ['PCA', 'Sentiment%20Analysis', "ttest"]
         path = str(Path('models', 'setup.py'))
         p = subprocess.run([self.py, path, self._get_config_file_name()],
                            input=b'user1\nP@ssw0rd\n')
@@ -20,15 +21,11 @@ def test_deploy_ssl_off_auth_on(self):
                 'Basic ' +
                 base64.b64encode('user1:P@ssw0rd'.
                                  encode('utf-8')).decode('utf-8')
-            }
+        }
 
         conn = self._get_connection()
-        conn.request("GET", "/endpoints/PCA", headers=headers)
-        PCA_request = conn.getresponse()
-        self.assertEqual(200, PCA_request.status)
-        PCA_request.read()
-
-        conn.request("GET", "/endpoints/Sentiment%20Analysis", headers=headers)
-        SentimentAnalysis_request = conn.getresponse()
-        self.assertEqual(200, SentimentAnalysis_request.status)
-        SentimentAnalysis_request.read()
+        for m in models:
+            conn.request("GET", f'/endpoints/{m}', headers=headers)
+            m_request = conn.getresponse()
+            self.assertEqual(200, m_request.status)
+            m_request.read()
diff --git a/tests/integration/test_deploy_model_ssl_on_auth_off.py b/tests/integration/test_deploy_model_ssl_on_auth_off.py
@@ -15,6 +15,7 @@ def _get_key_file_name(self) -> str:
         return './tests/integration/resources/2019_04_24_to_3018_08_25.key'
 
     def test_deploy_ssl_on_auth_off(self):
+        models = ['PCA', 'Sentiment%20Analysis', "ttest"]
         path = str(Path('models', 'setup.py'))
         subprocess.call([self.py, path, self._get_config_file_name()])
 
@@ -24,12 +25,7 @@ def test_deploy_ssl_on_auth_off(self):
         # Do not warn about insecure request
         requests.packages.urllib3.disable_warnings()
 
-        PCA_response = session.get(url=f'{self._get_transfer_protocol()}://'
-                                   'localhost:9004/endpoints/PCA')
-        self.assertEqual(200, PCA_response.status_code)
-
-        SentimentAnalysis_response = session.get(
-            url=f'{self._get_transfer_protocol()}://'
-            'localhost:9004/endpoints/'
-            'Sentiment Analysis')
-        self.assertEqual(200, SentimentAnalysis_response.status_code)
+        for m in models:
+            m_response = session.get(url=f'{self._get_transfer_protocol()}://'
+                                     f'localhost:9004/endpoints/{m}')
+            self.assertEqual(200, m_response.status_code)
diff --git a/tests/integration/test_deploy_model_ssl_on_auth_on.py b/tests/integration/test_deploy_model_ssl_on_auth_on.py
@@ -19,6 +19,7 @@ def _get_pwd_file(self) -> str:
         return './tests/integration/resources/pwdfile.txt'
 
     def test_deploy_ssl_on_auth_on(self):
+        models = ['PCA', 'Sentiment%20Analysis', "ttest"]
         path = str(Path('models', 'setup.py'))
         p = subprocess.run([self.py, path, self._get_config_file_name()],
                            input=b'user1\nP@ssw0rd\n')
@@ -36,13 +37,8 @@ def test_deploy_ssl_on_auth_on(self):
         # Do not warn about insecure request
         requests.packages.urllib3.disable_warnings()
 
-        PCA_response = session.get(url=f'{self._get_transfer_protocol()}'
-                                   '://localhost:9004/endpoints/PCA',
-                                   headers=headers)
-        self.assertEqual(200, PCA_response.status_code)
-
-        SentimentAnalysis_response = session.get(
-            url=f'{self._get_transfer_protocol()}'
-            '://localhost:9004/endpoints/'
-            'Sentiment Analysis', headers=headers)
-        self.assertEqual(200, SentimentAnalysis_response.status_code)
+        for m in models:
+            m_response = session.get(url=f'{self._get_transfer_protocol()}://'
+                                     f'localhost:9004/endpoints/{m}',
+                                     headers=headers)
+            self.assertEqual(200, m_response.status_code)