Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
45 changes: 45 additions & 0 deletions docs/tabpy-tools.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ on TabPy server.
- [Connecting to TabPy](#connecting-to-tabpy)
- [Authentication](#authentication)
- [Deploying a Function](#deploying-a-function)
- [Predeployed Functions](#predeployed-functions)
- [Providing Schema Metadata](#providing-schema-metadata)
- [Querying an Endpoint](#querying-an-endpoint)
- [Evaluating Arbitrary Python Scripts](#evaluating-arbitrary-python-scripts)
Expand Down Expand Up @@ -265,6 +266,50 @@ tabpy.query('Sentiment Analysis', _arg1, library='textblob')[‘response’]

```

### T-Test

A [t-test](https://en.wikipedia.org/wiki/Student%27s_t-test) is a statistical
hypothesis test that is used to compare two sample means or a sample’s mean against
a known population mean. The ttest should be used when the means of the samples
follows a normal distribution but the variance may not be known.

TabPy’s pre-deployed t-test implementation can be called using the following syntax,

```python

tabpy.query(‘ttest’, _arg1, _arg2)[‘response’]

```

and is capable of performing two types of t-tests:

<!-- markdownlint-disable MD029 -->
1\. [A t-test for the means of two independent samples with equal variance](https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.ttest_ind.html)
This is a two-sided t test with the null hypothesis being that the mean of
sample1 is equal to the mean of sample2.
_arg1 (list of numeric values): a list of independent observations
_arg2 (list of numeric values): a list of independent observations equal to
the length of _arg1

Alternatively, your data may not be split into separate measures. If that is
the case you can pass the following fields to ttest,

_arg1 (list of numeric values): a list of independent observations
_arg2 (list of categorical variables with cardinality two): a binary factor
that maps each observation in _arg1 to either sample1 or sample2 (this list
should be equal to the length of _arg1)

2\. [A t-test for the mean of one group](https://docs.scipy.org/doc/scipy-0.14.0/reference/generated/scipy.stats.ttest_1samp.html)
_arg1 (list of numeric values): a list of independent observations
_arg2 (a numeric value): the known population mean
A two-sided t test with the null hypothesis being that the mean of a sample of
independent observations is equal to the given population mean.

The function returns a two-tailed [p-value](https://en.wikipedia.org/wiki/P-value)
(between 0 and 1). Depending on your [significance level](https://en.wikipedia.org/wiki/Statistical_significance)
you may reject or fail to reject the null hypothesis.
<!-- markdownlint-enable MD029 -->

## Providing Schema Metadata

As soon as you share your deployed functions, you also need to share metadata
Expand Down
26 changes: 3 additions & 23 deletions models/scripts/PCA.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
from tabpy_tools.client import Client
import pandas as pd
from numpy import array
from sklearn.decomposition import PCA as sklearnPCA
Expand Down Expand Up @@ -60,25 +59,6 @@ def PCA(component, _arg1, _arg2, *_argN):


if __name__ == '__main__':
# running from setup.py
if len(sys.argv) > 1:
config_file_path = sys.argv[1]
else:
config_file_path = setup_utils.get_default_config_file_path()
port, auth_on, prefix = setup_utils.parse_config(config_file_path)

connection = Client(f'{prefix}://localhost:{port}/')

if auth_on:
# credentials are passed in from setup.py
if len(sys.argv) == 4:
user, passwd = sys.argv[2], sys.argv[3]
# running PCA independently
else:
user, passwd = setup_utils.get_creds()
connection.set_credentials(user, passwd)

connection.deploy('PCA', PCA,
'Returns the specified principal component.',
override=True)
print("Successfully deployed PCA")
setup_utils.main('PCA',
PCA,
'Returns the specified principal component')
27 changes: 4 additions & 23 deletions models/scripts/SentimentAnalysis.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
from tabpy_tools.client import Client
from textblob import TextBlob
import nltk
from nltk.sentiment.vader import SentimentIntensityAnalyzer
Expand Down Expand Up @@ -43,25 +42,7 @@ def SentimentAnalysis(_arg1, library='nltk'):


if __name__ == '__main__':
# running from setup.py
if len(sys.argv) > 1:
config_file_path = sys.argv[1]
else:
config_file_path = setup_utils.get_default_config_file_path()
port, auth_on, prefix = setup_utils.parse_config(config_file_path)

connection = Client(f'{prefix}://localhost:{port}/')

if auth_on:
# credentials are passed in from setup.py
if len(sys.argv) == 4:
user, passwd = sys.argv[2], sys.argv[3]
# running Sentiment Analysis independently
else:
user, passwd = setup_utils.get_creds()
connection.set_credentials(user, passwd)

connection.deploy('Sentiment Analysis', SentimentAnalysis,
'Returns a sentiment score between -1 and '
'1 for a given string.', override=True)
print("Successfully deployed SentimentAnalysis")
setup_utils.main('Sentiment Analysis',
SentimentAnalysis,
'Returns a sentiment score between -1 and 1 for '
'a given string')
44 changes: 44 additions & 0 deletions models/scripts/tTest.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
from scipy import stats
import sys
from pathlib import Path
sys.path.append(str(Path(__file__).resolve().parent.parent.parent / 'models'))
from utils import setup_utils


def ttest(_arg1, _arg2):
'''
T-Test is a statistical hypothesis test that is used to compare
two sample means or a sample’s mean against a known population mean.
For more information on the function and how to use it please refer
to tabpy-tools.md
'''
# one sample test with mean
if len(_arg2) == 1:
test_stat, p_value = stats.ttest_1samp(_arg1, _arg2)
return p_value
# two sample t-test where _arg1 is numeric and _arg2 is a binary factor
elif len(set(_arg2)) == 2:
# each sample in _arg1 needs to have a corresponding classification
# in _arg2
if not (len(_arg1) == len(_arg2)):
raise ValueError
class1, class2 = set(_arg2)
sample1 = []
sample2 = []
for i in range(len(_arg1)):
if _arg2[i] == class1:
sample1.append(_arg1[i])
else:
sample2.append(_arg1[i])
test_stat, p_value = stats.ttest_ind(sample1, sample2, equal_var=False)
return p_value
# arg1 is a sample and arg2 is a sample
else:
test_stat, p_value = stats.ttest_ind(_arg1, _arg2, equal_var=False)
return p_value


if __name__ == '__main__':
setup_utils.main('ttest',
ttest,
'Returns the p-value form a t-test')
23 changes: 23 additions & 0 deletions models/utils/setup_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
from pathlib import Path
import getpass
import sys
from tabpy_tools.client import Client


def get_default_config_file_path():
Expand Down Expand Up @@ -31,3 +32,25 @@ def get_creds():
user = sys.stdin.readline().rstrip()
passwd = sys.stdin.readline().rstrip()
return [user, passwd]

def main(funcName, func, funcDescription):
# running from setup.py
if len(sys.argv) > 1:
config_file_path = sys.argv[1]
else:
config_file_path = get_default_config_file_path()
port, auth_on, prefix = parse_config(config_file_path)

connection = Client(f'{prefix}://localhost:{port}/')

if auth_on:
# credentials are passed in from setup.py
if len(sys.argv) == 4:
user, passwd = sys.argv[2], sys.argv[3]
# running Sentiment Analysis independently
else:
user, passwd = get_creds()
connection.set_credentials(user, passwd)

connection.deploy(funcName, func, funcDescription, override=True)
print(f'Successfully deployed {funcName}')
15 changes: 6 additions & 9 deletions tests/integration/test_deploy_model_ssl_off_auth_off.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,16 +5,13 @@

class TestDeployModelSSLOffAuthOff(integ_test_base.IntegTestBase):
def test_deploy_ssl_off_auth_off(self):
models = ['PCA', 'Sentiment%20Analysis', "ttest"]
path = str(Path('models', 'setup.py'))
subprocess.call([self.py, path, self._get_config_file_name()])

conn = self._get_connection()
conn.request("GET", "/endpoints/PCA")
PCA_request = conn.getresponse()
self.assertEqual(200, PCA_request.status)
PCA_request.read()

conn.request("GET", "/endpoints/Sentiment%20Analysis")
SentimentAnalysis_request = conn.getresponse()
self.assertEqual(200, SentimentAnalysis_request.status)
SentimentAnalysis_request.read()
for m in models:
conn.request("GET", f'/endpoints/{m}')
m_request = conn.getresponse()
self.assertEqual(200, m_request.status)
m_request.read()
17 changes: 7 additions & 10 deletions tests/integration/test_deploy_model_ssl_off_auth_on.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ def _get_pwd_file(self) -> str:
return './tests/integration/resources/pwdfile.txt'

def test_deploy_ssl_off_auth_on(self):
models = ['PCA', 'Sentiment%20Analysis', "ttest"]
path = str(Path('models', 'setup.py'))
p = subprocess.run([self.py, path, self._get_config_file_name()],
input=b'user1\nP@ssw0rd\n')
Expand All @@ -20,15 +21,11 @@ def test_deploy_ssl_off_auth_on(self):
'Basic ' +
base64.b64encode('user1:P@ssw0rd'.
encode('utf-8')).decode('utf-8')
}
}

conn = self._get_connection()
conn.request("GET", "/endpoints/PCA", headers=headers)
PCA_request = conn.getresponse()
self.assertEqual(200, PCA_request.status)
PCA_request.read()

conn.request("GET", "/endpoints/Sentiment%20Analysis", headers=headers)
SentimentAnalysis_request = conn.getresponse()
self.assertEqual(200, SentimentAnalysis_request.status)
SentimentAnalysis_request.read()
for m in models:
conn.request("GET", f'/endpoints/{m}', headers=headers)
m_request = conn.getresponse()
self.assertEqual(200, m_request.status)
m_request.read()
14 changes: 5 additions & 9 deletions tests/integration/test_deploy_model_ssl_on_auth_off.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ def _get_key_file_name(self) -> str:
return './tests/integration/resources/2019_04_24_to_3018_08_25.key'

def test_deploy_ssl_on_auth_off(self):
models = ['PCA', 'Sentiment%20Analysis', "ttest"]
path = str(Path('models', 'setup.py'))
subprocess.call([self.py, path, self._get_config_file_name()])

Expand All @@ -24,12 +25,7 @@ def test_deploy_ssl_on_auth_off(self):
# Do not warn about insecure request
requests.packages.urllib3.disable_warnings()

PCA_response = session.get(url=f'{self._get_transfer_protocol()}://'
'localhost:9004/endpoints/PCA')
self.assertEqual(200, PCA_response.status_code)

SentimentAnalysis_response = session.get(
url=f'{self._get_transfer_protocol()}://'
'localhost:9004/endpoints/'
'Sentiment Analysis')
self.assertEqual(200, SentimentAnalysis_response.status_code)
for m in models:
m_response = session.get(url=f'{self._get_transfer_protocol()}://'
f'localhost:9004/endpoints/{m}')
self.assertEqual(200, m_response.status_code)
16 changes: 6 additions & 10 deletions tests/integration/test_deploy_model_ssl_on_auth_on.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ def _get_pwd_file(self) -> str:
return './tests/integration/resources/pwdfile.txt'

def test_deploy_ssl_on_auth_on(self):
models = ['PCA', 'Sentiment%20Analysis', "ttest"]
path = str(Path('models', 'setup.py'))
p = subprocess.run([self.py, path, self._get_config_file_name()],
input=b'user1\nP@ssw0rd\n')
Expand All @@ -36,13 +37,8 @@ def test_deploy_ssl_on_auth_on(self):
# Do not warn about insecure request
requests.packages.urllib3.disable_warnings()

PCA_response = session.get(url=f'{self._get_transfer_protocol()}'
'://localhost:9004/endpoints/PCA',
headers=headers)
self.assertEqual(200, PCA_response.status_code)

SentimentAnalysis_response = session.get(
url=f'{self._get_transfer_protocol()}'
'://localhost:9004/endpoints/'
'Sentiment Analysis', headers=headers)
self.assertEqual(200, SentimentAnalysis_response.status_code)
for m in models:
m_response = session.get(url=f'{self._get_transfer_protocol()}://'
f'localhost:9004/endpoints/{m}',
headers=headers)
self.assertEqual(200, m_response.status_code)