Skip to content

Commit c825b36

Browse files
authored
Merge e5e0c19 into fb581bf
2 parents fb581bf + e5e0c19 commit c825b36

9 files changed

+143
-84
lines changed

docs/tabpy-tools.md

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ on TabPy server.
88
- [Connecting to TabPy](#connecting-to-tabpy)
99
- [Authentication](#authentication)
1010
- [Deploying a Function](#deploying-a-function)
11+
- [Predeployed Functions](#predeployed-functions)
1112
- [Providing Schema Metadata](#providing-schema-metadata)
1213
- [Querying an Endpoint](#querying-an-endpoint)
1314
- [Evaluating Arbitrary Python Scripts](#evaluating-arbitrary-python-scripts)
@@ -265,6 +266,50 @@ tabpy.query('Sentiment Analysis', _arg1, library='textblob')[‘response’]
265266

266267
```
267268

269+
### T-Test
270+
271+
A [t-test](https://en.wikipedia.org/wiki/Student%27s_t-test) is a statistical
272+
hypothesis test that is used to compare two sample means or a sample’s mean against
273+
a known population mean. The ttest should be used when the means of the samples
274+
follows a normal distribution but the variance may not be known.
275+
276+
TabPy’s pre-deployed t-test implementation can be called using the following syntax,
277+
278+
```python
279+
280+
tabpy.query(‘ttest’, _arg1, _arg2)[‘response’]
281+
282+
```
283+
284+
and is capable of performing two types of t-tests:
285+
286+
<!-- markdownlint-disable MD029 -->
287+
1\. [A t-test for the means of two independent samples with equal variance](https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.ttest_ind.html)
288+
This is a two-sided t test with the null hypothesis being that the mean of
289+
sample1 is equal to the mean of sample2.
290+
_arg1 (list of numeric values): a list of independent observations
291+
_arg2 (list of numeric values): a list of independent observations equal to
292+
the length of _arg1
293+
294+
Alternatively, your data may not be split into separate measures. If that is
295+
the case you can pass the following fields to ttest,
296+
297+
_arg1 (list of numeric values): a list of independent observations
298+
_arg2 (list of categorical variables with cardinality two): a binary factor
299+
that maps each observation in _arg1 to either sample1 or sample2 (this list
300+
should be equal to the length of _arg1)
301+
302+
2\. [A t-test for the mean of one group](https://docs.scipy.org/doc/scipy-0.14.0/reference/generated/scipy.stats.ttest_1samp.html)
303+
_arg1 (list of numeric values): a list of independent observations
304+
_arg2 (a numeric value): the known population mean
305+
A two-sided t test with the null hypothesis being that the mean of a sample of
306+
independent observations is equal to the given population mean.
307+
308+
The function returns a two-tailed [p-value](https://en.wikipedia.org/wiki/P-value)
309+
(between 0 and 1). Depending on your [significance level](https://en.wikipedia.org/wiki/Statistical_significance)
310+
you may reject or fail to reject the null hypothesis.
311+
<!-- markdownlint-enable MD029 -->
312+
268313
## Providing Schema Metadata
269314

270315
As soon as you share your deployed functions, you also need to share metadata

models/scripts/PCA.py

Lines changed: 3 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,3 @@
1-
from tabpy_tools.client import Client
21
import pandas as pd
32
from numpy import array
43
from sklearn.decomposition import PCA as sklearnPCA
@@ -60,25 +59,6 @@ def PCA(component, _arg1, _arg2, *_argN):
6059

6160

6261
if __name__ == '__main__':
63-
# running from setup.py
64-
if len(sys.argv) > 1:
65-
config_file_path = sys.argv[1]
66-
else:
67-
config_file_path = setup_utils.get_default_config_file_path()
68-
port, auth_on, prefix = setup_utils.parse_config(config_file_path)
69-
70-
connection = Client(f'{prefix}://localhost:{port}/')
71-
72-
if auth_on:
73-
# credentials are passed in from setup.py
74-
if len(sys.argv) == 4:
75-
user, passwd = sys.argv[2], sys.argv[3]
76-
# running PCA independently
77-
else:
78-
user, passwd = setup_utils.get_creds()
79-
connection.set_credentials(user, passwd)
80-
81-
connection.deploy('PCA', PCA,
82-
'Returns the specified principal component.',
83-
override=True)
84-
print("Successfully deployed PCA")
62+
setup_utils.main('PCA',
63+
PCA,
64+
'Returns the specified principal component')

models/scripts/SentimentAnalysis.py

Lines changed: 4 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,3 @@
1-
from tabpy_tools.client import Client
21
from textblob import TextBlob
32
import nltk
43
from nltk.sentiment.vader import SentimentIntensityAnalyzer
@@ -43,25 +42,7 @@ def SentimentAnalysis(_arg1, library='nltk'):
4342

4443

4544
if __name__ == '__main__':
46-
# running from setup.py
47-
if len(sys.argv) > 1:
48-
config_file_path = sys.argv[1]
49-
else:
50-
config_file_path = setup_utils.get_default_config_file_path()
51-
port, auth_on, prefix = setup_utils.parse_config(config_file_path)
52-
53-
connection = Client(f'{prefix}://localhost:{port}/')
54-
55-
if auth_on:
56-
# credentials are passed in from setup.py
57-
if len(sys.argv) == 4:
58-
user, passwd = sys.argv[2], sys.argv[3]
59-
# running Sentiment Analysis independently
60-
else:
61-
user, passwd = setup_utils.get_creds()
62-
connection.set_credentials(user, passwd)
63-
64-
connection.deploy('Sentiment Analysis', SentimentAnalysis,
65-
'Returns a sentiment score between -1 and '
66-
'1 for a given string.', override=True)
67-
print("Successfully deployed SentimentAnalysis")
45+
setup_utils.main('Sentiment Analysis',
46+
SentimentAnalysis,
47+
'Returns a sentiment score between -1 and 1 for '
48+
'a given string')

models/scripts/tTest.py

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
from scipy import stats
2+
import sys
3+
from pathlib import Path
4+
sys.path.append(str(Path(__file__).resolve().parent.parent.parent / 'models'))
5+
from utils import setup_utils
6+
7+
8+
def ttest(_arg1, _arg2):
9+
'''
10+
T-Test is a statistical hypothesis test that is used to compare
11+
two sample means or a sample’s mean against a known population mean.
12+
For more information on the function and how to use it please refer
13+
to tabpy-tools.md
14+
'''
15+
# one sample test with mean
16+
if len(_arg2) == 1:
17+
test_stat, p_value = stats.ttest_1samp(_arg1, _arg2)
18+
return p_value
19+
# two sample t-test where _arg1 is numeric and _arg2 is a binary factor
20+
elif len(set(_arg2)) == 2:
21+
# each sample in _arg1 needs to have a corresponding classification
22+
# in _arg2
23+
if not (len(_arg1) == len(_arg2)):
24+
raise ValueError
25+
class1, class2 = set(_arg2)
26+
sample1 = []
27+
sample2 = []
28+
for i in range(len(_arg1)):
29+
if _arg2[i] == class1:
30+
sample1.append(_arg1[i])
31+
else:
32+
sample2.append(_arg1[i])
33+
test_stat, p_value = stats.ttest_ind(sample1, sample2, equal_var=False)
34+
return p_value
35+
# arg1 is a sample and arg2 is a sample
36+
else:
37+
test_stat, p_value = stats.ttest_ind(_arg1, _arg2, equal_var=False)
38+
return p_value
39+
40+
41+
if __name__ == '__main__':
42+
setup_utils.main('ttest',
43+
ttest,
44+
'Returns the p-value form a t-test')

models/utils/setup_utils.py

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
from pathlib import Path
33
import getpass
44
import sys
5+
from tabpy_tools.client import Client
56

67

78
def get_default_config_file_path():
@@ -31,3 +32,25 @@ def get_creds():
3132
user = sys.stdin.readline().rstrip()
3233
passwd = sys.stdin.readline().rstrip()
3334
return [user, passwd]
35+
36+
def main(funcName, func, funcDescription):
37+
# running from setup.py
38+
if len(sys.argv) > 1:
39+
config_file_path = sys.argv[1]
40+
else:
41+
config_file_path = get_default_config_file_path()
42+
port, auth_on, prefix = parse_config(config_file_path)
43+
44+
connection = Client(f'{prefix}://localhost:{port}/')
45+
46+
if auth_on:
47+
# credentials are passed in from setup.py
48+
if len(sys.argv) == 4:
49+
user, passwd = sys.argv[2], sys.argv[3]
50+
# running Sentiment Analysis independently
51+
else:
52+
user, passwd = get_creds()
53+
connection.set_credentials(user, passwd)
54+
55+
connection.deploy(funcName, func, funcDescription, override=True)
56+
print(f'Successfully deployed {funcName}')

tests/integration/test_deploy_model_ssl_off_auth_off.py

Lines changed: 6 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -5,16 +5,13 @@
55

66
class TestDeployModelSSLOffAuthOff(integ_test_base.IntegTestBase):
77
def test_deploy_ssl_off_auth_off(self):
8+
models = ['PCA', 'Sentiment%20Analysis', "ttest"]
89
path = str(Path('models', 'setup.py'))
910
subprocess.call([self.py, path, self._get_config_file_name()])
1011

1112
conn = self._get_connection()
12-
conn.request("GET", "/endpoints/PCA")
13-
PCA_request = conn.getresponse()
14-
self.assertEqual(200, PCA_request.status)
15-
PCA_request.read()
16-
17-
conn.request("GET", "/endpoints/Sentiment%20Analysis")
18-
SentimentAnalysis_request = conn.getresponse()
19-
self.assertEqual(200, SentimentAnalysis_request.status)
20-
SentimentAnalysis_request.read()
13+
for m in models:
14+
conn.request("GET", f'/endpoints/{m}')
15+
m_request = conn.getresponse()
16+
self.assertEqual(200, m_request.status)
17+
m_request.read()

tests/integration/test_deploy_model_ssl_off_auth_on.py

Lines changed: 7 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ def _get_pwd_file(self) -> str:
99
return './tests/integration/resources/pwdfile.txt'
1010

1111
def test_deploy_ssl_off_auth_on(self):
12+
models = ['PCA', 'Sentiment%20Analysis', "ttest"]
1213
path = str(Path('models', 'setup.py'))
1314
p = subprocess.run([self.py, path, self._get_config_file_name()],
1415
input=b'user1\nP@ssw0rd\n')
@@ -20,15 +21,11 @@ def test_deploy_ssl_off_auth_on(self):
2021
'Basic ' +
2122
base64.b64encode('user1:P@ssw0rd'.
2223
encode('utf-8')).decode('utf-8')
23-
}
24+
}
2425

2526
conn = self._get_connection()
26-
conn.request("GET", "/endpoints/PCA", headers=headers)
27-
PCA_request = conn.getresponse()
28-
self.assertEqual(200, PCA_request.status)
29-
PCA_request.read()
30-
31-
conn.request("GET", "/endpoints/Sentiment%20Analysis", headers=headers)
32-
SentimentAnalysis_request = conn.getresponse()
33-
self.assertEqual(200, SentimentAnalysis_request.status)
34-
SentimentAnalysis_request.read()
27+
for m in models:
28+
conn.request("GET", f'/endpoints/{m}', headers=headers)
29+
m_request = conn.getresponse()
30+
self.assertEqual(200, m_request.status)
31+
m_request.read()

tests/integration/test_deploy_model_ssl_on_auth_off.py

Lines changed: 5 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ def _get_key_file_name(self) -> str:
1515
return './tests/integration/resources/2019_04_24_to_3018_08_25.key'
1616

1717
def test_deploy_ssl_on_auth_off(self):
18+
models = ['PCA', 'Sentiment%20Analysis', "ttest"]
1819
path = str(Path('models', 'setup.py'))
1920
subprocess.call([self.py, path, self._get_config_file_name()])
2021

@@ -24,12 +25,7 @@ def test_deploy_ssl_on_auth_off(self):
2425
# Do not warn about insecure request
2526
requests.packages.urllib3.disable_warnings()
2627

27-
PCA_response = session.get(url=f'{self._get_transfer_protocol()}://'
28-
'localhost:9004/endpoints/PCA')
29-
self.assertEqual(200, PCA_response.status_code)
30-
31-
SentimentAnalysis_response = session.get(
32-
url=f'{self._get_transfer_protocol()}://'
33-
'localhost:9004/endpoints/'
34-
'Sentiment Analysis')
35-
self.assertEqual(200, SentimentAnalysis_response.status_code)
28+
for m in models:
29+
m_response = session.get(url=f'{self._get_transfer_protocol()}://'
30+
f'localhost:9004/endpoints/{m}')
31+
self.assertEqual(200, m_response.status_code)

tests/integration/test_deploy_model_ssl_on_auth_on.py

Lines changed: 6 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ def _get_pwd_file(self) -> str:
1919
return './tests/integration/resources/pwdfile.txt'
2020

2121
def test_deploy_ssl_on_auth_on(self):
22+
models = ['PCA', 'Sentiment%20Analysis', "ttest"]
2223
path = str(Path('models', 'setup.py'))
2324
p = subprocess.run([self.py, path, self._get_config_file_name()],
2425
input=b'user1\nP@ssw0rd\n')
@@ -36,13 +37,8 @@ def test_deploy_ssl_on_auth_on(self):
3637
# Do not warn about insecure request
3738
requests.packages.urllib3.disable_warnings()
3839

39-
PCA_response = session.get(url=f'{self._get_transfer_protocol()}'
40-
'://localhost:9004/endpoints/PCA',
41-
headers=headers)
42-
self.assertEqual(200, PCA_response.status_code)
43-
44-
SentimentAnalysis_response = session.get(
45-
url=f'{self._get_transfer_protocol()}'
46-
'://localhost:9004/endpoints/'
47-
'Sentiment Analysis', headers=headers)
48-
self.assertEqual(200, SentimentAnalysis_response.status_code)
40+
for m in models:
41+
m_response = session.get(url=f'{self._get_transfer_protocol()}://'
42+
f'localhost:9004/endpoints/{m}',
43+
headers=headers)
44+
self.assertEqual(200, m_response.status_code)

0 commit comments

Comments
 (0)