Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #290 from tableau/dev
Merging models/small bug fixes into master
- Loading branch information
Showing
52 changed files
with
1,099 additions
and
633 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,7 @@ | ||
[report] | ||
# Exclude lines that match patterns from coverage report. | ||
exclude_lines = | ||
if __name__ == .__main__.: | ||
|
||
# Only show one number after decimal point in report. | ||
precision = 1 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1 +1 @@ | ||
0.4.1 | ||
0.5 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,84 @@ | ||
from tabpy_tools.client import Client | ||
import pandas as pd | ||
from numpy import array | ||
from sklearn.decomposition import PCA as sklearnPCA | ||
from sklearn.preprocessing import StandardScaler | ||
from sklearn.preprocessing import LabelEncoder | ||
from sklearn.preprocessing import OneHotEncoder | ||
import sys | ||
from pathlib import Path | ||
sys.path.append(str(Path(__file__).resolve().parent.parent.parent / 'models')) | ||
from utils import setup_utils | ||
|
||
|
||
def PCA(component, _arg1, _arg2, *_argN): | ||
''' | ||
Principal Component Analysis is a technique that extracts the key | ||
distinct components from a high dimensional space whie attempting | ||
to capture as much of the variance as possible. For more information | ||
on the function and how to use it please refer to tabpy-tools.md | ||
''' | ||
cols = [_arg1, _arg2] + list(_argN) | ||
encodedCols = [] | ||
labelEncoder = LabelEncoder() | ||
oneHotEncoder = OneHotEncoder(categories='auto', sparse=False) | ||
|
||
for col in cols: | ||
if isinstance(col[0], (int, float)): | ||
encodedCols.append(col) | ||
elif type(col[0]) is bool: | ||
intCol = array(col) | ||
encodedCols.append(intCol.astype(int)) | ||
else: | ||
if len(set(col)) > 25: | ||
print('ERROR: Non-numeric arguments cannot have more than ' | ||
'25 unique values') | ||
raise ValueError | ||
integerEncoded = labelEncoder.fit_transform(array(col)) | ||
integerEncoded = integerEncoded.reshape(len(col), 1) | ||
oneHotEncoded = oneHotEncoder.fit_transform(integerEncoded) | ||
transformedMatrix = oneHotEncoded.transpose() | ||
encodedCols += list(transformedMatrix) | ||
|
||
dataDict = {} | ||
for i in range(len(encodedCols)): | ||
dataDict[f'col{1 + i}'] = list(encodedCols[i]) | ||
|
||
if component <= 0 or component > len(dataDict): | ||
print('ERROR: Component specified must be >= 0 and ' | ||
'<= number of arguments') | ||
raise ValueError | ||
|
||
df = pd.DataFrame(data=dataDict, dtype=float) | ||
scale = StandardScaler() | ||
scaledData = scale.fit_transform(df) | ||
|
||
pca = sklearnPCA() | ||
pcaComponents = pca.fit_transform(scaledData) | ||
|
||
return pcaComponents[:, component - 1].tolist() | ||
|
||
|
||
if __name__ == '__main__': | ||
# running from setup.py | ||
if len(sys.argv) > 1: | ||
config_file_path = sys.argv[1] | ||
else: | ||
config_file_path = setup_utils.get_default_config_file_path() | ||
port, auth_on, prefix = setup_utils.parse_config(config_file_path) | ||
|
||
connection = Client(f'{prefix}://localhost:{port}/') | ||
|
||
if auth_on: | ||
# credentials are passed in from setup.py | ||
if len(sys.argv) == 4: | ||
user, passwd = sys.argv[2], sys.argv[3] | ||
# running PCA independently | ||
else: | ||
user, passwd = setup_utils.get_creds() | ||
connection.set_credentials(user, passwd) | ||
|
||
connection.deploy('PCA', PCA, | ||
'Returns the specified principal component.', | ||
override=True) | ||
print("Successfully deployed PCA") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,62 @@ | ||
from tabpy_tools.client import Client | ||
from textblob import TextBlob | ||
from nltk.sentiment import SentimentIntensityAnalyzer | ||
import sys | ||
from pathlib import Path | ||
sys.path.append(str(Path(__file__).resolve().parent.parent.parent / 'models')) | ||
from utils import setup_utils | ||
|
||
|
||
def SentimentAnalysis(_arg1, library='nltk'): | ||
''' | ||
Sentiment Analysis is a procedure that assigns a score from -1 to 1 | ||
for a piece of text with -1 being negative and 1 being positive. For | ||
more information on the function and how to use it please refer to | ||
tabpy-tools.md | ||
''' | ||
if not (isinstance(_arg1[0], str)): | ||
raise TypeError | ||
|
||
library = library.lower() | ||
supportedLibraries = {'nltk', 'textblob'} | ||
|
||
if library not in supportedLibraries: | ||
raise ValueError | ||
|
||
scores = [] | ||
if library == 'nltk': | ||
sid = SentimentIntensityAnalyzer() | ||
for text in _arg1: | ||
sentimentResults = sid.polarity_scores(text) | ||
score = sentimentResults['compound'] | ||
scores.append(score) | ||
elif library == 'textblob': | ||
for text in _arg1: | ||
currScore = TextBlob(text) | ||
scores.append(currScore.sentiment.polarity) | ||
return scores | ||
|
||
|
||
if __name__ == '__main__': | ||
# running from setup.py | ||
if len(sys.argv) > 1: | ||
config_file_path = sys.argv[1] | ||
else: | ||
config_file_path = setup_utils.get_default_config_file_path() | ||
port, auth_on, prefix = setup_utils.parse_config(config_file_path) | ||
|
||
connection = Client(f'{prefix}://localhost:{port}/') | ||
|
||
if auth_on: | ||
# credentials are passed in from setup.py | ||
if len(sys.argv) == 4: | ||
user, passwd = sys.argv[2], sys.argv[3] | ||
# running Sentiment Analysis independently | ||
else: | ||
user, passwd = setup_utils.get_creds() | ||
connection.set_credentials(user, passwd) | ||
|
||
connection.deploy('Sentiment Analysis', SentimentAnalysis, | ||
'Returns a sentiment score between -1 and ' | ||
'1 for a given string.', override=True) | ||
print("Successfully deployed SentimentAnalysis") |
Oops, something went wrong.