Skip to content

Commit

Permalink
Use the word 'integrity' instead of 'accuracy' to avoid misconceptions
Browse files Browse the repository at this point in the history
  • Loading branch information
nok committed Nov 1, 2017
1 parent 187efac commit 715ec7d
Show file tree
Hide file tree
Showing 2 changed files with 58 additions and 38 deletions.
8 changes: 4 additions & 4 deletions readme.md
Original file line number Diff line number Diff line change
Expand Up @@ -211,17 +211,17 @@ y_java = porter.predict(X[0])
y_java = porter.predict([1., 2., 3., 4.])
```

### Accuracy
### Integrity

Always compute the accuracy between the original and the ported estimator:
Always compute and test the integrity between the original and the transpiled estimator:

```python
# ...
porter = Porter(clf, language='java')

# Accuracy:
accuracy = porter.predict_test(X)
print(accuracy) # 1.0
integrity = porter.integrity_score(X)
print(integrity) # 1.0
```


Expand Down
88 changes: 54 additions & 34 deletions sklearn_porter/Porter.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,10 +40,10 @@ def __init__(self, estimator, language='java', method='predict', **kwargs):
Parameters
----------
language : {'c', 'go', 'java', 'js', 'php', 'ruby'}, default 'java'
language : {'c', 'go', 'java', 'js', 'php', 'ruby'}, default: 'java'
The required target programming language.
method : {'predict', 'predict_proba'}, default 'predict'
method : {'predict', 'predict_proba'}, default: 'predict'
The target prediction method.
"""

Expand Down Expand Up @@ -211,21 +211,25 @@ def export(self, class_name=None, method_name=None,
}
return output

def port(self, class_name='Brain', method_name='predict', details=False):
def port(self, class_name=None, method_name=None,
num_format=lambda x: str(x), details=False, **kwargs):
# pylint: disable=unused-argument
"""
Transpile a trained model to the syntax of a
chosen programming language.
Parameters
----------
:param class_name : string, default 'Brain'
:param class_name : string, default: None
The name for the ported class.
:param method_name : string, default 'predict'
:param method_name : string, default: None
The name for the ported method.
:param details : bool, default False
:param num_format : lambda x, default: lambda x: str(x)
The representation of the floating-point values.
:param details : bool, default: False
Return additional data for the compilation
and execution.
Expand Down Expand Up @@ -294,8 +298,8 @@ def _regressors(self):

return regressors

def predict(self, X, class_name='Brain', method_name='predict',
tnp_dir='tmp', keep_tmp_dir=False, use_repr=True):
def predict(self, X, class_name=None, method_name=None,
tnp_dir='tmp', keep_tmp_dir=False, num_format=lambda x: str(x)):
"""
Predict using the transpiled model.
Expand All @@ -304,29 +308,35 @@ def predict(self, X, class_name='Brain', method_name='predict',
:param X : {array-like}, shape (n_features) or (n_samples, n_features)
The input data.
:param class_name : string, default 'Brain'
:param class_name : string, default: None
The name for the ported class.
:param method_name : string, default 'predict'
:param method_name : string, default: None
The name for the ported method.
:param tnp_dir : string, default 'tmp'
:param tnp_dir : string, default: 'tmp'
The path to the temporary directory for
storing the transpiled (and compiled) model.
:param keep_tmp_dir : bool, default False
:param keep_tmp_dir : bool, default: False
Whether to delete the temporary directory
or not.
:param use_repr : bool, default: True
Whether to use repr() for floating-point values or not.
:param num_format : lambda x, default: lambda x: str(x)
The representation of the floating-point values.
Returns
-------
y : int or array-like, shape (n_samples,)
The predicted class or classes.
"""

if class_name is None:
class_name = self.estimator_name

if method_name is None:
method_name = self.target_method

# Dependencies:
if not hasattr(self, '_tested_dependencies'):
self._test_dependencies()
Expand All @@ -345,7 +355,8 @@ def predict(self, X, class_name='Brain', method_name='predict',
# Transpiled model:
details = self.export(class_name=class_name,
method_name=method_name,
use_repr=use_repr, details=True)
num_format=num_format,
details=True)
filename = Porter._get_filename(class_name, self.target_language)
target_file = os.path.join(tnp_dir, filename)
with open(target_file, str('w')) as file_:
Expand Down Expand Up @@ -385,7 +396,8 @@ def predict(self, X, class_name='Brain', method_name='predict',

return pred_y

def predict_test(self, X, normalize=True, use_repr=True):
def integrity_score(self, X, method='predict', normalize=True,
num_format=lambda x: str(x)):
"""
Compute the accuracy of the ported classifier.
Expand All @@ -394,12 +406,15 @@ def predict_test(self, X, normalize=True, use_repr=True):
:param X : ndarray, shape (n_samples, n_features)
Input data.
:param normalize : bool, optional (default=True)
:param method : string, default: 'predict'
The method which should be tested.
:param normalize : bool, default: True
If ``False``, return the number of correctly classified samples.
Otherwise, return the fraction of correctly classified samples.
:param use_repr : bool, default: True
Whether to use repr() for floating-point values or not.
:param num_format : lambda x, default: lambda x: str(x)
The representation of the floating-point values.
Returns
-------
Expand All @@ -413,19 +428,23 @@ def predict_test(self, X, normalize=True, use_repr=True):
X = np.array(X)
if not X.ndim > 1:
X = np.array([X])
y_true = self.estimator.predict(X)
y_pred = self.predict(X, use_repr=use_repr)
return accuracy_score(y_true, y_pred, normalize=normalize)

method = str(method).strip().lower()
if method not in ['predict', 'predict_proba']:
error = "The given method '{}' isn't supported.".format(method)
raise AttributeError(error)

if method == 'predict':
y_true = self.estimator.predict(X)
y_pred = self.predict(X, tnp_dir='tmp_integrity_score', keep_tmp_dir=True, num_format=num_format)
return accuracy_score(y_true, y_pred, normalize=normalize)

return False

def _test_dependencies(self):
"""
Check all target programming and operating
system dependencies.
Parameters
----------
:param language : {'c', 'go', 'java', 'js', 'php', 'ruby'}
The target programming language.
"""
lang = self.target_language

Expand All @@ -435,14 +454,15 @@ def _test_dependencies(self):

# Dependencies:
depends = {
'c': ('gcc'),
'java': ('java', 'javac'),
'js': ('node'),
'go': ('go'),
'php': ('php'),
'ruby': ('ruby')
'c': ['gcc'],
'java': ['java', 'javac'],
'js': ['node'],
'go': ['go'],
'php': ['php'],
'ruby': ['ruby']
}
all_depends = depends.get(lang) + ('mkdir', 'rm')
all_depends = depends.get(lang) + ['mkdir', 'rm']
all_depends = [str(e) for e in all_depends]

cmd = 'if hash {} 2/dev/null; then echo 1; else echo 0; fi'
for exe in all_depends:
Expand Down

0 comments on commit 715ec7d

Please sign in to comment.