Use the word 'integrity' instead of 'accuracy' to avoid misconceptions

nok · Nov 1, 2017 · 715ec7d · 715ec7d
1 parent 187efac
commit 715ec7d
Show file tree

Hide file tree

Showing 2 changed files with 58 additions and 38 deletions.
diff --git a/readme.md b/readme.md
@@ -211,17 +211,17 @@ y_java = porter.predict(X[0])
 y_java = porter.predict([1., 2., 3., 4.])
 ```
 
-### Accuracy
+### Integrity
 
-Always compute the accuracy between the original and the ported estimator:
+Always compute and test the integrity between the original and the transpiled estimator:
 
 ```python
 # ...
 porter = Porter(clf, language='java')
 
 # Accuracy:
-accuracy = porter.predict_test(X)
-print(accuracy) # 1.0
+integrity = porter.integrity_score(X)
+print(integrity)  # 1.0
 ```
 
 

diff --git a/sklearn_porter/Porter.py b/sklearn_porter/Porter.py
@@ -40,10 +40,10 @@ def __init__(self, estimator, language='java', method='predict', **kwargs):
 
         Parameters
         ----------
-        language : {'c', 'go', 'java', 'js', 'php', 'ruby'}, default 'java'
+        language : {'c', 'go', 'java', 'js', 'php', 'ruby'}, default: 'java'
             The required target programming language.
 
-        method : {'predict', 'predict_proba'}, default 'predict'
+        method : {'predict', 'predict_proba'}, default: 'predict'
             The target prediction method.
         """
 
@@ -211,21 +211,25 @@ def export(self, class_name=None, method_name=None,
         }
         return output
 
-    def port(self, class_name='Brain', method_name='predict', details=False):
+    def port(self, class_name=None, method_name=None,
+               num_format=lambda x: str(x), details=False, **kwargs):
         # pylint: disable=unused-argument
         """
         Transpile a trained model to the syntax of a
         chosen programming language.
 
         Parameters
         ----------
-        :param class_name : string, default 'Brain'
+        :param class_name : string, default: None
             The name for the ported class.
 
-        :param method_name : string, default 'predict'
+        :param method_name : string, default: None
             The name for the ported method.
 
-        :param details : bool, default False
+        :param num_format : lambda x, default: lambda x: str(x)
+            The representation of the floating-point values.
+
+        :param details : bool, default: False
             Return additional data for the compilation
             and execution.
 
@@ -294,8 +298,8 @@ def _regressors(self):
 
         return regressors
 
-    def predict(self, X, class_name='Brain', method_name='predict',
-                tnp_dir='tmp', keep_tmp_dir=False, use_repr=True):
+    def predict(self, X, class_name=None, method_name=None,
+                tnp_dir='tmp', keep_tmp_dir=False, num_format=lambda x: str(x)):
         """
         Predict using the transpiled model.
 
@@ -304,29 +308,35 @@ def predict(self, X, class_name='Brain', method_name='predict',
         :param X : {array-like}, shape (n_features) or (n_samples, n_features)
             The input data.
 
-        :param class_name : string, default 'Brain'
+        :param class_name : string, default: None
             The name for the ported class.
 
-        :param method_name : string, default 'predict'
+        :param method_name : string, default: None
             The name for the ported method.
 
-        :param tnp_dir : string, default 'tmp'
+        :param tnp_dir : string, default: 'tmp'
             The path to the temporary directory for
             storing the transpiled (and compiled) model.
 
-        :param keep_tmp_dir : bool, default False
+        :param keep_tmp_dir : bool, default: False
             Whether to delete the temporary directory
             or not.
             
-        :param use_repr : bool, default: True
-            Whether to use repr() for floating-point values or not.
+        :param num_format : lambda x, default: lambda x: str(x)
+            The representation of the floating-point values.
 
         Returns
         -------
             y : int or array-like, shape (n_samples,)
             The predicted class or classes.
         """
 
+        if class_name is None:
+            class_name = self.estimator_name
+
+        if method_name is None:
+            method_name = self.target_method
+
         # Dependencies:
         if not hasattr(self, '_tested_dependencies'):
             self._test_dependencies()
@@ -345,7 +355,8 @@ def predict(self, X, class_name='Brain', method_name='predict',
         # Transpiled model:
         details = self.export(class_name=class_name,
                               method_name=method_name,
-                              use_repr=use_repr, details=True)
+                              num_format=num_format,
+                              details=True)
         filename = Porter._get_filename(class_name, self.target_language)
         target_file = os.path.join(tnp_dir, filename)
         with open(target_file, str('w')) as file_:
@@ -385,7 +396,8 @@ def predict(self, X, class_name='Brain', method_name='predict',
 
         return pred_y
 
-    def predict_test(self, X, normalize=True, use_repr=True):
+    def integrity_score(self, X, method='predict', normalize=True,
+                        num_format=lambda x: str(x)):
         """
         Compute the accuracy of the ported classifier.
 
@@ -394,12 +406,15 @@ def predict_test(self, X, normalize=True, use_repr=True):
         :param X : ndarray, shape (n_samples, n_features)
             Input data.
 
-        :param normalize : bool, optional (default=True)
+        :param method : string, default: 'predict'
+            The method which should be tested.
+
+        :param normalize : bool, default: True
             If ``False``, return the number of correctly classified samples.
             Otherwise, return the fraction of correctly classified samples.
             
-        :param use_repr : bool, default: True
-            Whether to use repr() for floating-point values or not.
+        :param num_format : lambda x, default: lambda x: str(x)
+            The representation of the floating-point values.
 
         Returns
         -------
@@ -413,19 +428,23 @@ def predict_test(self, X, normalize=True, use_repr=True):
         X = np.array(X)
         if not X.ndim > 1:
             X = np.array([X])
-        y_true = self.estimator.predict(X)
-        y_pred = self.predict(X, use_repr=use_repr)
-        return accuracy_score(y_true, y_pred, normalize=normalize)
+
+        method = str(method).strip().lower()
+        if method not in ['predict', 'predict_proba']:
+            error = "The given method '{}' isn't supported.".format(method)
+            raise AttributeError(error)
+
+        if method == 'predict':
+            y_true = self.estimator.predict(X)
+            y_pred = self.predict(X, tnp_dir='tmp_integrity_score', keep_tmp_dir=True, num_format=num_format)
+            return accuracy_score(y_true, y_pred, normalize=normalize)
+
+        return False
 
     def _test_dependencies(self):
         """
         Check all target programming and operating
         system dependencies.
-
-        Parameters
-        ----------
-        :param language : {'c', 'go', 'java', 'js', 'php', 'ruby'}
-            The target programming language.
         """
         lang = self.target_language
 
@@ -435,14 +454,15 @@ def _test_dependencies(self):
 
         # Dependencies:
         depends = {
-            'c': ('gcc'),
-            'java': ('java', 'javac'),
-            'js': ('node'),
-            'go': ('go'),
-            'php': ('php'),
-            'ruby': ('ruby')
+            'c': ['gcc'],
+            'java': ['java', 'javac'],
+            'js': ['node'],
+            'go': ['go'],
+            'php': ['php'],
+            'ruby': ['ruby']
         }
-        all_depends = depends.get(lang) + ('mkdir', 'rm')
+        all_depends = depends.get(lang) + ['mkdir', 'rm']
+        all_depends = [str(e) for e in all_depends]
 
         cmd = 'if hash {} 2/dev/null; then echo 1; else echo 0; fi'
         for exe in all_depends: