Merge pull request #43 from serengil/feat-task-split-unit-tests

modular unit tests
serengil · Dec 25, 2023 · 0ea6e22 · 0ea6e22
2 parents c9d1d1e + 39ca65e
commit 0ea6e22
Show file tree

Hide file tree

Showing 25 changed files with 550 additions and 427 deletions.
diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
@@ -34,13 +34,13 @@ jobs:
     - name: Install dependencies
       run: |
         python -m pip install --upgrade pip
-        pip install pandas==1.3.5 numpy==1.22.3 tqdm==4.62.3 psutil==5.9.0
+        pip install pytest
         pip install .
         
     - name: Test with pytest
       run: |
         cd tests
-        python global-unit-test.py
+        python -m pytest . -s --disable-warnings
   linting:
     needs: unit-tests
 

diff --git a/Makefile b/Makefile
@@ -1,5 +1,5 @@
 test:
-	cd tests && python global-unit-test.py
+	cd tests && python -m pytest . -s --disable-warnings
 
 lint:
 	python -m pylint chefboost/ --fail-under=10
diff --git a/chefboost/Chefboost.py b/chefboost/Chefboost.py
@@ -24,6 +24,7 @@ def fit(
     config: Optional[dict] = None,
     target_label: str = "Decision",
     validation_df: Optional[pd.DataFrame] = None,
+    silent: bool = False,
 ) -> Dict[str, Any]:
     """
     Build (a) decision tree model(s)
@@ -55,6 +56,9 @@ def fit(
                 if nothing is passed to validation data frame, then the function validates
                 built trees for training data frame
 
+            silent (bool): set this to True if you do not want to see
+                any informative logs
+
     Returns:
             chefboost model
     """
@@ -139,7 +143,8 @@ def fit(
 
     if enableParallelism == True:
         num_cores = config["num_cores"]
-        logger.info(f"[INFO]: {num_cores} CPU cores will be allocated in parallel running")
+        if silent is False:
+            logger.info(f"[INFO]: {num_cores} CPU cores will be allocated in parallel running")
 
         from multiprocessing import set_start_method, freeze_support
 
@@ -169,7 +174,8 @@ def fit(
         config["algorithm"] = "Regression"
 
     if enableGBM == True:
-        logger.info("Gradient Boosting Machines...")
+        if silent is False:
+            logger.info("Gradient Boosting Machines...")
         algorithm = "Regression"
         config["algorithm"] = "Regression"
 
@@ -184,7 +190,8 @@ def fit(
 
     # -------------------------
 
-    logger.info(f"{algorithm} tree is going to be built...")
+    if silent is False:
+        logger.info(f"{algorithm} tree is going to be built...")
 
     # initialize a dictionary. this is going to be used to check features numeric or nominal.
     # numeric features should be transformed to nominal values based on scales.
@@ -212,7 +219,13 @@ def fit(
 
     if enableAdaboost == True:
         trees, alphas = adaboost_clf.apply(
-            df, config, header, dataset_features, validation_df=validation_df, process_id=process_id
+            df,
+            config,
+            header,
+            dataset_features,
+            validation_df=validation_df,
+            process_id=process_id,
+            silent=silent,
         )
 
     elif enableGBM == True:
@@ -224,6 +237,7 @@ def fit(
                 dataset_features,
                 validation_df=validation_df,
                 process_id=process_id,
+                silent=silent,
             )
             # classification = True
 
@@ -235,12 +249,19 @@ def fit(
                 dataset_features,
                 validation_df=validation_df,
                 process_id=process_id,
+                silent=silent,
             )
             # classification = False
 
     elif enableRandomForest == True:
         trees = randomforest.apply(
-            df, config, header, dataset_features, validation_df=validation_df, process_id=process_id
+            df,
+            config,
+            header,
+            dataset_features,
+            validation_df=validation_df,
+            process_id=process_id,
+            silent=silent,
         )
     else:  # regular decision tree building
         root = 1
@@ -264,22 +285,23 @@ def fit(
             main_process_id=process_id,
         )
 
-    logger.info("-------------------------")
-    logger.info(f"finished in {time.time() - begin} seconds")
+    if silent is False:
+        logger.info("-------------------------")
+        logger.info(f"finished in {time.time() - begin} seconds")
 
     obj = {"trees": trees, "alphas": alphas, "config": config, "nan_values": nan_values}
 
     # -----------------------------------------
 
     # train set accuracy
     df = base_df.copy()
-    evaluate(obj, df, task="train")
+    trainset_evaluation = evaluate(obj, df, task="train", silent=silent)
+    obj["evaluation"] = {"train": trainset_evaluation}
 
     # validation set accuracy
     if isinstance(validation_df, pd.DataFrame):
-        evaluate(obj, validation_df, task="validation")
-
-    # -----------------------------------------
+        validationset_evaluation = evaluate(obj, validation_df, task="validation", silent=silent)
+        obj["evaluation"]["validation"] = validationset_evaluation
 
     return obj
 
@@ -455,31 +477,38 @@ def restoreTree(module_name) -> Any:
     return functions.restoreTree(module_name)
 
 
-def feature_importance(rules: Union[str, list]) -> pd.DataFrame:
+def feature_importance(rules: Union[str, list], silent: bool = False) -> pd.DataFrame:
     """
     Show the feature importance values of a built model
     Args:
-            rules (str or list): e.g. decision_rules = "outputs/rules/rules.py"
+        rules (str or list): e.g. decision_rules = "outputs/rules/rules.py"
             or this could be retrieved from built model as shown below.
 
-                    decision_rules = []
-                    for tree in model["trees"]:
-                       rule = .__dict__["__spec__"].origin
-                       decision_rules.append(rule)
+            ```python
+            decision_rules = []
+            for tree in model["trees"]:
+               rule = .__dict__["__spec__"].origin
+               decision_rules.append(rule)
+            ```
+        silent (bool): set this to True if you do want to see
+            any informative logs.
     Returns:
             feature importance (pd.DataFrame)
     """
 
     if not isinstance(rules, list):
         rules = [rules]
-    logger.info(f"rules: {rules}")
+
+    if silent is False:
+        logger.info(f"rules: {rules}")
 
     # -----------------------------
 
     dfs = []
 
     for rule in rules:
-        logger.info("Decision rule: {rule}")
+        if silent is False:
+            logger.info(f"Decision rule: {rule}")
 
         with open(rule, "r", encoding="UTF-8") as file:
             lines = file.readlines()
@@ -564,17 +593,23 @@ def feature_importance(rules: Union[str, list]) -> pd.DataFrame:
 
 
 def evaluate(
-    model: dict, df: pd.DataFrame, target_label: str = "Decision", task: str = "test"
-) -> None:
+    model: dict,
+    df: pd.DataFrame,
+    target_label: str = "Decision",
+    task: str = "test",
+    silent: bool = False,
+) -> dict:
     """
     Evaluate the performance of a built model on a data set
     Args:
         model (dict): built model which is the output of fit function
         df (pandas data frame): data frame you would like to evaluate
         target_label (str): target label
         task (string): set this to train, validation or test
+        silent (bool): set this to True if you do not want to see
+            any informative logs
     Returns:
-        None
+        evaluation results (dict)
     """
 
     # --------------------------
@@ -598,4 +633,4 @@ def evaluate(
         df["Decision"] = df["Decision"].astype(str)
         df["Prediction"] = df["Prediction"].astype(str)
 
-    cb_eval.evaluate(df, task=task)
+    return cb_eval.evaluate(df, task=task, silent=silent)
diff --git a/chefboost/commons/evaluate.py b/chefboost/commons/evaluate.py
@@ -1,30 +1,44 @@
 import math
+import pandas as pd
 from chefboost.commons.logger import Logger
 
 # pylint: disable=broad-except
 
 logger = Logger(module="chefboost/commons/evaluate.py")
 
 
-def evaluate(df, task="train"):
+def evaluate(df: pd.DataFrame, task: str = "train", silent: bool = False) -> dict:
+    """
+    Evaluate results
+    Args:
+        df (pd.DataFrame): data frame
+        task (str): train, test
+        silent (bool): set this to True if you do not want to
+            see any informative logs
+    Returns:
+        evaluation results (dict)
+    """
     if df["Decision"].dtypes == "object":
         problem_type = "classification"
     else:
         problem_type = "regression"
 
-    # -------------------------------------
-
+    evaluation_results = {}
     instances = df.shape[0]
 
-    logger.info("-------------------------")
-    logger.info(f"Evaluate {task} set")
-    logger.info("-------------------------")
+    if silent is False:
+        logger.info("-------------------------")
+        logger.info(f"Evaluate {task} set")
+        logger.info("-------------------------")
 
     if problem_type == "classification":
         idx = df[df["Prediction"] == df["Decision"]].index
         accuracy = 100 * len(idx) / df.shape[0]
-        logger.info(f"Accuracy: {accuracy}% on {instances} instances")
+        if silent is False:
+            logger.info(f"Accuracy: {accuracy}% on {instances} instances")
 
+        evaluation_results["Accuracy"] = accuracy
+        evaluation_results["Instances"] = instances
         # -----------------------------
 
         predictions = df.Prediction.values
@@ -48,8 +62,12 @@ def evaluate(df, task="train"):
                 confusion_row.append(item)
             confusion_matrix.append(confusion_row)
 
-        logger.info(f"Labels: {labels}")
-        logger.info(f"Confusion matrix: {confusion_matrix}")
+        if silent is False:
+            logger.info(f"Labels: {labels}")
+            logger.info(f"Confusion matrix: {confusion_matrix}")
+
+        evaluation_results["Labels"] = labels
+        evaluation_results["Confusion matrix"] = confusion_matrix
 
         # -----------------------------
         # precision and recall
@@ -79,11 +97,19 @@ def evaluate(df, task="train"):
             accuracy = round(100 * (tp + tn) / (tp + tn + fp + fn + epsilon), 4)
 
             if len(labels) >= 3:
-                logger.info(f"Decision {decision_class}")
-                logger.info(f"Accuray: {accuracy}")
+                if silent is False:
+                    logger.info(f"Decision {decision_class}")
+                    logger.info(f"Accuracy: {accuracy}")
+
+                evaluation_results[f"Decision {decision_class}'s Accuracy"] = accuracy
 
-            logger.info(f"Precision: {precision}%, Recall: {recall}%, F1: {f1_score}%")
-            logger.debug(f"TP: {tp}, TN: {tn}, FP: {fp}, FN: {fn}")
+            if silent is False:
+                logger.info(f"Precision: {precision}%, Recall: {recall}%, F1: {f1_score}%")
+                logger.debug(f"TP: {tp}, TN: {tn}, FP: {fp}, FN: {fn}")
+
+            evaluation_results["Precision"] = precision
+            evaluation_results["Recall"] = recall
+            evaluation_results["F1"] = f1_score
 
             if len(labels) < 3:
                 break
@@ -99,13 +125,17 @@ def evaluate(df, task="train"):
 
         if instances > 0:
             mae = df["Absolute_Error"].sum() / instances
-            logger.info(f"MAE: {mae}")
-
             mse = df["Absolute_Error_Squared"].sum() / instances
-            logger.info(f"MSE: {mse}")
-
             rmse = math.sqrt(mse)
-            logger.info(f"RMSE: {rmse}")
+
+            evaluation_results["MAE"] = mae
+            evaluation_results["MSE"] = mse
+            evaluation_results["RMSE"] = rmse
+
+            if silent is False:
+                logger.info(f"MAE: {mae}")
+                logger.info(f"MSE: {mse}")
+                logger.info(f"RMSE: {rmse}")
 
             rae = 0
             rrse = 0
@@ -122,12 +152,26 @@ def evaluate(df, task="train"):
             except Exception as err:
                 logger.error(str(err))
 
-            logger.info(f"RAE: {rae}")
-            logger.info(f"RRSE {rrse}")
+            if silent is False:
+                logger.info(f"RAE: {rae}")
+                logger.info(f"RRSE {rrse}")
+
+            evaluation_results["RAE"] = rae
+            evaluation_results["RRSE"] = rrse
 
             mean = df["Decision"].mean()
-            logger.info(f"Mean: {mean}")
+
+            if silent is False:
+                logger.info(f"Mean: {mean}")
+
+            evaluation_results["Mean"] = mean
 
             if mean > 0:
-                logger.info(f"MAE / Mean: {100 * mae / mean}%")
-                logger.info(f"RMSE / Mean: {100 * rmse / mean}%")
+                if silent is False:
+                    logger.info(f"MAE / Mean: {100 * mae / mean}%")
+                    logger.info(f"RMSE / Mean: {100 * rmse / mean}%")
+
+                evaluation_results["MAE / Mean"] = 100 * mae / mean
+                evaluation_results["RMSE / Mean"] = 100 * rmse / mean
+
+    return evaluation_results