Merge pull request #518 from subutai/may

Simple but useful analysis script
numenta · May 19, 2021 · b998699 · b998699
2 parents a042d3e + 50ea23b
commit b998699
Show file tree

Hide file tree

Showing 2 changed files with 202 additions and 0 deletions.
diff --git a/nupic/research/support/ray_utils.py b/nupic/research/support/ray_utils.py
@@ -91,6 +91,8 @@ def load_ray_tune_experiment(
     for experiment in all_experiments:
         # Make logs relative to experiment path
         logdir = experiment["logdir"]
+        if logdir is None:
+            logdir = ""
         logpath = os.path.join(experiment_path, os.path.basename(logdir))
         experiment["results"] = None
 

diff --git a/projects/dendrites/permutedMNIST/analyze_results.py b/projects/dendrites/permutedMNIST/analyze_results.py
@@ -0,0 +1,200 @@
+# -*- coding: utf-8 -*-
+# ----------------------------------------------------------------------
+# Numenta Platform for Intelligent Computing (NuPIC)
+# Copyright (C) 2021, Numenta, Inc.  Unless you have an agreement
+# with Numenta, Inc., for a separate license for this software code, the
+# following terms and conditions apply:
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero Public License version 3 as
+# published by the Free Software Foundation.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+# See the GNU Affero Public License for more details.
+#
+# You should have received a copy of the GNU Affero Public License
+# along with this program.  If not, see http://www.gnu.org/licenses.
+#
+# http://numenta.org/licenses/
+# ----------------------------------------------------------------------
+import argparse
+import copy
+import os
+import re
+from itertools import groupby
+from pathlib import Path
+
+import pandas as pd
+
+from experiments import CONFIGS
+from nupic.research.support import load_ray_tune_experiments
+
+
+# Select a unique tag for each parameter combination, ignoring seed value
+# Used to group multiple random seeds of the same configuration for computing results.
+def key_func(x):
+    s = re.split("[,]", re.sub(",|\\d+_|seed=\\d+", "", x["experiment_tag"]))
+    if len(s[0]) == 0:
+        return [" "]
+    return s
+
+
+def parse_one_experiment(exp, state, df):
+    """
+    Parse the trials in one experiment and append data to the given dataframe.
+
+    :param exp: experiment name
+    :param state: the `state` for the experiment. The state contains a list of runs.
+                  Each run is an invocation from the command line. Each run can
+                  consist of one or more trials.
+    :param df: the dataframe to append to
+
+    :return: a new dataframe with the results (the original one is not modified)
+    """
+    df_entries = []
+    for experiment_state in state:
+        # Go through all checkpoints in the experiment
+        all_trials = experiment_state["checkpoints"]
+
+        # Group trials based on their parameter combinations (represented by tag)
+        parameter_groups = {
+            k[0]: list(v)
+            for k, v in groupby(sorted(all_trials, key=key_func), key=key_func)
+        }
+
+        for tag in parameter_groups:
+            trial_checkpoints = parameter_groups[tag]
+
+            try:
+                for _, trial_checkpoint in enumerate(trial_checkpoints):
+                    results = trial_checkpoint["results"]
+                    if results is None:
+                        continue
+
+                    # For each checkpoint select the iteration with the best accuracy as
+                    # the best epoch
+                    best_results = max(results,
+                                       key=lambda x: x.get("mean_accuracy", 0.0))
+                    best_result = best_results["mean_accuracy"]
+                    if best_result > 0.0:
+                        # Get the trial parameters we care about
+                        config = trial_checkpoint["config"]
+                        model_args = config["model_args"]
+                        kw_percent_on = model_args["kw_percent_on"]
+                        weight_sparsity = model_args.get("weight_sparsity", 0.0)
+                        dendrite_weight_sparsity = model_args.get(
+                            "dendrite_weight_sparsity", 0.0)
+                        num_segments = model_args.get("num_segments")
+                        dim_context = model_args["dim_context"]
+                        epochs = config["epochs"]
+                        num_tasks = config["num_tasks"]
+                        lr = config["optimizer_args"]["lr"]
+                        momentum = config["optimizer_args"].get("momentum", 0.0)
+
+                        # This list must match the column headers in collect_results
+                        df_entries.append([
+                            exp, kw_percent_on, weight_sparsity,
+                            dendrite_weight_sparsity, num_segments, dim_context,
+                            epochs, num_tasks, lr, momentum,
+                            config["seed"], best_result,
+                            "{} {}".format(exp, tag)
+                        ])
+
+            except Exception:
+                print("Problem with checkpoint group" + tag + " in " + exp
+                      + " ...skipping")
+                continue
+
+    # Create new dataframe from the entries with same dimensions as df
+    df2 = pd.DataFrame(df_entries, columns=df.columns)
+    return df.append(df2)
+
+
+def collect_results(configs, basefilename):
+    """
+    Parse the results for each specified experiment in each config file. Creates a
+    dataframe containing one row for every trial for every network configuration in
+    every experiment.
+
+    The dataframe is saved to basefilename.pkl
+    The raw results are also saved in a csv file named basefilename.csv.
+
+    :param configs: list of experiment configs
+    :param basefilename: base name for output files
+    """
+
+    # The results table
+    columns = ["Experiment name",
+               "Activation sparsity", "FF weight sparsity",
+               "Dendrite weight sparsity", "Num segments",
+               "Dim context", "Epochs", "Num tasks", "LR", "Momentum", "Seed",
+               "Accuracy", "ID"
+               ]
+    df = pd.DataFrame(columns=columns)
+
+    for exp in configs:
+        config = configs[exp]
+
+        # Make sure path and data_dir are relative to the project location,
+        # handling both ~/nta and ../results style paths.
+        path = config.get("local_dir", ".")
+        config["path"] = str(Path(path).expanduser().resolve())
+
+        # Load experiment data
+        experiment_path = os.path.join(config["path"], exp)
+        try:
+            states = load_ray_tune_experiments(
+                experiment_path=experiment_path, load_results=True
+            )
+
+        except RuntimeError:
+            print("Could not locate experiment state for " + exp + " ...skipping")
+            continue
+
+        df = parse_one_experiment(exp, states, df)
+
+    df.to_csv(basefilename + ".csv")
+    df.to_pickle(basefilename + ".pkl")
+
+
+def analyze_experiment_data(filename_df, output_filename):
+    """
+    Simple analysis to serve as an example. In general it's best to do the analysis in
+    JupyterLab.
+
+    :param filename_df: pickle filename containing the dataframe
+    :param output_filename: filename to use to save the csv
+    """
+    df = pd.read_pickle(filename_df)
+
+    # Create a dataframe containing one row per configuration. The accuracy
+    df_id = df.groupby(["ID"]).agg(
+        num_trials=("ID", "count"),
+        ff_weight_sparsity=("FF weight sparsity", "first"),
+        activation_sparsity=("Activation sparsity", "first"),
+        mean_accuracy=("Accuracy", "mean"),
+        stdev=("Accuracy", "std"),
+    )
+    print(df_id)
+    df_id.to_csv(output_filename)
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument("experiments", nargs="+",
+                        help="Experiments to run", choices=CONFIGS.keys())
+    parser.add_argument("-f", dest="format", default="grid",
+                        help="Table format", choices=["grid", "latex_raw"])
+    parser.add_argument("-n", dest="name", default="temp", help="Base filename")
+    args = parser.parse_args()
+
+    # Get configuration values
+    configs = {}
+    for name in args.experiments:
+        configs[name] = copy.deepcopy(CONFIGS[name])
+
+    collect_results(configs, args.name)
+
+    analyze_experiment_data(args.name + ".pkl", args.name + "_analysis.csv")