Skip to content

Commit

Permalink
Merge pull request #518 from subutai/may
Browse files Browse the repository at this point in the history
Simple but useful analysis script
  • Loading branch information
subutai committed May 19, 2021
2 parents a042d3e + 50ea23b commit b998699
Show file tree
Hide file tree
Showing 2 changed files with 202 additions and 0 deletions.
2 changes: 2 additions & 0 deletions nupic/research/support/ray_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,8 @@ def load_ray_tune_experiment(
for experiment in all_experiments:
# Make logs relative to experiment path
logdir = experiment["logdir"]
if logdir is None:
logdir = ""
logpath = os.path.join(experiment_path, os.path.basename(logdir))
experiment["results"] = None

Expand Down
200 changes: 200 additions & 0 deletions projects/dendrites/permutedMNIST/analyze_results.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,200 @@
# -*- coding: utf-8 -*-
# ----------------------------------------------------------------------
# Numenta Platform for Intelligent Computing (NuPIC)
# Copyright (C) 2021, Numenta, Inc. Unless you have an agreement
# with Numenta, Inc., for a separate license for this software code, the
# following terms and conditions apply:
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero Public License version 3 as
# published by the Free Software Foundation.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
# See the GNU Affero Public License for more details.
#
# You should have received a copy of the GNU Affero Public License
# along with this program. If not, see http://www.gnu.org/licenses.
#
# http://numenta.org/licenses/
# ----------------------------------------------------------------------
import argparse
import copy
import os
import re
from itertools import groupby
from pathlib import Path

import pandas as pd

from experiments import CONFIGS
from nupic.research.support import load_ray_tune_experiments


# Select a unique tag for each parameter combination, ignoring seed value
# Used to group multiple random seeds of the same configuration for computing results.
def key_func(x):
s = re.split("[,]", re.sub(",|\\d+_|seed=\\d+", "", x["experiment_tag"]))
if len(s[0]) == 0:
return [" "]
return s


def parse_one_experiment(exp, state, df):
"""
Parse the trials in one experiment and append data to the given dataframe.
:param exp: experiment name
:param state: the `state` for the experiment. The state contains a list of runs.
Each run is an invocation from the command line. Each run can
consist of one or more trials.
:param df: the dataframe to append to
:return: a new dataframe with the results (the original one is not modified)
"""
df_entries = []
for experiment_state in state:
# Go through all checkpoints in the experiment
all_trials = experiment_state["checkpoints"]

# Group trials based on their parameter combinations (represented by tag)
parameter_groups = {
k[0]: list(v)
for k, v in groupby(sorted(all_trials, key=key_func), key=key_func)
}

for tag in parameter_groups:
trial_checkpoints = parameter_groups[tag]

try:
for _, trial_checkpoint in enumerate(trial_checkpoints):
results = trial_checkpoint["results"]
if results is None:
continue

# For each checkpoint select the iteration with the best accuracy as
# the best epoch
best_results = max(results,
key=lambda x: x.get("mean_accuracy", 0.0))
best_result = best_results["mean_accuracy"]
if best_result > 0.0:
# Get the trial parameters we care about
config = trial_checkpoint["config"]
model_args = config["model_args"]
kw_percent_on = model_args["kw_percent_on"]
weight_sparsity = model_args.get("weight_sparsity", 0.0)
dendrite_weight_sparsity = model_args.get(
"dendrite_weight_sparsity", 0.0)
num_segments = model_args.get("num_segments")
dim_context = model_args["dim_context"]
epochs = config["epochs"]
num_tasks = config["num_tasks"]
lr = config["optimizer_args"]["lr"]
momentum = config["optimizer_args"].get("momentum", 0.0)

# This list must match the column headers in collect_results
df_entries.append([
exp, kw_percent_on, weight_sparsity,
dendrite_weight_sparsity, num_segments, dim_context,
epochs, num_tasks, lr, momentum,
config["seed"], best_result,
"{} {}".format(exp, tag)
])

except Exception:
print("Problem with checkpoint group" + tag + " in " + exp
+ " ...skipping")
continue

# Create new dataframe from the entries with same dimensions as df
df2 = pd.DataFrame(df_entries, columns=df.columns)
return df.append(df2)


def collect_results(configs, basefilename):
"""
Parse the results for each specified experiment in each config file. Creates a
dataframe containing one row for every trial for every network configuration in
every experiment.
The dataframe is saved to basefilename.pkl
The raw results are also saved in a csv file named basefilename.csv.
:param configs: list of experiment configs
:param basefilename: base name for output files
"""

# The results table
columns = ["Experiment name",
"Activation sparsity", "FF weight sparsity",
"Dendrite weight sparsity", "Num segments",
"Dim context", "Epochs", "Num tasks", "LR", "Momentum", "Seed",
"Accuracy", "ID"
]
df = pd.DataFrame(columns=columns)

for exp in configs:
config = configs[exp]

# Make sure path and data_dir are relative to the project location,
# handling both ~/nta and ../results style paths.
path = config.get("local_dir", ".")
config["path"] = str(Path(path).expanduser().resolve())

# Load experiment data
experiment_path = os.path.join(config["path"], exp)
try:
states = load_ray_tune_experiments(
experiment_path=experiment_path, load_results=True
)

except RuntimeError:
print("Could not locate experiment state for " + exp + " ...skipping")
continue

df = parse_one_experiment(exp, states, df)

df.to_csv(basefilename + ".csv")
df.to_pickle(basefilename + ".pkl")


def analyze_experiment_data(filename_df, output_filename):
"""
Simple analysis to serve as an example. In general it's best to do the analysis in
JupyterLab.
:param filename_df: pickle filename containing the dataframe
:param output_filename: filename to use to save the csv
"""
df = pd.read_pickle(filename_df)

# Create a dataframe containing one row per configuration. The accuracy
df_id = df.groupby(["ID"]).agg(
num_trials=("ID", "count"),
ff_weight_sparsity=("FF weight sparsity", "first"),
activation_sparsity=("Activation sparsity", "first"),
mean_accuracy=("Accuracy", "mean"),
stdev=("Accuracy", "std"),
)
print(df_id)
df_id.to_csv(output_filename)


if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("experiments", nargs="+",
help="Experiments to run", choices=CONFIGS.keys())
parser.add_argument("-f", dest="format", default="grid",
help="Table format", choices=["grid", "latex_raw"])
parser.add_argument("-n", dest="name", default="temp", help="Base filename")
args = parser.parse_args()

# Get configuration values
configs = {}
for name in args.experiments:
configs[name] = copy.deepcopy(CONFIGS[name])

collect_results(configs, args.name)

analyze_experiment_data(args.name + ".pkl", args.name + "_analysis.csv")

0 comments on commit b998699

Please sign in to comment.