In [15]:
from nupic.research.frameworks import vernon
from copy import copy, deepcopy
from pprint import pprint


In [16]:
dir(vernon)

['ContinualLearningExperiment',
 'MetaContinualLearningExperiment',
 'RezeroedKWinnersGSCExperiment',
 'SupervisedExperiment',
 'VariedRezeroedKWinnersGSCExperiment',
 '__builtins__',
 '__cached__',
 '__doc__',
 '__file__',
 '__loader__',
 '__name__',
 '__package__',
 '__path__',
 '__spec__',
 'cl_experiment',
 'common_experiments',
 'components',
 'experiment_utils',
 'experiments',
 'interfaces',
 'meta_cl_experiment',
 'mixins',
 'network_utils',
 'supervised_experiment']

## Supervised Learning

In [76]:
import torch 
from torchvision import datasets, transforms, models

from nupic.research.frameworks.vernon import SupervisedExperiment

supervised_test = dict(
    # dataset -  using torchvision
    dataset_class=datasets.CIFAR10,
    dataset_args=dict(root="~/nta/datasets", transform=transforms.ToTensor()),       # model - using torchvision
    model_class=models.resnet18,
    model_args=dict(num_classes=10, pretrained=False),
    num_classes=10,
    # experiment
    distributed=False,
    # hyperparameters
    batch_size=32,
    epochs=5,
    optimizer_args=dict(lr=1e-2),
    optimizer_class=torch.optim.Adam,
    # for debugging
    batches_in_epoch=10,
    batches_in_epoch_val=10,
)

def run(experiment_class, config):
    exp = experiment_class()
    exp.setup_experiment(config)

    epoch = 0
    while not exp.should_stop():
        epoch += 1
        results = exp.run_epoch()
        print(f"Epoch: {epoch} Acc: {results['mean_accuracy']:.2f}")
        print(results)

run(SupervisedExperiment, config=supervised_test)

Epoch: 1 Acc: 0.00
{'total_correct': 0, 'total_tested': 0, 'mean_loss': 0.0, 'mean_accuracy': 0.0, 'learning_rate': 0.01}
Epoch: 2 Acc: 0.00
{'total_correct': 0, 'total_tested': 0, 'mean_loss': 0.0, 'mean_accuracy': 0.0, 'learning_rate': 0.01}
Epoch: 3 Acc: 0.12
{'total_correct': 38, 'total_tested': 320, 'mean_loss': 44.676943969726565, 'mean_accuracy': 0.11875, 'learning_rate': 0.01}
Epoch: 4 Acc: 0.20
{'total_correct': 65, 'total_tested': 320, 'mean_loss': 10.410250091552735, 'mean_accuracy': 0.203125, 'learning_rate': 0.01}
Epoch: 5 Acc: 0.12
{'total_correct': 38, 'total_tested': 320, 'mean_loss': 4.339820861816406, 'mean_accuracy': 0.11875, 'learning_rate': 0.01}


In [72]:
from nupic.research.frameworks.vernon import mixins

class CutMixSupervisedExperiment(mixins.CutMix,
                                 SupervisedExperiment):
    pass

supervised_test_v2 = deepcopy(supervised_test)
supervised_test_v2.update(
    experiment_class=CutMixSupervisedExperiment,
    mixup_beta=1.0,
    cutmix_prob=0.8,
)

run(SupervisedExperiment, config=supervised_test_v2)

Epoch: 1 Acc: 0.00
Epoch: 2 Acc: 0.00
Epoch: 3 Acc: 0.12
Epoch: 4 Acc: 0.20
Epoch: 5 Acc: 0.12


In [68]:
dir(mixins)

['CompositeLoss',
 'ConstrainParameters',
 'CutMix',
 'CutMixKnowledgeDistillation',
 'DelayLoadCheckpoint',
 'ElasticWeightConsolidation',
 'ExportModel',
 'ExtraValidationsPerEpoch',
 'KnowledgeDistillation',
 'KnowledgeDistillationCL',
 'LRRangeTest',
 'LegacyImagenetConfig',
 'LoadPreprocessedData',
 'LogBackpropStructure',
 'LogCovariance',
 'LogEveryLearningRate',
 'LogEveryLoss',
 'MaxupPerSample',
 'MaxupStandard',
 'MultiCycleLR',
 'OnlineMetaLearning',
 'Profile',
 'ProfileAutograd',
 'PruneLowMagnitude',
 'PruneLowSNR',
 'QuantizationAware',
 'ReduceLRAfterTask',
 'RegularizeLoss',
 'RezeroWeights',
 'SaveFinalCheckpoint',
 'StepBasedLogging',
 'UpdateBoostStrength',
 'VaryBatchSize',
 '__builtins__',
 '__cached__',
 '__doc__',
 '__file__',
 '__loader__',
 '__name__',
 '__package__',
 '__path__',
 '__spec__',
 'composite_loss',
 'constrain_parameters',
 'create_lr_test_experiment',
 'cutmix',
 'delay_load_checkpoint',
 'ewc',
 'export_model',
 'extra_validations_per_epoch',


In [73]:
# models and datasets available - not part of Vernon
from nupic.research.frameworks.pytorch import models as local_models
from nupic.research.frameworks.pytorch import datasets as local_datasets

supervised_test_v3 = deepcopy(supervised_test)
supervised_test_v3.update(
    # dataset - alternative using torchvision factory
    # includes base transforms as transforming to tensor and normalization
    dataset_class=local_datasets.torchvisiondataset,
    dataset_args=dict(root="~/nta/datasets", dataset_name="CIFAR10"),
    # can use local models available
    model_class=local_models.resnet9,
    model_args=dict(num_classes=10),
)

run(SupervisedExperiment, config=supervised_test_v3)

Files already downloaded and verified
Files already downloaded and verified
Epoch: 1 Acc: 0.00
Epoch: 2 Acc: 0.00
Epoch: 3 Acc: 0.12
Epoch: 4 Acc: 0.25
Epoch: 5 Acc: 0.25


In [66]:
dir(local_datasets)

['__builtins__',
 '__cached__',
 '__doc__',
 '__file__',
 '__loader__',
 '__name__',
 '__package__',
 '__path__',
 '__spec__',
 'download_gsc_data',
 'gsc_factory',
 'imagenet',
 'imagenet_factory',
 'omniglot',
 'preprocessed_gsc',
 'torchvision_factory',
 'torchvisiondataset']

In [67]:
dir(local_models)

['DenseNetCIFAR',
 'LeSparseNet',
 'MetaContinualLearningMLP',
 'MobileNetV1',
 'NoSoDenseNetCIFAR',
 'OMLNetwork',
 'OmniglotCNN',
 'ResNet',
 'StandardMLP',
 'VGGSparseNet',
 '__builtins__',
 '__cached__',
 '__doc__',
 '__file__',
 '__loader__',
 '__name__',
 '__package__',
 '__path__',
 '__spec__',
 'common_models',
 'le_sparse_net',
 'mobile_net_v1_sparse_depth',
 'mobile_net_v1_sparse_point',
 'mobilenetv1',
 'not_so_densenet',
 'pretrained_models',
 'resnet50_swsl',
 'resnet9',
 'resnet_models',
 'resnets',
 'resnext101_32x16d_wsl',
 'resnext101_32x48d_wsl',
 'resnext50_32x4d_swsl',
 'se_resnet50',
 'se_resnext50_32x4d',
 'separable_convolution2d',
 'vgg19_dense_net',
 'vgg19_sparse_net',
 'vgg_sparse_net',
 'xception']

## Continual Learning

In [74]:
from nupic.research.frameworks.vernon import ContinualLearningExperiment
from nupic.research.frameworks.pytorch.models import StandardMLP

class ReduceLRContinualLearningExperiment(mixins.ReduceLRAfterTask,
                                          ContinualLearningExperiment):
    pass

cl_mnist = dict(
    # specific to continual learning
    distributed=False,
    experiment_class=ReduceLRContinualLearningExperiment,
    num_classes=10,
    num_tasks=5,
    evaluation_metrics=[
        "eval_current_task",
        "eval_all_visited_tasks",
    ],
    # dataset
    dataset_class=torchvisiondataset,
    dataset_args=dict(root="~/nta/datasets", dataset_name="MNIST"),    
    # regular experiments
    model_class=StandardMLP,
    model_args=dict(
        input_size=(28, 28), num_classes=10, hidden_sizes=(50, 50, 50)
    ),
    # hyperparameters
    epochs_to_validate=[],
    epochs=5,
    batch_size=32,
    optimizer_class=torch.optim.SGD,
    optimizer_args=dict(lr=0.01, momentum=0.9, nesterov=False),
    # for debugging
    batches_in_epoch=30,
    batches_in_epoch_val=30
)

from nupic.research.frameworks.vernon.run_with_raytune import run_single_instance
run_single_instance(cl_mnist)


2021-01-21 10:36:51,469	INFO resource_spec.py:212 -- Starting Ray with 8.69 GiB memory available for workers and up to 4.36 GiB for objects. You can adjust these settings with ray.init(memory=<bytes>, object_store_memory=<bytes>).
{'checkpoint_at_end': False,
 'checkpoint_freq': 0,
 'checkpoint_score_attr': None,
 'config': {'batch_size': 32,
            'batches_in_epoch': 30,
            'batches_in_epoch_val': 30,
            'dataset_args': {'dataset_name': 'MNIST', 'root': '~/nta/datasets'},
            'dataset_class': <function torchvisiondataset at 0x7fe4a278d158>,
            'dist_port': 56312,
            'distributed': False,
            'epochs': 5,
            'epochs_to_validate': [],
            'evaluation_metrics': ['eval_current_task',
                                   'eval_all_visited_tasks'],
            'experiment_class': <class '__main__.ReduceLRContinualLearningExperiment'>,
            'log_level': 'INFO',
            'model_args': {'hidden_sizes': (50, 50, 

Trial name,status,loc
RemoteProcessTrainable_00000,RUNNING,


[2m[36m(pid=85446)[0m INFO:ReduceLRContinualLearningExperiment:Training...
[2m[36m(pid=85443)[0m 2021-01-21 10:37:12,352	INFO trainable.py:180 -- _setup took 14.472 seconds. If your trainable is slow to initialize, consider setting reuse_actors=True to reduce actor creation overheads.
[2m[36m(pid=85443)[0m INFO:RemoteProcessTrainable:Pre-Experiment Result: None
Result for RemoteProcessTrainable_00000:
  date: 2021-01-21_10-37-13
  done: false
  eval_all_visited_tasks__mean_accuracy: 0.9989583333333333
  eval_all_visited_tasks__mean_loss: 0.0030511913200219473
  eval_all_visited_tasks__total_correct: 959
  eval_all_visited_tasks__total_tested: 960
  eval_current_task__mean_accuracy: 1.0
  eval_current_task__mean_loss: 0.002297080059846242
  eval_current_task__total_correct: 960
  eval_current_task__total_tested: 960
  experiment_id: a5469d764fc74d1e8ed983172aecacac
  experiment_tag: '0'
  hostname: Lucas-MacBook-Pro.local
  iterations_since_restore: 1
  learning_rate: 0.01
  no

Trial name,status,loc,iter,total time (s)
RemoteProcessTrainable_00000,RUNNING,192.168.0.10:85443,1,0.851337


[2m[36m(pid=85446)[0m INFO:ReduceLRContinualLearningExperiment:Training...
[2m[36m(pid=85446)[0m INFO:ReduceLRContinualLearningExperiment:Setting learning rate to 0.00
[2m[36m(pid=85446)[0m INFO:ReduceLRContinualLearningExperiment:Training...
[2m[36m(pid=85443)[0m INFO:RemoteProcessTrainable:End Iteration Result: {'learning_rate': 0.001}
[2m[36m(pid=85446)[0m INFO:ReduceLRContinualLearningExperiment:Setting learning rate to 0.00
[2m[36m(pid=85446)[0m INFO:ReduceLRContinualLearningExperiment:Training...
[2m[36m(pid=85443)[0m INFO:RemoteProcessTrainable:End Iteration Result: {'learning_rate': 0.001}
[2m[36m(pid=85446)[0m INFO:ReduceLRContinualLearningExperiment:Setting learning rate to 0.00
[2m[36m(pid=85446)[0m INFO:ReduceLRContinualLearningExperiment:Training...
[2m[36m(pid=85443)[0m INFO:RemoteProcessTrainable:End Iteration Result: {'learning_rate': 0.001}
Result for RemoteProcessTrainable_00000:
  date: 2021-01-21_10-37-16
  done: true
  eval_all_visited

Trial name,status,loc,iter,total time (s)
RemoteProcessTrainable_00000,TERMINATED,,5,3.7205


**** Trial ended


## Meta-Continual Learning

In [75]:
# ray running in local, if bug, needs to manually shutdown # FIXME
import ray
ray.shutdown()

# networks and datasets under pytorch
from nupic.research.frameworks.vernon import MetaContinualLearningExperiment
from nupic.research.frameworks.pytorch.datasets import omniglot
from nupic.research.frameworks.pytorch.models import OMLNetwork

meta_cl_omniglot = dict(
    # experiment
    experiment_class=MetaContinualLearningExperiment,
    distributed=False,
    # dataset
    dataset_class=omniglot,
    dataset_args=dict(root="~/nta/datasets"),
    # model
    model_class=OMLNetwork,
    model_args=dict(num_classes=50),
    fast_params=["adaptation.*"],
    test_train_params=["adaptation.*"],
    # hyperparameters
    batch_size=5,
    num_batches_train=1,
    epochs=2,
    num_tasks_per_epoch=10,
    num_classes=50,
    optimizer_args=dict(lr=1e-4),
    optimizer_class=torch.optim.Adam,
)

# Use one of the existing run functions
run_single_instance(meta_cl_omniglot)


2021-01-21 10:41:45,617	INFO resource_spec.py:212 -- Starting Ray with 9.47 GiB memory available for workers and up to 4.74 GiB for objects. You can adjust these settings with ray.init(memory=<bytes>, object_store_memory=<bytes>).
{'checkpoint_at_end': False,
 'checkpoint_freq': 0,
 'checkpoint_score_attr': None,
 'config': {'batch_size': 5,
            'dataset_args': {'root': '~/nta/datasets'},
            'dataset_class': <function omniglot at 0x7fe4a278d1e0>,
            'dist_port': 56649,
            'distributed': False,
            'epochs': 2,
            'experiment_class': <class 'nupic.research.frameworks.vernon.experiments.meta_cl_experiment.MetaContinualLearningExperiment'>,
            'fast_params': ['adaptation.*'],
            'log_level': 'INFO',
            'model_args': {'num_classes': 50},
            'model_class': <class 'nupic.research.frameworks.pytorch.models.common_models.OMLNetwork'>,
            'num_batches_train': 1,
            'num_classes': 50,
      

Trial name,status,loc
RemoteProcessTrainable_00000,RUNNING,


[2m[36m(pid=85800)[0m Files already downloaded and verified
[2m[36m(pid=85794)[0m 2021-01-21 10:42:31,484	INFO trainable.py:180 -- _setup took 40.720 seconds. If your trainable is slow to initialize, consider setting reuse_actors=True to reduce actor creation overheads.
[2m[36m(pid=85794)[0m INFO:RemoteProcessTrainable:Pre-Experiment Result: None
[2m[36m(pid=85800)[0m INFO:MetaContinualLearningExperiment:Setup: fast_param_names=['adaptation.0.weight', 'adaptation.0.bias']
Result for RemoteProcessTrainable_00000:
  date: 2021-01-21_10-43-11
  done: false
  experiment_id: 9e023309f1dc4c0eaa5a73df4c92f6af
  experiment_tag: '0'
  hostname: Lucas-MacBook-Pro.local
  iterations_since_restore: 1
  learning_rate: 0.0001
  mean_accuracy: 0.10144927536231885
  mean_loss: 70.39163970947266
  neg_mean_loss: -70.39163970947266
  node_ip: 192.168.0.10
  pid: 85794
  time_since_restore: 39.8740611076355
  time_this_iter_s: 39.8740611076355
  time_total_s: 39.8740611076355
  timestamp: 161

Trial name,status,loc,acc,loss,iter,total time (s)
RemoteProcessTrainable_00000,RUNNING,192.168.0.10:85794,0.101449,70.3916,1,39.8741


[2m[36m(pid=85794)[0m INFO:RemoteProcessTrainable:End Iteration Result: {'total_correct': 7, 'total_tested': 69, 'learning_rate': 0.0001, 'validation_loss': 70.39163970947266, 'validation_accuracy': 0.10144927536231885}
Result for RemoteProcessTrainable_00000:
  date: 2021-01-21_10-43-20
  done: true
  experiment_id: 9e023309f1dc4c0eaa5a73df4c92f6af
  experiment_tag: '0'
  hostname: Lucas-MacBook-Pro.local
  iterations_since_restore: 2
  learning_rate: 0.0001
  mean_accuracy: 0.08695652173913043
  mean_loss: 28.518835067749023
  neg_mean_loss: -28.518835067749023
  node_ip: 192.168.0.10
  pid: 85794
  time_since_restore: 49.49658703804016
  time_this_iter_s: 9.622525930404663
  time_total_s: 49.49658703804016
  timestamp: 1611254600
  timesteps_since_restore: 0
  total_correct: 6
  total_tested: 69
  training_iteration: 2
  trial_id: '00000'
  


Trial name,status,loc,acc,loss,iter,total time (s)
RemoteProcessTrainable_00000,TERMINATED,,0.0869565,28.5188,2,49.4966


[2m[36m(pid=85794)[0m INFO:RemoteProcessTrainable:End Iteration Result: {'total_correct': 6, 'total_tested': 69, 'learning_rate': 0.0001, 'validation_loss': 28.518835067749023, 'validation_accuracy': 0.08695652173913043}


Trial name,status,loc,acc,loss,iter,total time (s)
RemoteProcessTrainable_00000,TERMINATED,,0.0869565,28.5188,2,49.4966


**** Trial ended
