In [1]:
%load_ext autoreload
%autoreload 2
import sys
import torch
import itertools
import numpy as np
from torch import nn
import torchmetrics
import pandas as pd
sys.path.append('..')
# sys.path.append('/system/user/beck/pwbeck/projects/regularization/ml_utilities')
from pathlib import Path
from typing import Union
from pprint import pprint
from ml_utilities.torch_models.base_model import BaseModel
from ml_utilities.torch_models.fc import FC
from ml_utilities.torch_models import get_model_class
from ml_utilities.output_loader.repo import Repo
from ml_utilities.output_loader.job_output import JobResult, SweepResult
from ml_utilities.torch_utils.metrics import SimpleAccuracy, SimpleError
from ml_utilities.utils import match_number_list_to_interval
from omegaconf import OmegaConf

from erank.data.datasetgenerator import DatasetGenerator
from erank.mode_connectivity import interpolate_linear, interpolate_linear_runs, interpolation_result2series

import matplotlib.pyplot as plt
gpu_id = 0
REPO = Repo(dir=Path('../../erank'), hydra_defaults=OmegaConf.load('../configs/hydra/jobname_outputdir_format.yaml'))

# Instability analysis debug notebook
This notebook is used to implement linear interplation of models. 

Do linear interpolation with on MNIST. Use data from Experiment 11.7.4. 

Start from pretrained model with 100 steps.

In [2]:
# some constants
score_fn = SimpleAccuracy()

## Instability Analysis on Experiment 11.7.4

**PROBLEM**: Cannot use this because we use only **50** samples < batchsize = 128. Therefore we do full batch training. -> No noise due to different batch order.

In [3]:
config_yaml = """
run_config:
  exec_type: parallel # sequential
  hostname: wasp
  gpu_ids: [0,1,2,3,4]
  runs_per_gpu: 5

  wandb: # wandb config for run_handler, if "wandb: null" then logging to wandb is disabled for run_handler
    init:
      tags:
        - ${config.experiment_data.experiment_tag}_exps
        - run_handler
      notes: #
      group: ${config.experiment_data.experiment_tag}
      job_type: run_handler

seeds: [1,2]

sweep:
  type: grid
  axes:
    - parameter: trainer.init_model_step
      vals: [0, 5, 10, 15, 20, 25, 30, 35, 40, 45, 50, 55, 60, 65, 70, 75, 80, 85, 90, 95, 100, 125, 150, 175, 200, 225, 250, 275, 300, 325, 350, 375, 400, 425, 450, 475]
    - parameter: data.dataset_kwargs.rotation_angle
      vals: linspace(0,180,30,endpoint=True)
    - parameter: data.dataset_split.restrict_n_samples_train_task
      vals: [50] #[5, 20, 50, 100, 500, 1000, 10000, 48000]

start_num: 4 # use this to count how often this config is run
###
config:
  experiment_data:
    entity: jkuiml-fsl
    project_name: sparsity
    experiment_tag: "11.7"
    experiment_type: startnum_${start_num}
    experiment_name: mnist-${config.experiment_data.experiment_tag}.${start_num}-lenet_rottasks_ft
    experiment_dir: null
    experiment_notes: Hyperparameter search.
    job_name: null
    seed: 0
    hostname: null # the server on which the run is run, will be filled by run_handler
    gpu_id: 0

  # wandb:
  #   init:
  #     tags: # list(), used to tag wandblogger
  #       - ${config.experiment_data.experiment_tag}_exps
  #     notes: ${config.experiment_data.experiment_notes} # str, used to make notes to wandblogger
  #     group: ${config.experiment_data.experiment_tag} # null
  #     job_type: ${config.experiment_data.experiment_type} # examples: hypsearch, pretrain, eval, etc.

  #   watch:
  #     log: null #parameters #null #all
  #     log_freq: 5000

  model:
    name: fc
    model_kwargs:
      input_size: 784
      hidden_sizes:
        - 300
        - 100
      output_size: 10
      flatten_input: True
      dropout: null
      act_fn: relu

  trainer:
    training_setup: supervised
    n_steps: 2000
    log_train_step_every: 1
    log_additional_train_step_every_multiplier: 1
    log_additional_logs: True
    val_every: 5
    save_every: 5 #500
    early_stopping_patience: 200 #500
    batch_size: 128
    optimizer_scheduler:
      optimizer_name: adamw #sgd #adamw
      optimizer_kwargs:
        lr: 0.001
        weight_decay: 0.0
    
    init_model_step: XXX
    init_model: /system/user/beck/pwbeck/projects/regularization/erank/outputs/mnist-11.5.0-lenet--221015_122552/model_step_${config.trainer.init_model_step}.p

    loss: crossentropy

    metrics:
      - Accuracy
    num_workers: 4
    verbose: False

  data:
    dataset: rotatedvision
    dataset_kwargs:
      data_root_path: /system/user/beck/pwbeck/data
      dataset: mnist
      rotation_angle: XXX
    dataset_split:
      train_val_split: 0.8
      restrict_n_samples_train_task: XXX
"""
cfg = OmegaConf.create(config_yaml)

In [4]:
sweepr = REPO.get_output_loader(cfg)
print(sweepr)

Exp. Tag(start_num): 11.7(4)
Exp. Name: mnist-11.7.4-lenet_rottasks_ft
Training setup: supervised
Model name: fc
Dataset name: rotatedvision
Sweep type: grid
  trainer.init_model_step: [0, 5, 10, 15, 20, 25, 30, 35, 40, 45, 50, 55, 60, 65, 70, 75, 80, 85, 90, 95, 100, 125, 150, 175, 200, 225, 250, 275, 300, 325, 350, 375, 400, 425, 450, 475]
  data.dataset_kwargs.rotation_angle: linspace(0,180,30,endpoint=True)
  data.dataset_split.restrict_n_samples_train_task: [50]
Seeds: [1, 2]
Num. jobs: 2150
Config updated: 2022-12-12 17:43:28
Sweep started:  2022-12-12 17:42:56



In [5]:
np.linspace(0,180,30,endpoint=True)

array([  0.        ,   6.20689655,  12.4137931 ,  18.62068966,
        24.82758621,  31.03448276,  37.24137931,  43.44827586,
        49.65517241,  55.86206897,  62.06896552,  68.27586207,
        74.48275862,  80.68965517,  86.89655172,  93.10344828,
        99.31034483, 105.51724138, 111.72413793, 117.93103448,
       124.13793103, 130.34482759, 136.55172414, 142.75862069,
       148.96551724, 155.17241379, 161.37931034, 167.5862069 ,
       173.79310345, 180.        ])

### Single linear interpolation for reference

In [6]:
jobs_pretrainsteps100 = sweepr.get_jobs('init_model_step-100-rotation_angle-37')
jobs_pretrainsteps100

[JobResult(mnist-11.7.4-lenet_rottasks_ft--init_model_step-100-rotation_angle-37.2414-restrict_n_samples_train_task-50-seed-2--221214_005243),
 JobResult(mnist-11.7.4-lenet_rottasks_ft--init_model_step-100-rotation_angle-37.2414-restrict_n_samples_train_task-50-seed-1--221213_143055)]

In [7]:
dataset_df, distance_df = interpolate_linear_runs(jobs_pretrainsteps100[0], jobs_pretrainsteps100[1], score_fn)

Interp. factors: 100%|██████████| 5/5 [00:20<00:00,  4.17s/it]


In [8]:
dataset_df

Unnamed: 0_level_0,Unnamed: 1_level_0,datasets,val,val,val,val,val,val,train,train,train,train,train,train
Unnamed: 0_level_1,Unnamed: 1_level_1,score,interpolation_scores,interpolation_scores,interpolation_scores,interpolation_scores,interpolation_scores,instability,interpolation_scores,interpolation_scores,interpolation_scores,interpolation_scores,interpolation_scores,instability
Unnamed: 0_level_2,Unnamed: 1_level_2,alpha,0.00,0.25,0.50,0.75,1.00,NaN,0.00,0.25,0.50,0.75,1.00,NaN
job,seeds,model_idxes,Unnamed: 3_level_3,Unnamed: 4_level_3,Unnamed: 5_level_3,Unnamed: 6_level_3,Unnamed: 7_level_3,Unnamed: 8_level_3,Unnamed: 9_level_3,Unnamed: 10_level_3,Unnamed: 11_level_3,Unnamed: 12_level_3,Unnamed: 13_level_3,Unnamed: 14_level_3
mnist-11.7.4-lenet_rottasks_ft--init_model_step-100-rotation_angle-37.2414-restrict_n_samples_train_task-50,"(2, 1)","(1885, 1815)",0.711341,0.711341,0.711341,0.711341,0.711341,0.0,1.0,1.0,1.0,1.0,1.0,0.0


In [9]:
distance_df

Unnamed: 0_level_0,Unnamed: 1_level_0,distances,l2distance,cosinesimilarity
job,seeds,model_idxes,Unnamed: 3_level_1,Unnamed: 4_level_1
mnist-11.7.4-lenet_rottasks_ft--init_model_step-100-rotation_angle-37.2414-restrict_n_samples_train_task-50,"(2, 1)","(1885, 1815)",0.016702,0.999999


In [10]:
jobs_pretrainsteps100[0].best_model_idx

1885

In [11]:
jobs_pretrainsteps100[1].best_model_idx

1815

## Instability Analysis on Experiment 11.7.3

In [12]:
config_yaml = """
run_config:
  exec_type: parallel # sequential
  hostname: gorilla
  gpu_ids: [0,1,2,3,4,5,6,7]
  runs_per_gpu: 3

  wandb: # wandb config for run_handler, if "wandb: null" then logging to wandb is disabled for run_handler
    init:
      tags:
        - ${config.experiment_data.experiment_tag}_exps
        - run_handler
      notes: #
      group: ${config.experiment_data.experiment_tag}
      job_type: run_handler

seeds: [1,2,3]

sweep:
  type: grid
  axes:
    - parameter: trainer.init_model_step
      vals: [0, 5, 10, 15, 20, 25, 30, 35, 40, 45, 50, 55, 60, 65, 70, 75, 80, 85, 90, 95, 100, 125, 150, 175, 200, 225, 250, 275, 300, 325, 350, 375, 400, 425, 450, 475]
    - parameter: data.dataset_kwargs.rotation_angle
      vals: linspace(0,180,50,endpoint=True)
    - parameter: data.dataset_split.restrict_n_samples_train_task
      vals: [300] #[5, 20, 50, 100, 500, 1000, 10000, 48000]

start_num: 3 # use this to count how often this config is run
###
config:
  experiment_data:
    entity: jkuiml-fsl
    project_name: sparsity
    experiment_tag: "11.7"
    experiment_type: startnum_${start_num}
    experiment_name: mnist-${config.experiment_data.experiment_tag}.${start_num}-lenet_rottasks_ft
    experiment_dir: null
    experiment_notes: Hyperparameter search.
    job_name: null
    seed: 0
    hostname: null # the server on which the run is run, will be filled by run_handler
    gpu_id: 0

  # wandb:
  #   init:
  #     tags: # list(), used to tag wandblogger
  #       - ${config.experiment_data.experiment_tag}_exps
  #     notes: ${config.experiment_data.experiment_notes} # str, used to make notes to wandblogger
  #     group: ${config.experiment_data.experiment_tag} # null
  #     job_type: ${config.experiment_data.experiment_type} # examples: hypsearch, pretrain, eval, etc.

  #   watch:
  #     log: null #parameters #null #all
  #     log_freq: 5000

  model:
    name: fc
    model_kwargs:
      input_size: 784
      hidden_sizes:
        - 300
        - 100
      output_size: 10
      flatten_input: True
      dropout: null
      act_fn: relu

  trainer:
    training_setup: supervised
    n_steps: 2000
    log_train_step_every: 1
    log_additional_train_step_every_multiplier: 1
    log_additional_logs: True
    val_every: 5
    save_every: 5 #500
    early_stopping_patience: 200 #500
    batch_size: 128
    optimizer_scheduler:
      optimizer_name: adamw #sgd #adamw
      optimizer_kwargs:
        lr: 0.001
        weight_decay: 0.0
    
    init_model_step: XXX
    init_model: /system/user/beck/pwbeck/projects/regularization/erank/outputs/mnist-11.5.0-lenet--221015_122552/model_step_${config.trainer.init_model_step}.p

    loss: crossentropy

    metrics:
      - Accuracy
    num_workers: 4
    verbose: False

  data:
    dataset: rotatedvision
    dataset_kwargs:
      data_root_path: /system/user/beck/pwbeck/data
      dataset: mnist
      rotation_angle: XXX
    dataset_split:
      train_val_split: 0.8
      restrict_n_samples_train_task: XXX

"""
cfg = OmegaConf.create(config_yaml)

In [13]:
sweepr = REPO.get_output_loader(cfg)
print(sweepr)

Exp. Tag(start_num): 11.7(3)
Exp. Name: mnist-11.7.3-lenet_rottasks_ft
Training setup: supervised
Model name: fc
Dataset name: rotatedvision
Sweep type: grid
  trainer.init_model_step: [0, 5, 10, 15, 20, 25, 30, 35, 40, 45, 50, 55, 60, 65, 70, 75, 80, 85, 90, 95, 100, 125, 150, 175, 200, 225, 250, 275, 300, 325, 350, 375, 400, 425, 450, 475]
  data.dataset_kwargs.rotation_angle: linspace(0,180,50,endpoint=True)
  data.dataset_split.restrict_n_samples_train_task: [300]
Seeds: [1, 2, 3]
Num. jobs: 5400
Config updated: 2022-11-25 12:34:14
Sweep started:  2022-11-25 12:36:51



In [14]:
np.linspace(0,180,50,endpoint=True)

array([  0.        ,   3.67346939,   7.34693878,  11.02040816,
        14.69387755,  18.36734694,  22.04081633,  25.71428571,
        29.3877551 ,  33.06122449,  36.73469388,  40.40816327,
        44.08163265,  47.75510204,  51.42857143,  55.10204082,
        58.7755102 ,  62.44897959,  66.12244898,  69.79591837,
        73.46938776,  77.14285714,  80.81632653,  84.48979592,
        88.16326531,  91.83673469,  95.51020408,  99.18367347,
       102.85714286, 106.53061224, 110.20408163, 113.87755102,
       117.55102041, 121.2244898 , 124.89795918, 128.57142857,
       132.24489796, 135.91836735, 139.59183673, 143.26530612,
       146.93877551, 150.6122449 , 154.28571429, 157.95918367,
       161.63265306, 165.30612245, 168.97959184, 172.65306122,
       176.32653061, 180.        ])

### Single linear interpolation for reference

In [15]:
jobs_pretrainsteps100 = sweepr.get_jobs('init_model_step-100-rotation_angle-25')
jobs_pretrainsteps100

[JobResult(mnist-11.7.3-lenet_rottasks_ft--init_model_step-100-rotation_angle-25.7143-restrict_n_samples_train_task-300-seed-2--221127_232024),
 JobResult(mnist-11.7.3-lenet_rottasks_ft--init_model_step-100-rotation_angle-25.7143-restrict_n_samples_train_task-300-seed-3--221126_021736),
 JobResult(mnist-11.7.3-lenet_rottasks_ft--init_model_step-100-rotation_angle-25.7143-restrict_n_samples_train_task-300-seed-1--221125_232309)]

In [16]:
dataset_df, distance_df = interpolate_linear_runs(jobs_pretrainsteps100[0], jobs_pretrainsteps100[1], score_fn)

Interp. factors: 100%|██████████| 5/5 [00:21<00:00,  4.30s/it]


In [17]:
dataset_df

Unnamed: 0_level_0,Unnamed: 1_level_0,datasets,val,val,val,val,val,val,train,train,train,train,train,train
Unnamed: 0_level_1,Unnamed: 1_level_1,score,interpolation_scores,interpolation_scores,interpolation_scores,interpolation_scores,interpolation_scores,instability,interpolation_scores,interpolation_scores,interpolation_scores,interpolation_scores,interpolation_scores,instability
Unnamed: 0_level_2,Unnamed: 1_level_2,alpha,0.00,0.25,0.50,0.75,1.00,NaN,0.00,0.25,0.50,0.75,1.00,NaN
job,seeds,model_idxes,Unnamed: 3_level_3,Unnamed: 4_level_3,Unnamed: 5_level_3,Unnamed: 6_level_3,Unnamed: 7_level_3,Unnamed: 8_level_3,Unnamed: 9_level_3,Unnamed: 10_level_3,Unnamed: 11_level_3,Unnamed: 12_level_3,Unnamed: 13_level_3,Unnamed: 14_level_3
mnist-11.7.3-lenet_rottasks_ft--init_model_step-100-rotation_angle-25.7143-restrict_n_samples_train_task-300,"(2, 3)","(515, 380)",0.873551,0.873457,0.873884,0.873896,0.874573,-0.000606,1.0,1.0,1.0,1.0,1.0,0.0


In [18]:
distance_df

Unnamed: 0_level_0,Unnamed: 1_level_0,distances,l2distance,cosinesimilarity
job,seeds,model_idxes,Unnamed: 3_level_1,Unnamed: 4_level_1
mnist-11.7.3-lenet_rottasks_ft--init_model_step-100-rotation_angle-25.7143-restrict_n_samples_train_task-300,"(2, 3)","(515, 380)",2.375957,0.989239


### Instability analysis

In [19]:
print(sweepr)

Exp. Tag(start_num): 11.7(3)
Exp. Name: mnist-11.7.3-lenet_rottasks_ft
Training setup: supervised
Model name: fc
Dataset name: rotatedvision
Sweep type: grid
  trainer.init_model_step: [0, 5, 10, 15, 20, 25, 30, 35, 40, 45, 50, 55, 60, 65, 70, 75, 80, 85, 90, 95, 100, 125, 150, 175, 200, 225, 250, 275, 300, 325, 350, 375, 400, 425, 450, 475]
  data.dataset_kwargs.rotation_angle: linspace(0,180,50,endpoint=True)
  data.dataset_split.restrict_n_samples_train_task: [300]
Seeds: [1, 2, 3]
Num. jobs: 5400
Config updated: 2022-11-25 12:34:14
Sweep started:  2022-11-25 12:36:51



In [20]:
init_model_step_param = 'trainer.init_model_step'

In [21]:
print(OmegaConf.to_yaml(sweepr.config))

run_config:
  exec_type: parallel
  hostname: gorilla
  gpu_ids:
  - 0
  - 1
  - 2
  - 3
  - 4
  - 5
  - 6
  - 7
  runs_per_gpu: 3
  wandb:
    init:
      tags:
      - ${config.experiment_data.experiment_tag}_exps
      - run_handler
      notes: null
      group: ${config.experiment_data.experiment_tag}
      job_type: run_handler
seeds:
- 1
- 2
- 3
sweep:
  type: grid
  axes:
  - parameter: trainer.init_model_step
    vals:
    - 0
    - 5
    - 10
    - 15
    - 20
    - 25
    - 30
    - 35
    - 40
    - 45
    - 50
    - 55
    - 60
    - 65
    - 70
    - 75
    - 80
    - 85
    - 90
    - 95
    - 100
    - 125
    - 150
    - 175
    - 200
    - 225
    - 250
    - 275
    - 300
    - 325
    - 350
    - 375
    - 400
    - 425
    - 450
    - 475
  - parameter: data.dataset_kwargs.rotation_angle
    vals: linspace(0,180,50,endpoint=True)
  - parameter: data.dataset_split.restrict_n_samples_train_task
    vals:
    - 300
start_num: 3
config:
  experiment_data:
    entity: j

In [22]:
fh = sweepr.get_failed_jobs()
fh

100%|██████████| 5400/5400 [01:06<00:00, 80.95it/s] 


In [26]:
sweepr.sweep_params

['trainer.init_model_step',
 'data.dataset_kwargs.rotation_angle',
 'data.dataset_split.restrict_n_samples_train_task']

In [31]:
sweepr.get_sweep_param_values('trainer.init_model_step')

[0,
 5,
 10,
 15,
 20,
 25,
 30,
 35,
 40,
 45,
 50,
 55,
 60,
 65,
 70,
 75,
 80,
 85,
 90,
 95,
 100,
 125,
 150,
 175,
 200,
 225,
 250,
 275,
 300,
 325,
 350,
 375,
 400,
 425,
 450,
 475]

In [48]:
seeds = sweepr.get_sweep_param_values('seed')
seeds


[1, 2, 3]

In [49]:
len(list(itertools.combinations(seeds, 2)))

3

In [51]:
list(itertools.combinations(seeds, 2))[:5]

[(1, 2), (1, 3), (2, 3)]

In [40]:
sweepr.get_sweep_param_values().keys()


dict_keys(['seed', 'data.dataset_split.restrict_n_samples_train_task', 'trainer.init_model_step', 'data.dataset_kwargs.rotation_angle'])

In [19]:
match_number_list_to_interval(list(range(10)), 4)

KeyboardInterrupt: 