In [19]:
%load_ext autoreload
%autoreload 2
import torch
import numpy as np
import pandas as pd
from omegaconf import OmegaConf
from pathlib import Path
from pprint import pprint
# plotting
import matplotlib.pyplot as plt
import matplotlib as mpl
import seaborn as sns
# mycode
from repo import REPO
from ml_utilities.output_loader.job_output import SweepResult, JobResult
from ml_utilities.output_loader import create_job_output_loader
from ml_utilities.output_loader.plot import plot_sweep_summary, plot_data_log_values
from erank.mode_connectivity.instability_analysis import InstabilityAnalyzer

[autoreload of ml_utilities.logger failed: Traceback (most recent call last):
  File "/system/apps/userenv/beck/subspaces/lib/python3.10/site-packages/IPython/extensions/autoreload.py", line 257, in check
    superreload(m, reload, self.old_objects)
  File "/system/apps/userenv/beck/subspaces/lib/python3.10/site-packages/IPython/extensions/autoreload.py", line 480, in superreload
    update_generic(old_obj, new_obj)
  File "/system/apps/userenv/beck/subspaces/lib/python3.10/site-packages/IPython/extensions/autoreload.py", line 377, in update_generic
    update(a, b)
  File "/system/apps/userenv/beck/subspaces/lib/python3.10/site-packages/IPython/extensions/autoreload.py", line 329, in update_class
    if update_generic(old_obj, new_obj):
  File "/system/apps/userenv/beck/subspaces/lib/python3.10/site-packages/IPython/extensions/autoreload.py", line 377, in update_generic
    update(a, b)
  File "/system/apps/userenv/beck/subspaces/lib/python3.10/site-packages/IPython/extensions/autorel

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


# 13.1 CIFAR 10 ResNet Pretraining

With this notebook I start the pretraining runs on CIFAR10, which are then used for later finetuning for the stability analysis. 
I also start the finetuning runs here

In [20]:
config_yaml = """
run_config:
  exec_type: parallel
  hostname: dragonfly
  gpu_ids: [0,1]
  runs_per_gpu: 2
  wandb:
    init:
      tags:
      - ${config.experiment_data.experiment_tag}_exps
      - run_handler
      notes: null
      group: ${config.experiment_data.experiment_tag}
      job_type: run_handler

seeds: [11,17,31]

sweep:
  type: grid
  axes: 
  - parameter: data.dataset_transforms.enable_transforms
    vals: [True, False]
  # - parameter: trainer.optimizer_scheduler.optimizer_kwargs.lr
  #   vals: [0.1, 0.075, 0.05, 0.025]
  # - parameter: trainer.optimizer_scheduler.optimizer_kwargs.weight_decay
  #   vals: [0.0, 0.001, 0.005]
  # - parameter: trainer.batch_size
  #   vals: [64, 256, 512]
    
start_num: 1

config:
  experiment_data:
    entity: jkuiml-fsl
    project_name: tflearning
    experiment_tag: '13.1'
    experiment_type: startnum_${start_num}
    experiment_name: cifar10-${config.experiment_data.experiment_tag}.${start_num}-resnet-B-pretrain
    experiment_dir: null
    experiment_notes: 
    job_name: null
    seed: 0
    hostname: null
    gpu_id: 0
  wandb:
    init:
      tags:
      - ${config.experiment_data.experiment_tag}_exps
      notes: ${config.experiment_data.experiment_notes}
      group: ${config.experiment_data.experiment_tag}
      job_type: ${config.experiment_data.experiment_type}
    watch:
      log: null
      log_freq:

  model:
    model_cfg: resnet20-cifar10-B
    # name: resnet
    # model_kwargs:
    #   in_channels: 3
    #   act_fn: relu
    #   residual_option: B
    #   input_layer_config:
    #     kernel_size: 3
    #     out_channels: 16
    #     bias: false
    #     batch_norm: true
    #     stride: 1
    #     padding: 1
    #   resnet_blocks_config:
    #     - out_channels: 16
    #       num_residual_blocks: 3
    #     - out_channels: 32
    #       num_residual_blocks: 3
    #     - out_channels: 64
    #       num_residual_blocks: 3
    #   linear_output_units:
    #     - 10
    init_model: null

  trainer:
    training_setup: supervised
    n_steps: 64e3
    log_train_step_every: 1
    log_additional_train_step_every_multiplier: 1
    log_additional_logs: true
    val_every: 500
    save_every: 1000
    early_stopping_patience: 64e3
    batch_size: 128
    optimizer_scheduler:
      optimizer_name: SGD
      optimizer_kwargs:
        lr: 0.01 #0.1
        momentum: 0.9
        weight_decay: 0.0001
      lr_scheduler_name: MultiStepLR
      lr_scheduler_kwargs:
        milestones: [32e3, 48e3]
        gamma: 0.1

    loss: crossentropy
    metrics:
    - Accuracy
    num_workers: 4
    verbose: false
  data:
    dataset: cifar10
    dataset_kwargs:
      data_root_path: /system/user/beck/pwbeck/data
    dataset_split:
      train_val_split: 0.9
      # restrict_n_samples_train_task: 100
    dataset_transforms:
      image_transforms:
      - RandomHorizontalFlip
      - RandomCrop:
          size: 32
          padding: 4
      tensor_transforms: 
      joint_tensor_transforms: 
      enable_transforms: True
"""
cfg = OmegaConf.create(config_yaml)

In [21]:
# run_command = REPO.create_experiment(cfg, override=False)
# print(run_command)

### Sweep result

In [22]:
sweepr = REPO.get_output_loader(cfg)
print(sweepr)

Exp. Tag(start_num): 13.1(1)
Exp. Name: cifar10-13.1.1-resnet-B-pretrain
Training setup: supervised
Model name: resnet20-cifar10-B
Dataset name: cifar10
Sweep type: grid
  data.dataset_transforms.enable_transforms: [True, False]
Seeds: [11, 17, 31]
Num. jobs: 6
Config updated: 2023-01-12 16:36:01
Sweep started:  2023-01-12 16:39:14



In [23]:
failed_jobs = sweepr.get_failed_jobs()
failed_jobs # No failed jobs

Collecting failed jobs: 100%|██████████| 6/6 [00:00<00:00, 55.20it/s]


([], {})

In [24]:
pprint(sweepr.available_log_columns)

{'_common_cols': ['epoch', 'train_step', 'log_step']}


In [25]:
sw_summary = sweepr.get_summary()
sw_summary

Collecting summaries: 100%|██████████| 6/6 [00:00<00:00, 130.47it/s]


BrokenProcessPool: A task has failed to un-serialize. Please ensure that the arguments of the function are all picklable.

# 13.5 CIFAR10 Resnet enable_transforms=True finetuning

In [None]:
# get a single pretraining job
df, jobs = sweepr.query_jobs({'seed':31, 'data.dataset_transforms.enable_transforms': True})
df

Collecting summaries: 100%|██████████| 6/6 [00:00<00:00, 4799.89it/s]


Unnamed: 0,best_step,best_val_score,data.dataset_transforms.enable_transforms,seed
cifar10-13.1.1-resnet-B-pretrain--enable_transforms-1-seed-31--230112_163938,62000,0.8822,True,31


In [None]:
pretrain_job_et = jobs[0]
pretrain_job_et

JobResult(cifar10-13.1.1-resnet-B-pretrain--enable_transforms-1-seed-31--230112_163938)

In [None]:
print(pretrain_job_et.directory)

/system/user/publicwork/beck/projects/regularization/erank/outputs/cifar10-13.1.1-resnet-B-pretrain--230112_163914/outputs/cifar10-13.1.1-resnet-B-pretrain--enable_transforms-1-seed-31--230112_163938


In [None]:
# determine pretrain indices, remaining training time and lr schedule milestones
pretrain_idxes = np.array(pretrain_job_et.available_model_checkpoint_indices[::6])
pretrain_idxes.tolist()

[0, 6000, 12000, 18000, 24000, 30000, 36000, 42000, 48000, 54000, 60000]

In [None]:
total_steps = cfg.config.trainer.n_steps
remaining_training_steps = total_steps - np.array(pretrain_idxes)
remaining_training_steps.astype(int).tolist()

[64000, 58000, 52000, 46000, 40000, 34000, 28000, 22000, 16000, 10000, 4000]

In [None]:
config_yaml = """
run_config:
  exec_type: parallel
  hostname: dragonfly
  gpu_ids: [0,1]
  runs_per_gpu: 2
  wandb:
    init:
      tags:
      - ${config.experiment_data.experiment_tag}_exps
      - run_handler
      notes: null
      group: ${config.experiment_data.experiment_tag}
      job_type: run_handler

seeds: [21,22]

sweep:
  type: line
  axes: 
  - parameter: trainer.n_steps # remaining steps to train
    vals: [64000, 58000, 52000, 46000, 40000, 34000, 28000, 22000, 16000, 10000, 4000]
  - parameter: model.pretrain_idx
    vals: [0, 6000, 12000, 18000, 24000, 30000, 36000, 42000, 48000, 54000, 60000]

    
start_num: 0

config:
  experiment_data:
    entity: jkuiml-fsl
    project_name: tflearning
    experiment_tag: '13.5'
    experiment_type: startnum_${start_num}
    experiment_name: cifar10-${config.experiment_data.experiment_tag}.${start_num}-resnet-B-finetune-enableT
    experiment_dir: null
    experiment_notes: 
    job_name: null
    seed: 0
    hostname: null
    gpu_id: 0
  wandb:
    init:
      tags:
      - ${config.experiment_data.experiment_tag}_exps
      notes: ${config.experiment_data.experiment_notes}
      group: ${config.experiment_data.experiment_tag}
      job_type: ${config.experiment_data.experiment_type}
    watch:
      log: null
      log_freq:

  model:
    model_cfg: resnet20-cifar10-B
    # name: resnet
    # model_kwargs:
    #   in_channels: 3
    #   act_fn: relu
    #   residual_option: B
    #   input_layer_config:
    #     kernel_size: 3
    #     out_channels: 16
    #     bias: false
    #     batch_norm: true
    #     stride: 1
    #     padding: 1
    #   resnet_blocks_config:
    #     - out_channels: 16
    #       num_residual_blocks: 3
    #     - out_channels: 32
    #       num_residual_blocks: 3
    #     - out_channels: 64
    #       num_residual_blocks: 3
    #   linear_output_units:
    #     - 10
    pretrain_idx: 0
    init_model: /system/user/publicwork/beck/projects/regularization/erank/outputs/cifar10-13.1.1-resnet-B-pretrain--230112_163914/outputs/cifar10-13.1.1-resnet-B-pretrain--enable_transforms-1-seed-31--230112_163938/model_step_${config.model.pretrain_idx}.p


  trainer:
    training_setup: supervised
    n_steps: 64e3
    log_train_step_every: 1
    log_additional_train_step_every_multiplier: 1
    log_additional_logs: true
    val_every: 500
    save_every: 1000
    early_stopping_patience: 64e3
    batch_size: 128
    optimizer_scheduler:
      optimizer_name: SGD
      optimizer_kwargs:
        lr: 0.01 #0.1
        momentum: 0.9
        weight_decay: 0.0001
      lr_scheduler_name: MultiStepLR
      lr_scheduler_kwargs:
        milestones: [32e3, 48e3]
        gamma: 0.1

    loss: crossentropy
    metrics:
    - Accuracy
    num_workers: 4
    verbose: false
  data:
    dataset: cifar10
    dataset_kwargs:
      data_root_path: /system/user/beck/pwbeck/data
    dataset_split:
      train_val_split: 0.9
      # restrict_n_samples_train_task: 100
    dataset_transforms:
      image_transforms:
      - RandomHorizontalFlip
      - RandomCrop:
          size: 32
          padding: 4
      tensor_transforms: 
      joint_tensor_transforms: 
      enable_transforms: True
"""
cfg = OmegaConf.create(config_yaml)

In [None]:
# run_command = REPO.create_experiment(cfg, override=False)
# print(run_command)

In [None]:
finetunesw5 = REPO.get_output_loader(cfg)
print(finetunesw5)

Exp. Tag(start_num): 13.5(0)
Exp. Name: cifar10-13.5.0-resnet-B-finetune-enableT
Training setup: supervised
Model name: resnet20-cifar10-B
Dataset name: cifar10
Sweep type: line
  trainer.n_steps: [64000, 58000, 52000, 46000, 40000, 34000, 28000, 22000, 16000, 10000, 4000]
  model.pretrain_idx: [0, 6000, 12000, 18000, 24000, 30000, 36000, 42000, 48000, 54000, 60000]
Seeds: [21, 22]
Num. jobs: 22
Config updated: 2023-01-13 10:33:01
Sweep started:  2023-01-13 10:33:37



In [None]:
fdir = finetunesw5.directory
print(fdir)

/system/user/publicwork/beck/projects/regularization/erank/outputs/cifar10-13.5.0-resnet-B-finetune-enableT--230113_103337


In [None]:
insta5 = InstabilityAnalyzer.reload(finetunesw5.directory, instability_folder_suffix='1')
insta5.combined_results_dfs['datasets']

Collecting failed jobs: 100%|██████████| 22/22 [00:00<00:00, 858.39it/s]


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,datasets,val,val,val,val,val,val,train,train,train,train,train,train
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,score,interpolation_scores,interpolation_scores,interpolation_scores,interpolation_scores,interpolation_scores,instability,interpolation_scores,interpolation_scores,interpolation_scores,interpolation_scores,interpolation_scores,instability
Unnamed: 0_level_2,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,alpha,0.00,0.25,0.50,0.75,1.00,NaN,0.00,0.25,0.50,0.75,1.00,NaN
default_params,init_model_idx_k,job,seeds,model_idxes,Unnamed: 5_level_3,Unnamed: 6_level_3,Unnamed: 7_level_3,Unnamed: 8_level_3,Unnamed: 9_level_3,Unnamed: 10_level_3,Unnamed: 11_level_3,Unnamed: 12_level_3,Unnamed: 13_level_3,Unnamed: 14_level_3,Unnamed: 15_level_3,Unnamed: 16_level_3
default_params,0,cifar10-13.5.0-resnet-B-finetune-enableT--n_steps-64000-pretrain_idx-0,"(21, 22)","(54000, 35500)",0.861328,0.708008,0.263477,0.595703,0.871484,-0.60293,0.945569,0.748481,0.263156,0.62937,0.963167,-0.691211
default_params,0,cifar10-13.5.0-resnet-B-finetune-enableT--n_steps-64000-pretrain_idx-0,"(21, 22)","(64000, 64000)",0.861914,0.709766,0.264258,0.596875,0.871875,-0.602637,0.946706,0.749421,0.265282,0.631242,0.963527,-0.689834
default_params,6000,cifar10-13.5.0-resnet-B-finetune-enableT--n_steps-58000-pretrain_idx-6000,"(21, 22)","(34000, 48500)",0.873828,0.874609,0.871875,0.874023,0.873047,-0.001563,0.970353,0.965226,0.96092,0.963677,0.969616,-0.009064
default_params,6000,cifar10-13.5.0-resnet-B-finetune-enableT--n_steps-58000-pretrain_idx-6000,"(21, 22)","(58000, 58000)",0.873047,0.874023,0.872852,0.874219,0.873242,-0.000293,0.970851,0.964304,0.959421,0.964198,0.971085,-0.011547
default_params,12000,cifar10-13.5.0-resnet-B-finetune-enableT--n_steps-52000-pretrain_idx-12000,"(21, 22)","(46000, 33500)",0.879102,0.881641,0.876758,0.874805,0.873438,-0.002832,0.978437,0.97599,0.973549,0.976306,0.977206,-0.004273
default_params,12000,cifar10-13.5.0-resnet-B-finetune-enableT--n_steps-52000-pretrain_idx-12000,"(21, 22)","(52000, 52000)",0.878711,0.881641,0.876758,0.874805,0.875,-0.002051,0.978099,0.976417,0.974614,0.97564,0.978437,-0.003653
default_params,18000,cifar10-13.5.0-resnet-B-finetune-enableT--n_steps-46000-pretrain_idx-18000,"(21, 22)","(33500, 40500)",0.875,0.873438,0.874414,0.876367,0.874805,-0.001465,0.981278,0.981477,0.980296,0.980784,0.981211,-0.000948
default_params,18000,cifar10-13.5.0-resnet-B-finetune-enableT--n_steps-46000-pretrain_idx-18000,"(21, 22)","(46000, 46000)",0.871484,0.873828,0.873633,0.876953,0.871094,-0.000195,0.9818,0.98205,0.981006,0.982432,0.982089,-0.000938
default_params,24000,cifar10-13.5.0-resnet-B-finetune-enableT--n_steps-40000-pretrain_idx-24000,"(21, 22)","(34500, 40000)",0.880469,0.880078,0.875586,0.878711,0.877539,-0.003418,0.984807,0.985196,0.984252,0.984042,0.984957,-0.00084
default_params,24000,cifar10-13.5.0-resnet-B-finetune-enableT--n_steps-40000-pretrain_idx-24000,"(21, 22)","(40000, 40000)",0.880664,0.878516,0.876758,0.87832,0.877344,-0.002246,0.984713,0.984535,0.983847,0.984397,0.984225,-0.000621


In [None]:
insta5 = InstabilityAnalyzer.reload(finetunesw5.directory, instability_folder_suffix='2')
insta5.combined_results_dfs['datasets']

Collecting failed jobs: 100%|██████████| 22/22 [00:00<00:00, 960.14it/s]


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,datasets,val,val,val,val,val,val,train,train,train,train,train,train
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,score,interpolation_scores,interpolation_scores,interpolation_scores,interpolation_scores,interpolation_scores,instability,interpolation_scores,interpolation_scores,interpolation_scores,interpolation_scores,interpolation_scores,instability
Unnamed: 0_level_2,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,alpha,0.00,0.25,0.50,0.75,1.00,NaN,0.00,0.25,0.50,0.75,1.00,NaN
default_params,init_model_idx_k,job,seeds,model_idxes,Unnamed: 5_level_3,Unnamed: 6_level_3,Unnamed: 7_level_3,Unnamed: 8_level_3,Unnamed: 9_level_3,Unnamed: 10_level_3,Unnamed: 11_level_3,Unnamed: 12_level_3,Unnamed: 13_level_3,Unnamed: 14_level_3,Unnamed: 15_level_3,Unnamed: 16_level_3
default_params,0,cifar10-13.5.0-resnet-B-finetune-enableT--n_steps-64000-pretrain_idx-0,"(21, 22)","(54000, 35500)",0.862109,0.708594,0.26582,0.595508,0.871875,-0.601172,0.944612,0.748607,0.266922,0.627483,0.963855,-0.687311
default_params,0,cifar10-13.5.0-resnet-B-finetune-enableT--n_steps-64000-pretrain_idx-0,"(21, 22)","(64000, 64000)",0.861328,0.710938,0.264062,0.598047,0.871484,-0.602344,0.945825,0.750012,0.264496,0.630522,0.963428,-0.690131
default_params,6000,cifar10-13.5.0-resnet-B-finetune-enableT--n_steps-58000-pretrain_idx-6000,"(21, 22)","(34000, 48500)",0.873633,0.874023,0.87168,0.874023,0.872852,-0.001562,0.970407,0.964521,0.961808,0.96436,0.968683,-0.007737
default_params,6000,cifar10-13.5.0-resnet-B-finetune-enableT--n_steps-58000-pretrain_idx-6000,"(21, 22)","(58000, 58000)",0.873633,0.874805,0.872852,0.873633,0.873633,-0.000781,0.970969,0.965431,0.961956,0.964881,0.970641,-0.008849
default_params,12000,cifar10-13.5.0-resnet-B-finetune-enableT--n_steps-52000-pretrain_idx-12000,"(21, 22)","(46000, 33500)",0.882422,0.882031,0.875977,0.875,0.871094,-0.005664,0.977549,0.975719,0.975596,0.975773,0.978654,-0.002506
default_params,12000,cifar10-13.5.0-resnet-B-finetune-enableT--n_steps-52000-pretrain_idx-12000,"(21, 22)","(52000, 52000)",0.881836,0.882031,0.876172,0.875,0.870313,-0.005762,0.978664,0.975591,0.97457,0.976518,0.978316,-0.00392
default_params,18000,cifar10-13.5.0-resnet-B-finetune-enableT--n_steps-46000-pretrain_idx-18000,"(21, 22)","(33500, 40500)",0.87207,0.874219,0.873242,0.876172,0.871484,-0.000293,0.982244,0.980851,0.982311,0.981495,0.98094,-0.000741
default_params,18000,cifar10-13.5.0-resnet-B-finetune-enableT--n_steps-46000-pretrain_idx-18000,"(21, 22)","(46000, 46000)",0.87168,0.873633,0.873438,0.875977,0.873438,-0.000879,0.981406,0.981867,0.981889,0.980691,0.980607,-0.0004
default_params,24000,cifar10-13.5.0-resnet-B-finetune-enableT--n_steps-40000-pretrain_idx-24000,"(21, 22)","(34500, 40000)",0.880273,0.878516,0.876172,0.877734,0.877539,-0.002734,0.984752,0.983936,0.984397,0.983776,0.983998,-0.000599
default_params,24000,cifar10-13.5.0-resnet-B-finetune-enableT--n_steps-40000-pretrain_idx-24000,"(21, 22)","(40000, 40000)",0.879883,0.878711,0.875977,0.877344,0.878125,-0.003027,0.98513,0.984775,0.983865,0.983887,0.984291,-0.000846


In [None]:
# finetunesw5.get_summary()

In [None]:
idx0job = finetunesw5.get_jobs('_idx-0-seed-21')[0]
idx0job.get_data_log('val')

Unnamed: 0,log_step,epoch,train_step,loss_CrossEntropyLoss,Accuracy,time_last_val_epoch_in_s
0,0,0,0,2.369769,0.0902,0.000000
1,501,2,500,1.479348,0.4630,2.620911
2,1003,3,1000,1.488813,0.4312,0.439410
3,1506,5,1500,1.420181,0.4884,0.526924
4,2008,6,2000,1.409338,0.4902,0.519385
...,...,...,...,...,...,...
124,62299,177,62000,0.435284,0.8678,0.553723
125,62801,178,62500,0.438549,0.8672,0.527406
126,63303,179,63000,0.442000,0.8658,0.505147
127,63806,181,63500,0.464248,0.8670,0.532207


In [None]:
idx0job = finetunesw5.get_jobs('_idx-0-seed-22')[0]
idx0job.get_data_log('val')

Unnamed: 0,log_step,epoch,train_step,loss_CrossEntropyLoss,Accuracy,time_last_val_epoch_in_s
0,0,0,0,2.366740,0.0902,0.000000
1,501,2,500,1.338886,0.5220,2.748828
2,1003,3,1000,1.391599,0.5030,0.542789
3,1506,5,1500,1.156231,0.5908,0.534415
4,2008,6,2000,1.055278,0.6218,0.566908
...,...,...,...,...,...,...
124,62299,177,62000,0.437943,0.8754,0.512799
125,62801,178,62500,0.465370,0.8764,0.532349
126,63303,179,63000,0.437721,0.8748,0.541527
127,63806,181,63500,0.442293,0.8764,0.549921


# 13.6 CIFAR10 Resnet enable_transforms=False finetuning

In [29]:
# get a single pretraining job
df, jobs = sweepr.query_jobs({'seed':31, 'data.dataset_transforms.enable_transforms': False})
df

Unnamed: 0,best_step,best_val_score,data.dataset_transforms.enable_transforms,seed
cifar10-13.1.1-resnet-B-pretrain--enable_transforms-0-seed-31--230112_163941,25500,0.798,False,31


In [30]:
pretrain_job_dt = jobs[0]
print(pretrain_job_dt.directory)

/system/user/publicwork/beck/projects/regularization/erank/outputs/cifar10-13.1.1-resnet-B-pretrain--230112_163914/outputs/cifar10-13.1.1-resnet-B-pretrain--enable_transforms-0-seed-31--230112_163941


In [49]:
config_yaml = """
run_config:
  exec_type: parallel
  hostname: dragonfly
  gpu_ids: [2,3]
  runs_per_gpu: 2
  wandb:
    init:
      tags:
      - ${config.experiment_data.experiment_tag}_exps
      - run_handler
      notes: null
      group: ${config.experiment_data.experiment_tag}
      job_type: run_handler

seeds: [21,22]

sweep:
  type: line
  axes: 
  - parameter: trainer.n_steps # remaining steps to train
    vals: [64000, 58000, 52000, 46000, 40000, 34000, 28000, 22000, 16000, 10000, 4000]
  - parameter: model.pretrain_idx
    vals: [0, 6000, 12000, 18000, 24000, 30000, 36000, 42000, 48000, 54000, 60000]

    
start_num: 0

config:
  experiment_data:
    entity: jkuiml-fsl
    project_name: tflearning
    experiment_tag: '13.6'
    experiment_type: startnum_${start_num}
    experiment_name: cifar10-${config.experiment_data.experiment_tag}.${start_num}-resnet-B-finetune-disableT
    experiment_dir: null
    experiment_notes: 
    job_name: null
    seed: 0
    hostname: null
    gpu_id: 0
  wandb:
    init:
      tags:
      - ${config.experiment_data.experiment_tag}_exps
      notes: ${config.experiment_data.experiment_notes}
      group: ${config.experiment_data.experiment_tag}
      job_type: ${config.experiment_data.experiment_type}
    watch:
      log: null
      log_freq:

  model:
    model_cfg: resnet20-cifar10-B
    # name: resnet
    # model_kwargs:
    #   in_channels: 3
    #   act_fn: relu
    #   residual_option: B
    #   input_layer_config:
    #     kernel_size: 3
    #     out_channels: 16
    #     bias: false
    #     batch_norm: true
    #     stride: 1
    #     padding: 1
    #   resnet_blocks_config:
    #     - out_channels: 16
    #       num_residual_blocks: 3
    #     - out_channels: 32
    #       num_residual_blocks: 3
    #     - out_channels: 64
    #       num_residual_blocks: 3
    #   linear_output_units:
    #     - 10
    pretrain_idx: 0
    init_model: /system/user/publicwork/beck/projects/regularization/erank/outputs/cifar10-13.1.1-resnet-B-pretrain--230112_163914/outputs/cifar10-13.1.1-resnet-B-pretrain--enable_transforms-0-seed-31--230112_163941/model_step_${config.model.pretrain_idx}.p


  trainer:
    training_setup: supervised
    n_steps: 64e3
    log_train_step_every: 1
    log_additional_train_step_every_multiplier: 1
    log_additional_logs: true
    val_every: 500
    save_every: 1000
    early_stopping_patience: 64e3
    batch_size: 128
    optimizer_scheduler:
      optimizer_name: SGD
      optimizer_kwargs:
        lr: 0.01 #0.1
        momentum: 0.9
        weight_decay: 0.0001
      lr_scheduler_name: MultiStepLR
      lr_scheduler_kwargs:
        milestones: [32e3, 48e3]
        gamma: 0.1

    loss: crossentropy
    metrics:
    - Accuracy
    num_workers: 4
    verbose: false
  data:
    dataset: cifar10
    dataset_kwargs:
      data_root_path: /system/user/beck/pwbeck/data
    dataset_split:
      train_val_split: 0.9
      # restrict_n_samples_train_task: 100
    dataset_transforms:
      image_transforms:
      - RandomHorizontalFlip
      - RandomCrop:
          size: 32
          padding: 4
      tensor_transforms: 
      joint_tensor_transforms: 
      enable_transforms: True
"""
cfg = OmegaConf.create(config_yaml)

In [50]:
# run_command = REPO.create_experiment(cfg, override=False)
# print(run_command)

In [54]:
finetunesw6 = REPO.get_output_loader(cfg)

In [55]:
print(finetunesw6.directory)

/system/user/publicwork/beck/projects/regularization/erank/outputs/cifar10-13.6.0-resnet-B-finetune-disableT--230113_103818


In [57]:
insta6 = InstabilityAnalyzer.reload(finetunesw6.directory, instability_folder_suffix='0')

Collecting failed jobs: 100%|██████████| 22/22 [00:13<00:00,  1.58it/s]


In [61]:
insta6.combined_results_dfs['datasets']

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,datasets,val,val,val,val,val,val,train,train,train,train,train,train
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,score,interpolation_scores,interpolation_scores,interpolation_scores,interpolation_scores,interpolation_scores,instability,interpolation_scores,interpolation_scores,interpolation_scores,interpolation_scores,interpolation_scores,instability
Unnamed: 0_level_2,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,alpha,0.00,0.25,0.50,0.75,1.00,NaN,0.00,0.25,0.50,0.75,1.00,NaN
default_params,init_model_idx_k,job,seeds,model_idxes,Unnamed: 5_level_3,Unnamed: 6_level_3,Unnamed: 7_level_3,Unnamed: 8_level_3,Unnamed: 9_level_3,Unnamed: 10_level_3,Unnamed: 11_level_3,Unnamed: 12_level_3,Unnamed: 13_level_3,Unnamed: 14_level_3,Unnamed: 15_level_3,Unnamed: 16_level_3
default_params,0,cifar10-13.6.0-resnet-B-finetune-disableT--n_steps-64000-pretrain_idx-0,"(21, 22)","(40000, 38500)",0.0984,0.1308,0.1058,0.1602,0.2214,-0.0615,0.09502,0.12134,0.10438,0.1401,0.20902,-0.057
default_params,0,cifar10-13.6.0-resnet-B-finetune-disableT--n_steps-64000-pretrain_idx-0,"(21, 22)","(64000, 64000)",0.0984,0.1308,0.1058,0.16,0.221,-0.0613,0.09564,0.12078,0.1056,0.13982,0.20764,-0.056
default_params,6000,cifar10-13.6.0-resnet-B-finetune-disableT--n_steps-58000-pretrain_idx-6000,"(21, 22)","(56500, 33000)",0.1152,0.118,0.1168,0.1138,0.113,-0.0011,0.1641,0.16806,0.1614,0.15492,0.14628,-0.00891
default_params,6000,cifar10-13.6.0-resnet-B-finetune-disableT--n_steps-58000-pretrain_idx-6000,"(21, 22)","(58000, 58000)",0.1152,0.1182,0.1164,0.1136,0.113,-0.0011,0.16534,0.16894,0.1641,0.15402,0.14396,-0.01069
default_params,12000,cifar10-13.6.0-resnet-B-finetune-disableT--n_steps-52000-pretrain_idx-12000,"(21, 22)","(35000, 40500)",0.1244,0.1222,0.1244,0.128,0.1376,-0.0088,0.16292,0.16328,0.16744,0.17702,0.18898,-0.01303
default_params,12000,cifar10-13.6.0-resnet-B-finetune-disableT--n_steps-52000-pretrain_idx-12000,"(21, 22)","(52000, 52000)",0.1246,0.1224,0.1244,0.1282,0.1374,-0.0086,0.16056,0.16068,0.16834,0.17814,0.18842,-0.01393
default_params,18000,cifar10-13.6.0-resnet-B-finetune-disableT--n_steps-46000-pretrain_idx-18000,"(21, 22)","(34500, 44500)",0.1804,0.1784,0.181,0.191,0.2128,-0.0182,0.2318,0.2379,0.24798,0.25688,0.26048,-0.01434
default_params,18000,cifar10-13.6.0-resnet-B-finetune-disableT--n_steps-46000-pretrain_idx-18000,"(21, 22)","(46000, 46000)",0.1802,0.1784,0.1806,0.1912,0.2126,-0.018,0.2323,0.23898,0.24764,0.258,0.26002,-0.01386
default_params,24000,cifar10-13.6.0-resnet-B-finetune-disableT--n_steps-40000-pretrain_idx-24000,"(21, 22)","(32500, 36500)",0.2032,0.2076,0.203,0.2076,0.2106,-0.0039,0.2314,0.2466,0.25114,0.25356,0.25094,-0.00977
default_params,24000,cifar10-13.6.0-resnet-B-finetune-disableT--n_steps-40000-pretrain_idx-24000,"(21, 22)","(40000, 40000)",0.203,0.2082,0.2026,0.2084,0.2108,-0.0043,0.23218,0.24536,0.25392,0.254,0.2521,-0.00996
