In [1]:
%load_ext autoreload
%autoreload 2
import torch
import numpy as np
import pandas as pd
from omegaconf import OmegaConf
from pathlib import Path
from pprint import pprint
# plotting
import matplotlib.pyplot as plt
import matplotlib as mpl
import seaborn as sns
# mycode
from repo import REPO
from ml_utilities.output_loader.result_loader import SweepResult, JobResult
from ml_utilities.output_loader import create_job_output_loader
from ml_utilities.output_loader.plot import plot_sweep_summary, plot_data_log_values

# 26 Finetune resnet18 on sun397

Steps: 
- [x] do hypsearch on full dataset
- [] select random samples (prune randomly)
- [] select samples according to prediction depth metric

## A. Experiment config

### Config file

In [8]:
config_yaml = """
run_config:
  exec_type: parallel
  hostname: ocelot
  gpu_ids: [0]
  runs_per_gpu: 1
  wandb:
    init:
      tags:
      - ${config.experiment_data.experiment_tag}_exps
      - run_handler
      notes: null
      group: ${config.experiment_data.experiment_tag}
      job_type: run_handler

seeds: [0]

sweep:
  type: grid
  axes: 
  - parameter: trainer.optimizer.kwargs.lr
    vals: [1e-3, 5e-4, 1e-4, 5e-5, 1e-5]
  - parameter: trainer.optimizer.kwargs.weight_decay
    vals: [0.0]
    
start_num: 1

config:
  experiment_data:
    entity: fslgroup
    project_name: tflearning
    experiment_tag: '27-sun397-hyps'
    experiment_type: startnum_${start_num}
    experiment_name: sun397-${config.experiment_data.experiment_tag}.${start_num}-resnet18-ft-hypsearch
    experiment_dir: null
    experiment_notes: 
    seed: 0
    gpu_id: 1
  
  model:
    name: timmmodel
    kwargs:
      name: resnet18
      timm_kwargs:
        pretrained: True
  
  trainer:
    training_setup: finetune
    n_epochs: 30
    val_every: 1
    early_stopping_patience: 15
    save_every: 100e3
    batch_size: 256
    optimizer:
      name: AdamW
      kwargs:
        lr: 1e-3
        weight_decay: 0.0
    loss: crossentropy
    num_workers: 6
  
  data:
    name: sun397
    kwargs:
      data_root_path: /system/user/beck/pwbeck/data
      n_px: 224
"""
cfg = OmegaConf.create(config_yaml)

In [9]:
run_command = REPO.create_experiment(cfg, override=False)
print(run_command)

python run_sweep.py --config-name sun397-27-sun397-hyps.1-resnet18-ft-hypsearch.yaml


### Sweep result

In [3]:
sweepr = REPO.get_output_loader(cfg)
print(sweepr)

Exp. Tag(start_num): 27-sun397-hyps(0)
Exp. Name: sun397-27-sun397-hyps.0-resnet18-ft-hypsearch
Training setup: finetune
Model name: timmmodel
Dataset name: sun397
Sweep type: grid
  trainer.optimizer.kwargs.lr: [0.005, 0.001, 0.0005, 0.0001, 5e-05, 1e-05]
  trainer.optimizer.kwargs.weight_decay: [0.0, 0.001]
Seeds: [0]
Num. jobs: 12
Config updated: 2023-03-18 13:55:28
Sweep started:  2023-03-18 17:50:20



In [6]:
failed_jobs = sweepr.get_failed_jobs()
failed_jobs # No failed jobs

Collecting failed jobs: 100%|██████████| 12/12 [00:00<00:00, 26.41it/s]


([JobResult(/system/user/publicwork/beck/projects/regularization/tflearning/outputs/sun397-27-sun397-hyps.0-resnet18-ft-hypsearch--230318_175020/outputs/sun397-27-sun397-hyps.0-resnet18-ft-hypsearch--lr-5e-05-weight_decay-0-seed-0--230320_181622),
  JobResult(/system/user/publicwork/beck/projects/regularization/tflearning/outputs/sun397-27-sun397-hyps.0-resnet18-ft-hypsearch--230318_175020/outputs/sun397-27-sun397-hyps.0-resnet18-ft-hypsearch--lr-1e-05-weight_decay-0-seed-0--230320_181654),
  JobResult(/system/user/publicwork/beck/projects/regularization/tflearning/outputs/sun397-27-sun397-hyps.0-resnet18-ft-hypsearch--230318_175020/outputs/sun397-27-sun397-hyps.0-resnet18-ft-hypsearch--lr-0.0005-weight_decay-0.001-seed-0--230320_181533),
  JobResult(/system/user/publicwork/beck/projects/regularization/tflearning/outputs/sun397-27-sun397-hyps.0-resnet18-ft-hypsearch--230318_175020/outputs/sun397-27-sun397-hyps.0-resnet18-ft-hypsearch--lr-0.0005-weight_decay-0-seed-0--230320_181516),
  

In [7]:
sweepr.get_summary().sort_values(by='best_val_score', ascending=True)

Collecting summaries: 100%|██████████| 12/12 [00:00<00:00, 114.70it/s]

Could not find job /system/user/publicwork/beck/projects/regularization/tflearning/outputs/sun397-27-sun397-hyps.0-resnet18-ft-hypsearch--230318_175020/outputs/sun397-27-sun397-hyps.0-resnet18-ft-hypsearch--lr-0.0001-weight_decay-0-seed-0--230320_181549
Could not find job /system/user/publicwork/beck/projects/regularization/tflearning/outputs/sun397-27-sun397-hyps.0-resnet18-ft-hypsearch--230318_175020/outputs/sun397-27-sun397-hyps.0-resnet18-ft-hypsearch--lr-0.0001-weight_decay-0.001-seed-0--230320_181606
Could not find job /system/user/publicwork/beck/projects/regularization/tflearning/outputs/sun397-27-sun397-hyps.0-resnet18-ft-hypsearch--230318_175020/outputs/sun397-27-sun397-hyps.0-resnet18-ft-hypsearch--lr-0.0005-weight_decay-0-seed-0--230320_181516
Could not find job /system/user/publicwork/beck/projects/regularization/tflearning/outputs/sun397-27-sun397-hyps.0-resnet18-ft-hypsearch--230318_175020/outputs/sun397-27-sun397-hyps.0-resnet18-ft-hypsearch--lr-0.0005-weight_decay-0.00




Unnamed: 0,best_epoch,best_val_score,trainer.optimizer.kwargs.lr,trainer.optimizer.kwargs.weight_decay,seed
sun397-27-sun397-hyps.0-resnet18-ft-hypsearch--lr-0.001-weight_decay-0-seed-0--230320_035758,5,1.66102,0.001,0.0,0
sun397-27-sun397-hyps.0-resnet18-ft-hypsearch--lr-0.005-weight_decay-0.001-seed-0--230319_113328,8,2.129354,0.005,0.001,0
sun397-27-sun397-hyps.0-resnet18-ft-hypsearch--lr-0.005-weight_decay-0-seed-0--230318_175036,11,2.202979,0.005,0.0,0


## B. Random subsample sweep

In [3]:
config_yaml = """
run_config:
  exec_type: parallel
  hostname: wasp
  gpu_ids: [0,1,2,3]
  runs_per_gpu: 1
  wandb:
    init:
      tags:
      - ${config.experiment_data.experiment_tag}_exps
      - run_handler
      notes: null
      group: ${config.experiment_data.experiment_tag}
      job_type: run_handler

seeds: [0]

sweep:
  type: grid
  axes: 
  - parameter: data.sample_selector.kwargs.fraction
    vals: [0.1, 0.2, 0.4, 0.5, 0.6, 0.7, 0.8, 1.0]
    
start_num: 1

config:
  experiment_data:
    entity: fslgroup
    project_name: tflearning
    experiment_tag: '25.1'
    experiment_type: startnum_${start_num}
    experiment_name: cifar10-${config.experiment_data.experiment_tag}.${start_num}-resnet18-ft-randomsubsample
    experiment_dir: null
    experiment_notes: 
    seed: 0
    gpu_id: 1
  
  model:
    name: timmmodel
    kwargs:
      name: resnet18
      timm_kwargs:
        pretrained: True
  
  trainer:
    training_setup: finetune
    n_epochs: 100
    val_every: 1
    early_stopping_patience: 30
    save_every: 100e3
    batch_size: 256
    optimizer:
      name: AdamW
      kwargs:
        lr: 0.00005
        weight_decay: 0.0
    loss: crossentropy
    num_workers: 6
  
  data:
    name: cifar10
    kwargs:
      data_root_path: /system/user/beck/pwbeck/data
      n_px: 224
    sample_selector:
      name: random
      kwargs:
        fraction: 0.1

"""
cfg = OmegaConf.create(config_yaml)

In [1]:
# run_command = REPO.create_experiment(cfg, override=False)
# print(run_command)

NameError: name 'REPO' is not defined

## B. Random subsample sweep

In [4]:
config_yaml = """
run_config:
  exec_type: parallel
  hostname: ocelot
  gpu_ids: [0]
  runs_per_gpu: 2
  wandb:
    init:
      tags:
      - ${config.experiment_data.experiment_tag}_exps
      - run_handler
      notes: null
      group: ${config.experiment_data.experiment_tag}
      job_type: run_handler

seeds: [0]

sweep:
  type: grid
  axes: 
  - parameter: data.sample_selector.kwargs.fraction
    vals: [0.1, 0.2, 0.4, 0.5, 0.6, 0.7, 0.8, 1.0]
  - parameter: data.sample_selector.kwargs.keep_highest
    vals: [True, False]
    
start_num: 0

config:
  experiment_data:
    entity: fslgroup
    project_name: tflearning
    experiment_tag: '25.2-cf10-pd_subsample'
    experiment_type: startnum_${start_num}
    experiment_name: cifar10-${config.experiment_data.experiment_tag}.${start_num}-resnet18-ft-pdepthsubsample
    experiment_dir: null
    experiment_notes: 
    seed: 0
    gpu_id: 1
  
  model:
    name: timmmodel
    kwargs:
      name: resnet18
      timm_kwargs:
        pretrained: True
  
  trainer:
    training_setup: finetune
    n_epochs: 50
    val_every: 1
    early_stopping_patience: 20
    save_every: 100e3
    batch_size: 256
    optimizer:
      name: AdamW
      kwargs:
        lr: 0.00005
        weight_decay: 0.0
    loss: crossentropy
    num_workers: 6
  
  data:
    name: cifar10
    kwargs:
      data_root_path: /system/user/beck/pwbeck/data
      n_px: 224
    sample_selector:
      name: prediction_depth
      kwargs:
        fraction: 0.1
        pred_results_file: /system/user/beck/pwbeck/projects/regularization/tflearning/outputs_scripts/prediction_depth/PD-cifar10_resnet18-pretrained--230317_124927/prediction_depth_results.p
        keep_highest: True
"""
cfg = OmegaConf.create(config_yaml)

In [5]:
run_command = REPO.create_experiment(cfg, override=False)
print(run_command)

python run_sweep.py --config-name cifar10-25.2-cf10-pd_subsample.0-resnet18-ft-pdepthsubsample.yaml
