# Define sweep

In [1]:
import wandb

In [2]:
project_name = 'splitnet_lsuv_sweep_3'

In [3]:
lsuv_m_range = {'min': 0.1, 'max': 2.}

# 2: Define the search space
sweep_configuration = {
    'method': 'random',
    'metric': 
    {
        'goal': 'maximize', 
        'name': 'last_psnr'
        },
    'parameters': 
    {
        'lsuv_m_0': lsuv_m_range,
        'lsuv_m_1': lsuv_m_range,
        'lsuv_m_2': lsuv_m_range,
        'lsuv_m_3': lsuv_m_range,
        'm': {'min': 12., 'max': 25.},
     }
}


In [4]:
# sweep_id = wandb.sweep(
#     sweep=sweep_configuration,
#     project=project_name,
# )

# Define worker

In [5]:
import torch
import numpy as np
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import DataLoader
from torchvision import datasets, transforms

import lovely_tensors as lt
lt.monkey_patch()

# Orthonorm init code is taked from Lasagne
# https://github.com/Lasagne/Lasagne/blob/master/lasagne/init.py
def svd_orthonormal(w):
    shape = w.shape
    if len(shape) < 2:
        raise RuntimeError("Only shapes of length 2 or more are supported.")
    flat_shape = (shape[0], np.prod(shape[1:]))
    a = np.random.normal(0.0, 1.0, flat_shape)#w;
    u, _, v = np.linalg.svd(a, full_matrices=False)
    q = u if u.shape == flat_shape else v
    # print (shape, flat_shape)
    q = q.reshape(shape)
    return q.astype(np.float32)


def orthogonal_weights_init(m):
    if isinstance(m, nn.Conv2d) or isinstance(m, nn.Linear):
        if hasattr(m, 'weight'):
            w_ortho = svd_orthonormal(m.weight.data.cpu().numpy())
            m.weight.data = torch.from_numpy(w_ortho)
            try:
                nn.init.constant_(m.bias, 0)
            except:
                pass
        else:
            #nn.init.orthogonal(m.weight)
            w_ortho = svd_orthonormal(m.weight.data.cpu().numpy())
            #print w_ortho 
            #m.weight.data.copy_(torch.from_numpy(w_ortho))
            m.weight.data = torch.from_numpy(w_ortho)
            try:
                nn.init.constant_(m.bias, 0)
            except:
                pass
    return


def splitnet_lsuv_init(self, batch_x, multipliers=(1,1.,1.,1), verbose=True):
    device = batch_x.device
    self = self.apply(orthogonal_weights_init)
    self = self.to(device)

    batch_x = self.encoding(batch_x)

    net = self.networks[0].net

    h = batch_x
    for layer in net:
        out, acts = layer.forward_with_activations(h)

        if True:
            if verbose:
                print('*'*40)
                print('Working with layer', layer)
            x, preact, preact_tanh, preact_sigmoid, preact_sin, preact_cos, act_tanh, act_sigmoid, act_sin, act_cos = acts

            shape = layer.linear.weight.data.shape[0]//4
            # print(layer.linear.weight.data.shape)
            for i, preact in enumerate([preact_tanh, preact_sigmoid, preact_sin, preact_cos]):
                if verbose:
                    print('Initial preact:', preact)
                from_i, to_i = i*shape, (i+1)*shape
                # W = layer.linear.weight.data[]

                mean = preact.mean().item()
                std = preact.std().item()

                layer.linear.bias.data[from_i:to_i] -= mean
                layer.linear.weight.data[from_i:to_i] /= std

                layer.linear.weight.data[from_i:to_i] *= multipliers[i]
                # print('from_i:to_i', from_i, to_i)
                # out, acts = layer.forward_with_activations(h)
                # x, preact, preact_tanh, preact_sigmoid, preact_sin, preact_cos, act_tanh, act_sigmoid, act_sin, act_cos = acts

            out, acts = layer.forward_with_activations(h)
            x, preact, preact_tanh, preact_sigmoid, preact_sin, preact_cos, act_tanh, act_sigmoid, act_sin, act_cos = acts
            for i, preact in enumerate([preact_tanh, preact_sigmoid, preact_sin, preact_cos]):
                if verbose:
                    print('After   preact:', preact)


        h = out
        
    return self

In [6]:
from train_pipeline import *

In [7]:
def _custom_train_seed(model, cfg, random_seed=0):
    seed_all(random_seed)
    print("Setting seed to", random_seed)

    logger = instantiate(
        cfg.logging.logger,
        project='эє',
        group=cfg.logging.experiment_name,
        name=f"rs{random_seed}",
    )
    print("*" * 80)
    print("\n")
    print(OmegaConf.to_yaml(cfg))
    print()
    print("*" * 80)

    device = cfg["device"]

    total_steps = cfg["total_steps"]
    steps_til_summary = cfg.logging["steps_till_summary"]
    batch_size = cfg.get('batch_size', None)

    best_psnr = 0
    optimizer = instantiate(cfg.optimizer, params=model.parameters())

    for step in range(total_steps):
        if batch_size:
            idxs = torch.randint(0, model_input.shape[1], (batch_size,))
            model_input_batch = model_input[:, idxs]
            ground_truth_batch = ground_truth[:, idxs]
        else:
            model_input_batch = model_input
            ground_truth_batch = ground_truth

        model_output_batch = model(model_input_batch)
        mse, psnr = mse_and_psnr(model_output_batch, ground_truth_batch)
        loss = mse

        psnr = psnr

        if best_psnr < psnr:
            best_psnr = psnr
        log_dic = {"step": step, "mse": mse.item(), "psnr": psnr.item()}
        logger.log_dict(log_dic)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        # print(step)

    return model, best_psnr

In [8]:
import os
import torch
from hydra import initialize, initialize_config_module, initialize_config_dir, compose
from hydra.utils import instantiate
from omegaconf import OmegaConf


def load_cfg(overrides=()):
    # with initialize_config_dir(config_dir="/app/notebooks/draft_02/conf"):
    with initialize(version_base=None, config_path="./conf"):
        cfg = compose(config_name='config', overrides=list(overrides))
        return cfg

from IPython.display import clear_output

import lovely_tensors as lt
lt.monkey_patch()

In [9]:
import wandb

project_name = 'splitnet_lsuv_sweep_3'
sweep_id = 'kilianovski/splitnet_lsuv_sweep_3/3hhbbojv'

In [10]:
# global
cfg = load_cfg(overrides=[
    "+exp=07_splitnet_lsuv",
    "random_seed=[0]",
    'logging.logger._target_=spellbook.logging.wandb.WandbLogger',
    # "model.model_configs=[{'hidden_layers': [32, 32]}]",
    "image=cameraman",
    "+device=cuda:0",
])

device = cfg["device"]

model_input, ground_truth, H, W = load_data(cfg)
model_input, ground_truth = model_input.to(device), ground_truth.to(device)

  return _VF.meshgrid(tensors, **kwargs)  # type: ignore[attr-defined]


In [11]:
from IPython.display import clear_output

In [12]:
def _train_for_sweep():
    clear_output()
    # params
    wandb.init(project=project_name)
    wandb_cfg = wandb.config
    m = wandb_cfg.m
    lsuv_multipliers = (wandb_cfg.lsuv_m_0, wandb_cfg.lsuv_m_1, wandb_cfg.lsuv_m_2, wandb_cfg.lsuv_m_3)

    cfg.model.model_configs[0]['m'] = [m]*(len(cfg.model.model_configs[0]['hidden_layers']) - 1) + [1.]
    print(OmegaConf.to_yaml(cfg))

    out_features = ground_truth.shape[-1]
    model = instantiate(cfg["model"], out_features=out_features)
    model.to(device)



    model = splitnet_lsuv_init(model, model_input, multipliers=lsuv_multipliers, verbose=False)
    model, best_psnr = _custom_train_seed(model, cfg, random_seed=cfg.random_seed[0])

    print('best_psnr', best_psnr)
    wandb.log({'last_psnr': best_psnr})

In [None]:
wandb.agent(sweep_id, function=_train_for_sweep, count=1_000)

Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
