In [3]:
import torch
from argparse import Namespace
from regressionFlow.models.networks_regression_SDD import HyperRegression
flow_args = Namespace(model_type='PointNet', logprob_type='Laplace', input_dim=1, dims='3-4-2',
                                   latent_dims='256', hyper_dims='128-32', num_blocks=1, latent_num_blocks=1,
                                   layer_type='concatsquash', time_length=0.5, train_T=True, nonlinearity='tanh',
                                   use_adjoint=True, solver='dopri5', atol=1e-05, rtol=1e-05, batch_norm=True,
                                   sync_bn=False, bn_lag=0, root_dir=None, use_latent_flow=False,
                                   use_deterministic_encoder=False,
                                   zdim=1, optimizer='adam', batch_size=1000, lr=0.001, beta1=0.9,
                                   beta2=0.999, momentum=0.9, weight_decay=1e-05, epochs=1000, seed=694754,
                                   recon_weight=1.0, prior_weight=1.0, entropy_weight=1.0, scheduler='linear',
                                   exp_decay=1.0, exp_decay_freq=1, image_size='28x28', data_dir='data/SDD/',
                                   dataset_type='shapenet15k', cates=['airplane'],
                                   mn40_data_dir='data/ModelNet40.PC15k',
                                   mn10_data_dir='data/ModelNet10.PC15k', dataset_scale=1.0, random_rotate=False,
                                   normalize_per_shape=False, normalize_std_per_axis=False, tr_max_sample_points=2048,
                                   te_max_sample_points=2048, num_workers=4, use_all_data=False,
                                   log_name='experiment_regression_flow_toy', viz_freq=1, val_freq=10, log_freq=1,
                                   save_freq=5, no_validation=False, save_val_results=False, eval_classification=False,
                                   no_eval_sampling=False, max_validate_shapes=None, resume_checkpoint=None,
                                   resume_optimizer=False, resume_non_strict=False, resume_dataset_mean=None,
                                   resume_dataset_std=None, world_size=1, dist_url='tcp://127.0.0.1:9991',
                                   dist_backend='nccl', distributed=False, rank=0, gpu=0, evaluate_recon=False,
                                   num_sample_shapes=10, num_sample_points=2048, use_sphere_dist=False,
                                   use_div_approx_train=False, use_div_approx_test=False)

flow = HyperRegression(flow_args)

def _get_opt_(params):
    if flow_args.optimizer == 'adam':
        optimizer = torch.optim.Adam(params, lr=flow_args.lr, betas=(flow_args.beta1, flow_args.beta2),
                               weight_decay=flow_args.weight_decay)
    elif flow_args.optimizer == 'sgd':
        optimizer = torch.optim.SGD(params, lr=flow_args.lr, momentum=flow_args.momentum)
    else:
        assert 0, "args.optimizer should be either 'adam' or 'sgd'"
    return optimizer

opt = _get_opt_(list(flow.hyper.parameters()) + list(flow.point_cnf.parameters()))

In [4]:
import torch
from regressionFlow.utils import truncated_normal, standard_normal_logprob, standard_laplace_logprob
import plotly.express as px
from sklearn.decomposition import PCA

def plotTheta(theta,loss):
    X = theta.detach().numpy().reshape(65,5)

    pca = PCA(n_components=2)
    components = pca.fit_transform(X)

    fig = px.scatter(components, x=0, y=1,title=f'loss {loss}')
    fig.show()

# flow hypernetwork with kld loss
def experiment_flow1(flow, opt,n):
    support = torch.randn(5,65)
    for i in range(n):
        x = support
        y = flow.sample_gaussian((*x.shape, flow.input_dim), None, flow.gpu)
        opt.zero_grad()
        batch_size = x.size(0)
        x = flow.hyper(x)
        target_networks_weights = flow.point_cnf(y, x, reverse=True).view(*y.size())

        # Loss
        _, delta_log_py = flow.point_cnf(target_networks_weights, x, torch.zeros(batch_size, y.size(1), 1).to(y))
        log_py = standard_normal_logprob(y).view(batch_size, -1).sum(1, keepdim=True)
        delta_log_py = delta_log_py.view(batch_size, y.size(1), 1).sum(1)
        log_px = log_py - delta_log_py

        # policzyc gestosci flowa log p_0(F^{-1}_\theta(w_i) + J
        loss = log_px.mean()

        # policzyc gestosci priora log N(w_i | (0,I))
        size_multivariate = target_networks_weights.flatten().size()[0]
        multivariate_normal_distrib = torch.distributions.MultivariateNormal(
                torch.zeros_like(target_networks_weights.flatten()).to(loss), torch.eye(size_multivariate).to(loss))
        loss_density = multivariate_normal_distrib.log_prob(target_networks_weights.flatten())
        loss = 0.01 * (loss - loss_density)

        # loss = torch.norm(loss)

        if i % 100 == 0 or i == n-1:
            print(loss)
            plotTheta(target_networks_weights,loss)
        loss.backward()
        opt.step()

experiment_flow1(flow,opt,10000)

tensor(3.6889, grad_fn=<MulBackward0>)


tensor(3.2898, grad_fn=<MulBackward0>)


tensor(2.9592, grad_fn=<MulBackward0>)


tensor(2.9242, grad_fn=<MulBackward0>)


tensor(2.9206, grad_fn=<MulBackward0>)


tensor(2.9295, grad_fn=<MulBackward0>)


tensor(3.0609, grad_fn=<MulBackward0>)


tensor(3.0782, grad_fn=<MulBackward0>)


tensor(3.1456, grad_fn=<MulBackward0>)


tensor(4.4335, grad_fn=<MulBackward0>)


In [5]:
import torch
from regressionFlow.utils import truncated_normal, standard_normal_logprob, standard_laplace_logprob
import plotly.express as px
from sklearn.decomposition import PCA

def plotTheta(theta,loss):
    X = theta.detach().numpy().reshape(65,5)

    pca = PCA(n_components=2)
    components = pca.fit_transform(X)

    fig = px.scatter(components, x=0, y=1,title=f'loss {loss}')
    fig.show()

# flow hypernetwork with norm loss
def experiment_flow2(flow, opt,n):
    support = torch.randn(5,65)
    for i in range(n):
        x = support
        y = flow.sample_gaussian((*x.shape, flow.input_dim), None, flow.gpu)
        opt.zero_grad()
        x = flow.hyper(x)
        target_networks_weights = flow.point_cnf(y, x, reverse=True).view(*y.size())

        loss = torch.norm(torch.flatten(target_networks_weights))

        if i % 100 == 0:
            print(loss)
            plotTheta(target_networks_weights,loss)
        loss.backward()
        opt.step()

experiment_flow2(flow,opt,10000)

tensor(0.0259, grad_fn=<MulBackward0>)


tensor(0.0113, grad_fn=<MulBackward0>)


tensor(0.0025, grad_fn=<MulBackward0>)


tensor(0.0024, grad_fn=<MulBackward0>)


tensor(0.0024, grad_fn=<MulBackward0>)


tensor(0.0024, grad_fn=<MulBackward0>)


tensor(0.0024, grad_fn=<MulBackward0>)


tensor(0.0024, grad_fn=<MulBackward0>)


tensor(0.0026, grad_fn=<MulBackward0>)


tensor(0.0024, grad_fn=<MulBackward0>)


In [15]:
from methods.hypernets.hypermaml import HyperNet
hyper_params = Namespace(seed=0, dataset='cross_char', model='Conv4', method='fhyper_maml', train_n_way=5, test_n_way=5, n_shot=1, train_aug=False, checkpoint_suffix='smallflow_w_nowarmup', lr=0.0001, optim='adam', n_val_perms=1, lr_scheduler='multisteplr', milestones=[51, 550], maml_save_feature_network=False, maml_adapt_classifier=False, evaluate_model=False, num_classes=4112, save_freq=50, start_epoch=0, stop_epoch=64, resume=False, warmup=False, es_epoch=250, es_threshold=70.0, eval_freq=1, hn_adaptation_strategy=None, hn_alpha_step=0, hn_hidden_size=512, hn_tn_hidden_size=120, hn_taskset_size=1, hn_neck_len=0, hn_head_len=3, hn_taskset_repeats='10:10-20:5-30:2', hn_taskset_print_every=20, hn_detach_ft_in_hn=10000, hn_detach_ft_in_tn=10000, hn_tn_depth=1, hn_dropout=0, hn_sup_aggregation='concat', hn_transformer_layers_no=1, hn_transformer_heads_no=1, hn_transformer_feedforward_dim=512, hn_attention_embedding=False, hn_kernel_layers_no=2, hn_kernel_hidden_dim=128, kernel_transformer_layers_no=1, kernel_transformer_heads_no=1, kernel_transformer_feedforward_dim=512, hn_kernel_out_size=1600, hn_kernel_invariance=False, hn_kernel_invariance_type='attention', hn_kernel_convolution_output_dim=256, hn_kernel_invariance_pooling='mean', hn_use_support_embeddings=False, hn_no_self_relations=False, hn_use_cosine_distance=False, hn_use_scalar_product=False, hn_use_cosine_nn_kernel=False, hn_val_epochs=0, hn_val_lr=0.0001, hn_val_optim='adam', hm_use_class_batch_input=True, hm_enhance_embeddings=True, hm_update_operator='minus', hm_lambda=0.0, hm_save_delta_params=False, hm_maml_warmup=True, hm_maml_update_feature_net=False, hm_maml_warmup_epochs=50, hm_maml_warmup_switch_epochs=500, hm_load_feature_net=False, hm_feature_net_path='', hm_detach_feature_net=False, hm_detach_before_hyper_net=False, hm_support_set_loss=False, hm_set_forward_with_adaptation=False, hm_weight_set_num_train=1, hm_weight_set_num_test=20, kl_stop_val=0.001, kl_scale=1e-24, do_scale=False, flow_w=1e-20, flow_stop_val=0.001, flow_scale=1e-24, flow_warmup=False)


hyper_hm = HyperNet(embedding_size=65,feat_dim=64,hn_hidden_size=512,n_way=5,out_neurons=65,params=hyper_params)
opt2 = _get_opt_(list(hyper_hm.parameters()) + list(flow.point_cnf.parameters()))

In [17]:
import torch
from regressionFlow.utils import truncated_normal, standard_normal_logprob, standard_laplace_logprob
import plotly.express as px
from sklearn.decomposition import PCA

def plotTheta(theta,loss):
    X = theta.detach().numpy().reshape(65,5)

    pca = PCA(n_components=2)
    components = pca.fit_transform(X)

    fig = px.scatter(components, x=0, y=1,title=f'loss {loss}')
    fig.show()

# hm hypernetwork with kld loss
def experiment_flow3(flow, opt,n):
    support = torch.randn(5,65)
    for i in range(n):
        x = support
        y = flow.sample_gaussian((*x.shape, flow.input_dim), None, flow.gpu)
        opt.zero_grad()
        batch_size = x.size(0)
        x = hyper_hm(x)
        target_networks_weights = flow.point_cnf(y, x, reverse=True).view(*y.size())

        # Loss
        _, delta_log_py = flow.point_cnf(target_networks_weights, x, torch.zeros(batch_size, y.size(1), 1).to(y))
        log_py = standard_normal_logprob(y).view(batch_size, -1).sum(1, keepdim=True)
        delta_log_py = delta_log_py.view(batch_size, y.size(1), 1).sum(1)
        log_px = log_py - delta_log_py

        # policzyc gestosci flowa log p_0(F^{-1}_\theta(w_i) + J
        loss = log_px.mean()

        # policzyc gestosci priora log N(w_i | (0,I))
        size_multivariate = target_networks_weights.flatten().size()[0]
        multivariate_normal_distrib = torch.distributions.MultivariateNormal(
                torch.zeros_like(target_networks_weights.flatten()).to(loss), torch.eye(size_multivariate).to(loss))
        loss_density = multivariate_normal_distrib.log_prob(target_networks_weights.flatten())
        loss = 0.01 * (loss - loss_density)

        if i % 100 == 0 or i == n-1:
            # print(loss)
            plotTheta(target_networks_weights,loss)
        loss.backward()
        opt.step()

experiment_flow3(flow,opt2,10000)

KeyboardInterrupt: 

In [None]:
import torch
from regressionFlow.utils import truncated_normal, standard_normal_logprob, standard_laplace_logprob
import plotly.express as px
from sklearn.decomposition import PCA

def plotTheta(theta,loss):
    X = theta.detach().numpy().reshape(65,5)

    pca = PCA(n_components=2)
    components = pca.fit_transform(X)

    fig = px.scatter(components, x=0, y=1,title=f'loss {loss}')
    fig.show()

# hm hypernetwork with norm loss
def experiment_flow4(flow,opt,n):
    support = torch.randn(5,65)
    for i in range(n):
        x = support
        y = flow.sample_gaussian((*x.shape, flow.input_dim), None, flow.gpu)
        opt.zero_grad()
        x = hyper_hm(x)
        target_networks_weights = flow.point_cnf(y, x, reverse=True).view(*y.size())

        loss = torch.norm(torch.flatten(target_networks_weights))

        if i % 100 == 0:
            print(loss)
            plotTheta(target_networks_weights,loss)
        loss.backward()
        opt.step()

experiment_flow4(flow,opt2,10000)

tensor(13.8625, grad_fn=<CopyBackwards>)


tensor(0.4201, grad_fn=<CopyBackwards>)
