In [1]:
import torch
import torch.nn as nn
from tqdm import tqdm
import pdb
import matplotlib.pyplot as plt
import sys
import numpy as np
sys.path.insert(0, './src/')
from target import NN_bernoulli
from kernels import HMC_our, Reverse_kernel
%matplotlib inline

In [2]:
class dotdict(dict):
    """dot.notation access to dictionary attributes"""
    __getattr__ = dict.get
    __setattr__ = dict.__setitem__
    __delattr__ = dict.__delitem__

In [3]:
# 'Encoder' - simple matrix
class Encoder_h(nn.Module):
    def __init__(self, L, z_dim, device='cpu'):
        super(Encoder_h, self).__init__()
        self.L = L
        self.z_dim = z_dim
        self.mu = nn.Linear(in_features=self.L, out_features=self.z_dim, bias=False)
        self.h = nn.Linear(in_features=self.L, out_features=self.z_dim)
    def forward(self, x):
        return self.mu(x), self.h(x)
    
# 'Encoder' - simple matrix
class Encoder(nn.Module):
    def __init__(self, L, z_dim, device='cpu'):
        super(Encoder, self).__init__()
        self.L = L
        self.z_dim = z_dim
        self.mu = nn.Linear(in_features=self.L, out_features=self.z_dim, bias=False)
    def forward(self, x):
        return self.mu(x)
    
# 'Decoder' - simple matrix, return logits
class Decoder(nn.Module):
    def __init__(self, L, z_dim, device='cpu'):
        super(Decoder, self).__init__()
        self.L = L
        self.z_dim = z_dim
        self.W = nn.Linear(in_features=self.z_dim, out_features=self.L, bias=False)
    def forward(self, z):
        return [self.W(z)]

In [4]:
L = 2
z_dim = 2
N = 100
device = "cpu"#"cuda:0" if torch.cuda.is_available() else "cpu"

args = dotdict({})
args.K = 2
args.N = 2
args.z_dim = z_dim
args.torchType = torch.float32
args.device = device
args.learnable_reverse = True
args.num_epoches = 5000
args.train_batch_size = 50
args.amortize = False
args.gamma = 0.1 ## Stepsize
args.alpha = 0.5  ## For partial momentum refresh
args.train_only_inference_period = 10
args.train_only_inference_cutoff = 5
args.hoffman_idea = True
args.separate_params = True
args.use_barker = True

In [5]:
if args.learnable_reverse:
    enc = Encoder_h(L=L, z_dim=z_dim, device=device).to(device)
else:
    enc = Encoder(L=L, z_dim=z_dim, device=device).to(device)
dec = Decoder(L=L, z_dim=z_dim, device=device).to(device)
reverse_kernel = Reverse_kernel(args).to(device)
target = NN_bernoulli({}, dec, device).to(device)
transitions = nn.ModuleList([HMC_our(kwargs=args).to(args.device) for _ in range(args['K'])])

std_normal = torch.distributions.Normal(loc=torch.tensor(0., device=device),
                                                scale=torch.tensor(1., device=device))
args.std_normal = std_normal

In [6]:
true_theta = std_normal.sample((z_dim, L))# + 5
print('True decoder matrix')
print(true_theta)
print('-' * 75)
data_probs = torch.sigmoid(std_normal.sample((N, z_dim)) @ true_theta)
# data_probs = torch.sigmoid(torch.ones((N, z_dim), device=device) @ true_theta)
data = torch.distributions.Bernoulli(probs=data_probs).sample()
print('Generated data example:')
print(data[:10])

True decoder matrix
tensor([[-0.9196,  1.2161],
        [ 1.1737, -0.9498]])
---------------------------------------------------------------------------
Generated data example:
tensor([[1., 1.],
        [1., 0.],
        [1., 0.],
        [1., 1.],
        [0., 1.],
        [0., 0.],
        [1., 0.],
        [1., 0.],
        [1., 0.],
        [1., 0.]])


In [7]:
dataloader = torch.utils.data.DataLoader(data, batch_size=args.train_batch_size, shuffle=True)

reverse_params = list([])
if args.learnable_reverse:
    reverse_params = list(reverse_kernel.parameters())

if args.separate_params:
    params = list(enc.parameters()) + list(reverse_kernel.parameters()) + list(transitions.parameters())
    optimizer = torch.optim.Adam(params=target.parameters())
    optimizer_inference = torch.optim.Adam(params=params)
else:
    params = list(enc.parameters()) + list(target.parameters()) + list(transitions.parameters()) + reverse_params
    optimizer = torch.optim.Adam(params=params)
scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=[100, 200, 300, 400], gamma=0.5) #torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=10, eta_min=0, last_epoch=-1)

In [8]:
enc.mu.weight

Parameter containing:
tensor([[-0.2717, -0.6099],
        [-0.5089, -0.2191]], requires_grad=True)

In [9]:
def compute_loss(z_new, p_new, u, p_old, x, sum_log_alpha, sum_log_jac, sum_log_sigma=0., mu=None, all_directions=None, h=None):
    if args.learnable_reverse:
        log_r = reverse_kernel(z_fin=z_new.detach(), h=h.detach(), a=all_directions)
        log_m = args.std_normal.log_prob(u).sum(1) + args.std_normal.log_prob(p_old).sum(1) - sum_log_jac - sum_log_sigma + sum_log_alpha
    else:
        log_r = -args.K * torch.tensor(np.log(2.), device=device)
        log_m = args.std_normal.log_prob(u).sum(1) + args.std_normal.log_prob(p_old).sum(1) - sum_log_jac - sum_log_sigma + sum_log_alpha
        
    log_p = target.get_logdensity(z=z_new, x=x, args=args) + args.std_normal.log_prob(p_new.sum(1))
    elbo_full = log_p + log_r - log_m
    grad_elbo = torch.mean(elbo_full + elbo_full.detach() * sum_log_alpha)
    return elbo_full, grad_elbo 

In [10]:
print_info_ = 500

for ep in tqdm(range(args.num_epoches)): # cycle over epoches
    for b_num, batch_train in enumerate(dataloader): # cycle over batches
        if args.learnable_reverse:
            mu, h = enc(batch_train) # sample mu and sigma from encoder
        else:
            mu = enc(batch_train) # sample mu and sigma from encoder
            h = None
        u = args.std_normal.sample(mu.shape) # sample random tensor for reparametrization trick
        z = mu + u # reperametrization trick

        p_old = args.std_normal.sample(z.shape)
        cond_vectors = [args.std_normal.sample(p_old.shape) for _ in range(args.K)]

        sum_log_alpha = torch.zeros(mu.shape[0], dtype=args.torchType, device=args.device) # for grad log alpha accumulation
        sum_log_jacobian = torch.zeros(mu.shape[0], dtype=args.torchType, device=args.device) # for log_jacobian accumulation
        p = p_old
        if args.learnable_reverse:
            all_directions = torch.tensor([], device=args.device)
        else:
            all_directions = None
        for k in range(args.K):
            # sample alpha - transition probabilities 
            if args.amortize:
#                     pdb.set_trace()
                z, p, log_jac, current_log_alphas, directions, _ = transitions.make_transition(q_old=z, x=batch_train,
                                                    p_old=p, k=cond_vectors[k], target_distr=target, args=args)
            else:
                z, p, log_jac, current_log_alphas, directions, _ = transitions[k].make_transition(q_old=z, x=batch_train,
                                                                    p_old=p, k=cond_vectors[k], target_distr=target, args=args) # sample a_i -- directions
            if ep  % print_info_ == 0 and b_num % (100 * print_info_) == 0:
                print('On batch number {}/{} and on k = {} we have for  0: {} and for +1: {}'.format(b_num + 1,
                                                                        data.shape[0] // args['train_batch_size'],
                                                                           k + 1,
                                                    (directions==0.).to(float).mean(),
                                                                    (directions==1.).to(float).mean()))
                if args.amortize:
                    print('Stepsize {}'.format(np.exp(transitions.gamma.cpu().detach().item())))
                    print('Autoregression coeff {}'.format(torch.sigmoid(transitions.alpha_logit).cpu().detach().item()))
            if args.learnable_reverse:
                all_directions = torch.cat([all_directions, directions.view(-1, 1)], dim=1)
            # Accumulate alphas
            sum_log_alpha = sum_log_alpha + current_log_alphas
            sum_log_jacobian = sum_log_jacobian + log_jac  # refresh log jacobian
        ##############################################
        if args.hoffman_idea:
            if args.learnable_reverse:
                log_r = reverse_kernel(z_fin=z.detach(), h=h.detach(), a=all_directions)
                log_m = args.std_normal.log_prob(u).sum(1) + args.std_normal.log_prob(p_old).sum(1) - sum_log_jacobian + sum_log_alpha
            else:
                log_r = -args.K * torch_log_2
                log_m = args.std_normal.log_prob(u).sum(1) + args.std_normal.log_prob(p_old).sum(1) - sum_log_jacobian + sum_log_alpha
            log_p = target.get_logdensity(z=z, x=batch_train, args=args) + args.std_normal.log_prob(p.sum(1))
            elbo_full = log_p + log_r - log_m
    #                 pdb.set_trace()
            ### Gradient of the first objective:
#             target.eval()
            obj_1 = torch.mean(elbo_full + elbo_full.detach() * sum_log_alpha)
            (-obj_1).backward(retain_graph=True)
            optimizer_inference.step()
            optimizer_inference.zero_grad()
            optimizer.zero_grad() 

            ### Gradient of the second objective:
#             target.train()
            log_p = target.get_logdensity(z=z.detach(), x=batch_train, args=args) + args.std_normal.log_prob(p.detach()).sum(1)
            obj_2 = torch.mean(log_p)
            (-obj_2).backward()
            optimizer.step()
            optimizer_inference.zero_grad()
            optimizer.zero_grad()
            ###########################################################
        else:
            elbo_full, grad_elbo = compute_loss(z_new=z, p_new=p, u=u, p_old=p_old, x=batch_train, sum_log_alpha=sum_log_alpha,
                                                sum_log_jac=sum_log_jacobian, mu=mu,
                                                all_directions=all_directions, h=h)
            (-grad_elbo).backward()

                
        if args.separate_params: # if we separate params of inference part and generation part
            optimizer_inference.step() # we always perform step for inference part
            if ep % args.train_only_inference_period > args.train_only_inference_cutoff: # but sometimes for gen
                optimizer.step()
            optimizer.zero_grad()
            optimizer_inference.zero_grad()
        else:
            optimizer.step()
            optimizer.zero_grad()
        scheduler.step()
            
        if ep  % print_info_ == 0 and b_num % (100 * print_info_) == 0:
            if args.hoffman_idea:
                print('obj_1:', obj_1)
                print('obj_2:', obj_2)
            else:
                print('elbo:', elbo_full.mean().cpu().detach().item())

  0%|          | 13/5000 [00:00<01:22, 60.14it/s]

On batch number 1/2 and on k = 1 we have for  0: 0.56 and for +1: 0.44
On batch number 1/2 and on k = 2 we have for  0: 0.48 and for +1: 0.52
obj_1: tensor(1.3110, grad_fn=<MeanBackward0>)
obj_2: tensor(-7.4289, grad_fn=<MeanBackward0>)


 10%|█         | 512/5000 [00:07<01:03, 70.84it/s]

On batch number 1/2 and on k = 1 we have for  0: 0.32 and for +1: 0.68
On batch number 1/2 and on k = 2 we have for  0: 0.56 and for +1: 0.44
obj_1: tensor(0.4931, grad_fn=<MeanBackward0>)
obj_2: tensor(-7.1133, grad_fn=<MeanBackward0>)


 20%|██        | 1007/5000 [00:14<00:56, 70.80it/s]

On batch number 1/2 and on k = 1 we have for  0: 0.48 and for +1: 0.52
On batch number 1/2 and on k = 2 we have for  0: 0.52 and for +1: 0.48
obj_1: tensor(0.3981, grad_fn=<MeanBackward0>)
obj_2: tensor(-7.4428, grad_fn=<MeanBackward0>)


 30%|███       | 1513/5000 [00:21<00:49, 70.29it/s]

On batch number 1/2 and on k = 1 we have for  0: 0.52 and for +1: 0.48
On batch number 1/2 and on k = 2 we have for  0: 0.54 and for +1: 0.46
obj_1: tensor(0.2315, grad_fn=<MeanBackward0>)
obj_2: tensor(-6.9668, grad_fn=<MeanBackward0>)


 40%|████      | 2012/5000 [00:28<00:39, 74.87it/s]

On batch number 1/2 and on k = 1 we have for  0: 0.5 and for +1: 0.5
On batch number 1/2 and on k = 2 we have for  0: 0.54 and for +1: 0.46
obj_1: tensor(0.2556, grad_fn=<MeanBackward0>)
obj_2: tensor(-6.9195, grad_fn=<MeanBackward0>)


 50%|█████     | 2508/5000 [00:34<00:33, 75.16it/s]

On batch number 1/2 and on k = 1 we have for  0: 0.64 and for +1: 0.36
On batch number 1/2 and on k = 2 we have for  0: 0.42 and for +1: 0.58
obj_1: tensor(0.2367, grad_fn=<MeanBackward0>)
obj_2: tensor(-7.2249, grad_fn=<MeanBackward0>)


 60%|██████    | 3012/5000 [00:41<00:26, 75.05it/s]

On batch number 1/2 and on k = 1 we have for  0: 0.5 and for +1: 0.5
On batch number 1/2 and on k = 2 we have for  0: 0.6 and for +1: 0.4
obj_1: tensor(0.2640, grad_fn=<MeanBackward0>)
obj_2: tensor(-6.9882, grad_fn=<MeanBackward0>)


 70%|███████   | 3508/5000 [00:48<00:19, 75.18it/s]

On batch number 1/2 and on k = 1 we have for  0: 0.5 and for +1: 0.5
On batch number 1/2 and on k = 2 we have for  0: 0.52 and for +1: 0.48
obj_1: tensor(0.2268, grad_fn=<MeanBackward0>)
obj_2: tensor(-6.8993, grad_fn=<MeanBackward0>)


 80%|████████  | 4012/5000 [00:55<00:13, 74.65it/s]

On batch number 1/2 and on k = 1 we have for  0: 0.56 and for +1: 0.44
On batch number 1/2 and on k = 2 we have for  0: 0.42 and for +1: 0.58
obj_1: tensor(0.1106, grad_fn=<MeanBackward0>)
obj_2: tensor(-6.8914, grad_fn=<MeanBackward0>)


 90%|█████████ | 4508/5000 [01:01<00:06, 70.66it/s]

On batch number 1/2 and on k = 1 we have for  0: 0.56 and for +1: 0.44
On batch number 1/2 and on k = 2 we have for  0: 0.42 and for +1: 0.58
obj_1: tensor(0.1705, grad_fn=<MeanBackward0>)
obj_2: tensor(-6.8818, grad_fn=<MeanBackward0>)


100%|██████████| 5000/5000 [01:08<00:00, 72.80it/s]


In [11]:
target.decoder.W.weight.T

tensor([[-0.0561,  0.0124],
        [-0.4709,  0.4122]], grad_fn=<PermuteBackward>)

In [12]:
target.decoder.W.bias

In [13]:
print(true_theta)

tensor([[-0.9196,  1.2161],
        [ 1.1737, -0.9498]])


In [14]:
enc.mu.weight

Parameter containing:
tensor([[-0.0286,  0.0149],
        [-0.4285,  0.3998]], requires_grad=True)

## VAE

In [15]:
# 'Encoder' - simple matrix
class Encoder_vae(nn.Module):
    def __init__(self, L, z_dim, device='cpu'):
        super(Encoder_vae, self).__init__()
        self.L = L
        self.z_dim = z_dim
        self.mu = nn.Linear(in_features=self.L, out_features=self.z_dim, bias=False)
    def forward(self, x):
        return self.mu(x)
    
class Decoder_vae(nn.Module):
    def __init__(self, L, z_dim, device='cpu'):
        super(Decoder_vae, self).__init__()
        self.L = L
        self.z_dim = z_dim
        self.d = nn.Linear(in_features=self.z_dim, out_features=self.L, bias=False)
    def forward(self, x):
        return self.d(x)

In [16]:
enc = Encoder_vae(L=L, z_dim=z_dim, device=device).to(device)
dec = Decoder_vae(L=L, z_dim=z_dim, device=device).to(device)

std_normal = torch.distributions.Normal(loc=torch.tensor(0., device=device),
                                                scale=torch.tensor(1., device=device))
args.std_normal = std_normal

In [17]:
params = list(enc.parameters()) + list(dec.parameters())
optimizer = torch.optim.Adam(params=params)
scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=[100, 200, 300, 400], gamma=0.75) #torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=10, eta_min=0, last_epoch=-1)

In [18]:
print_info_ = 100
# with torch.autograd.detect_anomaly():
for ep in tqdm(range(args.num_epoches)): # cycle over epoches
    for b_num, batch_train in enumerate(dataloader): # cycle over batches
#         pdb.set_trace()
        mu = enc(batch_train) # sample mu and sigma from encoder
        u = args.std_normal.sample(mu.shape) # sample random tensor for reparametrization trick
        z = mu + u # reperametrization trick
        batch_recovered = dec(z)
        probs = torch.sigmoid(batch_recovered)
        
        log_likelihood = torch.distributions.Bernoulli(probs=probs).log_prob(batch_train).sum(1)
        log_prior = std_normal.log_prob(z).sum(1)
        log_q = std_normal.log_prob(u).sum(1)
        
        elbo = torch.mean(log_likelihood + log_prior - log_q)
        (-elbo).backward()
        optimizer.step()
        optimizer.zero_grad()
        scheduler.step()
    if ep % print_info_ == 0:
        print('elbo:', elbo.cpu().detach().item())

  0%|          | 2/5000 [00:00<05:57, 13.99it/s]

elbo: -1.7558897733688354


  2%|▏         | 104/5000 [00:05<03:57, 20.59it/s]

elbo: -1.3855681419372559


  4%|▍         | 203/5000 [00:09<03:53, 20.54it/s]

elbo: -1.38618803024292


  6%|▌         | 305/5000 [00:14<03:48, 20.55it/s]

elbo: -1.3864556550979614


  8%|▊         | 404/5000 [00:19<03:44, 20.49it/s]

elbo: -1.386893391609192


 10%|█         | 503/5000 [00:24<03:38, 20.56it/s]

elbo: -1.386488676071167


 12%|█▏        | 605/5000 [00:29<03:33, 20.55it/s]

elbo: -1.3867928981781006


 14%|█▍        | 704/5000 [00:34<03:28, 20.64it/s]

elbo: -1.3858445882797241


 16%|█▌        | 803/5000 [00:39<03:23, 20.59it/s]

elbo: -1.3866851329803467


 18%|█▊        | 905/5000 [00:44<03:18, 20.59it/s]

elbo: -1.3862485885620117


 20%|██        | 1004/5000 [00:48<03:14, 20.51it/s]

elbo: -1.3864082098007202


 22%|██▏       | 1103/5000 [00:53<03:08, 20.62it/s]

elbo: -1.385992169380188


 24%|██▍       | 1205/5000 [00:58<03:04, 20.58it/s]

elbo: -1.3865265846252441


 26%|██▌       | 1304/5000 [01:03<03:00, 20.45it/s]

elbo: -1.3859598636627197


 28%|██▊       | 1403/5000 [01:08<02:55, 20.47it/s]

elbo: -1.3872487545013428


 30%|███       | 1505/5000 [01:13<02:50, 20.47it/s]

elbo: -1.3858733177185059


 32%|███▏      | 1604/5000 [01:18<02:45, 20.49it/s]

elbo: -1.3857648372650146


 34%|███▍      | 1703/5000 [01:22<02:41, 20.42it/s]

elbo: -1.387223243713379


 36%|███▌      | 1805/5000 [01:27<02:36, 20.48it/s]

elbo: -1.386448621749878


 38%|███▊      | 1904/5000 [01:32<02:31, 20.50it/s]

elbo: -1.3855845928192139


 40%|████      | 2003/5000 [01:37<02:26, 20.46it/s]

elbo: -1.3861188888549805


 42%|████▏     | 2105/5000 [01:42<02:20, 20.54it/s]

elbo: -1.3872483968734741


 44%|████▍     | 2204/5000 [01:47<02:16, 20.49it/s]

elbo: -1.3860719203948975


 46%|████▌     | 2303/5000 [01:52<02:11, 20.49it/s]

elbo: -1.3861651420593262


 48%|████▊     | 2405/5000 [01:57<02:06, 20.50it/s]

elbo: -1.3858051300048828


 50%|█████     | 2504/5000 [02:02<02:01, 20.52it/s]

elbo: -1.3854498863220215


 52%|█████▏    | 2603/5000 [02:06<01:57, 20.48it/s]

elbo: -1.3863214254379272


 54%|█████▍    | 2705/5000 [02:11<01:52, 20.49it/s]

elbo: -1.3867474794387817


 56%|█████▌    | 2804/5000 [02:16<01:47, 20.52it/s]

elbo: -1.3869109153747559


 58%|█████▊    | 2903/5000 [02:21<01:42, 20.50it/s]

elbo: -1.3860423564910889


 60%|██████    | 3005/5000 [02:26<01:37, 20.43it/s]

elbo: -1.3863308429718018


 62%|██████▏   | 3104/5000 [02:31<01:32, 20.44it/s]

elbo: -1.3863017559051514


 64%|██████▍   | 3203/5000 [02:36<01:27, 20.46it/s]

elbo: -1.3859273195266724


 66%|██████▌   | 3305/5000 [02:41<01:22, 20.43it/s]

elbo: -1.386637806892395


 68%|██████▊   | 3404/5000 [02:46<01:18, 20.46it/s]

elbo: -1.3862502574920654


 70%|███████   | 3503/5000 [02:50<01:13, 20.42it/s]

elbo: -1.3869497776031494


 72%|███████▏  | 3605/5000 [02:55<01:08, 20.47it/s]

elbo: -1.386596441268921


 74%|███████▍  | 3704/5000 [03:00<01:02, 20.79it/s]

elbo: -1.3868342638015747


 76%|███████▌  | 3803/5000 [03:05<00:57, 20.71it/s]

elbo: -1.3865556716918945


 78%|███████▊  | 3905/5000 [03:10<00:52, 20.76it/s]

elbo: -1.3861229419708252


 80%|████████  | 4004/5000 [03:15<00:48, 20.72it/s]

elbo: -1.3862134218215942


 82%|████████▏ | 4103/5000 [03:19<00:43, 20.63it/s]

elbo: -1.3863948583602905


 84%|████████▍ | 4205/5000 [03:24<00:38, 20.69it/s]

elbo: -1.386583685874939


 86%|████████▌ | 4304/5000 [03:29<00:33, 20.68it/s]

elbo: -1.3863840103149414


 88%|████████▊ | 4403/5000 [03:34<00:28, 20.65it/s]

elbo: -1.387058973312378


 90%|█████████ | 4505/5000 [03:39<00:23, 20.76it/s]

elbo: -1.3871320486068726


 92%|█████████▏| 4604/5000 [03:44<00:19, 20.73it/s]

elbo: -1.3855648040771484


 94%|█████████▍| 4703/5000 [03:48<00:14, 20.71it/s]

elbo: -1.3863588571548462


 96%|█████████▌| 4805/5000 [03:53<00:09, 20.70it/s]

elbo: -1.3861969709396362


 98%|█████████▊| 4904/5000 [03:58<00:04, 20.68it/s]

elbo: -1.387112021446228


100%|██████████| 5000/5000 [04:03<00:00, 20.55it/s]


In [19]:
dec.d.weight.T

tensor([[-0.0039, -0.0011],
        [-0.0083, -0.0068]], grad_fn=<PermuteBackward>)

In [20]:
dec.d.bias

In [21]:
print(true_theta)

tensor([[0.6969, 0.2562],
        [0.7179, 0.3912]])


In [22]:
for b_num, batch_train in enumerate(dataloader): # cycle over batches
    mu = enc(batch_train) # sample mu and sigma from encoder
    u = args.std_normal.sample(mu.shape) # sample random tensor for reparametrization trick
    z = mu + u # reperametrization trick
    break

In [23]:
batch_train[:10]

tensor([[1., 0.],
        [1., 1.],
        [1., 1.],
        [0., 0.],
        [1., 1.],
        [1., 1.],
        [1., 1.],
        [1., 0.],
        [1., 1.],
        [0., 0.]])

In [24]:
torch.distributions.Bernoulli(probs=torch.sigmoid(dec(z[:10]))).sample()

tensor([[1., 1.],
        [0., 1.],
        [0., 0.],
        [1., 1.],
        [1., 1.],
        [0., 1.],
        [1., 1.],
        [1., 1.],
        [1., 0.],
        [1., 1.]])