In [1]:
import pickle
import numpy as np
import pandas as pd
import xarray as xr
import torch
import torch.nn as nn
from itertools import product
import matplotlib.pyplot as plt
from collections import defaultdict
from hyperopt import hp, fmin, tpe, Trials, STATUS_OK

In [3]:
with open('data_train_dict_torch.pkl', 'rb') as f:
    data_train = pickle.load(f)

with open('data_test_dict_torch.pkl', 'rb') as f:
    data_test = pickle.load(f)

In [4]:
min_channels = []
max_channels = []
train_dims = [100, 120, 140, 160, 180]
for we, sn in np.random.permutation([*product(train_dims, train_dims)]):
    min_channels.append(data_train[f'{we}x{sn}'].transpose(0, 1).reshape(15, -1).min(dim=1)[0])
    max_channels.append(data_train[f'{we}x{sn}'].transpose(0, 1).reshape(15, -1).max(dim=1)[0])
min_scale = torch.stack(min_channels).min(dim=0)[0]
max_scale = torch.stack(max_channels).max(dim=0)[0]

In [5]:
def scale_batch(batch, min_scale, max_scale):
    min = min_scale[None,...,None, None].to(batch.device)
    max = max_scale[None,...,None, None].to(batch.device)
    return (batch - min) / (max - min)

In [6]:
def iterate_minibatches(*tensors, batch_size, shuffle=True, epochs=1,
                        allow_incomplete=True, callback=lambda x:x):
    indices = np.arange(len(tensors[0]))
    upper_bound = int((np.ceil if allow_incomplete else np.floor) (len(indices) / batch_size)) * batch_size
    epoch = 0
    while True:
        if shuffle:
            np.random.shuffle(indices)
        for batch_start in callback(range(0, upper_bound, batch_size)):
            batch_ix = indices[batch_start: batch_start + batch_size]
            batch = [tensor[batch_ix] for tensor in tensors]
            yield batch if len(tensors) > 1 else batch[0]
        epoch += 1
        if epoch >= epochs:
            break

# bayesian

In [7]:
import math

In [14]:
class Autoencoder(nn.Module):
    def __init__(self, input_channels=15, 
                    conv_kernel_0 = 3, adaptive_size = 90, 
                    conv_kernel_1 = 3, pool_kernel_1 = 3, pool_stride_1 = 2, 
                    conv_kernel_2 = 3, pool_kernel_2 = 3, pool_stride_2 = 2,
                    conv_kernel_3 = 3, pool_kernel_3 = 3, pool_stride_3 = 2,
                    upsample_1 = 16,
                    upsample_2 = 24,
                    upsample_3 = 52,
                    first_layer = 10, second_layer = 6, third_layer = 1):
        super().__init__()
        self.encoder = nn.Sequential(
            nn.Conv2d(in_channels=input_channels, out_channels=15, kernel_size=conv_kernel_0),
            nn.BatchNorm2d(15),
            nn.ReLU(),
            nn.AdaptiveAvgPool2d(output_size=(adaptive_size, adaptive_size)), 
            
            nn.Conv2d(in_channels=15, out_channels=first_layer, kernel_size=conv_kernel_1),
            nn.BatchNorm2d(first_layer),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=pool_kernel_1, stride=pool_stride_1), # a = math.ceil((adaptive_size - (conv_kernel_1 - 1)) / pool_stride_1) - 1

            nn.Conv2d(in_channels=first_layer, out_channels=second_layer, kernel_size=conv_kernel_2),
            nn.BatchNorm2d(second_layer),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=pool_kernel_2, stride=pool_stride_2), # b = math.ceil((a - (conv_kernel_2 - 1)) / pool_stride_2) - 1

            nn.Conv2d(in_channels=second_layer, out_channels=third_layer, kernel_size=conv_kernel_3),
            nn.BatchNorm2d(third_layer),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=pool_kernel_3, stride=pool_stride_3), # c = math.ceil((b - (conv_kernel_3 - 1)) / pool_stride_3) - 1

            nn.Flatten(),
        )
        self.encoder = self.encoder.float() # c = math.ceil((math.ceil((math.ceil((adaptive_size - (conv_kernel_1 - 1)) / pool_stride_1) - 1 - (conv_kernel_2 - 1)) / pool_stride_2) - 1 - (conv_kernel_3 - 1)) / pool_stride_3) - 1

        self.decoder = nn.Sequential(
            nn.Unflatten(1, (third_layer, int(math.ceil((math.ceil((math.ceil((adaptive_size - (conv_kernel_1 - 1)) / pool_stride_1) - 1 - (conv_kernel_2 - 1)) / pool_stride_2) - 1 - (conv_kernel_3 - 1)) / pool_stride_3) - 1), 
                            int(math.ceil((math.ceil((math.ceil((adaptive_size - (conv_kernel_1 - 1)) / pool_stride_1) - 1 - (conv_kernel_2 - 1)) / pool_stride_2) - 1 - (conv_kernel_3 - 1)) / pool_stride_3) - 1))),
            
            nn.ConvTranspose2d(in_channels=third_layer, out_channels=second_layer, kernel_size=conv_kernel_3),
            nn.Upsample(size=(upsample_1, upsample_1)),
            nn.ConvTranspose2d(in_channels=second_layer, out_channels=first_layer, kernel_size=conv_kernel_2),
            nn.Upsample(size=(upsample_2, upsample_2)),
            nn.ConvTranspose2d(in_channels=first_layer, out_channels=15, kernel_size=conv_kernel_1),
            nn.Upsample(size=(upsample_3, upsample_3)),
            nn.ConvTranspose2d(in_channels=15, out_channels=15, kernel_size=conv_kernel_0),
        )
        self.decoder = self.decoder.float()

    def forward(self, features):
        emb = self.encoder(features)
        reconstructed = self.decoder(emb)
    
        _, _, w, h = features.shape
        reconstructed = nn.Upsample(size=(w, h))(reconstructed)
    
        if reconstructed.shape != features.shape:
            print(reconstructed.shape)
            print(features.shape)
            assert False
    
        return reconstructed

In [15]:
def objective(params):
    device = 'cuda'
    print(params)

    model = Autoencoder(first_layer = int(params['first_layer']),
                        second_layer = int(params['second_layer']),
                        third_layer = int(params['third_layer']),
                        adaptive_size = int(params['adaptive_size']), 
                        upsample_1 = int(params['upsample_1']),
                        upsample_2 = int(params['upsample_2']),
                        upsample_3 = int(params['upsample_3'])).to(device)

    opt = torch.optim.Adam(model.parameters(), lr=1e-3)
    criterion = nn.MSELoss()
    test_loss = defaultdict(list)
    
    for epoch in range(25):
        for we, sn in np.random.permutation([*product(train_dims, train_dims)]):
            for batch in iterate_minibatches(data_train[f'{we}x{sn}'], batch_size=40):
                opt.zero_grad()
                batch = scale_batch(batch.to(device), min_scale, max_scale)
                out = model(batch)
                loss = criterion(out, batch)
                loss.backward()
                opt.step()
        for we, sn in [*product(train_dims, train_dims)]:
            test_batch = scale_batch(data_test[f'{we}x{sn}'].to(device), min_scale, max_scale)
            out = model(test_batch)
            test_loss[f'{we}x{sn}'].append(criterion(out, test_batch).item())
        mse_test = np.array([np.array(test_loss[f'{we}x{sn}']) for we, sn in [*product(train_dims, train_dims)]]).mean(0)
        # return mse_test[-1]
    return {'loss': mse_test[-1], 'params': params, 'status': STATUS_OK}
        

In [16]:
# possible values of parameters
space={'first_layer' : hp.choice('first_layer', [10, 11, 12, 13]), 
      'second_layer' : hp.choice('second_layer', [6, 7, 8, 9]), 
      'third_layer' : hp.choice('third_layer', [2, 3, 4, 5]), 
      'adaptive_size' : hp.choice('adaptive_size', [60, 70, 80, 90, 100]), 
      'upsample_1' : hp.choice('upsample_1', [8, 12, 16, 24]),
      'upsample_2' : hp.choice('upsample_2', [28, 32, 36, 40]),
      'upsample_3' : hp.choice('upsample_3', [44, 48, 52, 56, 60])
      }

trials = Trials()

max_evals = 10
for i in range(1, max_evals + 1, 1):
    best=fmin(fn=objective,
          space=space, 
          algo=tpe.suggest,
          max_evals=i,
          trials=trials,
          show_progressbar=True
         )

    print(best)
   
    pickle.dump(trials, open("bayesian_channels_results.pkl", "wb"))

{'adaptive_size': 100, 'first_layer': 13, 'second_layer': 7, 'third_layer': 3, 'upsample_1': 12, 'upsample_2': 40, 'upsample_3': 56}
  0%|          | 0/1 [00:00<?, ?trial/s, best loss=?]

100%|██████████| 1/1 [03:11<00:00, 191.05s/trial, best loss: 0.024075031057000162]
{'adaptive_size': 4, 'first_layer': 3, 'second_layer': 1, 'third_layer': 1, 'upsample_1': 1, 'upsample_2': 3, 'upsample_3': 3}
{'adaptive_size': 70, 'first_layer': 13, 'second_layer': 6, 'third_layer': 2, 'upsample_1': 16, 'upsample_2': 36, 'upsample_3': 48}
100%|██████████| 2/2 [03:31<00:00, 211.92s/trial, best loss: 0.024075031057000162]
{'adaptive_size': 4, 'first_layer': 3, 'second_layer': 1, 'third_layer': 1, 'upsample_1': 1, 'upsample_2': 3, 'upsample_3': 3}
{'adaptive_size': 90, 'first_layer': 11, 'second_layer': 9, 'third_layer': 3, 'upsample_1': 12, 'upsample_2': 36, 'upsample_3': 60}
100%|██████████| 3/3 [03:47<00:00, 227.98s/trial, best loss: 0.024075031057000162]
{'adaptive_size': 4, 'first_layer': 3, 'second_layer': 1, 'third_layer': 1, 'upsample_1': 1, 'upsample_2': 3, 'upsample_3': 3}
{'adaptive_size': 60, 'first_layer': 11, 'second_layer': 9, 'third_layer': 2, 'upsample_1': 16, 'upsample_

In [110]:
iter_done = len(trials.results)
max_evals = 90
iter_done

10

In [None]:
for i in range(1 + iter_done, max_evals + 1 + iter_done, 1):
    best=fmin(fn=objective,
          space=space, 
          algo=tpe.suggest,
          max_evals=i,
          trials=trials, 
          show_progressbar=True
         )
    print(best)

    # pickle.dump(trials, open("bayesian_channels_results.pkl", "wb"))

In [None]:
trials.results

In [47]:
# import pickle

# with open('trials.pkl', 'wb') as f:
#    pickle.dump(trials, f)

# with open('best_params.pkl', 'wb') as f:
#    pickle.dump(best, f)

Загрузка результатов

In [None]:
file = []
with (open('bayesian_channels_results.pkl','rb')) as openfile:
    while True:
        try:
            file.append(pickle.load(openfile))
        except EOFError:
            break
df = pd.DataFrame(file[0])