In [685]:
%load_ext autoreload
%autoreload 2

# pip install easydict
import easydict
import torch
import torch.nn as nn
import torch.nn.functional as F

from data import MetaLearningSystemDataLoader
from data import FewShotLearningDatasetParallel
from utils.parser_utils import get_args


from meta_neural_network_architectures import VGGReLUNormNetwork, ResNet12, extract_top_level_dict, MetaBatchNormLayer, MetaLinearLayer
from inner_loop_optimizers import LSLRGradientDescentLearningRule

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [686]:
args = easydict.EasyDict(
{
  "batch_size":2,
  "image_height":84,
  "image_width":84,
  "image_channels":3,
  "gpu_to_use":0,
  "num_dataprovider_workers":4,
  "max_models_to_save":5,
  "dataset_name":"mini_imagenet_full_size",
  "dataset_path":"mini_imagenet_full_size",
  "reset_stored_paths":False,
  "experiment_name":"alfa+maml",
  "train_seed": 0, "val_seed": 0,
  "indexes_of_folders_indicating_class": [-3, -2],
  "sets_are_pre_split": True,
  "train_val_test_split": [0.64, 0.16, 0.20],
  "evaluate_on_test_set_only": False,

  "total_epochs": 100,
  "total_iter_per_epoch":500, "continue_from_epoch": -2,
  "num_evaluation_tasks":600,
  "multi_step_loss_num_epochs": 15,
  "minimum_per_task_contribution": 0.01,
  "learnable_per_layer_per_step_inner_loop_learning_rate": False,
  "enable_inner_loop_optimizable_bn_params": False,
  "evalute_on_test_set_only": False,

  "max_pooling": True,
  "per_step_bn_statistics": False,
  "learnable_batch_norm_momentum": False,
  "load_into_memory": False,
  "init_inner_loop_learning_rate": 0.01,
  "init_inner_loop_weight_decay": 0.0005,
  "learnable_bn_gamma": True,
  "learnable_bn_beta": True,

  "dropout_rate_value":0.0,
  "min_learning_rate":0.001,
  "meta_learning_rate":0.001,   "total_epochs_before_pause": 100,
  "first_order_to_second_order_epoch":-1,
  "weight_decay": 0.0,

  "norm_layer":"batch_norm",
  "cnn_num_filters":48,
  "num_stages":4,
  "conv_padding": True,
  "number_of_training_steps_per_iter":5,
  "number_of_evaluation_steps_per_iter":5,
  "cnn_blocks_per_stage":1,
  "num_classes_per_set":5,
  "num_samples_per_class":5,
  "num_target_samples": 15,

  "second_order": True,
  "use_multi_step_loss_optimization":False,
  "attenuate": False,
  "alfa": True,
  "random_init": False,
  "backbone": "4-CONV"
}
)

device = torch.cuda.current_device()
im_shape = (2, 3, args.image_height, args.image_width)

args.use_cuda = torch.cuda.is_available()
args.seed = 104
args.reverse_channels=False
args.labels_as_int=False
args.reset_stored_filepaths=False
args.num_of_gpus=1

In [687]:
data = MetaLearningSystemDataLoader

# 1. Linear Regression으로 meta_adpative_curriculum을 구현하기 위한 시도
### - 사용하지 않는다
### - Linear Regression의 차원으로 인해 적용 불가하다
### - output의 차원을 통제하기 어렵다

In [688]:
input_dim = 10
input_shape = (1, input_dim)

meta_linear = MetaLinearLayer(input_shape=input_shape, num_filters=input_dim, use_bias=True)

In [689]:
for name, param in meta_linear.named_parameters():
    print(name, param.shape)

weights torch.Size([10, 10])
bias torch.Size([10])


In [690]:
x1 = torch.tensor([1, 2, 3, 4])
x2 = torch.tensor([5, 6, 7, 8])
x3 = torch.tensor([9, 10, 11 ,12])

a = []
a.append(x1)
a.append(x2)
a.append(x3)

print("a len == ",len(a))

# inputs = torch.stack(a)
inputs = torch.stack(a)
inputs


a len ==  3


tensor([[ 1,  2,  3,  4],
        [ 5,  6,  7,  8],
        [ 9, 10, 11, 12]])

In [691]:
input_dim = 10
input_shape = (1, input_dim)
num_filters=input_dim
b, c = input_shape #c=10

weights1 = nn.Parameter(torch.ones(num_filters, c))
# weight = nn.Parameter(torch.FloatTensor(out_features, in_features))

nn.init.xavier_uniform_(weights1)
#weights

Parameter containing:
tensor([[ 0.0364, -0.0539,  0.1909, -0.2905, -0.3656, -0.0267, -0.3022, -0.2922,
          0.1275, -0.2587],
        [-0.4672,  0.0126,  0.5321,  0.3353, -0.2960,  0.4450, -0.3140,  0.2775,
         -0.2970,  0.0274],
        [-0.3607, -0.5318,  0.3688, -0.5186, -0.5275,  0.2632, -0.4411,  0.0230,
         -0.2560,  0.3316],
        [ 0.3577, -0.1972, -0.3845,  0.4221,  0.4524, -0.2807, -0.2833, -0.1152,
          0.0409, -0.1963],
        [-0.0517, -0.4088, -0.2471,  0.3102, -0.5179,  0.3807,  0.1080,  0.4794,
         -0.5057, -0.2270],
        [-0.5412,  0.4166, -0.2408, -0.5284, -0.1872, -0.0149,  0.3841,  0.2630,
         -0.4650,  0.4323],
        [ 0.1436, -0.5149, -0.4313,  0.0863,  0.2253, -0.1602, -0.4749, -0.2412,
          0.4612, -0.5343],
        [-0.5359, -0.3466, -0.5199,  0.4558, -0.4525, -0.0040, -0.1630, -0.3868,
         -0.4991,  0.4467],
        [ 0.4961, -0.2447, -0.2916,  0.1642, -0.4376, -0.4701, -0.3763,  0.3604,
          0.5452,  0.2580

In [692]:
bias1 = nn.Parameter(torch.zeros(num_filters))
bias1

Parameter containing:
tensor([0., 0., 0., 0., 0., 0., 0., 0., 0., 0.], requires_grad=True)

In [693]:
x = torch.rand(3, 10)

out1 = F.linear(input=x, weight=weights1, bias=bias1)
# y = xW^T + b

out1

tensor([[-0.6641,  0.3276, -1.0046,  0.2556, -0.0299, -0.9472, -0.0665, -0.8861,
         -0.2933,  0.0408],
        [-0.3910,  0.0851, -0.7857,  0.2538, -0.4955, -0.4419, -0.1354, -0.8921,
         -0.4803,  0.1269],
        [-0.7700,  0.2488, -1.1447,  0.0998, -0.1821, -0.3008, -0.7266, -0.6806,
          0.1875,  0.2132]], grad_fn=<AddmmBackward0>)

In [694]:
weights2 = nn.Parameter(torch.ones(2, 10))
nn.init.xavier_uniform_(weights2)
out2 = F.linear(input=out1, weight=weights2)
# y = x*W^T + b

out2

tensor([[1.2852, 0.9372],
        [0.8929, 0.5722],
        [0.6433, 0.8925]], grad_fn=<MmBackward0>)

In [695]:
out3 = F.relu_(out2)
out3

tensor([[1.2852, 0.9372],
        [0.8929, 0.5722],
        [0.6433, 0.8925]], grad_fn=<ReluBackward0>)

# 2. LSTM으로 시도
### 안되면 conv1d
### https://sanghyu.tistory.com/52

In [696]:
x1 = torch.tensor([1, 2, 3, 4,5,6,7,8,9,10])
x2 = torch.tensor([5, 6, 7, 8,11,12,13,14,15,16])
x3 = torch.tensor([9, 10, 11 ,12,16,17,18,19,20,21])

a = []
a.append(x1)
a.append(x2)
a.append(x3)

print("a len == ",len(a))
inputs = torch.stack(a)
inputs = inputs.to(torch.float32)

inputs

a len ==  3


tensor([[ 1.,  2.,  3.,  4.,  5.,  6.,  7.,  8.,  9., 10.],
        [ 5.,  6.,  7.,  8., 11., 12., 13., 14., 15., 16.],
        [ 9., 10., 11., 12., 16., 17., 18., 19., 20., 21.]])

In [697]:
D_in = 10 # input_size: input의 feature dimension을 넣어주어야 한다. time step이 아니라 input feature dimension!
H= 2     # 내부에서 어떤 feature dimension으로 바꿔주고 싶은지를 넣어주면 된다.
D_out = 2

meta_adaptive_curriculum = nn.Sequential(
            torch.nn.LSTM(D_in, H),
             torch.nn.ReLU(),
             torch.nn.LSTM(H, D_out))

meta_adaptive_curriculum = nn.Sequential(
            torch.nn.LSTM(D_in, H))

meta_adaptive_curriculum(inputs)

(tensor([[ 7.4655e-05, -7.2966e-01],
         [ 1.6548e-07, -9.5746e-01],
         [ 7.1889e-10, -9.9282e-01]], grad_fn=<SqueezeBackward1>),
 (tensor([[ 7.1889e-10, -9.9282e-01]], grad_fn=<SqueezeBackward1>),
  tensor([[ 0.0054, -2.9961]], grad_fn=<SqueezeBackward1>)))

In [698]:
batch_size = 1
sequence_length = 3
input_features = 10
output_features = 5

# produce random data
x = torch.randn(batch_size, sequence_length, input_features)
x

tensor([[[ 0.9235, -1.5190, -0.0254,  1.8340, -0.4024,  0.0698, -0.7960,
          -0.3041, -0.4847,  1.3663],
         [ 0.0069,  0.7056, -1.4296,  1.1079,  0.0187, -0.3459, -1.0573,
           1.4781, -1.3182,  0.6059],
         [ 0.3683, -2.0391,  0.5262, -0.0336, -1.0296,  0.7878, -2.7096,
          -0.7749, -0.6737, -0.7976]]])

In [699]:
lstm_layer = nn.LSTM(
    input_size=input_features,
    hidden_size=output_features,
    batch_first=True
)

expected_output_shape = (batch_size, sequence_length, output_features)

x_out, _ = lstm_layer(x)

print(x_out.shape == expected_output_shape)

True


# 3. Conv1D
### https://kaya-dev.tistory.com/6

In [700]:
x1 = torch.tensor([0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 0.1])
x2 = torch.tensor([0.5, 0.6, 0.07, 0.8,0.11,0.12,0.13,0.14,0.15,0.16])
x3 = torch.tensor([0.9, 0.10, 0.11 , 0.12, 0.16, 0.17, 0.18, 0.19, 0.20, 0.21])

a = []
a.append(x1)
a.append(x2)
a.append(x3)

print("a len == ",len(a))
inputs = torch.stack(a)
inputs = inputs.to(torch.float32)

inputs

a len ==  3


tensor([[0.1000, 0.2000, 0.3000, 0.4000, 0.5000, 0.6000, 0.7000, 0.8000, 0.9000,
         0.1000],
        [0.5000, 0.6000, 0.0700, 0.8000, 0.1100, 0.1200, 0.1300, 0.1400, 0.1500,
         0.1600],
        [0.9000, 0.1000, 0.1100, 0.1200, 0.1600, 0.1700, 0.1800, 0.1900, 0.2000,
         0.2100]])

In [821]:
meta_adaptive_curriculum = nn.Sequential(
    nn.Conv1d(in_channels=3, out_channels=1, kernel_size=1))
meta_adaptive_curriculum(inputs)

tensor([[ 0.7934,  0.3721,  0.1253,  0.3634,  0.0706,  0.0316, -0.0074, -0.0463,
         -0.0853,  0.3074]], grad_fn=<SqueezeBackward1>)

In [845]:
adaptive_curriculum = nn.Sequential(
                    nn.Conv1d(in_channels=3, out_channels=1, kernel_size=1),
                    nn.Linear(10,4),
                    nn.Sigmoid())

a = meta_adaptive_curriculum(inputs)
#int(torch.argmax(a))
print(a)

tensor([[ 0.7934,  0.3721,  0.1253,  0.3634,  0.0706,  0.0316, -0.0074, -0.0463,
         -0.0853,  0.3074]], grad_fn=<SqueezeBackward1>)


In [819]:
meta_adaptive_curriculum = nn.Sequential(
    nn.Conv1d(in_channels=3, out_channels=1, kernel_size=1),
    nn.Linear(9,4),
    nn.Linear(4,1),
    nn.Sigmoid())

#int(meta_adaptive_curriculum(inputs) * 5)
a = meta_adaptive_curriculum(inputs)
a = int(a * 5)
print(a)
#int(torch.argmax(a))

RuntimeError: mat1 and mat2 shapes cannot be multiplied (1x10 and 9x4)

# 4. Linear

In [704]:
x1 = torch.tensor([0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 0.1])
x2 = torch.tensor([0.5, 0.6, 0.07, 0.8,0.11,0.12,0.13,0.14,0.15,0.16])
x3 = torch.tensor([0.9, 0.10, 0.11 , 0.12, 0.16, 0.17, 0.18, 0.19, 0.20, 0.21])

a = []
a.append(x1)
a.append(x2)
a.append(x3)

print("a len == ",len(a))
inputs = torch.stack(a)
inputs = inputs.to(torch.float32)
inputs

a len ==  3


tensor([[0.1000, 0.2000, 0.3000, 0.4000, 0.5000, 0.6000, 0.7000, 0.8000, 0.9000,
         0.1000],
        [0.5000, 0.6000, 0.0700, 0.8000, 0.1100, 0.1200, 0.1300, 0.1400, 0.1500,
         0.1600],
        [0.9000, 0.1000, 0.1100, 0.1200, 0.1600, 0.1700, 0.1800, 0.1900, 0.2000,
         0.2100]])

In [705]:
adaptive_curriculum = nn.Sequential(
                nn.Linear(3, 10))


a = meta_adaptive_curriculum(inputs)
print(a)
int(torch.argmax(a))

tensor([[0.6672]], grad_fn=<SigmoidBackward0>)


0