In [108]:
%load_ext autoreload
%autoreload 2

# pip install easydict
import easydict
import torch
import torch.nn as nn
import torch.nn.functional as F

from data import MetaLearningSystemDataLoader
from data import FewShotLearningDatasetParallel
from utils.parser_utils import get_args


from meta_neural_network_architectures import VGGReLUNormNetwork, ResNet12, extract_top_level_dict, MetaBatchNormLayer, MetaLinearLayer
from inner_loop_optimizers import LSLRGradientDescentLearningRule

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [109]:
args = easydict.EasyDict(
{
  "batch_size":2,
  "image_height":84,
  "image_width":84,
  "image_channels":3,
  "gpu_to_use":0,
  "num_dataprovider_workers":4,
  "max_models_to_save":5,
  "dataset_name":"mini_imagenet_full_size",
  "dataset_path":"mini_imagenet_full_size",
  "reset_stored_paths":False,
  "experiment_name":"alfa+maml",
  "train_seed": 0, "val_seed": 0,
  "indexes_of_folders_indicating_class": [-3, -2],
  "sets_are_pre_split": True,
  "train_val_test_split": [0.64, 0.16, 0.20],
  "evaluate_on_test_set_only": False,

  "total_epochs": 100,
  "total_iter_per_epoch":500, "continue_from_epoch": -2,
  "num_evaluation_tasks":600,
  "multi_step_loss_num_epochs": 15,
  "minimum_per_task_contribution": 0.01,
  "learnable_per_layer_per_step_inner_loop_learning_rate": False,
  "enable_inner_loop_optimizable_bn_params": False,
  "evalute_on_test_set_only": False,

  "max_pooling": True,
  "per_step_bn_statistics": False,
  "learnable_batch_norm_momentum": False,
  "load_into_memory": False,
  "init_inner_loop_learning_rate": 0.01,
  "init_inner_loop_weight_decay": 0.0005,
  "learnable_bn_gamma": True,
  "learnable_bn_beta": True,

  "dropout_rate_value":0.0,
  "min_learning_rate":0.001,
  "meta_learning_rate":0.001,   "total_epochs_before_pause": 100,
  "first_order_to_second_order_epoch":-1,
  "weight_decay": 0.0,

  "norm_layer":"batch_norm",
  "cnn_num_filters":48,
  "num_stages":4,
  "conv_padding": True,
  "number_of_training_steps_per_iter":5,
  "number_of_evaluation_steps_per_iter":5,
  "cnn_blocks_per_stage":1,
  "num_classes_per_set":5,
  "num_samples_per_class":5,
  "num_target_samples": 15,

  "second_order": True,
  "use_multi_step_loss_optimization":False,
  "attenuate": False,
  "alfa": True,
  "random_init": False,
  "backbone": "4-CONV"
}
)

device = torch.cuda.current_device()
im_shape = (2, 3, args.image_height, args.image_width)

args.use_cuda = torch.cuda.is_available()
args.seed = 104
args.reverse_channels=False
args.labels_as_int=False
args.reset_stored_filepaths=False
args.num_of_gpus=1

In [110]:
data = MetaLearningSystemDataLoader

# 1. Linear Regression으로 meta_adpative_curriculum을 구현하기 위한 시도
### - 사용하지 않는다
### - Linear Regression의 차원으로 인해 적용 불가하다
### - output의 차원을 통제하기 어렵다

In [111]:
input_dim = 10
input_shape = (1, input_dim)

meta_linear = MetaLinearLayer(input_shape=input_shape, num_filters=input_dim, use_bias=True)

In [112]:
for name, param in meta_linear.named_parameters():
    print(name, param.shape)

weights torch.Size([10, 10])
bias torch.Size([10])


In [113]:
x1 = torch.tensor([1, 2, 3, 4])
x2 = torch.tensor([5, 6, 7, 8])
x3 = torch.tensor([9, 10, 11 ,12])

a = []
a.append(x1)
a.append(x2)
a.append(x3)

print("a len == ",len(a))

# inputs = torch.stack(a)
inputs = torch.stack(a)
inputs


a len ==  3


tensor([[ 1,  2,  3,  4],
        [ 5,  6,  7,  8],
        [ 9, 10, 11, 12]])

In [114]:
input_dim = 10
input_shape = (1, input_dim)
num_filters=input_dim
b, c = input_shape #c=10

weights1 = nn.Parameter(torch.ones(num_filters, c))
# weight = nn.Parameter(torch.FloatTensor(out_features, in_features))

nn.init.xavier_uniform_(weights1)
#weights

Parameter containing:
tensor([[ 0.0548, -0.3760, -0.0344, -0.2852, -0.0900,  0.3292, -0.4537,  0.1552,
          0.5075,  0.5171],
        [-0.3942,  0.1571,  0.1007,  0.2596,  0.2473,  0.4311, -0.2338,  0.4221,
          0.4029, -0.4490],
        [ 0.0721,  0.2862, -0.5308, -0.0199, -0.5455, -0.0503, -0.1801, -0.1884,
          0.2964,  0.4391],
        [-0.4541, -0.1494, -0.0903,  0.2008,  0.5134,  0.1731,  0.3638,  0.4682,
         -0.2441,  0.4088],
        [-0.5431, -0.2243, -0.3429,  0.2121,  0.3825, -0.1407,  0.2971,  0.5402,
          0.4813, -0.2902],
        [-0.2602, -0.4393, -0.0885, -0.4255,  0.4029, -0.0476,  0.1118,  0.2320,
         -0.3739,  0.2959],
        [-0.4520,  0.4446,  0.5188, -0.1288, -0.0282,  0.2377, -0.5285,  0.3231,
          0.3172, -0.2486],
        [ 0.0925,  0.4461, -0.2178,  0.5254, -0.2857,  0.5168,  0.3996, -0.4742,
          0.4068,  0.4955],
        [-0.5432, -0.1709, -0.1184,  0.4868, -0.0915,  0.1797,  0.0129,  0.4036,
         -0.4648, -0.4477

In [115]:
bias1 = nn.Parameter(torch.zeros(num_filters))
bias1

Parameter containing:
tensor([0., 0., 0., 0., 0., 0., 0., 0., 0., 0.], requires_grad=True)

In [116]:
x = torch.rand(3, 10)

out1 = F.linear(input=x, weight=weights1, bias=bias1)
# y = xW^T + b

out1

tensor([[ 0.0342,  0.5365, -0.5674,  1.2229,  0.6175, -0.0138, -0.0882,  0.9999,
         -0.1395,  0.3651],
        [-0.2281,  0.3739, -0.5908,  0.2960,  0.3007, -0.5900,  0.0173,  0.9789,
         -0.7192,  0.5969],
        [ 0.0985,  0.4329, -0.1373,  1.0437,  0.1756, -0.1758, -0.2531,  1.1990,
          0.1610,  0.1259]], grad_fn=<AddmmBackward0>)

In [117]:
weights2 = nn.Parameter(torch.ones(2, 10))
nn.init.xavier_uniform_(weights2)
out2 = F.linear(input=out1, weight=weights2)
# y = x*W^T + b

out2

tensor([[-0.5279, -1.2201],
        [-0.3305, -1.5321],
        [-0.5346, -0.6170]], grad_fn=<MmBackward0>)

In [118]:
out3 = F.relu_(out2)
out3

tensor([[0., 0.],
        [0., 0.],
        [0., 0.]], grad_fn=<ReluBackward0>)

# 2. LSTM으로 시도
### 안되면 conv1d
### https://sanghyu.tistory.com/52

In [119]:
x1 = torch.tensor([1, 2, 3, 4,5,6,7,8,9,10])
x2 = torch.tensor([5, 6, 7, 8,11,12,13,14,15,16])
x3 = torch.tensor([9, 10, 11 ,12,16,17,18,19,20,21])

a = []
a.append(x1)
a.append(x2)
a.append(x3)

print("a len == ",len(a))
inputs = torch.stack(a)
inputs = inputs.to(torch.float32)

inputs

a len ==  3


tensor([[ 1.,  2.,  3.,  4.,  5.,  6.,  7.,  8.,  9., 10.],
        [ 5.,  6.,  7.,  8., 11., 12., 13., 14., 15., 16.],
        [ 9., 10., 11., 12., 16., 17., 18., 19., 20., 21.]])

In [120]:
D_in = 10 # input_size: input의 feature dimension을 넣어주어야 한다. time step이 아니라 input feature dimension!
H= 2     # 내부에서 어떤 feature dimension으로 바꿔주고 싶은지를 넣어주면 된다.
D_out = 2

meta_adaptive_curriculum = nn.Sequential(
            torch.nn.LSTM(D_in, H),
             torch.nn.ReLU(),
             torch.nn.LSTM(H, D_out))

meta_adaptive_curriculum = nn.Sequential(
            torch.nn.LSTM(D_in, H))

meta_adaptive_curriculum(inputs)

(tensor([[ 3.3837e-06,  4.7783e-02],
         [ 4.2505e-08,  9.0387e-03],
         [-1.3919e-09,  2.9464e-03]], grad_fn=<SqueezeBackward1>),
 (tensor([[-1.3919e-09,  2.9464e-03]], grad_fn=<SqueezeBackward1>),
  tensor([[-1.3919e-09,  1.0000e+00]], grad_fn=<SqueezeBackward1>)))

In [121]:
batch_size = 1
sequence_length = 3
input_features = 10
output_features = 5

# produce random data
x = torch.randn(batch_size, sequence_length, input_features)
x

tensor([[[-1.3122,  0.5331, -0.1796, -0.8394, -0.3112, -0.0492, -0.1339,
           0.1658,  0.3101,  1.1329],
         [-0.1352, -0.4880,  0.5118, -0.0088,  0.2206, -0.9102, -0.5511,
           0.4316, -0.8683, -1.0640],
         [-0.6711, -0.7300,  1.1797,  1.0489,  1.7177, -1.0403, -0.3063,
          -1.8115,  0.4117,  1.1906]]])

In [123]:
lstm_layer = nn.LSTM(
    input_size=input_features,
    hidden_size=output_features,
    batch_first=True
)

expected_output_shape = (batch_size, sequence_length, output_features)

x_out, _ = lstm_layer(x)

print(x_out.shape == expected_output_shape)

True


tensor([[-0.0940, -0.1628, -0.0330,  0.0506, -0.1559],
        [-0.0071, -0.3010, -0.1873,  0.0588, -0.0903],
        [-0.0691, -0.0922,  0.1313,  0.1493,  0.0610]],
       grad_fn=<SelectBackward0>)

# 3. Conv1D
### https://kaya-dev.tistory.com/6

In [225]:
x1 = torch.tensor([0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 0.1])
x2 = torch.tensor([0.5, 0.6, 0.07, 0.8,0.11,0.12,0.13,0.14,0.15,0.16])
x3 = torch.tensor([0.9, 0.10, 0.11 , 0.12, 0.16, 0.17, 0.18, 0.19, 0.20, 0.21])

a = []
a.append(x1)
a.append(x2)
a.append(x3)

print("a len == ",len(a))
inputs = torch.stack(a)
inputs = inputs.to(torch.float32)

inputs

a len ==  3


tensor([[0.1000, 0.2000, 0.3000, 0.4000, 0.5000, 0.6000, 0.7000, 0.8000, 0.9000,
         0.1000],
        [0.5000, 0.6000, 0.0700, 0.8000, 0.1100, 0.1200, 0.1300, 0.1400, 0.1500,
         0.1600],
        [0.9000, 0.1000, 0.1100, 0.1200, 0.1600, 0.1700, 0.1800, 0.1900, 0.2000,
         0.2100]])

In [457]:
meta_adaptive_curriculum = nn.Sequential(
    nn.Conv1d(in_channels=3, out_channels=1, kernel_size=2))
meta_adaptive_curriculum(inputs)

tensor([[-0.0178, -0.1619,  0.0021, -0.1357,  0.0399,  0.0731,  0.1062,  0.1394,
         -0.0681]], grad_fn=<SqueezeBackward1>)

In [607]:
adaptive_curriculum = nn.Sequential(
                    nn.Conv1d(in_channels=3, out_channels=1, kernel_size=2),
                    nn.Linear(9,4),
                    nn.Sigmoid())

a = meta_adaptive_curriculum(inputs)
#int(torch.argmax(a))
print(a)

tensor([[0.4132]], grad_fn=<SigmoidBackward0>)


In [684]:
meta_adaptive_curriculum = nn.Sequential(
    nn.Conv1d(in_channels=3, out_channels=1, kernel_size=2),
    nn.Linear(9,4),
    nn.Linear(4,1),
    nn.Sigmoid())

#int(meta_adaptive_curriculum(inputs) * 5)
a = meta_adaptive_curriculum(inputs)
a = int(a * 5)
print(a)
#int(torch.argmax(a))

2


# 4. Linear

In [549]:
x1 = torch.tensor([0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 0.1])
x2 = torch.tensor([0.5, 0.6, 0.07, 0.8,0.11,0.12,0.13,0.14,0.15,0.16])
x3 = torch.tensor([0.9, 0.10, 0.11 , 0.12, 0.16, 0.17, 0.18, 0.19, 0.20, 0.21])

a = []
a.append(x1)
a.append(x2)
a.append(x3)

print("a len == ",len(a))
inputs = torch.stack(a)
inputs = inputs.to(torch.float32)
inputs

a len ==  3


tensor([[0.1000, 0.2000, 0.3000, 0.4000, 0.5000, 0.6000, 0.7000, 0.8000, 0.9000,
         0.1000],
        [0.5000, 0.6000, 0.0700, 0.8000, 0.1100, 0.1200, 0.1300, 0.1400, 0.1500,
         0.1600],
        [0.9000, 0.1000, 0.1100, 0.1200, 0.1600, 0.1700, 0.1800, 0.1900, 0.2000,
         0.2100]])

In [550]:
adaptive_curriculum = nn.Sequential(
                nn.Linear(3, 10))


a = meta_adaptive_curriculum(inputs)
print(a)
int(torch.argmax(a))

RuntimeError: mat1 and mat2 shapes cannot be multiplied (1x8 and 9x1)