In [1]:
%load_ext autoreload
%autoreload 2

# pip install easydict
import easydict
import torch
import torch.nn as nn
import torch.nn.functional as F

from data import MetaLearningSystemDataLoader
from data import FewShotLearningDatasetParallel
from utils.parser_utils import get_args


from meta_neural_network_architectures import VGGReLUNormNetwork, ResNet12, extract_top_level_dict, MetaBatchNormLayer, MetaLinearLayer
from inner_loop_optimizers import LSLRGradientDescentLearningRule

In [2]:
args = easydict.EasyDict(
{
  "batch_size":2,
  "image_height":84,
  "image_width":84,
  "image_channels":3,
  "gpu_to_use":0,
  "num_dataprovider_workers":4,
  "max_models_to_save":5,
  "dataset_name":"mini_imagenet_full_size",
  "dataset_path":"mini_imagenet_full_size",
  "reset_stored_paths":False,
  "experiment_name":"alfa+maml",
  "train_seed": 0, "val_seed": 0,
  "indexes_of_folders_indicating_class": [-3, -2],
  "sets_are_pre_split": True,
  "train_val_test_split": [0.64, 0.16, 0.20],
  "evaluate_on_test_set_only": False,

  "total_epochs": 100,
  "total_iter_per_epoch":500, "continue_from_epoch": -2,
  "num_evaluation_tasks":600,
  "multi_step_loss_num_epochs": 15,
  "minimum_per_task_contribution": 0.01,
  "learnable_per_layer_per_step_inner_loop_learning_rate": False,
  "enable_inner_loop_optimizable_bn_params": False,
  "evalute_on_test_set_only": False,

  "max_pooling": True,
  "per_step_bn_statistics": False,
  "learnable_batch_norm_momentum": False,
  "load_into_memory": False,
  "init_inner_loop_learning_rate": 0.01,
  "init_inner_loop_weight_decay": 0.0005,
  "learnable_bn_gamma": True,
  "learnable_bn_beta": True,

  "dropout_rate_value":0.0,
  "min_learning_rate":0.001,
  "meta_learning_rate":0.001,   "total_epochs_before_pause": 100,
  "first_order_to_second_order_epoch":-1,
  "weight_decay": 0.0,

  "norm_layer":"batch_norm",
  "cnn_num_filters":48,
  "num_stages":4,
  "conv_padding": True,
  "number_of_training_steps_per_iter":5,
  "number_of_evaluation_steps_per_iter":5,
  "cnn_blocks_per_stage":1,
  "num_classes_per_set":5,
  "num_samples_per_class":5,
  "num_target_samples": 15,

  "second_order": True,
  "use_multi_step_loss_optimization":False,
  "attenuate": False,
  "alfa": True,
  "random_init": False,
  "backbone": "4-CONV"
}
)

device = torch.cuda.current_device()
im_shape = (2, 3, args.image_height, args.image_width)

args.use_cuda = torch.cuda.is_available()
args.seed = 104
args.reverse_channels=False
args.labels_as_int=False
args.reset_stored_filepaths=False
args.num_of_gpus=1

In [3]:
data = MetaLearningSystemDataLoader

# 1. Linear Regression으로 meta_adpative_curriculum을 구현하기 위한 시도
### - 사용하지 않는다
### - Linear Regression의 차원으로 인해 적용 불가하다
### - output의 차원을 통제하기 어렵다

In [4]:
input_dim = 10
input_shape = (1, input_dim)

meta_linear = MetaLinearLayer(input_shape=input_shape, num_filters=input_dim, use_bias=True)

In [5]:
for name, param in meta_linear.named_parameters():
    print(name, param.shape)

weights torch.Size([10, 10])
bias torch.Size([10])


In [6]:
x1 = torch.tensor([1, 2, 3, 4])
x2 = torch.tensor([5, 6, 7, 8])
x3 = torch.tensor([9, 10, 11 ,12])

a = []
a.append(x1)
a.append(x2)
a.append(x3)

print("a len == ",len(a))

# inputs = torch.stack(a)
inputs = torch.stack(a)
inputs


a len ==  3


tensor([[ 1,  2,  3,  4],
        [ 5,  6,  7,  8],
        [ 9, 10, 11, 12]])

In [7]:
input_dim = 10
input_shape = (1, input_dim)
num_filters=input_dim
b, c = input_shape #c=10

weights1 = nn.Parameter(torch.ones(num_filters, c))
# weight = nn.Parameter(torch.FloatTensor(out_features, in_features))

nn.init.xavier_uniform_(weights1)
#weights

Parameter containing:
tensor([[-0.1910, -0.0849,  0.0639,  0.2329,  0.3826, -0.5114, -0.1461,  0.1251,
         -0.1042, -0.2784],
        [-0.0110, -0.2903,  0.2639,  0.5262, -0.3030,  0.0328,  0.1335,  0.4269,
          0.3459,  0.0594],
        [-0.0231,  0.0374, -0.3457,  0.1960,  0.4374, -0.2969,  0.0480, -0.2695,
          0.0344,  0.0450],
        [ 0.4178,  0.3607,  0.3666, -0.3318,  0.4949, -0.2304,  0.0934,  0.1559,
         -0.0487, -0.2307],
        [ 0.1899, -0.5290,  0.0372, -0.4089, -0.2889, -0.1496, -0.2090, -0.0521,
          0.0983, -0.5199],
        [-0.1662,  0.0031, -0.0961,  0.3747, -0.1801,  0.1267,  0.4116,  0.2156,
         -0.1768,  0.1759],
        [-0.3891, -0.1062,  0.2070,  0.4332, -0.1609,  0.0660,  0.2310,  0.5121,
         -0.4660, -0.0081],
        [-0.3735, -0.1285, -0.1264, -0.0880,  0.1838,  0.3666, -0.2326,  0.1802,
          0.3001, -0.3834],
        [ 0.1761, -0.2643,  0.2797,  0.0938,  0.2325,  0.1360,  0.2962, -0.3737,
          0.2300, -0.4150

In [8]:
bias1 = nn.Parameter(torch.zeros(num_filters))
bias1

Parameter containing:
tensor([0., 0., 0., 0., 0., 0., 0., 0., 0., 0.], requires_grad=True)

In [9]:
x = torch.rand(3, 10)

out1 = F.linear(input=x, weight=weights1, bias=bias1)
# y = xW^T + b

out1

tensor([[-0.2167,  0.8676, -0.2396,  0.5526, -0.6963,  0.5132,  0.4324, -0.6328,
          0.1286, -0.2916],
        [-0.1041,  0.0660,  0.1271,  0.9163, -1.1435,  0.0642, -0.0631, -0.3692,
         -0.2266,  0.2151],
        [-0.0560,  0.7158,  0.1555,  0.4402, -0.6985,  0.2765, -0.0326, -0.1622,
          0.3543,  0.0678]], grad_fn=<AddmmBackward>)

In [10]:
weights2 = nn.Parameter(torch.ones(2, 10))
nn.init.xavier_uniform_(weights2)
out2 = F.linear(input=out1, weight=weights2)
# y = x*W^T + b

out2

tensor([[ 0.0426, -0.0874],
        [-0.1027,  0.2940],
        [ 0.1430, -0.0512]], grad_fn=<MmBackward>)

In [11]:
out3 = F.relu_(out2)
out3

tensor([[0.0426, 0.0000],
        [0.0000, 0.2940],
        [0.1430, 0.0000]], grad_fn=<ReluBackward1>)

# 2. LSTM으로 시도
### 안되면 conv1d
### https://sanghyu.tistory.com/52

In [14]:
x1 = torch.tensor([1, 2, 3, 4,5,6,7,8,9,10])
x2 = torch.tensor([5, 6, 7, 8,11,12,13,14,15,16])
x3 = torch.tensor([9, 10, 11 ,12,16,17,18,19,20,21])

a = []
a.append(x1)
a.append(x2)
a.append(x3)

print("a len == ",len(a))
inputs = torch.stack(a)
inputs = inputs.to(torch.float32)

inputs

a len ==  3


tensor([ 1.,  2.,  3.,  4.,  5.,  6.,  7.,  8.,  9., 10.,  5.,  6.,  7.,  8.,
        11., 12., 13., 14., 15., 16.,  9., 10., 11., 12., 16., 17., 18., 19.,
        20., 21.])

In [15]:
D_in = 10 # input_size: input의 feature dimension을 넣어주어야 한다. time step이 아니라 input feature dimension!
H= 2     # 내부에서 어떤 feature dimension으로 바꿔주고 싶은지를 넣어주면 된다.
D_out = 2

meta_adaptive_curriculum = nn.Sequential(
            torch.nn.LSTM(D_in, H),
             torch.nn.ReLU(),
             torch.nn.LSTM(H, D_out))

meta_adaptive_curriculum = nn.Sequential(
            torch.nn.LSTM(D_in, H))

meta_adaptive_curriculum(inputs)

IndexError: Dimension out of range (expected to be in range of [-1, 0], but got 1)

In [None]:
batch_size = 1
sequence_length = 3
input_features = 10
output_features = 5

# produce random data
x = torch.randn(batch_size, sequence_length, input_features)
x

In [None]:
lstm_layer = nn.LSTM(
    input_size=input_features,
    hidden_size=output_features,
    batch_first=True
)

expected_output_shape = (batch_size, sequence_length, output_features)

x_out, _ = lstm_layer(x)

print(x_out.shape == expected_output_shape)

# 3. Conv1D
### https://kaya-dev.tistory.com/6

In [24]:
x1 = torch.tensor([0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 0.1])
x2 = torch.tensor([0.5, 0.6, 0.07, 0.8,0.11,0.12,0.13,0.14,0.15,0.16])
x3 = torch.tensor([0.9, 0.10, 0.11 , 0.12, 0.16, 0.17, 0.18, 0.19, 0.20, 0.21])

a = []
a.append(x1)
a.append(x2)
a.append(x3)

print("a len == ",len(a))
inputs = torch.stack(a)
inputs = inputs.to(torch.float32)

print("input shape == ", inputs.shape)

inputs

a len ==  3
input shape ==  torch.Size([3, 10])


tensor([[0.1000, 0.2000, 0.3000, 0.4000, 0.5000, 0.6000, 0.7000, 0.8000, 0.9000,
         0.1000],
        [0.5000, 0.6000, 0.0700, 0.8000, 0.1100, 0.1200, 0.1300, 0.1400, 0.1500,
         0.1600],
        [0.9000, 0.1000, 0.1100, 0.1200, 0.1600, 0.1700, 0.1800, 0.1900, 0.2000,
         0.2100]])

In [30]:
meta_adaptive_curriculum = nn.Sequential(
    nn.Conv1d(in_channels=3, out_channels=2, kernel_size=2))
meta_adaptive_curriculum(inputs)

RuntimeError: Expected 3-dimensional input for 3-dimensional weight [2, 3, 2], but got 2-dimensional input of size [3, 10] instead

In [31]:
meta_adaptive_curriculum = nn.Sequential(
                    nn.Conv1d(in_channels=3, out_channels=1, kernel_size=1),
                    nn.Linear(10,4),
                    nn.Sigmoid())

a = meta_adaptive_curriculum(inputs)
#int(torch.argmax(a))
print(a)

RuntimeError: Expected 3-dimensional input for 3-dimensional weight [1, 3, 1], but got 2-dimensional input of size [3, 10] instead

In [26]:
meta_adaptive_curriculum = nn.Sequential(
    nn.Conv1d(in_channels=3, out_channels=1, kernel_size=1),
    nn.Linear(9,4),
    nn.Linear(4,1),
    nn.Sigmoid())

#int(meta_adaptive_curriculum(inputs) * 5)
a = meta_adaptive_curriculum(inputs)
a = int(a * 5)
print(a)
#int(torch.argmax(a))

RuntimeError: Expected 3-dimensional input for 3-dimensional weight [1, 3, 1], but got 2-dimensional input of size [3, 10] instead

# 4. Linear

In [27]:
x1 = torch.tensor([0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 0.1])
x2 = torch.tensor([0.5, 0.6, 0.07, 0.8,0.11,0.12,0.13,0.14,0.15,0.16])
x3 = torch.tensor([0.9, 0.10, 0.11 , 0.12, 0.16, 0.17, 0.18, 0.19, 0.20, 0.21])

a = []
a.append(x1)
a.append(x2)
a.append(x3)

print("a len == ",len(a))
inputs = torch.stack(a)
inputs = inputs.to(torch.float32)
inputs

a len ==  3


tensor([[0.1000, 0.2000, 0.3000, 0.4000, 0.5000, 0.6000, 0.7000, 0.8000, 0.9000,
         0.1000],
        [0.5000, 0.6000, 0.0700, 0.8000, 0.1100, 0.1200, 0.1300, 0.1400, 0.1500,
         0.1600],
        [0.9000, 0.1000, 0.1100, 0.1200, 0.1600, 0.1700, 0.1800, 0.1900, 0.2000,
         0.2100]])

In [28]:
adaptive_curriculum = nn.Sequential(
                nn.Linear(3, 10))


a = meta_adaptive_curriculum(inputs)
print(a)
int(torch.argmax(a))

RuntimeError: Expected 3-dimensional input for 3-dimensional weight [1, 3, 1], but got 2-dimensional input of size [3, 10] instead