In [15]:
import os
import nnmnkwii.datasets.jvs
from nnmnkwii.io import hts
import torch
from torch import nn
from torch.utils.data import Dataset, DataLoader
import torchaudio
import pytorch_lightning as pl
import itertools
from matplotlib import pyplot as plt

In [19]:
from pytorch_lightning.utilities.parsing import AttributeDict

In [20]:
class SingleArgModel(pl.LightningModule):
    def __init__(self, params):
        super().__init__()
        # manually assign single argument
        self.save_hyperparameters(params)
    def forward(self, *args, **kwargs):
        pass

In [54]:
test = AttributeDict({'key1': 2})

In [95]:
attr = AttributeDict({'number_of_layers': 2, 'key1': test, 'key2': 'abc'})

In [56]:
attr.key1.key1

2

In [96]:
class GLU(nn.Module):
    def __init__(self):
        super().__init__()
    
    def forward(x1, x2):
        return x1 * nn.functional.sigmoid(x2)

In [148]:
block_params = AttributeDict({
    'layer': AttributeDict({
        'name': None,
        'params': AttributeDict(),
    }),
    'bn': AttributeDict({
        'name': None,
        'params': AttributeDict(),
    }),
    'lr': AttributeDict({
        'name': None,
        'params': AttributeDict(),
    }),
    'bn': AttributeDict({
        'name': None,
        'params': AttributeDict(),
    }),
})

In [162]:
# UttrEnc setting

NUM_LAYERS = 4

params = AttributeDict({f"block{i+1}": AttributeDict({
    'layer': AttributeDict({
        'name': None,
        'params': AttributeDict(),
    }),
    'bn': AttributeDict({
        'name': None,
        'params': AttributeDict(),
    }),
    'lr': AttributeDict({
        'name': None,
        'params': AttributeDict(),
    }),
    'bn': AttributeDict({
        'name': None,
        'params': AttributeDict(),
    }),
}) for i in range(NUM_LAYERS)})

params.block1.layer.name = "conv"
params.block2.layer.name = "conv"
params.block3.layer.name = "conv"
params.block4.layer.name = "conv"

params.block1.layer.params.in_channels = 1
params.block2.layer.params.in_channels = 16
params.block3.layer.params.in_channels = 32
params.block4.layer.params.in_channels = 32

params.block1.layer.params.out_channels = params.block2.layer.params.in_channels
params.block2.layer.params.out_channels = params.block3.layer.params.in_channels
params.block3.layer.params.out_channels = params.block4.layer.params.in_channels
params.block4.layer.params.out_channels = 16

In [None]:
class Block(pl.LightningModule):
    def __init__(self, params):
        """
        layer
            name
                conv
                tconv
                linear
            params
        bn
        lr
            glu
        drop
        """
        super().__init__()
        
        if params.layer.name == "conv":
            if params["lr"] == "glu":
                self.layer_a = nn.Conv2d(**params["layer"]["params"])
                self.layer_b = nn.Conv2d(**params["layer"]["params"])
            else:
                self.layer = nn.Conv2d(**params["layer"]["params"])
        
        
        if params["bn"] is None:
            self.bn = torch.nn.Identity()
        else:
            if params["lr"] == "glu":
                self.bn_a = nn.BatchNorm2d(params["layer"]["params"]["out_channels"])
                self.bn_b = nn.BatchNorm2d(params["layer"]["params"]["out_channels"])
            else:
                self.bn = nn.BatchNorm2d(params["layer"]["params"]["out_channels"])
        
        if params["lr"] is None:
            self.lr = torch.nn.Identity()
        elif params["lr"] == "glu":
            self.lr = GLU()
    
    def forward(self, x):
        
        if params.lr == "glu":
            x1 = self.layer_a(x)
            x2 = self.layer_b(x)
            x1 = self.bn_a(x1)
            x2 = self.bn_b(x2)
            x = self.lr(x1, x2)
        else:
            x = self.layer(x)
            x = self.bn(x)
            x = self.lr(x)
            
        return x

In [None]:
class Model(pl.LightningModule):
    def __init__(self, speechencoder, faceencoder, voicedencoder, speechdecoder, facedecoder):
        super().__init__()
        self.save_hyperparameters()
        self.speechencoder = nn.ModuleDict({f"Block{i+1}": Block(params[f'layer{i+1}']) for i in range(4)})

In [None]:
a = Model(params)

In [None]:
a

Model(
  (Encoder): ModuleDict(
    (BLock1): Block(
      (layer_a): Conv2d(1, 8, kernel_size=(3, 9), stride=(2, 2), padding=(1, 3))
      (layer_b): Conv2d(1, 8, kernel_size=(3, 9), stride=(2, 2), padding=(1, 3))
      (bn_a): BatchNorm2d(8, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (bn_b): BatchNorm2d(8, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (lr): GLU()
    )
    (BLock2): Block(
      (layer_a): Conv2d(1, 8, kernel_size=(3, 9), stride=(2, 2), padding=(1, 3))
      (layer_b): Conv2d(1, 8, kernel_size=(3, 9), stride=(2, 2), padding=(1, 3))
      (bn_a): BatchNorm2d(8, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (bn_b): BatchNorm2d(8, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (lr): GLU()
    )
    (BLock3): Block(
      (layer_a): Conv2d(1, 8, kernel_size=(3, 9), stride=(2, 2), padding=(1, 3))
      (layer_b): Conv2d(1, 8, kernel_size=(3, 9), stride=(2, 2), padding=(1, 3))

In [48]:
a = SingleArgModel(params=attr)

In [49]:
for i in range(a.hparams.number_of_layers):
    print(a.hparams["key{}".format(i+1)])

1
abc
