In [1]:
import os
import sys
import torch
from torch import nn
from pathlib import Path
from torch.nn import functional as F

path = Path(os.path.abspath(os.curdir)).parent.parent
import sys
sys.path.append(str(path))

from dataclasses import dataclass, asdict

from pactis.model.core.adapter import InputAdapter
from pactis.model.core.config import LatentQueryConfig, CrossAttentionLayerConfig, SelfAttentionBlockConfig, PerceiverEncoderConfig, PerceiverDecoderConfig
from pactis.model.core.encoder import PerceiverIO, PerceiverEncoder, PerceiverDecoder
from pactis.model.core.decoder import AttentionalCopula

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
EncoderConfig = PerceiverEncoderConfig.create_from_config(InputAdapter(128),
                                                        LatentQueryConfig(num_latents=20,
                                                                        num_latent_dim=64),
                                                        CrossAttentionLayerConfig(num_heads=8,
                                                                                  num_q_input_dim=64,
                                                                                  num_kv_input_dim=128,
                                                                                  num_qk_dim=64,
                                                                                  num_v_dim=64),
                                                        SelfAttentionBlockConfig(num_layers=4,
                                                                                 num_heads=8,
                                                                                 num_dim=64),
                                                        num_cross_attn_layers=1,
                                                        num_self_attn_blocks=8,
                                                        )
DecoderConfig = PerceiverDecoderConfig.create_from_config(LatentQueryConfig(100, 128), 
                                                        CrossAttentionLayerConfig(8, 128, 64))

In [3]:
Encoder = PerceiverEncoder.from_config(EncoderConfig).to("cuda")
Decoder = PerceiverDecoder.from_config(DecoderConfig).to("cuda")

In [7]:
from einops import rearrange
optimizer = torch.optim.Adam(list(Encoder.parameters()) + list(Decoder.parameters()), lr=1e-3)
x = torch.randn(200, 100, 128, device='cuda')
# with torch.no_grad():
for i in range(50):
    x = Encoder(x)
    x = x.repeat(1, 5, 2)
x.shape

torch.Size([200, 100, 128])

In [7]:
torch.tril(float("inf")*torch.ones(2, 4, 10)).flip(1,2)

tensor([[[0., 0., 0., 0., 0., 0., inf, inf, inf, inf],
         [0., 0., 0., 0., 0., 0., 0., inf, inf, inf],
         [0., 0., 0., 0., 0., 0., 0., 0., inf, inf],
         [0., 0., 0., 0., 0., 0., 0., 0., 0., inf]],

        [[0., 0., 0., 0., 0., 0., inf, inf, inf, inf],
         [0., 0., 0., 0., 0., 0., 0., inf, inf, inf],
         [0., 0., 0., 0., 0., 0., 0., 0., inf, inf],
         [0., 0., 0., 0., 0., 0., 0., 0., 0., inf]]])

In [6]:
print("Hello World")

tensor([[[inf, inf, inf, inf, inf, inf, inf, inf, inf, inf],
         [inf, inf, inf, inf, inf, inf, inf, inf, inf, inf],
         [inf, inf, inf, inf, inf, inf, inf, inf, inf, inf],
         [inf, inf, inf, inf, inf, inf, inf, inf, inf, inf]],

        [[inf, inf, inf, inf, inf, inf, inf, inf, inf, inf],
         [inf, inf, inf, inf, inf, inf, inf, inf, inf, inf],
         [inf, inf, inf, inf, inf, inf, inf, inf, inf, inf],
         [inf, inf, inf, inf, inf, inf, inf, inf, inf, inf]]])

In [None]:
@dataclass
class AttentionalCopulaConfig:
    input_dim: int = 15
    attn_heads: int = 5
    attn_dim: int = 16
    attn_layers: int = 3
    mlp_dim: int = 32
    mlp_layers: int = 3
    resolution: int = 10

    @property
    def dict(self):
        return asdict(self)
device = "cuda" if torch.cuda.is_available() else "cpu"

config = AttentionalCopulaConfig()
model = AttentionalCopula(**config.dict).to(device)

In [None]:
encoded = torch.randn((100, 5, 20, 15), device=device)
true_u = torch.rand((100, 5, 20), device=device, dtype=torch.float)
print(true_u[0, :, 7:])
mask = torch.Tensor([1]*8+[0]*3+[1]*9)
print(model.sample(encoded, true_u, mask, device=device)[0, :, 7:])

In [None]:
model = nn.MultiheadAttention(embed_dim=15, num_heads=5, kdim=32, vdim=64, batch_first=True)
for module in model.named_parameters():
    print(module[0], module[1].shape)
model(torch.randn(10, 100, 15), torch.randn(10, 100, 32), torch.randn(10, 100, 64))[0].shape

In [None]:
torch.cat((torch.randn((9, 5, 20, 15))[:,:0,:,9:9+1], torch.randn((9, 5, 20, 15))[:,0:,:,9:9+1]), axis=3).shape

In [None]:
A = torch.randn(10, 50)
A.masked_fill_(, 0)

In [8]:
A = -torch.finfo(torch.float32).max
A = torch.Tensor([A])

In [16]:
torch.Tensor([1, 1, -torch.finfo(torch.float32).max])

tensor([ 1.0000e+00,  1.0000e+00, -3.4028e+38])

In [None]:
B = torch.Tensor([float("-inf")])
B

In [None]:
from pactis.model.core.modules import MultiHeadAttention
model = MultiHeadAttention(5, 15, 20)
model(torch.randn(1, 50, 15), torch.randn(1, 100, 20), pad_mask=torch.Tensor([[1]*99+[0]*1]).bool())

In [None]:
nn.LayerNorm((50))(torch.rand(2,3,50))[0].std()

In [None]:
class Sequential(nn.Sequential):
    def forward(self, *x, **kwargs):
        for i, module in enumerate(self):
            if type(x) == tuple:
                if i == 0:
                    x = module(*x, **kwargs)
                else:
                    x = module(*x)
            else:
                x = module(x)
        return x
class mdl(Sequential):
    def __init__(self):
        super().__init__(nn.Linear(5, 10), nn.Linear(10, 15))
model.


In [None]:
import os
import sys
import torch
from torch import nn
from pathlib import Path
from torch.nn import functional as F

path = Path(os.path.abspath(os.curdir)).parent.parent
import sys
sys.path.append(str(path))

from pactis.model.core.decoder import AttentionalCopula
from pactis.model.core.modules import CrossAttentionLayer, Sequential, Residual
model = CrossAttentionLayer(5, 15, 20)
model(torch.randn(1, 50, 15), torch.randn(1, 100, 20), torch.randn(1, 100, 20)).shape

In [None]:
class Sequential(nn.Sequential):
    def forward(self, *x, **kwargs):
        for i, module in enumerate(self):
            if type(x) == tuple:
                if i == 0:
                    x = module(*x, **kwargs)
                else:
                    x = module(*x)
            else:
                x = module(x)
        return x

In [None]:
from torch.nn import MultiheadAttention
model = MultiheadAttention(15, 5, kdim=10, vdim=20)
MODEL = Sequential(model, nn.Linear(15, 64))
MODEL(torch.randn(1, 100, 15), torch.randn(1, 100, 10), torch.randn(1, 100, 20))[0].shape

In [None]:
from typing import Optional
from dataclasses import dataclass, asdict, KW_ONLY, field

@dataclass
class Config:
    @property
    def dict(self):
        return asdict(self)

@dataclass
class AConfig(Config):
    a: int = 10
    b: int = 20
    _: KW_ONLY
    c: int = 30
    d: int = field(init=False)

    def __post_init__(self):
        self.d = self.a + self.b


@dataclass
class CrossAttentionLayerConfig(Config):
    n_heads: int
    num_q_input_dim: int
    num_kv_input_dim: int
    num_qk_dim: Optional[int] = None
    num_v_dim: Optional[int] = None
    qkv_bias: bool = True
    out_bias: bool = True
    mlp_bias: bool = True
    widening_factor: int = 1
    dropout: float = 0.1
    batch_first: bool = True
    norm_first: bool = True
    device: Optional[torch.device] = "cuda" if torch.cuda.is_available() else "cpu"
    dtype: Optional[torch.dtype] = None

config = CrossAttentionLayerConfig(5, 15, 20)
config.dict

In [None]:
@dataclass
class CLASS:
    a: int = 10
    b: int = 20
    c: int = 30
    d: int = field(init=False)


    def __post_init__(self):
        self.d = self.a + self.b
        
    @classmethod
    def from_else(cls, a, b, c):
        return cls(a, b, c)

# MODEL = CLASS()
MODEL = CLASS.from_else(1, 2, 3)
MODEL

In [None]:
@dataclass
class Config:
    _: KW_ONLY
    device: Optional[torch.device] = None
    dtype: Optional[torch.dtype] = None
    @property
    def dict(self):
        return asdict(self)
@dataclass
class batch_norm_order:
    _: KW_ONLY
    norm_first: bool = True
    batch_first: bool = True

@dataclass
class LatentQueryConfig(Config, batch_norm_order):
    num_latents: int
    num_latent_dim: int
    init_scale: float = 0.02
config = LatentQueryConfig(5, 10)

In [2]:
from pactis.model.core.modules import SelfAttentionBlock
# from pactis.model.core.encoder import PerceiverEncoder, PerceiverDecoder
from pactis.model.core.config import PerceiverEncoderConfig, PerceiverDecoderConfig
from pactis.model.core.adapter import InputAdapter
# config = SelfAttentionBlockConfig(5, 15, dropout=0.1, device="cuda")
# model = SelfAttentionBlock(**config.dict)

In [5]:
import torch

torch.Tensor([1, 1, float("inf")]).masked_fill_(torch.Tensor([1, 1, float("inf")]) == float("inf"), 0)

  from .autonotebook import tqdm as notebook_tqdm


tensor([1., 1., 0.])

In [7]:
torch.Tensor([1, 1, -float("inf")]).softmax(dim=0)

tensor([0.5000, 0.5000, 0.0000])

In [6]:
config = PerceiverEncoderConfig(InputAdapter(15, 20), 5, 16)
PerceiverEncoder(**config.dict)

PerceiverEncoder(
  (latent_provider): LatentQuery()
  (input_adapter): InputAdapter()
  (cross_attn_1): CrossAttentionLayer(
    (cross_attn): CrossAttention(
      (attention): MultiHeadAttention(
        (q_proj): Linear(in_features=16, out_features=16, bias=True)
        (k_proj): Linear(in_features=15, out_features=16, bias=True)
        (v_proj): Linear(in_features=15, out_features=16, bias=True)
        (o_proj): Linear(in_features=16, out_features=16, bias=True)
        (dropout): Dropout(p=0.0, inplace=False)
      )
    )
    (mlp): Sequential(
      (0): Linear(in_features=16, out_features=16, bias=True)
      (1): GELU(approximate='none')
      (2): Linear(in_features=16, out_features=16, bias=True)
    )
    (q_norm): LayerNorm((16,), eps=1e-05, elementwise_affine=True)
    (kv_norm): LayerNorm((15,), eps=1e-05, elementwise_affine=True)
    (attn_out_norm): LayerNorm((16,), eps=1e-05, elementwise_affine=True)
    (_ca_layer): Sequential(
      (0): Residual(
        (modul