In [26]:
import torch
import torch.nn as nn

class SimpleTransformerConcat(nn.Module):
    def __init__(self, d1, d2, nhead, num_layers):
        super(SimpleTransformerConcat, self).__init__()
        self.d1 = d1
        self.d2 = d2
        self.concat_dim = d1 + d2

        # Transformerのエンコーダ層
        encoder_layer = nn.TransformerEncoderLayer(d_model=self.concat_dim, nhead=nhead)
        self.transformer = nn.TransformerEncoder(encoder_layer, num_layers=num_layers)

    def forward(self, tensor1, tensor2):
        """
        Args:
            tensor1: [batch_size, seq_len1, d1] のテンソル
            tensor2: [batch_size, seq_len2, d2] のテンソル
        Returns:
            [batch_size, seq_len1 + seq_len2, d1 + d2] のテンソル
        """
        # テンソルを結合
        tensor1 = torch.nn.functional.pad(tensor1, (0, self.d2))  # tensor1にd2次元のゼロを追加
        tensor2 = torch.nn.functional.pad(tensor2, (0, self.d1))  # tensor2にd1次元のゼロを追加
        combined = torch.cat((tensor1, tensor2), dim=1)  # シーケンス長方向で結合

        # Transformerに通す
        output = self.transformer(combined)
        return output

# ダミーデータを使ったテスト
batch_size = 2
seq_len1 = 3
seq_len2 = 2
d1 = 256
d2 = 384

# モデル定義
model = SimpleTransformerConcat(d1=d1, d2=d2, nhead=8, num_layers=2)

# 入力テンソル
tensor1 = torch.rand(batch_size, seq_len1, d1)  # [batch_size, seq_len1, d1]
tensor2 = torch.rand(batch_size, seq_len2, d2)  # [batch_size, seq_len2, d2]

# モデル実行
output = model(tensor1, tensor2)

print(f"Input tensor1 shape: {tensor1.shape}")
print(f"Input tensor2 shape: {tensor2.shape}")
print(f"Output shape: {output.shape}")


Input tensor1 shape: torch.Size([2, 3, 256])
Input tensor2 shape: torch.Size([2, 2, 384])
Output shape: torch.Size([2, 5, 640])


In [None]:
fs = torch.load('/home/kfujii/vitruvion/outputs/2024-10-21/14-40-41/features_labels.pth')

In [24]:
from icecream import ic
ic(fs.keys())
ic(fs['features'][0].shape)
ic(fs['features'][0].shape)

ic| fs.keys():

 dict_keys(['features', 'labels'])
ic| fs['features'][0].shape: torch.Size([146, 256])
ic| fs['features'][0].shape: torch.Size([146, 256])


torch.Size([146, 256])

In [None]:
# ダミーデータを使ったテスト
batch_size = 1
seq_len1 = 3
seq_len2 = 2
d1 = 256
d2 = 256

# モデル定義
model = SimpleTransformerConcat(d1=d1, d2=d2, nhead=8, num_layers=2)

# 入力テンソル
tensor1 = torch.rand(batch_size, seq_len1, d1)  # [batch_size, seq_len1, d1]
tensor2 = torch.rand(batch_size, seq_len2, d2)  # [batch_size, seq_len2, d2]

# モデル実行
output = model(tensor1, tensor2)

print(f"Input tensor1 shape: {tensor1.shape}")
print(f"Input tensor2 shape: {tensor2.shape}")
print(f"Output shape: {output.shape}")

AssertionError: was expecting embedding dimension of 512, but got 1024