In [1]:
import numpy as np
import torch

## 构造特征

In [2]:
feature1 = np.array([[24.,  1., 27.,  2.],
                    [25.,  2., 32.,  1.]], dtype=np.float32)
print(feature1)

[[24.  1. 27.  2.]
 [25.  2. 32.  1.]]


In [3]:
feature2 = np.random.randn(2, 30).astype(np.float32)
print(feature2)

[[-0.17544515  0.7042147  -0.3920079  -0.15669443 -0.32453987 -0.11883102
   0.21864876 -0.45101166 -0.01959862  0.33695012 -0.56109595 -0.59572214
   0.8882939  -0.10159255 -0.08837802  1.5327622   1.2553723  -0.88887525
  -0.8803338   0.214064   -0.49087727 -0.7555549   0.08235431 -0.66871864
  -1.6715586  -1.5135074  -0.12584263  0.43865064  0.68406886 -1.04486   ]
 [-0.5781698  -1.023867    1.2425443   0.6189289  -0.57411844 -1.3952923
  -0.7495694   0.8037199   0.9653607  -0.74449235  0.3668502   0.7429077
  -0.96255165 -0.4425461  -0.24425691 -0.2545424  -1.6711973  -1.3323901
  -0.09830271  0.09843035 -0.89755195 -0.1382832  -0.584208   -0.39677384
   0.20301136  0.03540502 -1.4764647  -1.4337215  -0.10273612  0.06851932]]


In [4]:
feature3 = np.array([[1., 0., 0., 1., 0., 1., 0., 1.],
                     [0., 1., 0., 1., 0., 1., 0., 1.]], dtype=np.float32)
print(feature3)

[[1. 0. 0. 1. 0. 1. 0. 1.]
 [0. 1. 0. 1. 0. 1. 0. 1.]]


In [5]:
feature4 = np.array([[0., 3., 2.]], dtype=np.float32)
print(feature4)

[[0. 3. 2.]]


In [6]:
obs = {'feature1': feature1,
       'feature2': feature2,
       'feature3': feature3,
       'feature4': feature4}
print(obs)

{'feature1': array([[24.,  1., 27.,  2.],
       [25.,  2., 32.,  1.]], dtype=float32), 'feature2': array([[-0.17544515,  0.7042147 , -0.3920079 , -0.15669443, -0.32453987,
        -0.11883102,  0.21864876, -0.45101166, -0.01959862,  0.33695012,
        -0.56109595, -0.59572214,  0.8882939 , -0.10159255, -0.08837802,
         1.5327622 ,  1.2553723 , -0.88887525, -0.8803338 ,  0.214064  ,
        -0.49087727, -0.7555549 ,  0.08235431, -0.66871864, -1.6715586 ,
        -1.5135074 , -0.12584263,  0.43865064,  0.68406886, -1.04486   ],
       [-0.5781698 , -1.023867  ,  1.2425443 ,  0.6189289 , -0.57411844,
        -1.3952923 , -0.7495694 ,  0.8037199 ,  0.9653607 , -0.74449235,
         0.3668502 ,  0.7429077 , -0.96255165, -0.4425461 , -0.24425691,
        -0.2545424 , -1.6711973 , -1.3323901 , -0.09830271,  0.09843035,
        -0.89755195, -0.1382832 , -0.584208  , -0.39677384,  0.20301136,
         0.03540502, -1.4764647 , -1.4337215 , -0.10273612,  0.06851932]],
      dtype=float32),

## 对字典数据进行升维与降维

通常，对于单个样本，我们期望其`batch size`为`1`，所以需要升高一个维度。

In [7]:
def unsqueeze(obs):
    if isinstance(obs, dict):
        return {k: unsqueeze(v) for k, v in obs.items()}
    return obs[np.newaxis, ...]

In [8]:
print(obs['feature1'].shape)
print(unsqueeze(obs)['feature1'].shape)

(2, 4)
(1, 2, 4)


In [9]:
def squeeze(action):
    if isinstance(action, dict):
        return {k: squeeze(v) for k, v in action.items()}
    return np.squeeze(action, axis=0)

In [10]:
print(unsqueeze(obs)['feature1'].shape)
print(squeeze(unsqueeze(obs))['feature1'].shape)

(1, 2, 4)
(2, 4)


## 将数据转成Tensor

在数据经过网络之前，我们期望其能够将数据转换成`Tensor`的格式，所以对于复杂的，像字典包含的数据，我们可以采用如下函数的方式对其进行升维与降维。

In [11]:
def to_tensor(obj, cuda=True):
    if isinstance(obj, list) or isinstance(obj, tuple):
        return type(obj)(to_tensor(o, cuda=cuda) for o in obj)
    elif isinstance(obj, dict):
        return {k : to_tensor(v, cuda=cuda) for k, v in obj.items()}
    elif isinstance(obj, np.ndarray):
        return to_tensor(torch.as_tensor(obj), cuda=cuda)
    else:
        assert isinstance(obj, torch.Tensor) or isinstance(obj, torch.nn.Module)
        return obj.cuda() if torch.cuda.is_available() and cuda else obj

In [12]:
test_data1 = [[np.array([1, 2]), np.array([3, 4])]]
print(to_tensor(test_data1))

[[tensor([1, 2], dtype=torch.int32), tensor([3, 4], dtype=torch.int32)]]


In [13]:
test_data2 = obs
print(to_tensor(obs))

{'feature1': tensor([[24.,  1., 27.,  2.],
        [25.,  2., 32.,  1.]]), 'feature2': tensor([[-0.1754,  0.7042, -0.3920, -0.1567, -0.3245, -0.1188,  0.2186, -0.4510,
         -0.0196,  0.3370, -0.5611, -0.5957,  0.8883, -0.1016, -0.0884,  1.5328,
          1.2554, -0.8889, -0.8803,  0.2141, -0.4909, -0.7556,  0.0824, -0.6687,
         -1.6716, -1.5135, -0.1258,  0.4387,  0.6841, -1.0449],
        [-0.5782, -1.0239,  1.2425,  0.6189, -0.5741, -1.3953, -0.7496,  0.8037,
          0.9654, -0.7445,  0.3669,  0.7429, -0.9626, -0.4425, -0.2443, -0.2545,
         -1.6712, -1.3324, -0.0983,  0.0984, -0.8976, -0.1383, -0.5842, -0.3968,
          0.2030,  0.0354, -1.4765, -1.4337, -0.1027,  0.0685]]), 'feature3': tensor([[1., 0., 0., 1., 0., 1., 0., 1.],
        [0., 1., 0., 1., 0., 1., 0., 1.]]), 'feature4': tensor([[0., 3., 2.]])}


## 字典类型的obs编码

先定义一个基础的编码类，能够将不同长度的特征进行编码。

In [18]:
batch_obs = unsqueeze(obs)
batch_obs_tensor = to_tensor(batch_obs)
for k, v in batch_obs_tensor.items():
    print(k, v.size())

feature1 torch.Size([1, 2, 4])
feature2 torch.Size([1, 2, 30])
feature3 torch.Size([1, 2, 8])
feature4 torch.Size([1, 1, 3])


In [15]:
import torch.nn as nn
class VectorEncoder(nn.Module):
    def __init__(self, name, input_size, output_size, max_len=None):
        super(VectorEncoder, self).__init__()
        self.name = name
        self.max_len = max_len
        self.encoder = torch.nn.Sequential(
            torch.nn.Linear(input_size, output_size),
            torch.nn.ReLU(),
            torch.nn.Linear(output_size, output_size),
            torch.nn.ReLU(),
        )
    
    def forward(self, x):
        return self.encoder(x)

In [16]:
feature_size = 128
encoders = {
    'feature1' : VectorEncoder(name = 'feature1', input_size = 4, output_size = feature_size, max_len = 2),
    'feature2' : VectorEncoder(name = 'feature2', input_size = 30, output_size = feature_size, max_len = 2),
    'feature3' : VectorEncoder(name = 'feature3', input_size = 8, output_size = feature_size, max_len = 2),
    'feature4' : VectorEncoder(name = 'feature4', input_size = 3, output_size = feature_size, max_len = 1)
}

In [17]:
features = {feature_name : encoder(batch_obs_tensor[feature_name]) for feature_name, encoder in encoders.items()}
for k, v in features.items():
    print(k, v.size())

feature1 torch.Size([1, 2, 128])
feature2 torch.Size([1, 2, 128])
feature3 torch.Size([1, 2, 128])
feature4 torch.Size([1, 1, 128])


In [20]:
print(list(features.values()))

[tensor([[[ 3.3870,  0.0000,  2.1932,  0.0000,  0.0000,  0.0000,  0.0000,
           0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
           2.3653,  1.4721,  0.0000,  6.1238,  8.7693,  0.0000,  7.3527,
           0.0000,  0.0000,  0.3398,  0.6431,  4.4999,  0.0000,  7.5788,
           0.0000,  7.9202,  3.0125,  1.8759,  0.0000,  1.9627,  0.7405,
           0.0000,  0.0000, 12.3629,  0.0000,  0.0000,  0.0000,  0.0000,
           2.7324,  0.0000,  6.7563,  0.0000,  0.7843,  7.4080,  0.4097,
           6.0832,  0.0000,  0.0000,  0.6730,  0.0000,  0.0000,  0.0000,
           0.0000,  5.4909,  0.0000,  0.0000,  2.6584,  0.0000,  0.9764,
           0.0000,  5.0096,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
           0.2131,  3.3992,  4.3637,  0.0000,  0.0000,  0.0000,  1.0389,
           5.6287,  4.2520,  0.0000,  3.5529,  1.0182,  0.0000,  5.2904,
           2.8096,  4.3295,  0.0000,  0.0000,  0.0000,  0.0000,  2.2601,
           3.2063,  0.8467,  0.0000,  0.9818,  4.7

特征独立编码到同一维度之后可以考虑将其`cat`在一起：

In [21]:
feature_seq = torch.cat(list(features.values()), dim=1)
print(feature_seq.size())

torch.Size([1, 7, 128])


## Transformer

`Transformer`为主体网络结构：

In [22]:
class MultiheadAttentionImp(torch.nn.MultiheadAttention):
    def forward(self, query, key, value, key_padding_mask=None, need_weights=True, attn_mask=None):
        attn_output, self.attn_output_weights = super().forward(query, key, value, key_padding_mask, need_weights, attn_mask)
        return attn_output, self.attn_output_weights

In [25]:
import torch.nn.functional as F
class TransformerEncoderLayerImp(torch.nn.TransformerEncoderLayer):
    def __init__(self, d_model, nhead, dim_feedforward=2048, dropout=0.1, activation=F.relu, layer_norm_eps=1e-5, batch_first=False, norm_first=False, device=None, dtype=None) -> None:
        super().__init__(d_model, nhead, dim_feedforward, dropout, activation, layer_norm_eps, batch_first, norm_first, device, dtype)
        factory_kwargs = {'device': device, 'dtype': dtype}
        self.self_attn = MultiheadAttentionImp(d_model, nhead, dropout=dropout, batch_first=batch_first, **factory_kwargs)

    # self-attention block
    def _sa_block(self, x, attn_mask, key_padding_mask):
        x = self.self_attn(x, x, x, attn_mask=attn_mask, key_padding_mask=key_padding_mask, need_weights=True)[0]
        return self.dropout1(x)

In [27]:
transformer_layer = torch.nn.TransformerEncoder(
                    TransformerEncoderLayerImp(feature_size, nhead=4, dim_feedforward=feature_size, batch_first=True, dropout=0),
                    num_layers = 3)

In [29]:
feature_seq = transformer_layer(feature_seq)
print(feature_seq.size())

torch.Size([1, 7, 128])


同样可以获取到`Transformer`最后一层输出的`attention weight`：

In [30]:
print(transformer_layer.layers[-1].self_attn.attn_output_weights)

tensor([[[0.1891, 0.1906, 0.1218, 0.1229, 0.1257, 0.1205, 0.1294],
         [0.1903, 0.1917, 0.1210, 0.1224, 0.1251, 0.1200, 0.1294],
         [0.1682, 0.1707, 0.1285, 0.1365, 0.1335, 0.1228, 0.1398],
         [0.1706, 0.1734, 0.1253, 0.1333, 0.1330, 0.1217, 0.1426],
         [0.1782, 0.1809, 0.1230, 0.1293, 0.1297, 0.1201, 0.1389],
         [0.1746, 0.1773, 0.1253, 0.1313, 0.1310, 0.1214, 0.1391],
         [0.1730, 0.1769, 0.1276, 0.1332, 0.1300, 0.1202, 0.1391]]],
       grad_fn=<DivBackward0>)


## enhance_fragment

## learn