In [1]:
import pandas as pd
import torch
import torch.nn as nn
import numpy as np
import time
import math
from matplotlib import pyplot
from sklearn.model_selection import train_test_split
import os
import tensorflow as tf

# 位置编码

In [3]:
class PositionalEncoding(nn.Module):
    def __init__(self, d_model, max_len=50):
        super(PositionalEncoding, self).__init__()
        pe = torch.zeros(max_len, d_model)
        position = torch.arange(0, max_len, dtype=torch.float).unsqueeze(1)
        div_term = torch.exp(torch.arange(0, d_model, 2).float() * (-math.log(10000.0) / d_model))
        pe[:, 0::2] = torch.sin(position * div_term)
        pe[:, 1::2] = torch.cos(position * div_term)
        pe = pe.unsqueeze(0).transpose(0, 1)
        self.register_buffer('pe', pe)
        
    def forward(self, x):
        return x + self.pe[:x.size(0), :]

# 模型结构

In [4]:
#### model stracture ####
class TransAm(nn.Module):
    def __init__(self, feature_size=512, num_layers=1, dropout=0):
        super(TransAm, self).__init__()
        self.model_type = 'Transformer'
        self.src_mask = None
        
        self.pos_encoder = PositionalEncoding(feature_size)
        
        self.encoder_layer = nn.TransformerEncoderLayer(d_model=feature_size, nhead=8, dropout=dropout)
        
        self.transformer_encoder = nn.TransformerEncoder(self.encoder_layer, num_layers=num_layers)
        
        self.decoder = nn.Linear(feature_size, 1)
        
        self.init_weights() # nn.Linear初始化
        
        self.src_key_padding_mask = None #解码器的mask是空 可以直接删除

    def init_weights(self):
        initrange = 0.1
        self.decoder.bias.data.zero_()
        self.decoder.weight.data.uniform_(-initrange, initrange)

    def forward(self, src,src_key_padding_mask):
        src_key_padding_mask = src_key_padding_mask.bool()
        
        src = self.pos_encoder(src) # 位置编码 
        
        output = self.transformer_encoder(src,self.src_mask,src_key_padding_mask) # encode部分
        
        output = self.decoder(output) # decoder
        
        return output

# 这个是计算训练的损失

In [5]:
def train(data,targets,padd_mask):
    model.train()  #需要在训练时添加model.train()，在测试时添加model.eval()。其中model.train()是保证BN层用每一批数据的均值和方差
    total_loss = 0.
    start_time = time.time()

#     for batch, i in enumerate(range(0, len(train_data) - 1, batch_size)):
#         data, targets, key_padding_mask = get_batch(train_data, i, batch_size)
# 每循环一次进来得矩阵结构：data：torch.Size([45, 50, 1])   targets：torch.Size([45, 50, 1])   key_padding_mask：torch.Size([50, 45])
    optimizer.zero_grad()
    
    data1 = torch.tensor(data,dtype=torch.float32).to(device)
    key_padding_mask1 = torch.tensor(padd_mask,dtype=torch.float32).to(device)
#     print(data.dtype)
#     print(key_padding_mask.dtype)
    
    output = model(data1,key_padding_mask1) # 传入模型得是 原始得数据 和 key_padding_mask

#     if calculate_loss_over_all_values: # 前边定义了 ：calculate_loss_over_all_values = False
#         loss = criterion(output, targets)
#     else:
    loss = criterion(output[-output_window:], targets[-output_window:])
#     print("train",loss)
    loss.backward()
    torch.nn.utils.clip_grad_norm_(model.parameters(), 0.5)
    optimizer.step()
    total_loss += loss.item()

# 计算在测试集上计算损失的函数

In [6]:
def evaluate(model,data,target,padd_mask):
    model.eval()  # 在对模型进行评估时，应该配合使用with torch.no_grad() 与 model.eval()：
    total_loss = 0.
    eval_batch_size = 50
    with torch.no_grad():
            output = model(data,padd_mask)
#             if calculate_loss_over_all_values: #原本就是设置的flase
#                 total_loss += len(data[0]) * criterion(output, target).cpu().item()
#             else:
            total_loss += len(data[0]) * criterion(output[-output_window:],targets[-output_window:]).cpu().item()
    return total_loss #/ len(data_source)


In [7]:
# batch_size = 5  # batch size
torch.set_default_tensor_type(torch.DoubleTensor) # 直接设置创建的tensor类型默认为Double，如果不设置的话自动默认为float类型。
input_window = 50
output_window = 5

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

model = TransAm().to(device)
criterion = nn.MSELoss().to(device)
lr = 0.00001
# optimizer = torch.optim.SGD(model.parameters(), lr=lr)
optimizer = torch.optim.AdamW(model.parameters(), lr=lr)
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, 3, gamma=0.96)

best_val_loss = float("inf")
epochs = 10 # The number of epochs
best_model = None

# 造数据：

# 划分训练集和测试集 然后各自构造掩码 然后在进行训练的时候一边测试一边训练 

# 输入数据的维度是 句子长度 batch大小  每个单词的维度 ， padding矩阵 是1的部分是mask掉的 ，不是0的部分是要训练可见的

In [8]:
data=np.random.randn(2250)
data=torch.tensor(data.reshape([50, 45, 1])).to(device)
print(data.shape)

targets=np.random.randn(2250)
targets=torch.tensor(targets.reshape([50, 45, 1])).to(device)
print(targets.shape)

# numpy.zeros(3)
key_padding_mask=np.zeros(2250)
key_padding_mask=torch.tensor(key_padding_mask.reshape([45,50])).to(device)
print(key_padding_mask.shape)



test_data=np.random.randn(2250)
test_data=torch.tensor(test_data.reshape([50, 45, 1])).to(device)
print(test_data.shape)

test_targets=np.random.randn(2250)
test_targets=torch.tensor(test_targets.reshape([50, 45, 1])).to(device)
print(test_targets.shape)



test_key_padding_mask=np.zeros(2250)
test_key_padding_mask=torch.tensor(test_key_padding_mask.reshape([45,50])).to(device)
print(test_key_padding_mask.shape)

torch.Size([50, 45, 1])
torch.Size([50, 45, 1])
torch.Size([45, 50])
torch.Size([50, 45, 1])
torch.Size([50, 45, 1])
torch.Size([45, 50])


In [9]:
for epoch in range(1, epochs + 1):
    epoch_start_time = time.time()
    train(data,targets,key_padding_mask)
    train_loss = evaluate(model,data,targets,key_padding_mask)
    val_loss = evaluate(model,test_data,test_targets,test_key_padding_mask) 
    print('-' * 89)
    print('| end of epoch {:3d} | time: {:5.2f}s | valid loss {:5.5f} | train loss {:5.5f} '.format(
        epoch, (time.time() - epoch_start_time),
        val_loss, train_loss))  # , math.exp(val_loss) | valid ppl {:8.2f}
    print('-' * 89)
    scheduler.step()

  # This is added back by InteractiveShellApp.init_path()
  if sys.path[0] == '':


-----------------------------------------------------------------------------------------
| end of epoch   1 | time:  0.96s | valid loss 149.33289 | train loss 152.49291 
-----------------------------------------------------------------------------------------
-----------------------------------------------------------------------------------------
| end of epoch   2 | time:  0.48s | valid loss 108.80559 | train loss 111.87309 
-----------------------------------------------------------------------------------------
-----------------------------------------------------------------------------------------
| end of epoch   3 | time:  0.47s | valid loss 79.94246 | train loss 82.45022 
-----------------------------------------------------------------------------------------
-----------------------------------------------------------------------------------------
| end of epoch   4 | time:  0.46s | valid loss 63.24713 | train loss 64.80456 
--------------------------------------------------