In [1]:
import pandas as pd 
import numpy as np 
import time 

In [2]:
import collections
import random
import torch
from torch import nn
from torch.nn import functional as F
from d2l import torch as d2l

In [3]:
from sklearn.preprocessing import MinMaxScaler

In [113]:
class SeqDataLoader:  
    """序列数据的迭代器"""
    def __init__(self, corpus, batch_size, num_steps, vocab_size):
        self.corpus = corpus
        self.batch_size, self.num_steps = batch_size, num_steps
        self.vocab_size = vocab_size

    def __iter__(self):
        return self.seq_data_iter_sequential(self.corpus, self.batch_size, self.num_steps, self.vocab_size)
    
    def seq_data_iter_sequential(self, corpus, batch_size, num_steps, vocab_size):
        """使用顺序分区生成一个小批量子序列"""
        # 从随机偏移量开始划分序列
        offset = random.randint(0, num_steps)
        num_tokens = ((len(corpus) - offset - 1) // batch_size) * batch_size
        Xs = torch.tensor(corpus[offset: offset + num_tokens], dtype=torch.float32)
        Ys = torch.tensor(corpus[offset + 1: offset + 1 + num_tokens], dtype=torch.float32)
        Xs, Ys = Xs.reshape(batch_size, -1, vocab_size), Ys.reshape(batch_size, -1, vocab_size)
        num_batches = Xs.shape[1] // num_steps
        for i in range(0, num_steps * num_batches, num_steps):
            X = Xs[:, i: i + num_steps]
            Y = Ys[:, i: i + num_steps][:, :, 0]
            yield X, Y

# model

In [151]:
class RNNModel(nn.Module):
    """循环神经网络模型"""
    def __init__(self, rnn_layer, vocab_size, num_item, num_place, item_embed_size=5, place_embed_size=5, **kwargs):
        super(RNNModel, self).__init__(**kwargs)
        self.rnn = rnn_layer
        self.vocab_size = vocab_size
        self.num_hiddens = self.rnn.hidden_size
        if not self.rnn.bidirectional:
            self.num_directions = 1
            self.linear = nn.Linear(self.num_hiddens, self.vocab_size)
        else:
            self.num_directions = 2
            self.linear = nn.Linear(self.num_hiddens * 2, self.vocab_size)
        # item 和 place的embedding
        self.num_item = num_item
        self.num_place = num_place
        self.item_embed_size = item_embed_size
        self.place_embed_size = place_embed_size
        self.item_embed = nn.Embedding(num_embeddings=num_item, embedding_dim=item_embed_size)
        self.place_embed = nn.Embedding(num_embeddings=num_place, embedding_dim=place_embed_size)
        

    def forward(self, inputs, iid, pid, state):
        iid_tensor = torch.full((inputs.shape[0], inputs.shape[1], 1), iid)
        pid_tensor = torch.full((inputs.shape[0], inputs.shape[1], 1), pid)
        iid_embed = self.item_embed(iid_tensor).view(inputs.shape[0], inputs.shape[1], self.item_embed_size)
        pid_embed = self.place_embed(pid_tensor).view(inputs.shape[0], inputs.shape[1], self.place_embed_size)
        X = torch.cat((inputs, iid_embed, pid_embed), dim=-1)
        # 时间步数*批量大小,隐藏单元数
        X.transpose_(0, 1)
        X = X.to(torch.float32)
        Y, state = self.rnn(X, state)
        # 全连接层首先将Y的形状改为(时间步数*批量大小,隐藏单元数)
        # 它的输出形状是(时间步数*批量大小,词表大小)。
        output = self.linear(Y.reshape((-1, Y.shape[-1])))
        return output, state

    def begin_state(self, device, batch_size=1):
        if not isinstance(self.rnn, nn.LSTM):
            # nn.GRU以张量作为隐状态
            return  torch.zeros((self.num_directions * self.rnn.num_layers,
                                 batch_size, self.num_hiddens),
                                device=device)
        else:
            # nn.LSTM以元组作为隐状态
            return (torch.zeros((
                self.num_directions * self.rnn.num_layers,
                batch_size, self.num_hiddens), device=device),
                    torch.zeros((
                        self.num_directions * self.rnn.num_layers,
                        batch_size, self.num_hiddens), device=device))

In [146]:
num_inputs = 9 + 3 + 3
num_hiddens = 32

model = RNNModel(nn.LSTM(num_inputs, num_hiddens), vocab_size=1, num_item=100, num_place=10, item_embed_size=3, place_embed_size=3)

In [147]:
x.shape 

torch.Size([64, 14, 9])

In [149]:
out, s = model(x, iid=0, pid=0, state=model.begin_state(device='cpu', batch_size=64))

In [150]:
out 

tensor([[-2.0703e-02],
        [ 2.1787e-02],
        [-4.0293e-03],
        [-5.5603e-02],
        [-1.0819e-01],
        [-1.5432e-01],
        [-1.9241e-01],
        [-2.2291e-01],
        [-2.4697e-01],
        [-2.6586e-01],
        [-2.8073e-01],
        [-2.9249e-01],
        [-3.0187e-01],
        [-3.0940e-01],
        [-3.1549e-01],
        [-3.2046e-01],
        [-3.2454e-01],
        [-3.2791e-01],
        [-3.3071e-01],
        [-3.3306e-01],
        [-3.3503e-01],
        [-3.3670e-01],
        [-3.3811e-01],
        [-3.3933e-01],
        [-3.4038e-01],
        [-3.4128e-01],
        [-3.4208e-01],
        [-3.4277e-01],
        [-3.4339e-01],
        [-3.4395e-01],
        [-3.4445e-01],
        [-3.4490e-01],
        [-3.4531e-01],
        [-3.4570e-01],
        [-3.4606e-01],
        [-3.4639e-01],
        [-3.4671e-01],
        [-3.4701e-01],
        [-3.4730e-01],
        [-3.4758e-01],
        [-3.4784e-01],
        [-3.4811e-01],
        [-3.4836e-01],
        [-3

In [None]:
batch_size, num_steps = 32, 35

# predict

In [6]:
def predict_rnn(inputs, iid, pid, num_preds, net, device):
    """向后预测"""
    state = net.begin_state(batch_size=1, device=device)
    outputs = inputs.flatten().tolist() 
    # 预热
    Y, state = net(inputs, iid=iid, pid=pid, state=state)
    # 向后第一步
    outputs.append(Y.flatten()[-1].item())
    for _ in range(num_preds - 1):  # 预测num_preds步
        Y, state = net(torch.tensor(outputs[-1]).view(1, 1, 1), iid=iid, pid=pid, state=state)
        outputs.append(Y.item())
    return outputs

# train

In [7]:
def grad_clipping(net, theta):  #@save
    """裁剪梯度"""
    if isinstance(net, nn.Module):
        params = [p for p in net.parameters() if p.requires_grad]
    else:
        params = net.params
    norm = torch.sqrt(sum(torch.sum((p.grad ** 2)) for p in params))
    if norm > theta:
        for param in params:
            param.grad[:] *= theta / norm

In [152]:
def train_epoch(net, train_iter, iid, pid, loss, updater, device, use_random_iter):
    """训练网络一个迭代周期"""
    state = None
    start_time = time.time()
    mse = 0
    size = 0
    for X, Y in train_iter:
        if state is None or use_random_iter:
            # 在第一次迭代或使用随机抽样时初始化state
            state = net.begin_state(batch_size=X.shape[0], device=device)
        else:
            if isinstance(net, nn.Module) and not isinstance(state, tuple):
                # state对于nn.GRU是个张量
                state.detach_()
            else:
                for s in state:
                    s.detach_()
                    
        y = Y.reshape(-1)
        X, y = X.to(device), y.to(device)
        y_hat, state = net(X, iid, pid, state)
        l = loss(y_hat.reshape(-1), y)
        if isinstance(updater, torch.optim.Optimizer):
            updater.zero_grad()
            l.backward()
            grad_clipping(net, 1)
            updater.step()
        else:
            l.backward()
            grad_clipping(net, 1)
            # 因为已经调用了mean函数
            updater(batch_size=1)
        mse += l * y.numel()
        size += y.numel()
    
    return mse / size, time.time() - start_time

In [153]:
def train(net, train_iter, iid, pid, lr, num_epochs, device, use_random_iter=False):
    """训练模型"""
    loss = nn.MSELoss()
    # 初始化
    updater = torch.optim.Adam(net.parameters(), lr)
    # 训练
    for epoch in range(num_epochs):
        epochmse, speed = train_epoch(
            net, train_iter, iid, pid, loss, updater, device, use_random_iter)
        if (epoch) % 50 == 0:
            print(f'epoch: {epoch + 1}, mse: {epochmse}, time: {speed}')

In [93]:
tsdata

Unnamed: 0,sale,pre_1,pre_2,pre_3,pre_4,pre_5,pre_6,pre_7,pre_8,pre_9,...,pre_21,pre_22,pre_23,pre_24,pre_25,pre_26,pre_27,pre_28,pre_29,pre_30
0,0,,,,,,,,,,...,,,,,,,,,,
1,1,0.0,,,,,,,,,...,,,,,,,,,,
2,2,1.0,0.0,,,,,,,,...,,,,,,,,,,
3,3,2.0,1.0,0.0,,,,,,,...,,,,,,,,,,
4,4,3.0,2.0,1.0,0.0,,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9995,9995,9994.0,9993.0,9992.0,9991.0,9990.0,9989.0,9988.0,9987.0,9986.0,...,9974.0,9973.0,9972.0,9971.0,9970.0,9969.0,9968.0,9967.0,9966.0,9965.0
9996,9996,9995.0,9994.0,9993.0,9992.0,9991.0,9990.0,9989.0,9988.0,9987.0,...,9975.0,9974.0,9973.0,9972.0,9971.0,9970.0,9969.0,9968.0,9967.0,9966.0
9997,9997,9996.0,9995.0,9994.0,9993.0,9992.0,9991.0,9990.0,9989.0,9988.0,...,9976.0,9975.0,9974.0,9973.0,9972.0,9971.0,9970.0,9969.0,9968.0,9967.0
9998,9998,9997.0,9996.0,9995.0,9994.0,9993.0,9992.0,9991.0,9990.0,9989.0,...,9977.0,9976.0,9975.0,9974.0,9973.0,9972.0,9971.0,9970.0,9969.0,9968.0


In [237]:
# days = 365 * 5

# tsdata = pd.DataFrame({'sale': np.array([1, 3, 5, 7, 9, 7, 5, 3, 1, -1] * 1000)})
tsdata = pd.DataFrame({'sale': range(1000)})
# for i in range(1, 9):
#     tsdata['pre_{}'.format(i)] = tsdata['sale'].shift(i)
# tsdata.dropna(inplace=True)
iid = 0
pid = 0

data = tsdata.to_numpy() / 100
# scaler = MinMaxScaler() 
# data = scaler.fit_transform(data)

In [238]:
data.shape

(1000, 1)

In [239]:
num_inputs = 1 + 3 + 3
num_hiddens = 32
net = RNNModel(nn.LSTM(num_inputs, num_hiddens), vocab_size=1, num_item=2, num_place=2, item_embed_size=3, place_embed_size=3)

In [240]:
for name, param in net.named_parameters():
    print(name, param.size())

rnn.weight_ih_l0 torch.Size([128, 7])
rnn.weight_hh_l0 torch.Size([128, 32])
rnn.bias_ih_l0 torch.Size([128])
rnn.bias_hh_l0 torch.Size([128])
linear.weight torch.Size([1, 32])
linear.bias torch.Size([1])
item_embed.weight torch.Size([2, 3])
place_embed.weight torch.Size([2, 3])


In [241]:
net.state_dict() 

OrderedDict([('rnn.weight_ih_l0',
              tensor([[ 1.3279e-01,  5.5129e-02,  1.0689e-01,  1.6370e-02,  5.7336e-02,
                       -1.0306e-01,  1.1669e-01],
                      [ 2.1842e-02, -1.5499e-01, -1.3783e-01, -1.5914e-01, -1.6705e-01,
                        7.6211e-02,  1.0567e-01],
                      [ 2.0697e-02,  8.0705e-02, -1.1997e-02,  2.8080e-02, -1.5677e-01,
                       -1.7174e-01,  5.3371e-02],
                      [-4.9831e-02,  1.3302e-02, -5.4015e-02, -1.1276e-01,  1.5479e-02,
                        5.3492e-03,  3.6677e-02],
                      [ 4.0697e-02,  2.1063e-02,  2.5974e-02,  1.0895e-01, -3.8742e-02,
                        1.0740e-01, -1.7658e-01],
                      [ 1.6507e-01,  1.4495e-01, -1.1953e-01, -1.6611e-01, -7.5069e-03,
                        1.1296e-02,  1.5656e-01],
                      [ 9.5326e-02, -7.5979e-02, -1.2545e-01, -3.8579e-03, -2.4358e-02,
                        6.1704e-03, -7.6666e-02],


In [None]:
net.state_dict() 

In [185]:
net.linear.state_dict() 

OrderedDict([('weight',
              tensor([[-0.0586,  0.1549, -0.1508,  0.0709, -0.0343, -0.1502,  0.0314, -0.1331,
                       -0.0015,  0.0218, -0.1280,  0.1348,  0.0657, -0.1431, -0.1048,  0.1255,
                        0.0743,  0.0640, -0.1502, -0.0274,  0.0878,  0.0204,  0.1213,  0.0265,
                        0.0247,  0.1549, -0.1439,  0.0366,  0.1106,  0.1307, -0.0087, -0.0008]])),
             ('bias', tensor([0.1611]))])

In [104]:
net.linear.state_dict() 

OrderedDict([('weight',
              tensor([[-0.0930, -0.0290,  0.0931, -0.0283, -0.0540,  0.0087, -0.0248, -0.1001,
                       -0.0540, -0.0080,  0.0374,  0.0825, -0.1465, -0.0846,  0.0706, -0.1383,
                        0.1404, -0.0918,  0.0170,  0.0838, -0.1195, -0.0074,  0.0687,  0.1334,
                        0.0814, -0.0113, -0.0277,  0.1489, -0.0498, -0.0689,  0.1381, -0.1109]])),
             ('bias', tensor([-0.1733]))])

In [242]:
train_iter = SeqDataLoader(data, batch_size=256, num_steps=1, vocab_size=1)
num_epochs, lr = 800, 0.01
device = 'cpu'

In [243]:
train(net, train_iter, iid, pid, lr, num_epochs, device)

epoch: 1, mse: 18.315536499023438, time: 0.0156097412109375
epoch: 51, mse: 0.024247661232948303, time: 0.01004171371459961
epoch: 101, mse: 0.009333171881735325, time: 0.00201416015625
epoch: 151, mse: 0.009411613456904888, time: 0.010079622268676758
epoch: 201, mse: 0.02152615785598755, time: 0.0
epoch: 251, mse: 0.0151206711307168, time: 0.010032415390014648
epoch: 301, mse: 0.004858304280787706, time: 0.010030984878540039
epoch: 351, mse: 0.020061960443854332, time: 0.0020384788513183594
epoch: 401, mse: 0.013741184026002884, time: 0.0020263195037841797
epoch: 451, mse: 0.002203725976869464, time: 0.002016305923461914
epoch: 501, mse: 0.014759056270122528, time: 0.008023977279663086
epoch: 551, mse: 0.010606653988361359, time: 0.008011341094970703
epoch: 601, mse: 0.006655978038907051, time: 0.01009678840637207
epoch: 651, mse: 0.015625419095158577, time: 0.010052680969238281
epoch: 701, mse: 0.00401706388220191, time: 0.010021448135375977
epoch: 751, mse: 0.013029289431869984, tim

In [259]:
prefix = torch.tensor(data[50]).reshape(1, -1, 1)
predict = predict_rnn(inputs=prefix, iid=iid, pid=pid, num_preds=5, net=net, device='cpu')

In [260]:
np.array(predict) * 100

array([50.        , 52.2426486 , 54.6469152 , 57.26788044, 57.42036104,
       54.7268033 ])

In [223]:
state = net.begin_state(batch_size=1, device=device)

In [167]:
prefix

tensor([[[0.1300, 0.1200, 0.1100, 0.1000, 0.0900, 0.0800, 0.0700, 0.0600,
          0.0500]]], dtype=torch.float64)

In [226]:
prefix

tensor([[[0.0000],
         [0.0100],
         [0.0200],
         [0.0300],
         [0.0400],
         [0.0500],
         [0.0600],
         [0.0700],
         [0.0800],
         [0.0900],
         [0.1000],
         [0.1100],
         [0.1200],
         [0.1300],
         [0.1400],
         [0.1500],
         [0.1600],
         [0.1700],
         [0.1800],
         [0.1900]]], dtype=torch.float64)

In [225]:
prefix = torch.tensor(data[:20]).reshape(1, -1, 1)

net(prefix, iid, pid, state)

(tensor([[0.0381],
         [2.5731],
         [5.1342],
         [6.6531],
         [7.2177],
         [7.4545],
         [7.5659],
         [7.6180],
         [7.6429],
         [7.6554],
         [7.6622],
         [7.6666],
         [7.6698],
         [7.6725],
         [7.6750],
         [7.6774],
         [7.6797],
         [7.6820],
         [7.6843],
         [7.6866]], grad_fn=<AddmmBackward>),
 (tensor([[[ 1.6607e-03,  9.9963e-01,  4.2697e-01,  9.9999e-01, -9.9987e-01,
             9.5105e-01, -9.9994e-01, -5.1971e-01,  9.9961e-01,  4.2324e-01,
            -9.9997e-01, -1.3191e-05,  1.0000e+00, -9.9994e-01,  9.9999e-01,
             1.4608e-03,  7.2114e-01,  1.6778e-14, -1.9352e-08,  1.0000e+00,
             5.7262e-10,  9.9462e-01,  1.0000e+00,  9.9997e-01,  4.8186e-05,
             1.0000e+00, -9.9988e-01, -2.3140e-08, -9.9995e-01,  9.9982e-01,
            -8.8307e-02, -9.9967e-01]]], grad_fn=<StackBackward>),
  tensor([[[ 2.1297e-01,  1.8905e+01,  1.5022e+01,  1.8026e+01, 

In [68]:
predict

[0.0,
 0.01,
 0.02,
 0.03,
 0.04,
 0.05,
 0.06,
 0.07,
 0.08,
 0.09,
 0.1,
 0.11,
 0.12,
 0.13,
 0.14,
 0.15,
 0.16,
 0.17,
 0.18,
 0.19,
 0.2,
 0.21,
 0.22,
 0.23,
 0.24,
 0.25,
 0.26,
 0.27,
 0.28,
 0.29,
 0.3,
 0.31,
 0.32,
 0.33,
 0.34,
 0.35,
 0.36,
 0.37,
 0.38,
 0.39,
 0.4,
 0.41,
 0.42,
 0.43,
 0.44,
 0.45,
 0.46,
 0.47,
 0.48,
 0.49,
 0.5,
 0.51,
 0.52,
 0.53,
 0.54,
 0.55,
 0.56,
 0.57,
 0.58,
 0.59,
 0.6,
 0.61,
 0.62,
 0.63,
 0.64,
 0.65,
 0.66,
 0.67,
 0.68,
 0.69,
 0.7,
 0.71,
 0.72,
 0.73,
 0.74,
 0.75,
 0.76,
 0.77,
 0.78,
 0.79,
 0.8,
 0.81,
 0.82,
 0.83,
 0.84,
 0.85,
 0.86,
 0.87,
 0.88,
 0.89,
 0.9,
 0.91,
 0.92,
 0.93,
 0.94,
 0.95,
 0.96,
 0.97,
 0.98,
 0.99,
 46.3619384765625,
 46.940086364746094,
 46.944618225097656,
 46.94465637207031,
 46.94465637207031,
 46.94465637207031,
 46.94465637207031,
 46.94465637207031,
 46.94465637207031,
 46.94465637207031]

In [None]:
scaler.inverse_transform(np.array(predict).reshape(-1, 1))