<a href="https://colab.research.google.com/github/scdctlt/course_work/blob/master/mcm_c.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
from google.colab import drive
drive.mount('/content/drive/')

Drive already mounted at /content/drive/; to attempt to forcibly remount, call drive.mount("/content/drive/", force_remount=True).


In [2]:
!nvidia-smi

Fri Feb 17 16:18:15 2023       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 510.47.03    Driver Version: 510.47.03    CUDA Version: 11.6     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla T4            Off  | 00000000:00:04.0 Off |                    0 |
| N/A   58C    P0    31W /  70W |      0MiB / 15360MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

In [3]:
#%%
import pandas as pd
import numpy as np
import torch
from torch import nn
from torch.nn import functional as F
from torch.utils.data import TensorDataset, DataLoader
!pip install torchkeras==1.5 
import torchkeras
from plotly import graph_objects as go
from sklearn.preprocessing import MinMaxScaler


Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [4]:
df=pd.read_excel('./drive/MyDrive/test.xlsx')
df.head

<bound method NDFrame.head of                     Date  Number of  reported results
0    2022-01-07 00:00:00                        80630
1    2022-01-08 00:00:00                       101503
2    2022-01-09 00:00:00                        91477
3    2022-01-10 00:00:00                       107134
4    2022-01-11 00:00:00                       153880
..                   ...                          ...
354  2022-12-27 00:00:00                        20879
355  2022-12-28 00:00:00                        20160
356  2022-12-29 00:00:00                        20001
357  2022-12-30 00:00:00                        21204
358  2022-12-31 00:00:00                        20380

[359 rows x 2 columns]>

In [5]:
#%%
# 数据预览
fig = go.Figure()
fig.add_trace(go.Scatter(x=df.index, y=df['Number of  reported results'], name='Sales'))
fig.show()


In [6]:
#%%
# 数据处理
# 归一化 [0, 1]
scaler = MinMaxScaler()
predict_field = 'Numbers'
df[predict_field] = scaler.fit_transform(df['Number of  reported results'].values.reshape(-1, 1))
df.head()


Unnamed: 0,Date,Number of reported results,Numbers
0,2022-01-07 00:00:00,80630,0.217235
1,2022-01-08 00:00:00,101503,0.275322
2,2022-01-09 00:00:00,91477,0.247421
3,2022-01-10 00:00:00,107134,0.290993
4,2022-01-11 00:00:00,153880,0.421081


In [7]:
#%%
def create_dataset(data:list, time_step: int):
    arr_x, arr_y = [], []
    for i in range(len(data) - time_step - 1):
        x = data[i: i + time_step]
        y = data[i + time_step]
        arr_x.append(x)
        arr_y.append(y)
    return np.array(arr_x), np.array(arr_y)

time_step = 8
X, Y = create_dataset(df[predict_field].values, time_step)


In [8]:
#%%
# cuda
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
# 转化成 tensor->(batch_size, seq_len, feature_size)
X = torch.tensor(X.reshape(-1, time_step, 1), dtype=torch.float).to(device)
Y = torch.tensor(Y.reshape(-1, 1, 1), dtype=torch.float).to(device)
print('Total datasets: ', X.shape, '-->', Y.shape)

# 划分数据
split_ratio = 0.8
len_train = int(X.shape[0] * split_ratio)
X_train, Y_train = X[:len_train, :, :], Y[:len_train, :, :]
print('Train datasets: ', X_train.shape, '-->', Y_train.shape)


Total datasets:  torch.Size([350, 8, 1]) --> torch.Size([350, 1, 1])
Train datasets:  torch.Size([280, 8, 1]) --> torch.Size([280, 1, 1])


In [9]:
#%%
# 构建迭代器
batch_size = 10
ds = TensorDataset(X, Y)
dl = DataLoader(ds, batch_size=batch_size, num_workers=0)
ds_train = TensorDataset(X_train, Y_train)
dl_train = DataLoader(ds_train, batch_size=batch_size, num_workers=0)
# 查看第一个batch
x, y = next(iter(dl_train))
print(x.shape)
print(y.shape)


torch.Size([10, 8, 1])
torch.Size([10, 1, 1])


In [10]:
#%%
# 定义模型
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.lstm = nn.LSTM(input_size=1, hidden_size=6, num_layers=3, batch_first=True)
        self.fc = nn.Linear(in_features=6, out_features=1)

    def forward(self, x):
        # x is input, size (batch_size, seq_len, input_size)
        x, _ = self.lstm(x)
        # x is output, size (batch_size, seq_len, hidden_size)
        x = x[:, -1, :]
        x = self.fc(x)
        x = x.view(-1, 1, 1)
        return x


In [11]:
#%%
# torchkeras API 训练方式
model = torchkeras.Model(Net())
model.summary(input_shape=(time_step, 1))
model.compile(loss_func=F.mse_loss, optimizer=torch.optim.Adam(model.parameters(), lr=1e-2), device=device)
dfhistory = model.fit(epochs=50, dl_train=dl_train, log_step_freq=20)


----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
              LSTM-1                 [-1, 8, 6]             888
            Linear-2                    [-1, 1]               7
Total params: 895
Trainable params: 895
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.000031
Forward/backward pass size (MB): 0.000374
Params size (MB): 0.003414
Estimated Total Size (MB): 0.003819
----------------------------------------------------------------
Start Training ...

{'step': 20, 'loss': 0.112}

 +-------+-------+
| epoch |  loss |
+-------+-------+
|   1   | 0.086 |
+-------+-------+

{'step': 20, 'loss': 0.118}

 +-------+-------+
| epoch |  loss |
+-------+-------+
|   2   | 0.092 |
+-------+-------+

{'step': 20, 'loss': 0.096}

 +-------+------+
| epoch | loss |
+-------+------+
|   3   | 0.08 |
+-------+------+

{'step': 20, 'loss': 0.09}

 +-------+--

In [12]:
#%%
# 模型评估
fig = go.Figure()
fig.add_trace(go.Scatter(x=dfhistory.index, y=dfhistory['loss'], name='loss'))
fig.show()


In [13]:
#%%
# 预测验证预览
y_true = Y.cpu().numpy().squeeze()
y_pred = model.predict(dl).detach().cpu().numpy().squeeze()
fig = go.Figure()
fig.add_trace(go.Scatter(y=y_true, name='y_true'))
fig.add_trace(go.Scatter(y=y_pred, name='y_pred'))
fig.show()


In [14]:
#%%
# 自定义训练方式
model = Net().to(device)
loss_function = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-2)

def train_step(model,features, labels):
    # 正向传播求损失
    predictions = model.forward(features)
    loss = loss_function(predictions, labels)
    # 反向传播求梯度
    loss.backward()
    # 参数更新
    optimizer.step()
    optimizer.zero_grad()
    return loss.item()


# 测试一个batch
features, labels = next(iter(dl_train))
loss = train_step(model, features, labels)
loss


0.1774754375219345

In [15]:
#%%
# 训练模型
def train_model(model, epochs):
    for epoch  in range(1, epochs+1):
        list_loss = []
        for features, labels in dl_train:
            lossi = train_step(model,features, labels)
            list_loss.append(lossi)
        loss = np.mean(list_loss)
        if epoch % 10 == 0:
            print('epoch={} | loss={} '.format(epoch,loss))

train_model(model, 50)


epoch=10 | loss=0.0752393594219549 
epoch=20 | loss=0.015768610523731956 
epoch=30 | loss=0.008641077453505983 
epoch=40 | loss=0.0037501826973311836 
epoch=50 | loss=0.003152137214004012 


In [16]:
#%%
# 预测验证预览
y_pred = model.forward(X).detach().cpu().numpy().squeeze()
fig = go.Figure()
fig.add_trace(go.Scatter(y=y_true, name='y_true'))
fig.add_trace(go.Scatter(y=y_pred, name='y_pred'))
fig.show()
