In [1]:
import sys
import warnings
import os
if not sys.warnoptions:
    warnings.simplefilter('ignore')
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import random
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from datetime import datetime
from datetime import timedelta
from tqdm import tqdm
from copy import deepcopy as copy
from torch.utils.data import DataLoader, TensorDataset
sns.set()
def seed_torch(seed=1122):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed) # 为了禁止hash随机化，使得实验可复现
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed) # if you are using multi-GPU.
    torch.backends.cudnn.benchmark = False
    torch.backends.cudnn.deterministic = True
seed_torch()

In [2]:
sh02 = pd.read_csv('history_sh.000002_stock_k_data.csv')
sh02.drop(['code','preclose','amount','adjustflag','turn','tradestatus','pctChg','isST'], axis=1, inplace=True)
sh02 = sh02.set_index('date')
sh02.head()

Unnamed: 0_level_0,open,high,low,close,volume
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2016-01-04,3701.584,3703.778,3449.499,3450.031,18281590784
2016-01-05,3345.625,3483.121,3338.1,3440.785,26481264384
2016-01-06,3444.441,3519.729,3442.2,3518.541,23725938688
2016-01-07,3463.891,3463.891,3261.164,3270.706,7008106496
2016-01-08,3343.442,3386.281,3199.253,3334.934,28444573440


In [3]:
class GetData:
    def __init__(self, batch_size=20, n=5):
        self.data = sh02
        self.batch_size = batch_size
        self.n = n
        
    def getData(self):
        self.close_min = self.data['close'].min()
        self.close_max = self.data["close"].max()
        self.data = self.data.apply(lambda x: (x - min(x)) / (max(x) - min(x)))
        return self.data
    
    def process_data(self):
        n = self.n
        self.getData()
        feature = [
            self.data.iloc[i: i + n].values.tolist()
            for i in range(len(self.data) - n + 2)
            if i + n < len(self.data)
        ]
        label = [
            self.data.close.values[i + n]
            for i in range(len(self.data) - n + 2)
            if i + n < len(self.data)
        ]
        num = int(len(feature) * 0.9)
        train_x = feature[:num]
        test_x = feature[num:]
        train_y = label[:num]
        test_y = label[num:]
        return train_x, test_x, train_y, test_y
    
    def get_dataloader(self):
        batch_size = self.batch_size
        train_x, test_x, train_y, test_y = self.process_data()
        x_train = torch.tensor(train_x).float()
        x_test = torch.tensor(test_x).float()
        y_train = torch.tensor(train_y).float()
        y_test = torch.tensor(test_y).float()
        train_data = TensorDataset(x_train, y_train)
        train_dataLoader = DataLoader(train_data, batch_size=batch_size)
        test_data = TensorDataset(x_test, y_test)
        test_dataLoader = DataLoader(test_data, batch_size=batch_size)
        return train_data, test_data

In [4]:
GD = GetData()
train_x, test_x, train_y, test_y = GD.process_data()

In [None]:
class CNN_LSTM(nn.Module):
    def __init__(self):
        super(CNN_LSTM, self).__init__()
        self.conv1 = nn.Conv2d(1, 32, 3, padding=1)  # 输入通道是1，输出通道是第二个参数，第三个参数是卷积核大小
        # 输出维度根据输入维度和卷积核大小计算
        self.conv2 = nn.Conv2d(32, 16, 3, padding=1) # 参数含义同上
        self.fc1 = nn.Linear(16, 128)
        self.fc2 = nn.Linear(128, 64)
        self.fc3 = nn.Linear(64, 1)
        self.lstm_layer = nn.LSTM(input_size=n, hidden_size=128, batch_first=True)
        self.linear_layer = nn.Linear(in_features=128, out_features=1, bias=True)

    def forward(self, x):
        #print('【【【【【【forward】】】】】】】】】】')
        x = torch.unsqueeze(x, dim=1)  
        #print(str(x.shape)) # torch.Size([20, 1, 5, 5])
        x = self.conv1(x)
        #print(str(x.shape)) # torch.Size([20, 32, 3, 3])
        x = F.relu(x)
        #print(str(x.shape)) # torch.Size([20, 32, 3, 3])
        x = F.max_pool2d(x, 2)
        #print(str(x.shape)) # torch.Size([20, 32, 1, 1])
        #print("[[[[[第一次卷积结束]]]]]")
        x = self.conv2(x)
        #print(str(x.shape))  # torch.Size([20, 16, 2, 2])
        x = F.relu(x)
        #print(str(x.shape))  # torch.Size([20, 16, 2, 2])
        x = F.max_pool2d(x, 2)
        #print(str(x.shape))  # torch.Size([20, 16, 1, 1])
        #print("[[[[[第二次卷积结束]]]]]")
        # view函数将张量x变形成一维向量形式，总特征数不变，为全连接层做准备
        x = x.view(x.size()[0], -1)
        #print(str(x.shape))  # torch.Size([20, 16])
        x = F.relu(self.fc1(x))
        #print(str(x.shape))  # torch.Size([20, 128])
        x = F.relu(self.fc2(x))
        #print(str(x.shape))  # torch.Size([20, 64])
        x = self.fc3(x)
        #print(str(x.shape))  # torch.Size([20, 1])
        return x