In [1]:
# 导入包
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import sys
import zipfile
import pickle
import pandas as pd
import numpy as np
import paddle
import paddle.nn as nn
import paddle.nn.functional as F
import paddle.optimizer as optim
import paddle.optimizer.lr as lr
from copy import deepcopy
from collections import defaultdict
from tqdm import tqdm
from paddle.io import DataLoader, TensorDataset
import json

# 固定随机种子保证结果可复现
seed = 42
np.random.seed(seed)
paddle.seed(seed)

# 导入自定义包
sys.path.append("work")
from candle2 import Canva



# 模型-Multichannel

In [2]:
class DilatedCNN(nn.Layer):
    def __init__(self, in_channels, out_channels, kernel_size, dilation):
        super(DilatedCNN, self).__init__()
        self.conv = nn.Conv1D(
            in_channels, 
            out_channels, 
            kernel_size, 
            padding=(kernel_size - 1) // 2 * dilation, 
            dilation=dilation
        )
        self.relu = nn.ReLU()

    def forward(self, x):
        x = self.conv(x)
        x = self.relu(x)
        return x

class BiLSTM(nn.Layer):
    def __init__(self, input_dim, hidden_dim, num_layers):
        super(BiLSTM, self).__init__()
        self.lstm = nn.LSTM(
            input_dim, 
            hidden_dim, 
            num_layers, 
            direction='bidirectional', 
            time_major=False
        )

    def forward(self, x):
        x, _ = self.lstm(x)
        return x


class Attention(nn.Layer):
    def __init__(self, hidden_dim):
        super(Attention, self).__init__()
        self.attn = nn.Linear(hidden_dim * 2, hidden_dim * 2)
        self.softmax = nn.Softmax(axis=1)
        self.context_vector = paddle.create_parameter(
            shape=[hidden_dim * 2], 
            dtype='float32', 
            default_initializer=paddle.nn.initializer.Normal()
        )

    def forward(self, lstm_out):
        attn_weights = nn.ReLU()(self.attn(lstm_out))
        attn_weights = paddle.matmul(attn_weights, self.context_vector)
        attn_weights = self.softmax(attn_weights)
        context = paddle.matmul(attn_weights.unsqueeze(1), lstm_out).squeeze(1)
        return context

class multichannel(nn.Layer):
    def __init__(self, input_dim, cnn_out_channels=128, cnn_kernel_size=3, lstm_hidden_dim=256, lstm_num_layers=2, output_dim=1, dropout=0.1):
        super(multichannel, self).__init__()
        self.embedding = nn.Linear(input_dim**2*3, 128)
        paddle.nn.initializer.KaimingUniform()(self.embedding.weight)
        paddle.nn.initializer.Constant(value=0.0)(self.embedding.bias)
        self.dilated_cnn1 = DilatedCNN(128, cnn_out_channels, cnn_kernel_size, dilation=1)
        self.dilated_cnn2 = DilatedCNN(128, cnn_out_channels, cnn_kernel_size, dilation=2)
        self.dilated_cnn3 = DilatedCNN(128, cnn_out_channels, cnn_kernel_size, dilation=3)

        self.lstm_cnn = BiLSTM(cnn_out_channels * 3, lstm_hidden_dim, lstm_num_layers)
        self.lstm_direct = BiLSTM(128, lstm_hidden_dim, lstm_num_layers)
        for layer in [self.lstm_cnn, self.lstm_direct]:
            for param in layer.parameters():
                if param.ndim >= 2:
                    paddle.nn.initializer.XavierUniform()(param)
                else:
                    paddle.nn.initializer.Constant(value=0.0)(param)
                    
        self.local_attention = Attention(lstm_hidden_dim)
        self.global_attention = Attention(lstm_hidden_dim * 2)
        self.dropout = nn.Dropout(dropout)
        self.q = nn.Linear(lstm_hidden_dim * 4, lstm_hidden_dim * 2)
        self.fc = nn.Linear(lstm_hidden_dim * 2, output_dim)
        paddle.nn.initializer.XavierUniform()(self.q.weight)
        paddle.nn.initializer.XavierUniform()(self.fc.weight)
        paddle.nn.initializer.Constant(value=0.0)(self.q.bias)
        paddle.nn.initializer.Constant(value=0.0)(self.fc.bias)

    def forward(self, x):
        x = (x - 128.0) / 255.0
        x = x.reshape((x.shape[0], -1))
        x = self.embedding(x).unsqueeze(1)
        x = paddle.transpose(x, [0, 2, 1]) 
        # # print("Embedding Output Shape:", x.shape)
        # if x.shape[1:] != [128, 128]:
        #     pad_shape = [0, 128 - x.shape[2], 0, 128 - x.shape[1]]
        #     x = F.pad(x, pad_shape, mode='constant', value=0)  # 扩充到 [batch_size, 128, 128]
        # 多通道膨胀卷积
        x1 = self.dilated_cnn1(x)
        x2 = self.dilated_cnn2(x)
        x3 = self.dilated_cnn3(x)
        x_cnn = paddle.concat((x1, x2, x3), axis=1)
        x_cnn = paddle.transpose(x_cnn, [0, 2, 1])  # (batch_size, seq_len, cnn_out_channels * 3)
        # print("CNN Output Shape:", x_cnn.shape)

        lstm_cnn_out = self.lstm_cnn(x_cnn)
        local_cnn_attn_out = self.local_attention(lstm_cnn_out)

        lstm_direct_out = self.lstm_direct(paddle.transpose(x, [0, 2, 1]))
        local_direct_attn_out = self.local_attention(lstm_direct_out)


        combined_local_attn_out = paddle.concat((local_cnn_attn_out, local_direct_attn_out), axis=1)
        
        global_attn_out = self.global_attention(paddle.unsqueeze(combined_local_attn_out, axis=1)).squeeze(1)
        

        output = self.dropout(global_attn_out)
        # print(output.shape)
        output = self.q(output)
        output = self.fc(output)
        return output

In [None]:
model = multichannel(input_dim=160)


# 加载训练好的模型状态字典
model_state_dict = paddle.load(f'work/trained_model/multichannel_gradclip_newdataset.pdparams')

# 将状态字典加载到模型中
model.set_state_dict(model_state_dict['net'])

In [None]:
# X_test = np.stack(all_features, axis=0)
# 转换为 Paddle 张量
# 共有两个test_features_1.npy 和 test_features_2.npy
X_test = np.concatenate([np.load(f"work/test_dataset/test_features_{i}.npy") for i in range(1, 2)])
# X_test = np.concatenate([np.load(f"work/test_dataset/test_features_{i}.npy") for i in range(2, 3)])
X_test = paddle.to_tensor(X_test, dtype=paddle.float32).unsqueeze(1)  # Shape becomes [100000, 1, 200, 200]

# 因为 TensorDataset 只能接受两个 Tensor，所以需要创建一个占位符才能生成预测结果
placeholder = np.zeros([X_test.shape[0], 1]).astype('float32')
placeholder = paddle.to_tensor(placeholder, dtype=paddle.float32)

# 创建数据集
test_dataset = TensorDataset([X_test, placeholder])

# 创建数据加载器
test_loader = DataLoader(test_dataset, batch_size=512, shuffle=False)

In [None]:
# 生成给预测结果
y_pred = []
model.eval()

with paddle.no_grad():
    for inputs, _ in test_loader:
        # inputs = inputs[,0]
        outputs = model(inputs)
        y_pred.append(outputs)

y_pred = paddle.concat(y_pred, axis=0).numpy()

# # 检查长度是否对的上
# assert y_pred.shape[0] == len(okay_indices) + len(modified_indices) + len(misshaped_indices)

# 由于预测值代表一周的均值，所以需要复制。如果选手的模型能做到预测每天的 y，那么可以不用 repeat
y_pred = np.tile(y_pred, (5, ))
y_pred[:10]

In [None]:
# 保存张量， 结果过大，需要保存再合并
paddle.save(y_pred, "y_pred1.pdparams")
# paddle.save(y_pred, "y_pred2.pdparams")

In [None]:
# 读取两个 pdparams 文件
y_pred1 = paddle.load("y_pred1.pdparams")  # 形状为 (100000, 5)
y_pred2 = paddle.load("y_pred2.pdparams")  # 形状为 (39306, 5)

# 确保两者的形状兼容（列数相同）
assert y_pred1.shape[1] == y_pred2.shape[1], "列数不匹配，无法连接！"

# 连接两个张量
y_pred = paddle.concat([y_pred1, y_pred2], axis=0)

# 加载indices

In [None]:
# 加载 JSON 文件
with open("work/test_dataset/test_indices.json", "r", encoding="utf-8") as f:
    alls_indices = json.load(f)

# 获取merged_test

In [None]:
# 指定文件路径
zip_file_path = 'data/data285396/初赛数据集.zip'
train_file_name = '数据集/初赛-训练集.csv'
test_file_name = '数据集/初赛-测试集.csv'

# 打开zip文件
with zipfile.ZipFile(zip_file_path) as z:
    with z.open(test_file_name) as f:
        test_df = pd.read_csv(f, encoding="gbk")


# 这里的处理逻辑同训练集，仍然是分组
grouper = pd.DataFrame([test_df["日期代码"].unique(), pd.Series((np.diff(test_df["日期代码"].unique()) != 1).cumsum()).shift(1)]).T.bfill().ffill()
grouper.columns = ['日期代码', '组别']
merged_test = pd.merge(test_df, grouper, on='日期代码', how='left')
grouped_test = merged_test.groupby(['股票', '组别'])

# 获取每只股票每周对应的日期代码
date_dict = dict()

for (stock, group), dates in merged_test.groupby(['股票', '组别'])['日期代码']:
    date_dict[(stock, group)] = dates.tolist()

In [None]:
final_dates_list = []
final_stocks_list = []
final_y_pred = []

for idx, indices in tqdm(enumerate(alls_indices)):
    indices = tuple(indices)
    dates = date_dict[indices]
    stock = [indices[0]] * len(dates)
    score = y_pred[idx][-len(dates):]
    final_dates_list.extend(dates)
    final_stocks_list.extend(stock)
    final_y_pred.append(score)

In [None]:

batch_size = 10000  # 每次合并100个数组
final_y_pred_combined = []

for i in range(0, len(final_y_pred), batch_size):
    batch = final_y_pred[i:i+batch_size]
    final_y_pred_combined.append(np.concatenate(batch))

# 最终合并所有小批量
final_y_pred_combined = np.concatenate(final_y_pred_combined)

In [None]:
result_df = pd.DataFrame(zip(final_stocks_list, final_dates_list, final_y_pred_combined), columns=['股票', '日期代码', 'SCORE'])
result_df = result_df.drop_duplicates(subset=['股票', '日期代码'], keep='first')

In [None]:
result_df = pd.merge(test_df, result_df, on=['股票', '日期代码'], how='left')[['股票', '日期代码', 'SCORE']]

In [None]:
new_mapping = dict(zip(result_df['日期代码'].unique().tolist(), (result_df['日期代码'].unique()[1:]).tolist() + [40530]))
result_df['日期代码'] = result_df['日期代码'].replace(new_mapping)

In [None]:
result_df.columns = ['STOCK', 'NEXT_TRADE_DATE_CODE', 'SCORE']
result_df.SCORE.fillna(0, inplace=True)

In [None]:
# 生成提交文件
from datetime import datetime
current_time = datetime.now().strftime("%Y%m%d%H%M")

result_df.to_csv(f'work/result/submission_{current_time}.csv', index=False)