In [None]:
from data_provider.data_loader import DataModule
from exp.exp_model import Model
from utils.exp_logger import Logger
from utils.exp_metrics_plotter import MetricsPlotter
from run_train import get_experiment_name
from utils.utils import set_settings
# Experiment Settings, logger, plotter
from utils.exp_config import get_config
config = get_config('FinancialConfig')
config.multi_dataset = True
set_settings(config)
log_filename, exper_detail = get_experiment_name(config)
plotter = MetricsPlotter(log_filename, config)
log = Logger(log_filename, exper_detail, plotter, config)
datamodule = DataModule(config)
model = Model(config)

In [None]:
import torch
import torch.nn as nn

# 假设输入
bs, seq_len, channels, dim = 16, 48, 33, 64
x_enc = torch.randn(bs, seq_len, channels, dim)

# 定义 attention 层（无 batch_first 参数）
attn_channel = nn.MultiheadAttention(embed_dim=dim, num_heads=8)  # expects (seq_len, batch, dim)
attn_time = nn.MultiheadAttention(embed_dim=dim, num_heads=8)

# ===== 1. 跨通道 attention =====
# 原始 x_enc: (bs, 48, 33, 64)
# 调整为 (33, bs*48, 64)
x_enc_reshaped = x_enc.permute(2, 0, 1, 3).reshape(channels, bs * seq_len, dim)

# 注意力：通道之间的 self-attention
x_channel_attn, _ = attn_channel(x_enc_reshaped, x_enc_reshaped, x_enc_reshaped)  # (33, bs*48, 64)

# 还原为 (bs, 48, 33, 64)
x_channel_attn = x_channel_attn.reshape(channels, bs, seq_len, dim).permute(1, 2, 0, 3)

# ===== 2. 跨时间 attention =====
# 调整为 (48, bs*33, 64)
x_time_input = x_channel_attn.permute(1, 0, 2, 3).reshape(seq_len, bs * channels, dim)

# 注意力：时间步之间的 self-attention
x_time_attn, _ = attn_time(x_time_input, x_time_input, x_time_input)  # (48, bs*33, 64)

# 还原为 (bs, 48, 33, 64)
x_time_attn = x_time_attn.reshape(seq_len, bs, channels, dim).permute(1, 0, 2, 3)

# 最终输出
print(x_time_attn.shape)  # torch.Size([16, 48, 33, 64])


In [None]:
from modules.backbone import Backbone
from run_train import *

from utils.exp_config import get_config
config = get_config()
# datamodule = DataModule(config)
# model = Model(datamodule, config)
model = Backbone(3, config)

In [None]:
bs, seq_len, channels, dim = 1, 48, 33, 3
random_inputs = torch.rand(bs, seq_len, channels, dim)
y = model(random_inputs, None, None)
# [1, 48, 32, 3]

In [None]:
bs, seq_len, channels, dim = 1, 48, 1, 3
random_inputs = torch.rand(bs, seq_len, channels, dim)
y = model(random_inputs, None, None)
# [1, 48, 32, 3]

In [None]:
bs, seq_len, channels, dim = 1, 48, 16, 3
random_inputs = torch.rand(bs, seq_len, channels, dim)
y = model(random_inputs, None, None)
# [1, 48, 32, 3]

In [None]:
import torch

x = torch.arange(2*3*4*10).reshape(2, 3, 4, 10)
patch_len = 4
stride = 2

x_unfolded = x.unfold(dimension=-1, size=patch_len, step=stride)
print(x_unfolded.shape)

In [None]:
import os

# 读取所有文件名
all_files = os.listdir('results/financial/20250701/log')

# 提取每个文件中 _Multi_ 与 .md 之间的数字
existing_ids = set()
for filename in all_files:
    try:
        num = int(filename.split('_Multi_')[1].split('.md')[0])
        existing_ids.add(num)
    except (IndexError, ValueError):
        continue

# 检查 1-130 中缺失的编号
missing_ids = [i for i in range(0, 150) if i not in existing_ids]

print("缺失的编号：", missing_ids)

In [None]:
import pickle 
from collections import Counter
data = pickle.load(open('./datasets/func_code_to_label_150.pkl', 'rb'))
# 提取组号列
group_ids = data[:, 1]

# 统计每个组号的基金数量
counts = Counter(group_ids)

# 打印结果
for group_id, count in sorted(counts.items()):
    print(f"组号 {group_id} 中有 {count} 个基金")

In [1]:
import os 
import pickle
all_code = os.listdir('./datasets/financial/S20200713_E20250628')
all_code_len = []
for code in all_code:
    if code.endswith('.pkl'):
        with open(os.path.join('./datasets/financial/S20200713_E20250628', code), 'rb') as f:
            data = pickle.load(f)
            all_code_len.append(len(data))

FileNotFoundError: [Errno 2] No such file or directory: './datasets/financial/S20200713_E20250628'

In [None]:
import numpy as np

# 假设你已经有 all_code_len
all_code_len = np.array(all_code_len)

print(f"📊 总文件数: {len(all_code_len)}")
print(f"📈 最大长度: {np.max(all_code_len)}")
print(f"📉 最小长度: {np.min(all_code_len)}")
print(f"📏 平均长度: {np.mean(all_code_len):.2f}")
print(f"📐 中位数: {np.median(all_code_len)}")
print(f"🔹 5%分位数: {np.percentile(all_code_len, 5)}")
print(f"🔹 6%分位数: {np.percentile(all_code_len, 6)}")
print(f"🔹 10%分位数: {np.percentile(all_code_len, 10)}")
print(f"🔹 25%分位数: {np.percentile(all_code_len, 25)}")
print(f"🔸 75%分位数: {np.percentile(all_code_len, 75)}")

In [None]:
import pandas as pd
from sqlalchemy import create_engine, text
import pickle
# 数据库配置
with open('./datasets/sql_token.pkl', 'rb') as f:
    DB_URI = pickle.load(f)
engine = create_engine(DB_URI)

def query_fund_data(fund, start_date, end_date):
    """查询数据库中某支基金的净值数据
        SELECT fund_code, date, nav, accnav, adj_nav
    """
    sql = text("""
        SELECT fund_code, date, accnav, adj_nav, nav
        FROM b_fund_nav_details_new
        WHERE fund_code IN :codes
          AND date BETWEEN :start AND :end
        ORDER BY date
    """)
    try:
        df = pd.read_sql_query(
            sql.bindparams(codes=tuple(fund), start=start_date, end=end_date),
            engine
        )
        fund_dict = {code: df_group.reset_index(drop=True)
                     for code, df_group in df.groupby("fund_code")}
        return fund_dict
    except Exception as e:
        print(f"[{fund}] 数据库查询失败: {str(e)}")
        return pd.DataFrame()
df = query_fund_data(['000001', '000003'], '2020-01-01', '2025-01-01')
    

In [54]:
import numpy as np 
with open('./datasets/func_code_to_label_150.pkl', 'rb') as f:
    data = pickle.load(f)
data = data[:, 0]
df = query_fund_data(data, '2020-01-01', '2025-01-01')

In [57]:
min_value, max_value = 1e9, -1e9
for fund_code, value in df.items():
    min_value = min(min_value, value['nav'].min())
    max_value = max(max_value, value['nav'].max())
print(f"最小值: {min_value}, 最大值: {max_value}")

最小值: 0.0871, 最大值: 141.426


In [53]:
df

'970135'

In [None]:
import numpy as np

def constrain_nav_prediction(predictions, bar=0.05, scale=0.9):
    """
    检测单位净值预测中是否存在超过bar的相邻涨跌幅，
    如果是，则整条基金的净值序列按相对首日值重新缩放（温和调整）

    参数：
    - predictions: np.ndarray [7, 64]，表示64支基金7天的预测单位净值
    - bar: float，单位净值日涨跌幅上限（如0.05表示5%）
    - scale: float，检测异常后，使用的趋势缩放系数（如0.9）

    返回：
    - adjusted: np.ndarray [7, 64]，处理后的单位净值预测
    - mask: np.ndarray [64]，表示哪些基金被缩放（True为缩放）
    """
    adjusted = predictions.copy()
    mask = np.zeros(predictions.shape[1], dtype=bool)
    for fund_idx in range(predictions.shape[1]):
        nav_series = predictions[:, fund_idx]
        # 计算相邻涨跌幅
        returns = nav_series[1:] / nav_series[:-1] - 1
        if np.any(np.abs(returns) > bar):
            # 以首日为锚点，重构温和曲线
            # 	•	以首日值为锚点，计算整个序列相对于首日的累计变化幅度；
	        #   •	然后将这些累计变化幅度乘以 scale（比如0.9），形成温和版本；
	        #   •	最后用 base * (1 + 相对变化 * 缩放因子) 得到缩放后的单位净值曲线；
	        #   •	更新 adjusted 和 mask。
            base = nav_series[0]
            relative_change = (nav_series - base) / base
            softened = base * (1 + relative_change * scale)
            adjusted[:, fund_idx] = softened
            mask[fund_idx] = True
    return adjusted, mask

# 模拟单位净值预测（中间人为插入一个异常）
np.random.seed(0)
preds = np.cumprod(1 + np.random.normal(0, 0.01, (7, 1)), axis=0)
preds[:, 0] *= [1, 1, 1.2, 1.5, 1.7, 10.0, 2.5]  # 第6支基金异常暴涨

In [25]:
preds.reshape(-1)

array([ 1.01764052,  1.02171269,  1.23805509,  1.58224823,  1.82670398,
       10.64030593,  2.68534956])

In [None]:
adjusted, flagged = constrain_nav_prediction(preds, bar=1, scale=0.5)
print(f"被缩放的基金编号：{np.where(flagged)[0]}")

被缩放的基金编号：[0]


In [27]:
adjusted.reshape(-1)

array([1.01764052, 1.0196766 , 1.12784781, 1.29994438, 1.42217225,
       5.82897323, 1.85149504])

In [1]:
240 * 4

960

In [None]:
import os
start_date: str = '2020-07-13' 
end_date: str = '2025-06-28'
dir_name = 'S' + (start_date + '_E' + end_date).replace('-', '')
all_address = os.listdir(f'./datasets/financial/{dir_name}')
all_code_list = [item.split('.')[0] for item in all_address]
len(all_code_list)

11674

In [7]:
import pickle 
with open('./datasets/all_code_list.pkl', 'wb') as f:
    pickle.dump(all_code_list, f)

In [2]:
240 * 4

960

In [None]:
from data_provider.get_nav_features import get_df_date_as_index

df = get_df_date_as_index("000010", "2025-03-01", "2025-4-14")
df.columns

Index(['fund_code', 'adj_nav', 'bid_close', 'ask_close', 'gdp', 'gdp_yoy',
       'pi_yoy', 'si_yoy', 'ti_yoy', '1w', '2w', '1m', '3m', '6m', '9m', '1y',
       'cumulative', 'annual_volatility', 'stability', 'monthwin',
       'winning_day', 'maxDrawdown'],
      dtype='object')

In [20]:
import numpy as np 
import pickle 
import pandas as pd
from sqlalchemy import create_engine, text

# 数据库配置
with open('./datasets/sql_token.pkl', 'rb') as f:
    DB_URI = pickle.load(f)
engine = create_engine(DB_URI)

create_date = '2023-7-13'
date_list = create_date.split('-')
base_date = str(int(date_list[0]) - 1) + '-' + date_list[1] + '-' + date_list[2]  # 筛选成立1年以上的基金
first_date = str(int(date_list[0]) - 2) + '-' + date_list[1] + '-' + date_list[2]  # 筛选成立1-2年的基金
second_date = str(int(date_list[0]) - 3) + '-' + date_list[1] + '-' + date_list[2]  # 筛选成立2-3年的基金
# print(base_date, first_date, second_date)
sql = f"""
    SELECT fund_code, fund_name, market, survival_status, tu_fund_type, establish_time, tu_invest_type
    FROM b_fund_list
    WHERE establish_time < '{base_date}' AND fund_code IN (
        SELECT fund_code FROM b_fund_nav
            WHERE date = (
                SELECT MAX(date) FROM b_fund_nav
            )
            AND sub_status NOT LIKE '%%暂停申购%%' 
            AND red_status NOT LIKE '%%封闭期%%'
        )
    """
sql = f"""
    SELECT fund_code, fund_name, market, survival_status, tu_fund_type, establish_time, tu_invest_type
    FROM b_fund_list
    WHERE establish_time < '{base_date}'
    AND fund_code IN (
        SELECT fund_code FROM b_fund_nav
        WHERE date = (SELECT MAX(date) FROM b_fund_nav)
        AND sub_status NOT LIKE '%%暂停申购%%'
        AND red_status NOT LIKE '%%封闭期%%'
    )
    """
df = pd.read_sql_query(sql, engine)
print(df.shape)  # 打印df的形状，确认是否有数据
df

(323, 7)


Unnamed: 0,fund_code,fund_name,market,survival_status,tu_fund_type,establish_time,tu_invest_type
0,159901,易方达深证100ETF,E,L,股票型,2006-03-24,被动指数型
1,159915,易方达创业板ETF,E,L,股票型,2011-09-20,被动指数型
2,159925,南方沪深300ETF,E,L,股票型,2013-02-18,被动指数型
3,159930,汇添富中证能源ETF,E,L,股票型,2013-08-23,被动指数型
4,159931,汇添富中证金融地产ETF,E,L,股票型,2013-08-23,被动指数型
...,...,...,...,...,...,...,...
318,588360,科创创业ETF,E,L,股票型,2021-06-29,被动指数型
319,159783,双创基金ETF,E,L,股票型,2021-06-24,被动指数型
320,159781,双创50ETF,E,L,股票型,2021-06-28,被动指数型
321,159780,双创ETF,E,L,股票型,2021-06-24,被动指数型


In [None]:

# 剔除定开、货币型基金
df = df[df['survival_status'] != 'D']
df = df[~df['fund_name'].str.contains('定开')]
df = df[~(df['tu_fund_type'] == '货币市场型')]
df = df.drop(['fund_name'], axis=1)
df = df.drop(['survival_status'], axis=1)

# 按成立时间划分，3类：1-2年，2-3年，3年以上
df['establish_time'] = pd.to_datetime(df['establish_time'], format='%Y-%m-%d')
df.loc[df['establish_time'] > first_date, 'establish_type'] = 1
df.loc[(df['establish_type'] != 1) & (df['establish_time'] > second_date), 'establish_type'] = 2
df.loc[(df['establish_type'] != 1) & (df['establish_type'] != 2), 'establish_type'] = 3
df = df.drop(['establish_time'], axis=1)

# 划分基金类型，五类：stock/bond/index(O or E)/other/mix
df.loc[(df['tu_invest_type'] == '被动指数型') & (df['market'] == 'O'), 'tu_fund_type'] = 'index_O'
df.loc[(df['tu_invest_type'] == '被动指数型') & (df['market'] == 'E'), 'tu_fund_type'] = 'index_E'
df.loc[df['tu_fund_type'] == '股票型', 'tu_fund_type'] = 'stock'
df.loc[df['tu_fund_type'] == '债券型', 'tu_fund_type'] = 'bond'
df.loc[df['tu_fund_type'] == '混合型', 'tu_fund_type'] = 'mix'
df.loc[(df['tu_fund_type'] != 'stock') & (df['tu_fund_type'] != 'bond') &
        (df['tu_fund_type'] != 'mix') & (df['tu_fund_type'] != 'index_E') &
        (df['tu_fund_type'] != 'index_O'), 'tu_fund_type'] = 'other'

df = df.drop(['tu_invest_type'], axis=1)
df = df.drop(['market'], axis=1)
code_list = df['fund_code']

# 2025年07月04日20:10:32，暂时用这里
with open('./datasets/all_code_list.pkl', 'rb') as f:
    code_list = pickle.load(f)

In [None]:
date_list = create_date.split('-')
base_date = str(int(date_list[0])-1)+'-'+date_list[1]+'-'+date_list[2]  # 筛选成立1年以上的基金
first_date = str(int(date_list[0])-2)+'-'+date_list[1]+'-'+date_list[2] # 筛选成立1-2年的基金
second_date = str(int(date_list[0])-3)+'-'+date_list[1]+'-'+date_list[2] # 筛选成立2-3年的基金
print("base date ",base_date)
# sql = f"select fund_code, fund_name, survival_status,tu_fund_type from b_fund_list WHERE establish_time < '{base_date}'"
# 获取以今天为界的成立一年以上的仍在每天更新数据的基金
sql = f"select fund_code, fund_name, market, survival_status,tu_fund_type,establish_time,tu_invest_type from b_fund_list WHERE establish_time < '{base_date}' )"
# print(sql)
df = pd.read_sql_query(sql, engine)
print(df.shape)
# 剔除定开、货币型基金
df=df[df['survival_status']!='D']
df=df[~df['fund_name'].str.contains('定开')]
df=df[~(df['tu_fund_type'] == '货币市场型')]
df = df.drop(['fund_name'], axis=1)
df = df.drop(['survival_status'], axis=1)

# 按成立时间划分，3类：1-2年，2-3年，3年以上
df['establish_time'] = pd.to_datetime(df['establish_time'], format='%Y-%m-%d')
df.loc[df['establish_time'] > first_date, 'establish_type'] = 1
df.loc[(df['establish_type'] != 1) & (df['establish_time'] > second_date), 'establish_type'] = 2
df.loc[(df['establish_type'] != 1) & (df['establish_type'] != 2), 'establish_type'] = 3
df = df.drop(['establish_time'], axis=1)

# 划分基金类型，五类：stock/bond/index(O or E)/other/mix
df.loc[(df['tu_invest_type'] == '被动指数型') & (df['market'] == 'O'), 'tu_fund_type'] = 'index_O'
df.loc[(df['tu_invest_type'] == '被动指数型') & (df['market'] == 'E'), 'tu_fund_type'] = 'index_E'
df.loc[df['tu_fund_type'] == '股票型', 'tu_fund_type'] = 'stock'
df.loc[df['tu_fund_type'] == '债券型', 'tu_fund_type'] = 'bond'
df.loc[df['tu_fund_type'] == '混合型', 'tu_fund_type'] = 'mix'
df.loc[(df['tu_fund_type'] != 'stock') & (df['tu_fund_type'] != 'bond') & (df['tu_fund_type'] != 'mix') & (df['tu_fund_type'] != 'index_E') & (df['tu_fund_type'] != 'index_O'), 'tu_fund_type'] = 'other'
df = df.drop(['tu_invest_type'], axis=1)
df = df.drop(['market'], axis=1)

base date  2022-7-13


ProgrammingError: (pymysql.err.ProgrammingError) (1064, "You have an error in your SQL syntax; check the manual that corresponds to your MySQL server version for the right syntax to use near ')' at line 1")
[SQL: select fund_code, fund_name, market, survival_status,tu_fund_type,establish_time,tu_invest_type from b_fund_list WHERE establish_time < '2022-7-13')]
(Background on this error at: https://sqlalche.me/e/20/f405)

In [55]:
import numpy as np

def apply_delta_with_hist_constraints_np(hist, pred):
    """
    hist: shape [seq_len, n]
    pred: shape [T_pred, n]
    """
    pred_clipped = pred.copy().T  # shape: [n, T_pred]
    hist = hist.copy().T          # shape: [n, seq_len]
    N, T_pred = pred_clipped.shape

    # Step 1: 计算历史最大涨跌
    hist_diff = hist[:, 1:] - hist[:, :-1]
    max_gain = np.max(hist_diff, axis=1)  # [n]
    max_drop = np.min(hist_diff, axis=1)  # [n]

    print("基金历史最大涨跌幅：")
    for i in range(N):
        print(f"基金{i}: max_gain = {max_gain[i]:.4f}, max_drop = {max_drop[i]:.4f}")

    # Step 2: 应用递推约束
    for i in range(N):
        for t in range(1, T_pred):
            prev = pred_clipped[i, t - 1]
            curr = pred_clipped[i, t]
            delta = curr - prev

            if delta > max_gain[i]:
                low = prev
                high = prev + max_gain[i]
                if high < low:
                    low, high = high, low
                new_val = np.random.uniform(low, high)
                print(f"[基金{i}] 第{t}步: 涨幅超限 (Δ={delta:.4f} > {max_gain[i]:.4f})，原值={curr:.4f} → 新值={new_val:.4f}")
                pred_clipped[i, t] = new_val

            elif delta < max_drop[i]:
                low = prev + max_drop[i]  # 注意是 prev + max_drop（max_drop 是负数）
                high = prev
                if high < low:
                    low, high = high, low
                new_val = np.random.uniform(low, high)
                print(f"[基金{i}] 第{t}步: 跌幅超限 (Δ={delta:.4f} < {max_drop[i]:.4f})，原值={curr:.4f} → 新值={new_val:.4f}")
                pred_clipped[i, t] = new_val

    return pred_clipped.T  # shape: [T_pred, n]

# 示例数据
np.random.seed(42)
hist = np.abs(np.random.randn(7, 3))   # shape: [7, 3]
pred = np.abs(np.random.randn(9, 3))   # shape: [9, 3]

clipped = apply_delta_with_hist_constraints_np(hist, pred)

print("\n原始预测序列：\n", pred)
print("\n约束后预测序列：\n", clipped)

基金历史最大涨跌幅：
基金0: max_gain = 1.0263, max_drop = -1.0367
基金1: max_gain = 1.4499, max_drop = -0.9004
基金2: max_gain = 1.2592, max_drop = -1.4107
[基金1] 第4步: 跌幅超限 (Δ=-1.0297 < -0.9004)，原值=0.8225 → 新值=1.2048
[基金1] 第6步: 跌幅超限 (Δ=-1.2212 < -0.9004)，原值=0.7385 → 新值=1.5479
[基金1] 第7步: 跌幅超限 (Δ=-1.2468 < -0.9004)，原值=0.3011 → 新值=0.7743
[基金2] 第7步: 涨幅超限 (Δ=1.3072 > 1.2592)，原值=1.4785 → 新值=1.1815

原始预测序列：
 [[0.2257763  0.0675282  1.42474819]
 [0.54438272 0.11092259 1.15099358]
 [0.37569802 0.60063869 0.29169375]
 [0.60170661 1.85227818 0.01349722]
 [1.05771093 0.82254491 1.22084365]
 [0.2088636  1.95967012 1.32818605]
 [0.19686124 0.73846658 0.17136828]
 [0.11564828 0.3011037  1.47852199]
 [0.71984421 0.46063877 1.05712223]]

约束后预测序列：
 [[0.2257763  0.0675282  1.42474819]
 [0.54438272 0.11092259 1.15099358]
 [0.37569802 0.60063869 0.29169375]
 [0.60170661 1.85227818 0.01349722]
 [1.05771093 1.20479629 1.22084365]
 [0.2088636  1.95967012 1.32818605]
 [0.19686124 1.54789121 0.17136828]
 [0.11564828 0.77433718 

In [45]:
torch.empty(1).uniform_(4, 9).item()


8.208395957946777

In [20]:
import pickle 
with open('datasets/sql_token.pkl', 'rb') as f:
    df = pickle.load(f)
df

'mysql+pymysql://root:qilai123@123.57.74.222:3306/fund'

In [6]:
import pickle 
with open('datasets/func_code_to_label_160_balanced.pkl', 'rb') as f:
    df = pickle.load(f)

dic = {}
for i in range(len(df)):
    if df[i][-1] not in dic:
        dic[df[i][-1]] = 1
    else:
        dic[df[i][-1]] += 1
dic

{'0': 54,
 '1': 100,
 '2': 99,
 '3': 78,
 '4': 100,
 '5': 100,
 '6': 100,
 '7': 100,
 '8': 100,
 '9': 100,
 '10': 71,
 '11': 100,
 '12': 68,
 '13': 100,
 '14': 80,
 '15': 100,
 '16': 65,
 '17': 92,
 '18': 59,
 '19': 90,
 '20': 68,
 '21': 100,
 '22': 100,
 '23': 100,
 '24': 100,
 '25': 92,
 '26': 70,
 '27': 91,
 '28': 83,
 '29': 100,
 '30': 66,
 '31': 93,
 '32': 92,
 '33': 100,
 '34': 59,
 '35': 100,
 '36': 65,
 '37': 100,
 '38': 89,
 '39': 66,
 '40': 100,
 '41': 84,
 '42': 62,
 '43': 99,
 '44': 76,
 '45': 100,
 '46': 100,
 '47': 100,
 '48': 100,
 '49': 60,
 '50': 84,
 '51': 65,
 '52': 100,
 '53': 61,
 '54': 56,
 '55': 100,
 '56': 100,
 '57': 100,
 '58': 86,
 '59': 57,
 '60': 90,
 '61': 100,
 '62': 67,
 '63': 100,
 '64': 58,
 '65': 81,
 '66': 87,
 '67': 67,
 '68': 100,
 '69': 100,
 '70': 54,
 '71': 70,
 '72': 70,
 '73': 87,
 '74': 80,
 '75': 76,
 '76': 67,
 '77': 100,
 '78': 81,
 '79': 71,
 '80': 100,
 '81': 58,
 '82': 82,
 '83': 100,
 '84': 100,
 '85': 64,
 '86': 51,
 '87': 66,
 '88': 

In [None]:
from datetime import datetime, timedelta

from run_service import get_history_data
current_date = datetime.now().strftime('%Y-%m-%d')
DB_URI = 'mysql+pymysql://root:qilai123@123.57.74.222:3306/fund'

In [None]:
with open(f'./datasets/func_code_to_label_160_balanced.pkl', 'rb') as f:
    data = pickle.load(f)
all_func_code = []
for i in range(len(data)):
    if int(data[i][1]) == 1:
        all_func_code.append(data[i][0])
        
def get_history_data(get_group_idx, current_date, config):
    all_history_input = []
    start_date = get_start_date(current_date, window_size=2000)
    fund_dict = query_fund_data(get_group_idx, start_date, current_date)
    min_len = 1e9
    for key, value in fund_dict.items():
        min_len = min(len(value), min_len)

    for key, value in fund_dict.items():
        df = process_date_columns(value)
        df = df[-min_len:, :]
        all_history_input.append(df)
    data = all_history_input
    return data
history_input = get_history_data(all_func_code, current_date, config)

NameError: name 'get_history_data' is not defined