In [None]:
from wequant.wefetch import *
from wequant.wesu import *
from wequant.mongo import get_db

import os
import pandas as pd
import numpy as np



In [None]:
db = get_db()
print('db:', db.name)
print('collections:', len(db.list_collection_names()))
assert 'stock_list' in db.list_collection_names(), 'missing collection: stock_list'
assert 'dk_data' in db.list_collection_names(), 'missing collection: dk_data'



In [None]:
stock_list = fetch_stock_list()
assert stock_list is not None and len(stock_list) > 0
stock_list.head()



In [None]:
hkcode_list = fetch_get_hkstock_list('tdx')
assert hkcode_list is not None and len(hkcode_list) > 0
hkcode_list.head()



In [None]:
# DK fetch: Mongo $in requires code list
sample = db['dk_data'].find_one({}, {'code': 1})
assert sample and 'code' in sample
sample_code = str(sample['code'])
min_doc = db['dk_data'].find_one({'code': sample_code}, sort=[('datetime', 1)], projection={'datetime': 1})
max_doc = db['dk_data'].find_one({'code': sample_code}, sort=[('datetime', -1)], projection={'datetime': 1})
assert min_doc and max_doc
start = pd.to_datetime(min_doc['datetime']).strftime('%Y-%m-%d')
end = pd.to_datetime(max_doc['datetime']).strftime('%Y-%m-%d')

dk_df = fetch_dk_data([sample_code], start, end, format='pd')
assert dk_df is not None and len(dk_df) > 0

dk_df.tail()



In [None]:
# Fetch stock adjustment factors (subset)
codes = list(stock_list.index[:200])
adj_df = fetch_stock_adj(codes, '2023-01-01', '2025-01-01', format='pd')
assert adj_df is not None and len(adj_df) > 0
adj_df.head()



In [None]:
# Optional: ingest DK excel files and save into Mongo via WESU (offline-safe).
# Set WEQUANT_DK_ROOT to the folder containing DK xlsx files.
root_path = os.getenv('WEQUANT_DK_ROOT')
if not root_path or not os.path.isdir(root_path):
    print('skip: set WEQUANT_DK_ROOT to ingest DK xlsx files')
else:
    file_names = [n for n in os.listdir(root_path) if n.lower().endswith(('.xlsx', '.xls'))]
    assert len(file_names) > 0, f'no excel files in {root_path}'

    file_name = sorted(file_names)[-1]
    xlsx_path = os.path.join(root_path, file_name)
    sheets = pd.read_excel(xlsx_path, sheet_name=None)

    rows = []
    for sheet_name, df in sheets.items():
        if df is None or df.empty:
            continue
        # Expect minimal columns: code + R_value/L_value. Adapt as needed.
        df = df.copy()
        df.columns = [str(c).strip() for c in df.columns]
        if 'code' not in df.columns:
            continue
        if 'datetime' not in df.columns:
            df['datetime'] = file_name.split('.')[0][-8:]
        rows.append(df)

    dk_new = pd.concat(rows, ignore_index=True) if rows else pd.DataFrame()
    assert not dk_new.empty, 'no usable DK rows from excel'

    n1 = save_dk_data(dk_new)
    n2 = save_dk_data(dk_new)
    print('saved dk_data (idempotent):', n1, 'then', n2)

