In [1]:
%load_ext autoreload
%autoreload 2
%config Completer.use_jedi = False 

In [2]:
# Env module 
import sys
sys.path.append('../')


from env import os, glob, pdl, pd, msno, trange, tqdm, sleep, timeit, timedelta
from IPython.display import clear_output

from utils.datetimes import start_date, end_date, yesterday_date, today_date, \
    week_ago_date, month_ago_date, biquater_ago_date, bimonth_ago_date, quater_ago_date, \
    biquater_ago_date, triquater_ago_date, trade_day_util as tdu

from utils.calculators import *
from utils.psql_client import load_table, insert_df, load_stock_prices
from utils.stock_utils import *
from utils.datasource import *
from data_center import DataCenter
from utils.stock_filter import StockFilter
print(f'Today is {today_date}, Working from {start_date} to {end_date}')

from utils.strategy import *

from models import *

pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', 100)
# 列名与数据对其显示 (ak?)
pd.set_option('display.unicode.ambiguous_as_wide', True)
pd.set_option('display.unicode.east_asian_width', True)

from __future__ import (absolute_import, division, print_function,
                        unicode_literals)

[2022-03-04T23:15:33.102584+08:00] Enviroment loaded. Working Dir: /Users/tzhu/work/lab/neo_world/notebooks
Today is 2022-03-04, Working from 2021-02-10 to 2022-03-04


In [3]:
# BT module 

import backtrader as bt
import backtrader.indicators as btind
import backtrader.feeds as btfeeds

In [4]:
%matplotlib inline

from utils.plot_plotly import *
from utils.plot_mpl import *
from utils.plot_bokeh import *

In [5]:
def init_data(start_date, end_date, expire_days=30):
    print(f'Initializing data from {start_date} to {end_date}...')
    dc = DataCenter(start_date, end_date)
    
    search_pattern = glob.glob(f'../tmp/price_{start_date}_{end_date}_*.feather')
    for f in search_pattern:
        # read cache
        print(f'Found cache file: {f}, loading...')
        df_init = pd.read_feather(f).set_index(['ts_code', 'trade_date'])
        break
    else:
        df_init = dc.merge_all()
        # cache it
        expire_date = pdl.today().add(days=expire_days).to_date_string()
        df_file_path = f'../tmp/price_{start_date}_{end_date}_{expire_date}.feather'
        df_init.reset_index().to_feather(df_file_path)      
    return dc, df_init    

In [6]:
def clean_cache_files():
    files = glob.glob('../tmp/price_*')
    for f in files:
        file_name = f.split('/')[-1]
        file_params = file_name.split('.')[0].split('_')
        if len(file_params) == 4 and pdl.parse(file_params[3]).set(tz='Asia/Shanghai') >= pdl.today():
            print(f'Preserving recent file {file_name}')
        else:
            print(f'Deleting old file {file_name}')
            try:       
                os.remove(f)
            except OSError as e:
                print("Error: %s : %s" % (f, e.strerror))

In [7]:
def cache_data(df, name='custom', expire_days=30):
    expire_date = pdl.today().add(days=expire_days).to_date_string()
    start_date = df.index[0][1].strftime('%Y-%m-%d')
    end_date = df.index[-1][1].strftime('%Y-%m-%d')
    df_file_path = f'../tmp/{name}_{start_date}_{end_date}_{expire_date}.feather'
    df.reset_index().to_feather(df_file_path)
    return df_file_path

    
def read_cache(name='custom', start_date='*', end_date='*'):
    search_pattern = glob.glob(f'../tmp/{name}_{start_date}_{end_date}_*.feather')
    for f in search_pattern:
        print(f'Found cache file: {f}, loading...')
        df = pd.read_feather(f).set_index(['ts_code', 'trade_date'])
        break
    return df

In [8]:
def stock_summary(df, ts_code, end_date):
    if len(ts_code) != 6 and len(ts_code) != 9:
        ts_code = get_ts_code_from_name(ts_code)
    row = df.loc[ts_code, end_date]
    ak_code = add_postfix(type='ak', ts_code=ts_code)
    stock_zh_a_tick_tx_js_df = ak.stock_zh_a_tick_tx_js(code=ak_code)
    auc_amount = stock_zh_a_tick_tx_js_df.iloc[0]['成交金额']
    print(f'[{row["name"]}] 竞价成交{round(auc_amount/10000, 2)}万，开盘{round(row.open_pct, 2)}%，收盘{round(row.pct_chg,2)}%，量比{round(row.vol_ratio,2)}，成交额：{round(row.amount/100000, 2)}亿，实际换手率{round(row.turnover_rate_f,2)}%')

In [9]:
from functools import reduce

def str_join(series):
    return reduce(lambda x, y: f'{x},{y}', series)

    
def merge_plate_names(lst):
    if isinstance(lst, str):
        return '+'.join(set(map(lambda c: c.strip(), lst.split(','))))
    else:
        return 'NA'
    
def stock_summaries(df, ts_codes, end_date, cols=None, get_auc=False, top_cons=None):
    ts_codes = list(map(lambda c: get_ts_code_from_name(c) if len(c) != 6 and len(c) != 9 else c, ts_codes))
    today_df = df.xs(end_date, level='trade_date', drop_level=True)
    target_df = today_df[today_df.index.isin(ts_codes)]
    if top_cons is not None:
        target_df = target_df.join(top_cons)
    if cols is None:
        cols = ['name', 'close', 'pre5_pct_chg', 'pre20_pct_chg', 'circ_mv', 'total_mv', 'vol', 'turnover_rate_f', 'amount', 'conseq_up_num', 'strth', 'first_time', 'last_time', 'fd_amount']
    if get_auc:
        for ts_code in tqdm(ts_codes):
            ak_code = add_postfix(type='ak', ts_code=ts_code)
            stock_zh_a_tick_tx_js_df = ak.stock_zh_a_tick_tx_js(code=ak_code)
            target_df.loc[ts_code, 'auc_amt'] = stock_zh_a_tick_tx_js_df.iloc[0]['成交金额']
            open_pct = target_df.loc[ts_code, 'open_pct']
            auc_amt = target_df.loc[ts_code, 'auc_amt']
            pct_chg =  target_df.loc[ts_code, 'pct_chg']
            auc_v_pre_vol_ratio =(auc_amt / target_df.loc[ts_code, 'pre_amount'] / 1000)
            if target_df.loc[ts_code, 'limit'] == 'U':
                up = '【涨停】'
            elif target_df.loc[ts_code, 'limit'] == 'D':
                up = '【跌停】'
            else:
                up = ''
            print(f'[{target_df.loc[ts_code, "name"]}] 开{round(open_pct,2)}%，竞价成交{round(auc_amt/100000000, 2)}亿 ({round(auc_v_pre_vol_ratio, 2)})，收{round(pct_chg,2)}%。{up}')
        cols = cols + ['auc_amt', 'open_pct', 'pct_chg']
    else:
        for i, ts_code in enumerate(ts_codes):
            row = target_df.loc[ts_code]
            print(f'{i+1}. {row["name"]} ({merge_plate_names(row.plate_name)}, {row.conseq_up_num}板 ({row.up_type})，流值{round(row.circ_mv/10000, 2)}亿，量比{round(row.vol_ratio,2)}，{round(row.amount/100000, 2)}亿，trf{round(row.turnover_rate_f,0)}%)：')
    return target_df[cols]

In [None]:
def calc_top_cons(cons_today, cons):
    top_cons= (
         cons_today.reset_index()[['ts_code', 'name','plate_name']]
            .merge(cons[['upstop_num', 'pct_chg', 'p5_pct_chg']], on=['plate_name'])
            .sort_values(['pct_chg'], ascending=False)
            .groupby('name').head(3)
            .groupby('ts_code').agg({'plate_name': str_join})
    )
    return top_cons


In [None]:
def market_summary(df, end_date):
    today_df = df.xs(end_date, level='trade_date', drop_level=True)
    today_hs = StockFilter(end_date=end_date).hs().filter(today_df)
    
    total_amt = round(today_hs.amount.sum()/100000, 2)
    
    bs_amount = round(ak.stock_em_hsgt_north_net_flow_in(indicator="北上").rename(columns={'date': 'trade_date'}).set_index('trade_date').loc[end_date].value / 10000, 2)
    hgt_amount = round(ak.stock_em_hsgt_north_net_flow_in(indicator="沪股通").rename(columns={'date': 'trade_date'}).set_index('trade_date').loc[end_date].value / 10000, 2)
    sgt_amount = round(ak.stock_em_hsgt_north_net_flow_in(indicator="深股通").rename(columns={'date': 'trade_date'}).set_index('trade_date').loc[end_date].value / 10000, 2)

    median_pct_chg = round(today_hs.pct_chg.mean(),2)
    emo = read_pg(table='activities')
    emo['trade_date'] = emo.trade_date.apply(lambda x: pdl.parse(x))
    emo.set_index('trade_date', inplace=True)
    emo = emo.loc[end_date]
    
    #upstops
    today_hs = StockFilter(end_date=end_date).hs().not_st().filter(today_df)
    today_uped = today_hs[today_hs.high == today_hs.upstop_price]
    today_up = today_hs[today_hs.limit=='U']
    today_dn = today_hs[today_hs.limit=='D']
    up_fail_rate = round((len(today_uped)-len(today_up))/len(today_uped)*100,2)
    
    # trend  pre_up_pct	pre_ups_pct	p_up_t_noup_pct
    upstop_trend_df = upstop_trend(df, end_date, n_days=3)
    pre_up_pct = round(upstop_trend_df.tail(1).pre_up_pct.iat[-1], 2)
    pre_ups_pct = round(upstop_trend_df.tail(1).pre_ups_pct.iat[-1], 2)
    p_up_t_noup_pct = round(upstop_trend_df.tail(1).p_up_t_noup_pct.iat[-1], 2)
    
    print(f'【{end_date}】')
    print(f'总成交{total_amt}亿，北上总流入{bs_amount}亿（沪{hgt_amount}亿, 深{sgt_amount}亿）。涨跌比：{emo.up.astype("int")}/{emo.dn.astype("int")}，中位涨幅：{median_pct_chg}%，热度：{emo.vitality}。')
    print(f'涨跌停：{emo.real_upstop.astype("int")}/{emo.real_dnstop.astype("int")}，炸板率{up_fail_rate}%，连板高度{today_up.conseq_up_num.max()}板。昨涨停平均涨幅{pre_up_pct}%，掉队股平均涨幅{p_up_t_noup_pct}%。')

In [None]:
def get_tfp(end_date):
    end_date_ak = pdl.parse(end_date).strftime('%Y%m%d')
    stock_em_tfp_df = ak.stock_tfp_em(date=end_date_ak)
    stock_em_tfp_df.set_index('代码', inplace=True)
    stock_em_tfp_df = stock_em_tfp_df[~stock_em_tfp_df.index.str.startswith('8')]
    if len(stock_em_tfp_df[stock_em_tfp_df['停牌原因']=='交易异常波动'])>0:
        display(stock_em_tfp_df[stock_em_tfp_df['停牌原因']=='交易异常波动'])
    print('=================================================================================')
    display(stock_em_tfp_df[stock_em_tfp_df['停牌原因']!='交易异常波动'])
    return stock_em_tfp_df


In [None]:
import re
def round_print(text, *numbers):
    regex = re.compile('{}')
    if len(regex.findall(text)) != len(numbers):
        raise ValueError('Number of numbers does not match the pattern')
    for num in numbers:
        text = regex.sub(str(round(num, 2)), text, 1)
    print(text)

In [None]:
def check_performance(df):
    round_print('[Total] [CVO] Min: {}%, Max: {}%, Avg: {}%, Median: {}%', df.cvo.min(), df.cvo.max(), df.cvo.mean(), df.cvo.median())
    df1 = df[df.open_pct >= 0]
    round_print('[Open >=0][CVO] Min: {}%, Max: {}%, Avg: {}%, Median: {}%', df1.cvo.min(), df1.cvo.max(), df1.cvo.mean(), df1.cvo.median())
    print(f'[Count] Upstop: {len(df[df.limit=="U"])}, Non_Y: {len(df[(df.limit=="U") & (df.up_type != "Y")])}, Total {len(df)}')


In [10]:
norm_cols = ['name', 'close', 'circ_mv', 'turnover_rate_f', 'amount']
up_cols = ['name', 'close', 'pre5_pct_chg', 'pre20_pct_chg', 'circ_mv', 'total_mv', 'vol', 'turnover_rate_f', 'amount', 'conseq_up_num', 'strth', 'first_time', 'last_time', 'fd_amount']
UPSTOP_COLS = ['name', 'open', 'high', 'close', 'circ_mv', 'total_mv', 'vol', 'vol_type', 'vol_ratio', 'amount', 'open_pct', 'pct_chg', 'up_type', 'first_time', 'last_time', 'open_times', 'strth', 'turnover_rate_f', 'fl_ratio', 'fc_ratio']
