In [None]:
import tushare as ts
import pandas as pd
import numpy as np
import os
import time
import tqdm
import talib as ta
import mplfinance as mpf
import matplotlib.pyplot as plt
from datetime import datetime
from pathlib import Path
import shutil

# TOEKN_PATH = os.path.expanduser("~/.tushare.token")

# with open(TOEKN_PATH, "r") as f:
#     token = f.read().strip()
#     ts.set_token(token=token)
#     pro = ts.pro_api(token=token)


In [2]:
stock_basic_df = pd.read_csv("./data/stock_basic_df.csv")
ggt_df = pd.read_csv("./data/ggt_basic_df.csv")
etf_df = pd.read_csv("./data/etf_basic_df.csv")
basic_df = pd.read_csv("./data/basic_df.csv")

In [3]:
industry_df = pd.read_csv("./data/industry.csv")
buy_list = pd.read_csv("./data/buy_list.csv")['Column1'].tolist()
up_list = pd.read_csv("./data/up_list.csv")['Column1'].tolist()

In [4]:
def apply_rsi(df, period=24):
    df = df.sort_values(by="trade_date", ascending=True)
    df["rsi12"] = ta.RSI(df["close"].values, timeperiod=12)
    df["rsi60"] = ta.RSI(df["close"].values, timeperiod=60)
    df["rsi120"] = ta.RSI(df["close"].values, timeperiod=120)
    df['min_rsi'] = df[['rsi12', 'rsi60', 'rsi120']].min(axis=1)

    return df

In [5]:
# 计算每个组的比例
close_greater_than_ma60 = industry_df['close'] > industry_df['ma60']
close_greater_than_ma20 = industry_df['close'] > industry_df['ma20']
close_greater_than_ma5 = industry_df['close'] > industry_df['ma5']

industry_proportion_ma60 = close_greater_than_ma60.groupby(industry_df['industry']).mean() * 100
industry_proportion_ma20 = close_greater_than_ma20.groupby(industry_df['industry']).mean() * 100
industry_proportion_ma5 = close_greater_than_ma5.groupby(industry_df['industry']).mean() * 100

# 创建包含比例的 DataFrame
industry_proportion = pd.DataFrame({
    'MA5 Proportion': round(industry_proportion_ma5, 2),
    'MA20 Proportion': round(industry_proportion_ma20, 2),
    'MA60 Proportion': round(industry_proportion_ma60, 2),
})
industry_proportion.sort_values(by='MA5 Proportion', ascending=False, inplace=True)

# 计算每个组的行数
industry_counts = close_greater_than_ma5.groupby(industry_df['industry']).size().reset_index(name='MA5 Count')

# 将比例和行数合并到一个 DataFrame 中
industry_proportion_with_counts = pd.merge(industry_proportion, industry_counts, on='industry')

# 设置显示选项以显示所有行
pd.set_option('display.max_rows', None)

# 输出结果
industry_proportion_with_counts

Unnamed: 0,industry,MA5 Proportion,MA20 Proportion,MA60 Proportion,MA5 Count
0,公路,100.0,100.0,100.0,4
1,白酒,100.0,30.0,100.0,20
2,电器连锁,100.0,50.0,100.0,2
3,超市连锁,100.0,44.44,100.0,9
4,渔业,100.0,50.0,100.0,6
5,旅游服务,100.0,50.0,100.0,8
6,乳制品,95.0,85.0,100.0,20
7,影视音像,94.29,82.86,100.0,35
8,文教休闲,92.16,72.55,92.16,51
9,纺织,89.74,79.49,97.44,39


In [6]:
industry_list = ['银行','普钢','煤炭开采','小金属']
industry_proportion_with_counts[industry_proportion_with_counts['industry'].isin(industry_list)]

Unnamed: 0,industry,MA5 Proportion,MA20 Proportion,MA60 Proportion,MA5 Count
13,银行,85.71,54.76,97.62,42
100,普钢,26.92,11.54,96.15,26
104,小金属,21.82,23.64,90.91,55
106,煤炭开采,16.0,12.0,52.0,25


In [7]:
# recent sell
recent_sell_dict = {
    # 11.27
    '515220.SH':1.166, # 煤炭
    # 11.26
    '601006.SH':6.66, # 大秦铁路
    # 11.25
    '513090.SH':1.479, # 香港证券
    '603816.SH':28.06, # 顾家家居
    '600008.SH':3.25 , # 首创
    '301359.SH':23.35 , # 东南
    '159922.SZ':6.065 , # 500
    '512200.SH':1.569 , # 地产
    '512480.SH':0.961 , # 半导体
    # 11.21
    '001256.SZ': 18.28, # 炜冈
    '001259.SZ': 24.02, # 利仁
    '601069.SH': 12.64, # 西部黄金
    # 11.20
    '603272.SH': 13.93, # 联翔
    '301234.SZ': 27.7, # 五洲医疗
    '001209.SZ': 14.22, # 洪兴
    '301309.SZ': 27.85, # 万德凯
    '001226.SZ': 25.56, # 拓山
    '301061.SZ': 55.61, # 匠心家居
    '000581.SZ': 18.29, # 威孚高科
    '603048.SH': 14.96, # 浙江黎明
    '688750.SH': 29.85, # 金天
    '002895.SZ': 21.4, # 川恒股份
    # 11.19
    '301043.SZ': 27.8, # 绿岛风
    '301388.SZ': 22.58, # 欣灵电气
    '001368.SZ': 19.55, # 通达创智
    '603216.SH': 10.49, # 梦天家居
    '603307.SH': 31.25, # 金泉
    '601919.SH': 14.6, # 中远海控
}

print(len(buy_list), len(up_list),len(industry_df))

for k,v in recent_sell_dict.items():
    if not os.path.exists(os.path.join('./data/daily', f"{k}.csv")):
        continue
    df = pd.read_csv(os.path.join('./data/daily', f"{k}.csv"))
    if df['close'].iloc[-1] > v:
        if k in buy_list:
            buy_list.remove(k)
        if k in up_list:
            up_list.remove(k)
        if k in industry_df['ts_code'].tolist():
            industry_df = industry_df[industry_df['ts_code'] != k]

print(len(buy_list), len(up_list),len(industry_df))



36 3418 5078
36 3410 5067


In [8]:
mini_df = industry_df[industry_df['pe'] > 0]
mini_df = mini_df[mini_df['ts_code'].isin(up_list)]    
print(len(mini_df))
mini_df = mini_df.sort_values(by='circ_mv')
mini_df['circ_mv'] = mini_df['circ_mv'] / 10000
mini_df = mini_df.drop_duplicates(subset='industry', keep='first')
print(len(mini_df))
mini_df.to_csv("data/mini_df.csv")
mini_df[['ts_code', 'name','industry','circ_mv','pe','pb','dv_ttm']].head(20)


2527
108


Unnamed: 0,ts_code,name,industry,circ_mv,pe,pb,dv_ttm
2322,001260.SZ,坤泰股份,汽车配件,5.178425,39.1661,2.3511,0.7664
4307,688638.SH,誉辰智能,专用机械,5.186151,29.9763,1.6084,0.0
4566,001234.SZ,泰慕士,服饰,5.188406,29.964,2.1913,3.9119
1440,301287.SZ,康力源,文教休闲,5.282723,21.7985,1.9059,0.7889
4168,301107.SZ,瑜欣电子,机械基件,5.811086,32.4765,2.2036,1.4307
196,603280.SH,南方路机,工程机械,5.971134,19.2385,1.7782,1.591
348,001231.SZ,农心科技,农药化肥,6.291674,32.6517,1.8136,0.7503
3309,301212.SZ,联盛化学,化工原料,6.3504,35.7202,1.9017,0.8418
834,001366.SZ,播恩集团,饲料,6.447744,38.2018,2.2514,1.0363
139,688695.SH,中创股份,软件服务,6.477726,43.187,3.6801,0.0


In [9]:
base_df_per = industry_df["pb"].quantile(0.1)
base_pb = 1 if base_df_per < 1 else base_df_per
print("base_pb: ", base_pb, "base_df_per: ",base_df_per)

pb_df = industry_df[industry_df["pb"] <= base_pb]
print(len(pb_df))
pb_df = pb_df[pb_df['pe'] > 0]
print(len(pb_df))
pb_df = pb_df[pb_df['ts_code'].isin(up_list)]
print(len(pb_df))
pb_df.loc[:, 'circ_mv'] = pb_df['circ_mv'] / 10000

pb_df = pb_df.sort_values(by='dv_ttm', ascending=False)
pb_df = pb_df.drop_duplicates(subset='industry', keep='first')
pb_df.to_csv("data/pb_df.csv")
print(len(pb_df))
pb_df[['ts_code', 'name','industry','circ_mv','pe','pb','dv_ttm']].head(20)


base_pb:  1.10272 base_df_per:  1.10272
504
417
283
65


Unnamed: 0,ts_code,name,industry,circ_mv,pe,pb,dv_ttm
172,000517.SZ,荣安地产,区域地产,59.558475,21.286,0.9673,13.3891
511,000937.SZ,冀中能源,煤炭开采,180.20919,4.4309,1.0432,12.9032
4760,603167.SH,渤海轮渡,水运,40.768657,16.2815,1.1027,9.206
1193,600016.SH,民生银行,银行,1400.753867,4.8276,0.319,8.7595
4056,600162.SH,香江控股,全国地产,61.773481,88.8988,1.0457,8.4656
16,603588.SH,高能环境,环境保护,79.969809,15.8469,0.8862,8.2078
1554,601006.SH,大秦铁路,铁路,1236.078491,10.3611,0.8155,7.5145
148,600153.SH,建发股份,仓储物流,278.355701,2.159,0.5131,7.4326
1453,000581.SZ,威孚高科,汽车配件,148.201641,9.7621,0.9315,6.5759
955,002191.SZ,劲嘉股份,广告包装,64.548939,54.9461,0.9712,6.5221


In [10]:
buy_df = pd.DataFrame()
daily_data_dir = './data/daily'

for b in buy_list:
    # 从 daily 里读 csv,只读第一行然后按 circ_mv 排序
    df = pd.read_csv(os.path.join(daily_data_dir, f"{b}.csv"))
    df = apply_rsi(df)
    
    df = df.sort_values(by="trade_date", ascending=False)
    df = df.head(1)
    ts_code = df['ts_code'].iloc[0]
    name = basic_df.loc[basic_df['ts_code'] == ts_code, 'name'].values[0]
    industry = basic_df.loc[basic_df['ts_code'] == ts_code, 'industry'].values[0]
    df['name'] = name
    df['industry'] = industry
    buy_df = pd.concat([buy_df, df])

buy_df["min_rsi"] = buy_df['rsi12'] + buy_df['rsi60'] + buy_df['rsi120']
buy_df = buy_df.sort_values(by="min_rsi", ascending=True)    
buy_df['circ_mv'] = buy_df['circ_mv'] / 1e4
buy_df.to_csv("data/buy_df.csv")
print(len(buy_df))
buy_df[['ts_code', 'name','industry','circ_mv','rsi12','rsi60','rsi120','min_rsi']].head(50)

36


Unnamed: 0,ts_code,name,industry,circ_mv,rsi12,rsi60,rsi120,min_rsi
0,301525.SZ,儒竞科技,家用电器,30.574401,58.844397,52.508914,47.883453,159.236764
223,00772.HK,阅文集团,,0.0,60.123364,51.874116,50.705067,162.702548
0,603466.SH,风语筑,文教休闲,55.790176,61.265122,53.076124,49.664491,164.005737
0,605299.SH,舒华体育,文教休闲,33.90168,64.105558,52.595454,49.675326,166.376339
0,688578.SH,艾力斯,化学制药,260.415,60.142907,53.365445,53.230462,166.738815
0,300769.SZ,德方纳米,化工原料,121.367421,60.696146,56.24561,50.677345,167.619101
0,300651.SZ,金陵体育,文教休闲,11.05841,63.549978,54.170682,50.265948,167.986608
0,603320.SH,迪贝电气,电气设备,20.62837,61.52405,54.549265,52.609315,168.68263
0,600229.SH,城市传媒,出版业,52.354224,62.439267,54.981455,52.422809,169.843531
0,301018.SZ,申菱环境,专用机械,51.556816,62.854716,55.248659,51.762627,169.866002


In [11]:
up_df = pd.DataFrame()

for b in tqdm.tqdm(up_list, desc="Processing"):
    # 从 daily 里读 csv,只读第一行然后按 circ_mv 排序
    df = pd.read_csv(os.path.join(daily_data_dir, f"{b}.csv"))
    df = apply_rsi(df)
    df = df.sort_values(by="trade_date", ascending=False)
    df = df.head(1)
    ts_code = df['ts_code'].iloc[0]
    name = basic_df.loc[basic_df['ts_code'] == ts_code, 'name'].values[0]
    industry = basic_df.loc[basic_df['ts_code'] == ts_code, 'industry'].values[0]
    df['name'] = name
    df['industry'] = industry
    df['name'] = name
    up_df = pd.concat([up_df, df])
    
print(len(up_df))
up_df["min_rsi"] = up_df['rsi12'] + up_df['rsi60'] + up_df['rsi120']
up_df = up_df.sort_values(by="min_rsi", ascending=True)    
up_df['circ_mv'] = up_df['circ_mv'] / 1e4
up_df.to_csv("data/up_df.csv")
up_df[['ts_code', 'name', 'industry','circ_mv','rsi12','rsi60','rsi120','min_rsi']].head(50)

Processing: 100%|██████████| 3410/3410 [00:14<00:00, 240.61it/s]


3410


Unnamed: 0,ts_code,name,industry,circ_mv,rsi12,rsi60,rsi120,min_rsi
213,02169.HK,沧港铁路,,0.0,59.42776,39.923437,26.97411,126.325307
223,09880.HK,优必选,,0.0,33.549009,45.684861,48.98745,128.221319
223,00316.HK,东方海外国际,,0.0,36.265564,47.335324,48.885965,132.486853
0,600060.SH,海信视像,家用电器,248.269594,38.136278,48.028827,48.752806,134.91791
0,603107.SH,上海汽配,汽车配件,36.456875,44.230931,47.939346,42.95507,135.125347
223,01268.HK,美东汽车,,0.0,41.609939,47.999901,45.630221,135.240062
223,01541.HK,宜明昂科－Ｂ,,0.0,46.385324,45.313146,43.581643,135.280112
0,002237.SZ,恒邦股份,黄金,96.144342,38.321703,48.525849,49.09457,135.942121
0,301539.SZ,宏鑫科技,汽车配件,7.4111,47.924725,45.872315,42.27708,136.07412
0,600916.SH,中国黄金,服饰,146.664,42.486202,47.561457,47.298043,137.345702
