In [63]:
import akshare
from typing import Dict

In [64]:
style_dataset = akshare.stock_board_concept_name_em()
style_dataset.to_csv("./output/stock-style.csv")

In [65]:
from open_quant_data.dataset.thirdparty.akshare.AkshareDataset import AkshareDataset

stock_dataset = AkshareDataset.stock_spot()
stock_dataset.to_csv('./output/stock-data.csv')

In [66]:
# pre-handle
# add xtquant stock code prefix
def add_suffix(stock_id):
    if stock_id.startswith('6'):
        return stock_id + '.SH'
    elif stock_id.startswith('0') or stock_id.startswith('3'):
        return stock_id + '.SZ'
    else:
        return stock_id + '.X'


stock_dataset['stock_id'] = stock_dataset['stock_id'].apply(add_suffix)

In [67]:
# params
min_pe = 0
max_pe = 30
max_market_value = 1e10
head_len = 400
# filter
stock_dataset = stock_dataset[~stock_dataset['stock_id'].str.endswith('.X')]
stock_dataset = stock_dataset[~stock_dataset['stock_name'].str.contains('ST')]
stock_dataset = stock_dataset[(stock_dataset['pe'] >= min_pe) & (stock_dataset['pe'] <= max_pe)]
stock_dataset = stock_dataset[stock_dataset['market_value'] <= max_market_value]
stock_dataset = stock_dataset.dropna(subset=['price'])
# sort
stock_dataset = stock_dataset.sort_values(by='market_value')
stock_dataset = stock_dataset.head(head_len).copy()

In [68]:
# loop styles
style_names = style_dataset['板块名称'].tolist()
stock_id_pool = stock_dataset['stock_id'].tolist()
style_names, stock_id_pool

(['昨日连板',
  '昨日连板_含一字',
  '短剧互动游戏',
  '拼多多概念',
  '盲盒经济',
  '快手概念',
  '昨日涨停_含一字',
  '电子竞技',
  '供销社概念',
  '噪声防治',
  '知识产权',
  '影视概念',
  '网红直播',
  '退税商店',
  '户外露营',
  '在线旅游',
  '昨日涨停',
  '字节概念',
  'NFT概念',
  '农业种植',
  '刀片电池',
  '进口博览',
  '跨境电商',
  'AIGC概念',
  '广电',
  '网络游戏',
  '云游戏',
  '手游概念',
  '免税概念',
  '抖音小店',
  '屏下摄像',
  '土地流转',
  '租售同权',
  '虚拟数字人',
  'ChatGPT概念',
  '京津冀',
  '体育产业',
  '空间计算',
  '股权转让',
  '元宇宙概念',
  '赛马概念',
  '彩票概念',
  '宠物经济',
  '世界杯',
  '沪企改革',
  '工程机械概念',
  'Web3.0',
  '动力电池回收',
  '装配建筑',
  'HIT电池',
  '猪肉概念',
  '混合现实',
  '固态电池',
  '星闪概念',
  '培育钻石',
  '虚拟现实',
  '券商概念',
  '增强现实',
  '新零售',
  '建筑节能',
  '基本金属',
  '华为欧拉',
  '抗菌面料',
  '电商概念',
  '钠离子电池',
  '光伏建筑一体化',
  '粮食概念',
  '东盟自贸区概念',
  '磷化工',
  '宁组合',
  '盐湖提锂',
  '熔盐储能',
  '北京冬奥',
  '锂电池',
  '钙钛矿电池',
  'TOPCon电池',
  '贬值受益',
  '无线耳机',
  '预制菜概念',
  '钛白粉',
  '深证100R',
  '鸡肉概念',
  '粤港自贸',
  '地摊经济',
  '智能穿戴',
  'REITs概念',
  '茅指数',
  '百度概念',
  'RCS概念',
  '债转股',
  '太阳能',
  'C2M概念',
  '草甘膦',
  '杭州亚运会',
  '东北振兴',
  '生态农业',
  '成

In [69]:
from typing import Dict

# add prefix
target_stock_ids = set([])
style_dict: Dict[str, str] = dict()
style_ban_list = ['昨日连板','昨日连板_含一字','昨日涨停_含一字','昨日涨停']
for style_name in style_names[:20]:
    if style_name in style_ban_list:
        continue
    print(f"=== {style_name} ===")
    curr_dataset = akshare.stock_board_concept_cons_em(style_name)
    curr_dataset['代码'] = curr_dataset['代码'].apply(add_suffix)
    curr_codes = curr_dataset['代码'].tolist()
    for code in curr_codes:
        if code in stock_id_pool:
            print(f"{code}")
            target_stock_ids.add(code)
            style_dict[code] = style_name

=== 短剧互动游戏 ===
=== 拼多多概念 ===
002718.SZ
603365.SH
=== 盲盒经济 ===
=== 快手概念 ===
=== 电子竞技 ===
=== 供销社概念 ===
603970.SH
=== 噪声防治 ===
=== 知识产权 ===
=== 影视概念 ===
=== 网红直播 ===
603898.SH
002853.SZ
300908.SZ
603808.SH
002615.SZ
002264.SZ
002763.SZ
002820.SZ
=== 退税商店 ===
002404.SZ
=== 户外露营 ===
300993.SZ
002887.SZ
002615.SZ
300577.SZ
603307.SH
605080.SH
603908.SH
=== 在线旅游 ===
603199.SH
002627.SZ
=== 字节概念 ===
=== NFT概念 ===
603808.SH
001216.SZ
002615.SZ
002627.SZ
=== 农业种植 ===
600731.SH
002133.SZ
000850.SZ
300908.SZ
300387.SZ
002772.SZ
600883.SH
300970.SZ
300829.SZ


In [72]:
stock_dataset = stock_dataset[stock_dataset['stock_id'].isin(target_stock_ids)]

In [74]:
stock_dataset['style_name'] = stock_dataset['stock_id'].map(style_dict)
stock_dataset

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  stock_dataset['style_name'] = stock_dataset['stock_id'].map(style_dict)


Unnamed: 0,index,stock_id,stock_name,price,ratio,ratio_value,volume,volume_value,amplitude,high,...,turnover_rate,pe,pb,market_value,flow_market_value,ratio_acc,5m_ret,60d_ret,year_ret,style_name
1531,1532,300970.SZ,华绿生物,17.25,0.12,0.02,11879.0,20488010.0,0.81,17.31,...,1.71,20.13,1.29,2033578000.0,1198066000.0,-0.06,0.0,-12.7,-1.99,农业种植
1205,1206,600883.SH,博闻科技,8.7,0.35,0.03,35783.0,31108100.0,3.0,8.88,...,1.52,18.75,2.29,2053966000.0,2053966000.0,0.0,0.12,10.55,22.02,农业种植
533,534,002718.SZ,友邦吊顶,16.92,1.32,0.22,11104.0,18725410.0,1.74,16.97,...,1.8,23.56,2.06,2224097000.0,1040917000.0,-0.18,0.0,6.28,29.46,拼多多概念
1022,1023,300387.SZ,富邦股份,7.77,0.52,0.04,50042.0,38657820.0,3.1,7.85,...,1.78,27.43,1.71,2245973000.0,2184844000.0,0.0,0.26,6.0,20.84,农业种植
2299,2300,002820.SZ,桂发祥,11.5,-0.17,-0.02,62339.0,71697220.0,2.17,11.63,...,3.11,27.6,2.35,2309985000.0,2303859000.0,-0.52,-0.43,10.47,-15.13,网红直播
473,474,002853.SZ,皮阿诺,12.96,1.49,0.19,67247.0,87978150.0,5.01,13.41,...,5.08,29.42,1.82,2417594000.0,1714075000.0,-0.15,-0.08,-23.54,-33.47,网红直播
2926,2927,603908.SH,牧高笛,37.32,-0.48,-0.18,4195.0,15736120.0,1.89,37.88,...,0.63,16.97,4.41,2488871000.0,2488871000.0,-0.37,0.11,-11.46,-37.45,户外露营
1083,1084,002887.SZ,绿茵生态,8.75,0.46,0.04,17420.0,15220010.0,1.26,8.79,...,1.05,26.65,1.24,2730000000.0,1447973000.0,0.0,0.11,2.1,7.63,户外露营
641,642,002133.SZ,广宇集团,3.72,1.09,0.04,125915.0,47150760.0,2.45,3.79,...,1.64,13.67,0.71,2879816000.0,2863917000.0,-0.27,-0.27,6.9,8.14,农业种植
1401,1402,603307.SH,扬州金泉,44.14,0.2,0.09,2216.0,9758424.0,2.2,44.75,...,1.32,11.05,2.26,2957380000.0,739345000.0,-0.02,-0.05,-11.19,43.87,户外露营


In [76]:
stock_dataset.to_csv('./output/result.csv')
stock_dataset.to_excel('./output/result.xlsx')

In [78]:
stock_dataset['ratio'].mean()

0.4996551724137931

In [82]:
akshare.stock_board_concept_hist_em('拼多多概念',start_date='20231201', end_date='20231215')

Unnamed: 0,日期,开盘,收盘,最高,最低,涨跌幅,涨跌额,成交量,成交额,振幅,换手率
0,2023-12-01,990.74,1024.18,1026.34,990.74,3.2,31.76,5017071,6240825000.0,3.59,4.52
1,2023-12-04,1022.94,1032.25,1037.53,1021.73,0.79,8.07,4190761,5571055000.0,1.54,3.78
2,2023-12-05,1031.73,1009.81,1035.75,1009.81,-2.17,-22.44,4404798,5899795000.0,2.51,3.97
3,2023-12-06,1002.99,1014.81,1027.11,995.34,0.5,5.0,4160139,5551940000.0,3.15,3.75
4,2023-12-07,1005.5,1021.29,1026.21,1005.5,0.64,6.48,3720279,5265614000.0,2.04,3.35
5,2023-12-08,1015.63,1011.21,1018.59,1007.59,-0.99,-10.08,3813677,5115535000.0,1.08,3.44
6,2023-12-11,1005.76,1030.86,1032.28,1005.76,1.94,19.65,4020422,6208465000.0,2.62,3.62
7,2023-12-12,1032.79,1036.91,1039.86,1029.45,0.59,6.05,3643671,5689842000.0,1.01,3.28
8,2023-12-13,1032.73,1026.78,1040.42,1025.98,-0.98,-10.13,3033723,4186316000.0,1.39,2.73
9,2023-12-14,1030.18,1025.85,1045.88,1023.09,-0.09,-0.93,2862025,3757883000.0,2.22,2.58
