# 用于对本月的历史数据按照板块名称进行分组统计，结果保存到result.csv文件

In [1]:
import pandas as pd
from datetime import datetime
# import sys
# sys.path.append('../utils')
# import constants
from functools import lru_cache
OPTION_DICT = {
    "all": (float('-inf'), float('inf')),
    "0-100": (0, 100),
    "100-500": (100, 500),
    "500-1000": (500, 1000),
    "1000-30000": (1000, 30000),
}

RANGE = ["跌停", "跌<-5%",  "-3%<-5%",     "-3<-1%",
         "平盘", "<3%",     "3-5%",   "5%-涨停", "涨停"]


In [2]:
def get_data() -> tuple[pd.DataFrame, list]:
    """
    获得股票历史信息，并计算总市值
    """
    # 显示结果
    df = pd.read_csv(
        f"../data/merge_{datetime.now().strftime('%Y-%m-%d')}.csv", parse_dates=['日期'], index_col=0, dtype={"股票代码": object})
    # dates = df.index.unique().sort_values().to_list()
    # print(type(dates[0]))
    # dates = [x.strftime("%Y-%m-%d") for x in dates]
    # 获得当前结果集的日期列表
    # dates_list = [date.strftime('%Y-%m-%d') for date in dates]
    value = pd.read_csv("../data/总股本.csv", index_col=0, dtype={"股票代码": object})
    value_dict = value['总股本'].to_dict()
    df['总股本'] = df['股票代码'].apply(lambda x: value_dict.get(x))
    df['总市值'] = df['总股本']*df['收盘']

    return df


In [3]:
print(OPTION_DICT)

{'all': (-inf, inf), '0-100': (0, 100), '100-500': (100, 500), '500-1000': (500, 1000), '1000-30000': (1000, 30000)}


In [4]:
df = get_data()
df.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 170816 entries, 2023-03-01 to 2023-03-24
Data columns (total 15 columns):
 #   Column  Non-Null Count   Dtype  
---  ------  --------------   -----  
 0   开盘      170816 non-null  float64
 1   收盘      170816 non-null  float64
 2   最高      170816 non-null  float64
 3   最低      170816 non-null  float64
 4   成交量     170816 non-null  int64  
 5   成交额     170816 non-null  float64
 6   振幅      170816 non-null  float64
 7   涨跌幅     170816 non-null  float64
 8   涨跌额     170816 non-null  float64
 9   换手率     170816 non-null  float64
 10  股票代码    170816 non-null  object 
 11  股票名称    170816 non-null  object 
 12  板块名称    170816 non-null  object 
 13  总股本     163580 non-null  float64
 14  总市值     163580 non-null  float64
dtypes: float64(11), int64(1), object(3)
memory usage: 20.9+ MB


In [5]:
df.columns

Index(['开盘', '收盘', '最高', '最低', '成交量', '成交额', '振幅', '涨跌幅', '涨跌额', '换手率', '股票代码',
       '股票名称', '板块名称', '总股本', '总市值'],
      dtype='object')

In [6]:
df.head()

Unnamed: 0_level_0,开盘,收盘,最高,最低,成交量,成交额,振幅,涨跌幅,涨跌额,换手率,股票代码,股票名称,板块名称,总股本,总市值
日期,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
2023-03-01,22.55,22.42,22.83,22.18,98735,221958533.0,2.9,0.18,0.04,3.03,300119,瑞普生物,农业服务,468018786.0,10492980000.0
2023-03-01,22.55,22.42,22.83,22.18,98735,221958533.0,2.9,0.18,0.04,3.03,300119,瑞普生物,动物保健,468018786.0,10492980000.0
2023-03-02,22.44,21.73,22.54,21.64,53425,117040504.0,4.01,-3.08,-0.69,1.64,300119,瑞普生物,农业服务,468018786.0,10170050000.0
2023-03-02,22.44,21.73,22.54,21.64,53425,117040504.0,4.01,-3.08,-0.69,1.64,300119,瑞普生物,动物保健,468018786.0,10170050000.0
2023-03-03,21.89,21.27,22.18,21.12,57008,122635566.0,4.88,-2.12,-0.46,1.75,300119,瑞普生物,农业服务,468018786.0,9954760000.0


In [9]:
counts = df.groupby('板块名称')['股票名称'].count()
counts

板块名称
IT服务    2286
LED      720
专业工程     612
专业服务     496
专业连锁     144
        ... 
饮料制造     846
饰品       288
饲料       320
高速公路     360
黑色家电     198
Name: 股票名称, Length: 288, dtype: int64

In [7]:
# 计算每个板块下有多少股票
counts = df.groupby('板块名称')['股票名称'].nunique()
counts.columns=["板块名称", "股票个数"]
# 打印结果
print(counts)


板块名称
IT服务    127
LED      40
专业工程     34
专业服务     28
专业连锁      8
       ... 
饮料制造     47
饰品       16
饲料       18
高速公路     20
黑色家电     11
Name: 股票名称, Length: 288, dtype: int64


## 正式运行

In [13]:
for key,value in OPTION_DICT.items():
    start_value, end_value = value
    cur_df = df.copy()
    cur_df = cur_df[(cur_df['总市值'] >= (start_value)*100_000_000)
                    & (cur_df['总市值'] <= (end_value)*100_000_000)]
    
    # data = df.groupby("板块名称")["股票代码"].count()
    # 计算每个板块下有多少股票
    counts = df.groupby('板块名称')['股票名称'].nunique()
    # counts.reset_index(inplace=True)
    counts.name = "股票个数"
    counts.to_csv(
        f"../data/股票代码个数_{key}.csv", index=True)
