# 用于对本月的历史数据按照板块名称进行分组统计，结果保存到result.csv文件

In [69]:
import pandas as pd
from datetime import datetime
# import sys
# sys.path.append('../utils')
# import constants
from functools import lru_cache
OPTION_DICT = {
    "all": (float('-inf'), float('inf')),
    "0-100": (0, 100),
    "100-500": (100, 500),
    "500-1000": (500, 1000),
    "1000-30000": (1000, 30000),
}

RANGE = ["跌停", "跌<-5%",  "-3%<-5%",     "-3<-1%",
         "平盘", "<3%",     "3-5%",   "5%-涨停", "涨停"]


In [70]:
def get_data() -> tuple[pd.DataFrame, list]:
    """
    获得股票历史信息，并计算总市值
    """
    # 显示结果
    df = pd.read_csv(
        f"../data/merge_{datetime.now().strftime('%Y-%m-%d')}.csv", parse_dates=['日期'], index_col=0, dtype={"股票代码": object})
    # dates = df.index.unique().sort_values().to_list()
    # print(type(dates[0]))
    # dates = [x.strftime("%Y-%m-%d") for x in dates]
    # 获得当前结果集的日期列表
    # dates_list = [date.strftime('%Y-%m-%d') for date in dates]
    value = pd.read_csv("../data/总股本_em.csv", index_col="股票代码", dtype={"股票代码": object})
    value_dict = value['总股本'].to_dict()
    df['总股本'] = df['股票代码'].apply(lambda x: value_dict.get(x))
    df['总市值'] = df['总股本']*df['收盘']
    # 把总市值为none的变成0
    # df.fillna(0, inplace=True)
    return df


In [None]:
print(OPTION_DICT)

In [71]:
df = get_data()
df.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 170816 entries, 2023-03-01 to 2023-03-24
Data columns (total 15 columns):
 #   Column  Non-Null Count   Dtype  
---  ------  --------------   -----  
 0   开盘      170816 non-null  float64
 1   收盘      170816 non-null  float64
 2   最高      170816 non-null  float64
 3   最低      170816 non-null  float64
 4   成交量     170816 non-null  int64  
 5   成交额     170816 non-null  float64
 6   振幅      170816 non-null  float64
 7   涨跌幅     170816 non-null  float64
 8   涨跌额     170816 non-null  float64
 9   换手率     170816 non-null  float64
 10  股票代码    170816 non-null  object 
 11  股票名称    170816 non-null  object 
 12  板块名称    170816 non-null  object 
 13  总股本     170816 non-null  float64
 14  总市值     170816 non-null  float64
dtypes: float64(11), int64(1), object(3)
memory usage: 20.9+ MB


In [72]:
df.isnull().sum()

开盘      0
收盘      0
最高      0
最低      0
成交量     0
成交额     0
振幅      0
涨跌幅     0
涨跌额     0
换手率     0
股票代码    0
股票名称    0
板块名称    0
总股本     0
总市值     0
dtype: int64

In [None]:
df.columns

In [None]:
df.head()

In [73]:
counts = df.groupby('板块名称')['股票名称'].count()
counts

板块名称
IT服务    2286
LED      720
专业工程     612
专业服务     496
专业连锁     144
        ... 
饮料制造     846
饰品       288
饲料       320
高速公路     360
黑色家电     198
Name: 股票名称, Length: 288, dtype: int64

In [74]:
# 计算每个板块下有多少股票
counts = df.groupby('板块名称')['股票名称'].nunique()
counts.columns=["板块名称", "股票个数"]
# 打印结果
print(counts)


板块名称
IT服务    127
LED      40
专业工程     34
专业服务     28
专业连锁      8
       ... 
饮料制造     47
饰品       16
饲料       18
高速公路     20
黑色家电     11
Name: 股票名称, Length: 288, dtype: int64


In [75]:
start_value, end_value = 0,100
cur_df = df.copy()
cur_df = cur_df[(cur_df['总市值'] >= (start_value)*100_000_000)
                    & (cur_df['总市值'] <= (end_value)*100_000_000)]

# data = df.groupby("板块名称")["股票代码"].count()
# 计算每个板块下有多少股票
counts = cur_df.groupby('板块名称')['股票名称'].nunique()
# counts.reset_index(inplace=True)
counts.name = "股票个数"
counts


板块名称
IT服务    92
LED     36
专业工程    24
专业服务    23
专业连锁     4
        ..
饮料制造    22
饰品      12
饲料      11
高速公路    12
黑色家电     5
Name: 股票个数, Length: 277, dtype: int64

In [79]:
da=cur_df[cur_df["板块名称"] == "农业服务"]
da.loc['2023-03-01']

Unnamed: 0_level_0,开盘,收盘,最高,最低,成交量,成交额,振幅,涨跌幅,涨跌额,换手率,股票代码,股票名称,板块名称,总股本,总市值
日期,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
2023-03-01,4.43,4.45,4.48,4.4,99091,43942150.0,1.81,0.68,0.03,0.87,2141,贤丰控股,农业服务,1134657000.0,5049222000.0
2023-03-01,4.98,4.96,4.99,4.94,132872,65984540.0,1.0,-0.4,-0.02,1.9,300021,大禹节水,农业服务,862299800.0,4277007000.0
2023-03-01,8.37,8.36,8.38,8.32,24992,20870380.0,0.72,0.0,0.0,0.61,688098,申联生物,农业服务,410644000.0,3432984000.0
2023-03-01,21.79,21.84,22.08,21.66,15582,34016500.0,1.92,-0.27,-0.06,1.88,300871,回盛生物,农业服务,166054500.0,3626631000.0
2023-03-01,11.58,11.54,11.6,11.44,29497,33960330.0,1.39,0.0,0.0,0.46,603718,海利生物,农业服务,644000000.0,7431760000.0
2023-03-01,8.81,8.9,8.9,8.78,69040,61102880.0,1.36,0.91,0.08,0.74,2556,辉隆股份,农业服务,953993000.0,8490538000.0
2023-03-01,3.42,3.39,3.42,3.35,1772,595397.7,2.06,-0.29,-0.01,0.2,838275,驱动力,农业服务,160277600.0,543341100.0
2023-03-01,8.53,8.45,8.53,8.26,354,298534.2,3.2,0.24,0.02,0.04,839729,永顺生物,农业服务,273350000.0,2309808000.0
2023-03-01,3.44,3.46,3.46,3.44,1434,495342.4,0.58,0.0,0.0,0.08,830964,润农节水,农业服务,261208000.0,903779700.0
2023-03-01,5.55,5.5,5.55,5.49,38164,21045020.0,1.09,-0.36,-0.02,0.5,2688,金河生物,农业服务,780422400.0,4292323000.0


In [76]:
counts.loc['农业服务']


13

## 正式运行

In [None]:
for key,value in OPTION_DICT.items():
    start_value, end_value = value
    cur_df = df.copy()
    if key!="all":
        cur_df = cur_df[(cur_df['总市值'] >= (start_value)*100_000_000)
                        & (cur_df['总市值'] <= (end_value)*100_000_000)]
    
    # data = df.groupby("板块名称")["股票代码"].count()
    # 计算每个板块下有多少股票
    counts = cur_df.groupby('板块名称')['股票名称'].nunique()
    # counts.reset_index(inplace=True)
    counts.name = "股票个数"
    counts.to_csv(
        f"../data/股票代码个数_{key}.csv", index=True)


In [None]:
data = pd.read_csv(
    f"../data/股票代码个数_all.csv", index_col=0, )
# data.reset_index(inplace=True)
# data = df.groupby("板块名称")["股票代码"].count()
# data_dict = data.to_dict()
data["股票个数"].to_dict()
