# 用于对本月的历史数据按照板块名称进行分组统计，结果保存到result.csv文件

In [1]:
import pandas as pd
from datetime import datetime
# import sys
# sys.path.append('../utils')
# import constants
from functools import lru_cache
OPTION_DICT = {
    "all": (float('-inf'), float('inf')),
    "0-100": (0, 100),
    "100-500": (100, 500),
    "500-1000": (500, 1000),
    "1000-30000": (1000, 30000),
}

RANGE = ["跌停", "跌<-5%",  "-3%<-5%",     "-3<-1%",
         "平盘", "<3%",     "3-5%",   "5%-涨停", "涨停"]


In [2]:
def get_data() -> tuple[pd.DataFrame, list]:
    """
    获得股票历史信息，并计算总市值
    """
    # 显示结果
    df = pd.read_csv(
        f"../data/merge_{datetime.now().strftime('%Y-%m-%d')}.csv", parse_dates=['日期'], index_col=0, dtype={"股票代码": object})
    # dates = df.index.unique().sort_values().to_list()
    # print(type(dates[0]))
    # dates = [x.strftime("%Y-%m-%d") for x in dates]
    # 获得当前结果集的日期列表
    # dates_list = [date.strftime('%Y-%m-%d') for date in dates]
    value = pd.read_csv("../data/总股本_em.csv",
                        index_col="股票代码", dtype={"股票代码": object})
    value_dict = value['总股本'].to_dict()
    df['总股本'] = df['股票代码'].apply(lambda x: value_dict.get(x))
    df['总市值'] = df['总股本']*df['收盘']
    # 把总市值为none的变成0
    df.fillna(0, inplace=True)

    return df


In [3]:
print(OPTION_DICT)

{'all': (-inf, inf), '0-100': (0, 100), '100-500': (100, 500), '500-1000': (500, 1000), '1000-30000': (1000, 30000)}


In [4]:
df = get_data()
df.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 170816 entries, 2023-03-01 to 2023-03-24
Data columns (total 15 columns):
 #   Column  Non-Null Count   Dtype  
---  ------  --------------   -----  
 0   开盘      170816 non-null  float64
 1   收盘      170816 non-null  float64
 2   最高      170816 non-null  float64
 3   最低      170816 non-null  float64
 4   成交量     170816 non-null  int64  
 5   成交额     170816 non-null  float64
 6   振幅      170816 non-null  float64
 7   涨跌幅     170816 non-null  float64
 8   涨跌额     170816 non-null  float64
 9   换手率     170816 non-null  float64
 10  股票代码    170816 non-null  object 
 11  股票名称    170816 non-null  object 
 12  板块名称    170816 non-null  object 
 13  总股本     170816 non-null  float64
 14  总市值     170816 non-null  float64
dtypes: float64(11), int64(1), object(3)
memory usage: 20.9+ MB


In [5]:
df.columns

Index(['开盘', '收盘', '最高', '最低', '成交量', '成交额', '振幅', '涨跌幅', '涨跌额', '换手率', '股票代码',
       '股票名称', '板块名称', '总股本', '总市值'],
      dtype='object')

## 按日期、板块名称分组，并统计涨幅大于0和小于0的股票数量

In [6]:

def get_count(_df):
    """
    根据日期,板块名称对涨跌幅进行
    """
    cur_df = _df.copy()
    # 按股票名称分组，并统计涨幅大于0和小于0的股票数量
    result = cur_df.groupby(['日期', '板块名称'])['涨跌幅'].agg(
        [('涨的数量', lambda x: sum(x > 0)), ('跌的数量', lambda x: sum(x < 0)), ('平的数量', lambda x: sum(x == 0))])
    result['涨幅比'] = result['涨的数量'] / \
        (result['涨的数量']+result['跌的数量']+result['平的数量'])*100
    # result.reset_index(inplace=True)
    return result


## 按日期、板块名称分组，并统计涨幅平均值以及总市值求和

In [7]:

def get_sum(_df):
    """
    根据start_value, end_value过滤总市值
    """
    cur_df = _df.copy()
    value_df = cur_df.groupby(['日期', "板块名称"]).agg(
        {"涨跌幅": "mean", "总市值": "sum"})
    # value_df.reset_index(inplace=True)
    return value_df


In [8]:

def get_range(_df):  # 按涨跌幅统计
      
     # db = df.loc['2023-03-01']
     cur_df = _df.copy()
     bins = [-20, -10, -5, -3, -0.099, 0.099, 3, 5, 10, 20]
     # bins = list(range(-11, 12))
     cuts = pd.cut(cur_df['涨跌幅'], bins=bins)
     pct_chg_list = cur_df.groupby(["日期", "板块名称", cuts])['涨跌幅'].count()
     cur_df = pct_chg_list.unstack()
     return cur_df


### 用于测试

In [9]:
cur_df = df.copy()
# cur_df = cur_df[(cur_df['总市值'] >= (start_value)*100_000_000)
#                 & (cur_df['总市值'] <= (end_value)*100_000_000)]
result = get_count(cur_df)
value_df = get_sum(cur_df)
result


Unnamed: 0_level_0,Unnamed: 1_level_0,涨的数量,跌的数量,平的数量,涨幅比
日期,板块名称,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2023-03-01,IT服务,120,7,0,94.488189
2023-03-01,LED,31,7,2,77.500000
2023-03-01,专业工程,31,3,0,91.176471
2023-03-01,专业服务,20,6,1,74.074074
2023-03-01,专业连锁,6,1,1,75.000000
...,...,...,...,...,...
2023-03-24,饮料制造,29,18,0,61.702128
2023-03-24,饰品,4,11,1,25.000000
2023-03-24,饲料,13,4,1,72.222222
2023-03-24,高速公路,2,17,1,10.000000


In [10]:
value_df

Unnamed: 0_level_0,Unnamed: 1_level_0,涨跌幅,总市值
日期,板块名称,Unnamed: 2_level_1,Unnamed: 3_level_1
2023-03-01,IT服务,4.119685,1.270194e+12
2023-03-01,LED,1.099000,3.117635e+11
2023-03-01,专业工程,1.343529,3.937400e+11
2023-03-01,专业服务,0.585556,1.873936e+11
2023-03-01,专业连锁,0.792500,6.455047e+10
...,...,...,...
2023-03-24,饮料制造,0.271064,5.040966e+12
2023-03-24,饰品,-0.426250,1.350491e+11
2023-03-24,饲料,0.875556,2.820931e+11
2023-03-24,高速公路,-1.489500,2.722461e+11


In [11]:
final_df = result.join(value_df, on=["日期", "板块名称"])
final_df 

Unnamed: 0_level_0,Unnamed: 1_level_0,涨的数量,跌的数量,平的数量,涨幅比,涨跌幅,总市值
日期,板块名称,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2023-03-01,IT服务,120,7,0,94.488189,4.119685,1.270194e+12
2023-03-01,LED,31,7,2,77.500000,1.099000,3.117635e+11
2023-03-01,专业工程,31,3,0,91.176471,1.343529,3.937400e+11
2023-03-01,专业服务,20,6,1,74.074074,0.585556,1.873936e+11
2023-03-01,专业连锁,6,1,1,75.000000,0.792500,6.455047e+10
...,...,...,...,...,...,...,...
2023-03-24,饮料制造,29,18,0,61.702128,0.271064,5.040966e+12
2023-03-24,饰品,4,11,1,25.000000,-0.426250,1.350491e+11
2023-03-24,饲料,13,4,1,72.222222,0.875556,2.820931e+11
2023-03-24,高速公路,2,17,1,10.000000,-1.489500,2.722461e+11


In [12]:
# final_df.reset_index(inplace=True)
# final_df


In [13]:
# d1=final_df.loc[("2023-03-01", "其他传媒")]
# d1

In [14]:
# final_df.dropna(inplace=True, axis=0)
# 将Salary列格式化为亿元
final_df['总市值亿元'] = final_df['总市值'].apply(
    lambda x: '{:.2f}'.format(x/100000000))

db = get_range(cur_df)
result = pd.merge(final_df, db, on=["日期",'板块名称'])
result.reset_index(inplace=True,level=[0,1])
a=result.columns[:9].to_list()
a.extend(RANGE)
result.columns = a


In [15]:
result.columns

Index(['日期', '板块名称', '涨的数量', '跌的数量', '平的数量', '涨幅比', '涨跌幅', '总市值', '总市值亿元',
       '跌停', '跌<-5%', '-3%<-5%', '-3<-1%', '平盘', '<3%', '3-5%', '5%-涨停', '涨停'],
      dtype='object')

## 正式运行

In [16]:
def runit():
    for key,value in OPTION_DICT.items():
        start_value, end_value = value
        cur_df = df.copy()
        if key!="all":
            cur_df = cur_df[(cur_df['总市值'] >= (start_value)*100_000_000)
                            & (cur_df['总市值'] <= (end_value)*100_000_000)]
        result = get_count(cur_df)
        value_df = get_sum(cur_df)
        final_df = result.join(value_df, on=["日期", "板块名称"])
        # final_df.dropna(inplace=True, axis=0)
        # 将Salary列格式化为亿元
        final_df['总市值亿元'] = final_df['总市值'].apply(
            lambda x: '{:.2f}'.format(x/100000000))
        
        db = get_range(cur_df)
        result = pd.merge(final_df, db, on=["日期",'板块名称'])
        result.reset_index(inplace=True,level=[0,1])
        # 获得字段的前八列
        a = result.columns[:9].to_list()
        a.extend(RANGE)
        result.columns = a

        result.to_csv(
            f"../data/result_{key}_{datetime.now().strftime('%Y%m%d')}.csv", index=False)
runit()

## 校验一下数据结果

In [17]:
df = pd.read_csv(
    f"../data/result_100-500_{datetime.now().strftime('%Y%m%d')}.csv", index_col=0, parse_dates=True)
df.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 4728 entries, 2023-03-01 to 2023-03-24
Data columns (total 17 columns):
 #   Column   Non-Null Count  Dtype  
---  ------   --------------  -----  
 0   板块名称     4728 non-null   object 
 1   涨的数量     4728 non-null   int64  
 2   跌的数量     4728 non-null   int64  
 3   平的数量     4728 non-null   int64  
 4   涨幅比      4728 non-null   float64
 5   涨跌幅      4728 non-null   float64
 6   总市值      4728 non-null   float64
 7   总市值亿元    4728 non-null   float64
 8   跌停       4728 non-null   int64  
 9   跌<-5%    4728 non-null   int64  
 10  -3%<-5%  4728 non-null   int64  
 11  -3<-1%   4728 non-null   int64  
 12  平盘       4728 non-null   int64  
 13  <3%      4728 non-null   int64  
 14  3-5%     4728 non-null   int64  
 15  5%-涨停    4728 non-null   int64  
 16  涨停       4728 non-null   int64  
dtypes: float64(4), int64(12), object(1)
memory usage: 664.9+ KB


In [18]:
df.head()

Unnamed: 0_level_0,板块名称,涨的数量,跌的数量,平的数量,涨幅比,涨跌幅,总市值,总市值亿元,跌停,跌<-5%,-3%<-5%,-3<-1%,平盘,<3%,3-5%,5%-涨停,涨停
日期,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
2023-03-01,IT服务,33,1,0,97.058824,4.469118,637888900000.0,6378.89,0,0,0,0,1,13,10,8,2
2023-03-01,LED,3,0,0,100.0,1.78,44999580000.0,450.0,0,0,0,0,1,1,1,0,0
2023-03-01,专业工程,7,1,0,87.5,2.38375,146894800000.0,1468.95,0,0,0,1,0,6,0,1,0
2023-03-01,专业服务,4,2,0,66.666667,0.201667,97227310000.0,972.27,0,0,0,2,1,2,1,0,0
2023-03-01,专业连锁,3,0,1,75.0,0.845,54123660000.0,541.24,0,0,0,0,1,3,0,0,0


In [19]:
color = ["green", "green", "green", "green",
         "yellow", "red", "red", "red", "red"]


In [20]:
my_df=df.loc["2023-03-01"]
my_df[my_df['板块名称'] == "其他传媒"]


Unnamed: 0_level_0,板块名称,涨的数量,跌的数量,平的数量,涨幅比,涨跌幅,总市值,总市值亿元,跌停,跌<-5%,-3%<-5%,-3<-1%,平盘,<3%,3-5%,5%-涨停,涨停
日期,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
2023-03-01,其他传媒,13,0,0,100.0,4.456154,270459100000.0,2704.59,0,0,0,0,0,5,4,3,1


In [21]:
my_df=df.loc["2023-03-01"]
my_df = my_df[my_df['板块名称'] == "其他传媒"]

color = ["green", "green", "green", "green",
         "yellow", "red", "red", "red", "red"]

my=my_df[RANGE].unstack()
my=my.reset_index()
my.columns=['x',"date","y"]
# my
data = pd.DataFrame({"x": my["x"], "y": my["y"], "color": color})
data

Unnamed: 0,x,y,color
0,跌停,0,green
1,跌<-5%,0,green
2,-3%<-5%,0,green
3,-3<-1%,0,green
4,平盘,0,yellow
5,<3%,5,red
6,3-5%,4,red
7,5%-涨停,3,red
8,涨停,1,red


In [22]:
from st_aggrid.grid_options_builder import GridOptionsBuilder
import plotly.graph_objs as go
code_df=data
fig = go.Figure([go.Bar(x=code_df['x'], y=code_df['y'], marker={
    'color': code_df["color"]}, text=code_df['y'], textposition='auto')])
fig.update_traces(
    texttemplate='%{text:.2d}', textposition='outside')
fig.update_layout(autosize=True, margin=dict(
    l=70, r=70, t=70, b=70))
fig.update_layout(
    xaxis_title='区间', yaxis_title='数量')

fig.show()

In [23]:
df.groupby(df.index).sum() 


The default value of numeric_only in DataFrameGroupBy.sum is deprecated. In a future version, numeric_only will default to False. Either specify numeric_only or select only columns which should be valid for the function.



Unnamed: 0_level_0,涨的数量,跌的数量,平的数量,涨幅比,涨跌幅,总市值,总市值亿元,跌停,跌<-5%,-3%<-5%,-3<-1%,平盘,<3%,3-5%,5%-涨停,涨停
日期,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
2023-03-01,1573,831,133,16612.394968,176.057483,52785110000000.0,527851.06,0,12,49,711,235,1261,169,85,14
2023-03-02,893,1546,89,10824.692015,-52.85061,52454480000000.0,524544.87,4,46,125,1346,130,782,54,36,2
2023-03-03,1261,1198,75,12352.523791,13.299943,52566630000000.0,525666.38,0,12,56,1077,200,1024,118,45,2
2023-03-06,1307,1176,64,12667.674176,48.399389,52855130000000.0,528551.3,2,16,75,1062,119,996,187,81,9
2023-03-07,254,2220,27,2530.155644,-481.419489,51751030000000.0,517510.34,0,61,511,1646,47,203,17,12,4
2023-03-08,1470,954,101,15081.165407,74.292519,52093140000000.0,520931.41,0,8,40,871,176,1293,90,43,4
2023-03-09,935,1500,104,8550.203487,-23.974448,52258030000000.0,522580.23,0,8,56,1401,174,780,81,28,9
2023-03-10,400,2079,23,3618.003605,-374.619351,51413020000000.0,514130.18,2,38,213,1803,68,321,32,15,10
2023-03-13,1379,1053,74,15720.750284,153.77678,51662590000000.0,516625.83,0,14,74,944,128,1055,178,105,8
2023-03-14,654,1774,50,6085.395386,-220.597835,50966680000000.0,509666.85,3,22,181,1543,95,478,76,72,8
