# 用于对本月的历史数据按照板块名称进行分组统计，结果保存到result.csv文件

In [103]:
import pandas as pd
from datetime import datetime
# import sys
# sys.path.append('../utils')
# import constants
from functools import lru_cache
OPTION_DICT = {
    "all": (float('-inf'), float('inf')),
    "0-100": (0, 100),
    "100-500": (100, 500),
    "500-1000": (500, 1000),
    "1000-30000": (1000, 30000),
}

RANGE = ["跌停", "跌<-5%",  "-3%<-5%",     "-3<-1%",
         "平盘", "<3%",     "3-5%",   "5%-涨停", "涨停"]


In [104]:
def get_data() -> tuple[pd.DataFrame, list]:
    """
    获得股票历史信息，并计算总市值
    """
    # 显示结果
    df = pd.read_csv(
        f"../data/merge_{datetime.now().strftime('%Y-%m-%d')}.csv", parse_dates=['日期'], index_col=0, dtype={"股票代码": object})
    # dates = df.index.unique().sort_values().to_list()
    # print(type(dates[0]))
    # dates = [x.strftime("%Y-%m-%d") for x in dates]
    # 获得当前结果集的日期列表
    # dates_list = [date.strftime('%Y-%m-%d') for date in dates]
    value = pd.read_csv("../data/总股本.csv", index_col=0, dtype={"股票代码": object})
    value_dict = value['总股本'].to_dict()
    df['总股本'] = df['股票代码'].apply(lambda x: value_dict.get(x))
    df['总市值'] = df['总股本']*df['收盘']

    return df


In [105]:
print(OPTION_DICT)

{'all': (-inf, inf), '0-100': (0, 100), '100-500': (100, 500), '500-1000': (500, 1000), '1000-30000': (1000, 30000)}


In [106]:
df = get_data()
df.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 170816 entries, 2023-03-01 to 2023-03-24
Data columns (total 15 columns):
 #   Column  Non-Null Count   Dtype  
---  ------  --------------   -----  
 0   开盘      170816 non-null  float64
 1   收盘      170816 non-null  float64
 2   最高      170816 non-null  float64
 3   最低      170816 non-null  float64
 4   成交量     170816 non-null  int64  
 5   成交额     170816 non-null  float64
 6   振幅      170816 non-null  float64
 7   涨跌幅     170816 non-null  float64
 8   涨跌额     170816 non-null  float64
 9   换手率     170816 non-null  float64
 10  股票代码    170816 non-null  object 
 11  股票名称    170816 non-null  object 
 12  板块名称    170816 non-null  object 
 13  总股本     163580 non-null  float64
 14  总市值     163580 non-null  float64
dtypes: float64(11), int64(1), object(3)
memory usage: 20.9+ MB


In [107]:
df.columns

Index(['开盘', '收盘', '最高', '最低', '成交量', '成交额', '振幅', '涨跌幅', '涨跌额', '换手率', '股票代码',
       '股票名称', '板块名称', '总股本', '总市值'],
      dtype='object')

## 按日期、板块名称分组，并统计涨幅大于0和小于0的股票数量

In [108]:

def get_count(df):
    """
    根据日期,板块名称对涨跌幅进行
    """
    cur_df = df.copy()
    # 按股票名称分组，并统计涨幅大于0和小于0的股票数量
    result = cur_df.groupby(['日期', '板块名称'])['涨跌幅'].agg(
        [('涨的数量', lambda x: sum(x > 0)), ('跌的数量', lambda x: sum(x < 0)), ('平的数量', lambda x: sum(x == 0))])
    result['涨幅比'] = result['涨的数量'] / \
        (result['涨的数量']+result['跌的数量']+result['平的数量'])*100
    # result.reset_index(inplace=True)
    return result


## 按日期、板块名称分组，并统计涨幅平均值以及总市值求和

In [109]:

def get_sum(df):
    """
    根据start_value, end_value过滤总市值
    """
    cur_df = df.copy()
    value_df = cur_df.groupby(['日期', "板块名称"]).agg(
        {"涨跌幅": "mean", "总市值": "sum"})
    # value_df.reset_index(inplace=True)
    return value_df


In [110]:

def get_range(cur_df):  # 按涨跌幅统计
      
     # db = df.loc['2023-03-01']
     cur_df = df.copy()
     bins = [-20, -10, -5, -3, -0.099, 0.099, 3, 5, 10, 20]
     # bins = list(range(-11, 12))
     cuts = pd.cut(cur_df['涨跌幅'], bins=bins)
     pct_chg_list = cur_df.groupby(["日期", "板块名称", cuts])['涨跌幅'].count()
     cur_df = pct_chg_list.unstack()
     return cur_df


### 用于测试

In [111]:
cur_df = df.copy()
# cur_df = cur_df[(cur_df['总市值'] >= (start_value)*100_000_000)
#                 & (cur_df['总市值'] <= (end_value)*100_000_000)]
result = get_count(cur_df)
value_df = get_sum(cur_df)
result


Unnamed: 0_level_0,Unnamed: 1_level_0,涨的数量,跌的数量,平的数量,涨幅比
日期,板块名称,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2023-03-01,IT服务,120,7,0,94.488189
2023-03-01,LED,31,7,2,77.500000
2023-03-01,专业工程,31,3,0,91.176471
2023-03-01,专业服务,20,6,1,74.074074
2023-03-01,专业连锁,6,1,1,75.000000
...,...,...,...,...,...
2023-03-24,饮料制造,29,18,0,61.702128
2023-03-24,饰品,4,11,1,25.000000
2023-03-24,饲料,13,4,1,72.222222
2023-03-24,高速公路,2,17,1,10.000000


In [112]:
value_df

Unnamed: 0_level_0,Unnamed: 1_level_0,涨跌幅,总市值
日期,板块名称,Unnamed: 2_level_1,Unnamed: 3_level_1
2023-03-01,IT服务,4.119685,1.235165e+12
2023-03-01,LED,1.099000,2.977415e+11
2023-03-01,专业工程,1.343529,3.725464e+11
2023-03-01,专业服务,0.585556,1.848354e+11
2023-03-01,专业连锁,0.792500,6.418736e+10
...,...,...,...
2023-03-24,饮料制造,0.271064,4.812782e+12
2023-03-24,饰品,-0.426250,1.216285e+11
2023-03-24,饲料,0.875556,2.813987e+11
2023-03-24,高速公路,-1.489500,2.310754e+11


In [113]:
final_df = result.join(value_df, on=["日期", "板块名称"])
final_df 

Unnamed: 0_level_0,Unnamed: 1_level_0,涨的数量,跌的数量,平的数量,涨幅比,涨跌幅,总市值
日期,板块名称,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2023-03-01,IT服务,120,7,0,94.488189,4.119685,1.235165e+12
2023-03-01,LED,31,7,2,77.500000,1.099000,2.977415e+11
2023-03-01,专业工程,31,3,0,91.176471,1.343529,3.725464e+11
2023-03-01,专业服务,20,6,1,74.074074,0.585556,1.848354e+11
2023-03-01,专业连锁,6,1,1,75.000000,0.792500,6.418736e+10
...,...,...,...,...,...,...,...
2023-03-24,饮料制造,29,18,0,61.702128,0.271064,4.812782e+12
2023-03-24,饰品,4,11,1,25.000000,-0.426250,1.216285e+11
2023-03-24,饲料,13,4,1,72.222222,0.875556,2.813987e+11
2023-03-24,高速公路,2,17,1,10.000000,-1.489500,2.310754e+11


In [114]:
final_df.reset_index(inplace=True)
final_df


Unnamed: 0,日期,板块名称,涨的数量,跌的数量,平的数量,涨幅比,涨跌幅,总市值
0,2023-03-01,IT服务,120,7,0,94.488189,4.119685,1.235165e+12
1,2023-03-01,LED,31,7,2,77.500000,1.099000,2.977415e+11
2,2023-03-01,专业工程,31,3,0,91.176471,1.343529,3.725464e+11
3,2023-03-01,专业服务,20,6,1,74.074074,0.585556,1.848354e+11
4,2023-03-01,专业连锁,6,1,1,75.000000,0.792500,6.418736e+10
...,...,...,...,...,...,...,...,...
5179,2023-03-24,饮料制造,29,18,0,61.702128,0.271064,4.812782e+12
5180,2023-03-24,饰品,4,11,1,25.000000,-0.426250,1.216285e+11
5181,2023-03-24,饲料,13,4,1,72.222222,0.875556,2.813987e+11
5182,2023-03-24,高速公路,2,17,1,10.000000,-1.489500,2.310754e+11


In [115]:
final_df[final_df['板块名称'] == "其他传媒"]


Unnamed: 0,日期,板块名称,涨的数量,跌的数量,平的数量,涨幅比,涨跌幅,总市值
28,2023-03-01,其他传媒,53,5,1,89.830508,2.644746,541649900000.0
316,2023-03-02,其他传媒,38,18,3,64.40678,1.087288,551776600000.0
604,2023-03-03,其他传媒,27,31,1,45.762712,0.308644,551440600000.0
892,2023-03-06,其他传媒,9,49,1,15.254237,-1.60661,542280600000.0
1180,2023-03-07,其他传媒,7,51,1,11.864407,-2.082542,530684700000.0
1468,2023-03-08,其他传媒,47,9,3,79.661017,1.255424,536046500000.0
1756,2023-03-09,其他传媒,23,36,0,38.983051,-0.424068,532315200000.0
2044,2023-03-10,其他传媒,11,44,4,18.644068,-1.024576,523158000000.0
2332,2023-03-13,其他传媒,46,11,2,77.966102,1.391695,534719000000.0
2620,2023-03-14,其他传媒,15,44,0,25.423729,-1.193559,527236500000.0


In [116]:
# final_df.dropna(inplace=True, axis=0)
# 将Salary列格式化为亿元
final_df['总市值亿元'] = final_df['总市值'].apply(
    lambda x: '{:.2f}'.format(x/100000000))

db = get_range(cur_df)
result = pd.merge(final_df, db, on=["日期",'板块名称'])
result.reset_index(inplace=True,level=[0,1])
a=result.columns[:9].to_list()
a.extend(RANGE)
result.columns = a


IndexError: Too many levels: Index has only 1 level, not 2

In [None]:
result.columns

## 正式运行

In [117]:
for key,value in OPTION_DICT.items():
    start_value, end_value = value
    cur_df = df.copy()
    cur_df = cur_df[(cur_df['总市值'] >= (start_value)*100_000_000)
                    & (cur_df['总市值'] <= (end_value)*100_000_000)]
    result = get_count(cur_df)
    value_df = get_sum(cur_df)
    final_df = result.join(value_df, on=["日期", "板块名称"])
    # final_df.dropna(inplace=True, axis=0)
    # 将Salary列格式化为亿元
    final_df['总市值亿元'] = final_df['总市值'].apply(
        lambda x: '{:.2f}'.format(x/100000000))
    
    db = get_range(cur_df)
    result = pd.merge(final_df, db, on=["日期",'板块名称'])
    result.reset_index(inplace=True,level=[0,1])
    # 获得字段的前八列
    a = result.columns[:9].to_list()
    a.extend(RANGE)
    result.columns = a

    result.to_csv(
        f"../data/result_{key}_{datetime.now().strftime('%Y%m%d')}.csv", index=False)


## 校验一下数据结果

In [141]:
df = pd.read_csv(
    f"../data/result_100-500_{datetime.now().strftime('%Y%m%d')}.csv", index_col=0, parse_dates=True)
df.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 4710 entries, 2023-03-01 to 2023-03-24
Data columns (total 17 columns):
 #   Column   Non-Null Count  Dtype  
---  ------   --------------  -----  
 0   板块名称     4710 non-null   object 
 1   涨的数量     4710 non-null   int64  
 2   跌的数量     4710 non-null   int64  
 3   平的数量     4710 non-null   int64  
 4   涨幅比      4710 non-null   float64
 5   涨跌幅      4710 non-null   float64
 6   总市值      4710 non-null   float64
 7   总市值亿元    4710 non-null   float64
 8   跌停       4710 non-null   int64  
 9   跌<-5%    4710 non-null   int64  
 10  -3%<-5%  4710 non-null   int64  
 11  -3<-1%   4710 non-null   int64  
 12  平盘       4710 non-null   int64  
 13  <3%      4710 non-null   int64  
 14  3-5%     4710 non-null   int64  
 15  5%-涨停    4710 non-null   int64  
 16  涨停       4710 non-null   int64  
dtypes: float64(4), int64(12), object(1)
memory usage: 662.3+ KB


In [134]:
df.head()

Unnamed: 0_level_0,板块名称,涨的数量,跌的数量,平的数量,涨幅比,涨跌幅,总市值,总市值亿元,跌停,跌<-5%,-3%<-5%,-3<-1%,平盘,<3%,3-5%,5%-涨停,涨停
日期,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
2023-03-01,IT服务,33,1,0,97.058824,4.469118,635050800000.0,6350.51,0,0,0,0,1,13,10,8,2
2023-03-01,LED,3,0,0,100.0,1.78,45087630000.0,450.88,0,0,0,0,1,1,1,0,0
2023-03-01,专业工程,7,1,0,87.5,2.38375,141288100000.0,1412.88,0,0,0,1,0,6,0,1,0
2023-03-01,专业服务,4,2,0,66.666667,0.201667,96574760000.0,965.75,0,0,0,2,1,2,1,0,0
2023-03-01,专业连锁,3,0,1,75.0,0.845,53759390000.0,537.59,0,0,0,0,1,3,0,0,0


In [135]:
color = ["green", "green", "green", "green",
         "yellow", "red", "red", "red", "red"]


In [142]:
my_df=df.loc["2023-03-01"]
my_df[my_df['板块名称'] == "其他传媒"]


Unnamed: 0_level_0,板块名称,涨的数量,跌的数量,平的数量,涨幅比,涨跌幅,总市值,总市值亿元,跌停,跌<-5%,-3%<-5%,-3<-1%,平盘,<3%,3-5%,5%-涨停,涨停
日期,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
2023-03-01,其他传媒,13,0,0,100.0,4.456154,270383500000.0,2703.84,0,0,0,0,0,5,4,3,1


In [139]:
my_df=df.loc["2023-03-01"]
my_df = my_df[my_df['板块名称'] == "其他传媒"]

color = ["green", "green", "green", "green",
         "yellow", "red", "red", "red", "red"]

my=my_df[RANGE].unstack()
my=my.reset_index()
my.columns=['x',"date","y"]
# my
data = pd.DataFrame({"x": my["x"], "y": my["y"], "color": color})
data

Unnamed: 0,x,y,color
0,跌停,0,green
1,跌<-5%,0,green
2,-3%<-5%,0,green
3,-3<-1%,0,green
4,平盘,0,yellow
5,<3%,5,red
6,3-5%,4,red
7,5%-涨停,3,red
8,涨停,1,red


In [140]:
from st_aggrid.grid_options_builder import GridOptionsBuilder
import plotly.graph_objs as go
code_df=data
fig = go.Figure([go.Bar(x=code_df['x'], y=code_df['y'], marker={
    'color': code_df["color"]}, text=code_df['y'], textposition='auto')])
fig.update_traces(
    texttemplate='%{text:.2d}', textposition='outside')
fig.update_layout(autosize=True, margin=dict(
    l=70, r=70, t=70, b=70))
fig.update_layout(
    xaxis_title='区间', yaxis_title='数量')

fig.show()

In [None]:
df.groupby(df.index).sum() 