# 计算冰点和沸点距离现在的天数


In [134]:
import pandas as pd
import datetime as dt

# 获取历史数据
data = pd.read_csv("../data/Hist_2023-03-26.csv", index_col=0,
                   parse_dates=True, dtype={"股票代码": object})

data.head()


Unnamed: 0_level_0,开盘,收盘,最高,最低,成交量,成交额,振幅,涨跌幅,涨跌额,换手率,板块名称,股票代码
日期,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
2023-03-01,22.55,22.42,22.83,22.18,98735,221958533.0,2.9,0.18,0.04,3.03,动物保健,300119
2023-03-02,22.44,21.73,22.54,21.64,53425,117040504.0,4.01,-3.08,-0.69,1.64,动物保健,300119
2023-03-03,21.89,21.27,22.18,21.12,57008,122635566.0,4.88,-2.12,-0.46,1.75,动物保健,300119
2023-03-06,21.3,21.73,21.89,21.17,49037,106350983.0,3.39,2.16,0.46,1.5,动物保健,300119
2023-03-07,21.82,21.27,22.22,21.17,41433,89342602.0,4.83,-2.12,-0.46,1.27,动物保健,300119


In [135]:
data.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 91506 entries, 2023-03-01 to 2023-03-24
Data columns (total 12 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   开盘      91506 non-null  float64
 1   收盘      91506 non-null  float64
 2   最高      91506 non-null  float64
 3   最低      91506 non-null  float64
 4   成交量     91506 non-null  int64  
 5   成交额     91506 non-null  float64
 6   振幅      91506 non-null  float64
 7   涨跌幅     91506 non-null  float64
 8   涨跌额     91506 non-null  float64
 9   换手率     91506 non-null  float64
 10  板块名称    91506 non-null  object 
 11  股票代码    91506 non-null  object 
dtypes: float64(9), int64(1), object(2)
memory usage: 9.1+ MB


## 标记沸点和冰点的日期

In [136]:
hot = data.groupby(["日期", "板块名称"]).apply(lambda x: all(x['涨跌幅'] > 0))
# 重置索引
hot = hot.reset_index()
ice = data.groupby(["日期", "板块名称"]).apply(lambda x: all(x['涨跌幅'] < 0))
ice = ice.reset_index()
ice.columns = ["日期", "板块名称", '冰点']
# 重命名列名
hot.columns = ["日期", "板块名称", '沸点']
result = pd.merge(ice, hot, on=["日期", "板块名称"])
result


Unnamed: 0,日期,板块名称,冰点,沸点
0,2023-03-01,IT服务,False,False
1,2023-03-01,LED,False,False
2,2023-03-01,专业工程,False,False
3,2023-03-01,专业服务,False,False
4,2023-03-01,专业连锁,False,False
...,...,...,...,...
4225,2023-03-24,食品及饲料添加剂,False,False
4226,2023-03-24,餐饮,False,False
4227,2023-03-24,饰品,False,False
4228,2023-03-24,饲料,False,False


In [137]:
result.set_index(['日期',"板块名称"])

Unnamed: 0_level_0,Unnamed: 1_level_0,冰点,沸点
日期,板块名称,Unnamed: 2_level_1,Unnamed: 3_level_1
2023-03-01,IT服务,False,False
2023-03-01,LED,False,False
2023-03-01,专业工程,False,False
2023-03-01,专业服务,False,False
2023-03-01,专业连锁,False,False
...,...,...,...
2023-03-24,食品及饲料添加剂,False,False
2023-03-24,餐饮,False,False
2023-03-24,饰品,False,False
2023-03-24,饲料,False,False


# 计算沸点的天数

In [138]:
df = result.copy()
# 找到冰点为true的日期
hot_dates = df.groupby("板块名称").apply(lambda x: x[x['沸点'] == True])[["日期","沸点"]]
# true_ice_dates.columns=["日期"]
# true_ice_dates = true_ice_dates.reset_index(drop=True).loc["日期", "板块名称"]
hot_dates.reset_index(inplace=True)

hot_dates=hot_dates[["日期", "板块名称"]]
hot_dates


Unnamed: 0,日期,板块名称
0,2023-03-21,专业连锁
1,2023-03-21,个护用品
2,2023-03-21,乘用车
3,2023-03-15,产业地产
4,2023-03-24,人工景点
...,...,...
248,2023-03-21,风电设备
249,2023-03-02,餐饮
250,2023-03-06,餐饮
251,2023-03-21,餐饮


## 计算冰点的天数

In [139]:
df = result.copy()
# 找到冰点为true的日期
ice_dates = df.groupby("板块名称").apply(
    lambda x: x[x['冰点'] == True])[["日期", "冰点"]]
# true_ice_dates.columns=["日期"]
# true_ice_dates = true_ice_dates.reset_index(drop=True).loc["日期", "板块名称"]
ice_dates.reset_index(inplace=True)

ice_dates = ice_dates[["日期", "板块名称"]]
ice_dates


Unnamed: 0,日期,板块名称
0,2023-03-14,专业连锁
1,2023-03-20,专业连锁
2,2023-03-07,乘用车
3,2023-03-10,乘用车
4,2023-03-13,乘用车
...,...,...
388,2023-03-09,餐饮
389,2023-03-10,餐饮
390,2023-03-15,餐饮
391,2023-03-16,餐饮


## 计算最近的冰点为true的天数

In [140]:
# 计算最近的冰点为true的天数
def hot_date(specified_date, sect):
    dt_series = hot_dates[hot_dates['板块名称'] ==
                          sect]['日期'].apply(lambda x: x.date())
    # print(sect)
    time_list = dt_series.to_list()
    # print(time_list)
    if len(time_list) == 0:
        return 0
    # specified_date = specified_date.date()
    # 判断指定日期是否落在时间list中的区间
    diff_days = 0
    if specified_date in time_list:
        return -1
    if specified_date < time_list[0]:
        nearest_date = time_list[0]
        diff_days = 0
    
    elif specified_date > time_list[-1]:
        nearest_date = time_list[-1]
        diff_days = (specified_date-nearest_date).days
    else:
        for i in range(len(time_list) - 1):
            if time_list[i] <= specified_date < time_list[i+1]:
                nearest_date = time_list[i]
                diff_days = (specified_date-nearest_date).days
                break
    return diff_days


# data = result.groupby("板块名称").apply(
#     lambda row: 0 if row["冰点"] else 0)
mydata = []
grouped = result.groupby(["板块名称", "日期"])
for group_name, group_data in grouped:
    # print(type(group_name[1]))
    re = hot_date(group_name[1].date(), group_name[0], )
    mydata.append([group_name[0], group_name[1], re])
# data = result.groupby(["板块名称","日期"]).apply(
#     lambda row: 0 if row["冰点"].any() else mydate(row["日期"], row["板块名称"]))
# data = result.groupby(["板块名称","日期"]).apply(lambda row: 0 if row["冰点"].any() else 100)
# data
final_hot = pd.DataFrame(mydata)
final_hot.columns = ["板块名称", '日期', "距上次沸点天数"]
final_hot['日期'] = pd.to_datetime(final_hot['日期'])
final_hot.head()


Unnamed: 0,板块名称,日期,距上次沸点天数
0,IT服务,2023-03-01,0
1,IT服务,2023-03-02,0
2,IT服务,2023-03-03,0
3,IT服务,2023-03-06,0
4,IT服务,2023-03-07,0


In [141]:
final_hot.info()


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4230 entries, 0 to 4229
Data columns (total 3 columns):
 #   Column   Non-Null Count  Dtype         
---  ------   --------------  -----         
 0   板块名称     4230 non-null   object        
 1   日期       4230 non-null   datetime64[ns]
 2   距上次沸点天数  4230 non-null   int64         
dtypes: datetime64[ns](1), int64(1), object(1)
memory usage: 99.3+ KB


In [142]:
result=pd.merge(result,final_hot,on=['日期','板块名称'])
result

Unnamed: 0,日期,板块名称,冰点,沸点,距上次沸点天数
0,2023-03-01,IT服务,False,False,0
1,2023-03-01,LED,False,False,0
2,2023-03-01,专业工程,False,False,0
3,2023-03-01,专业服务,False,False,0
4,2023-03-01,专业连锁,False,False,0
...,...,...,...,...,...
4225,2023-03-24,食品及饲料添加剂,False,False,0
4226,2023-03-24,餐饮,False,False,3
4227,2023-03-24,饰品,False,False,0
4228,2023-03-24,饲料,False,False,0


In [143]:
# 计算最近的冰点为true的天数
def ice_date(specified_date, sect):
    dt_series = ice_dates[ice_dates['板块名称'] ==
                          sect]['日期'].apply(lambda x: x.date())
    # print(sect)
    time_list = dt_series.to_list()
    # print(time_list)
    if len(time_list) == 0:
        return 0
    # specified_date = specified_date.date()
    # 判断指定日期是否落在时间list中的区间
    diff_days = 0
    if specified_date in time_list:
        return -1
    if specified_date < time_list[0]:
        nearest_date = time_list[0]
        diff_days = 0

    elif specified_date > time_list[-1]:
        nearest_date = time_list[-1]
        diff_days = (specified_date-nearest_date).days
    else:
        for i in range(len(time_list) - 1):
            if time_list[i] <= specified_date < time_list[i+1]:
                nearest_date = time_list[i]
                diff_days = (specified_date-nearest_date).days
                break
    return diff_days


# data = result.groupby("板块名称").apply(
#     lambda row: 0 if row["冰点"] else 0)
mydata = []
grouped = result.groupby(["板块名称", "日期"])
for group_name, group_data in grouped:
    # print(type(group_name[1]))
    re = ice_date(group_name[1].date(), group_name[0], )
    mydata.append([group_name[0], group_name[1], re])
# data = result.groupby(["板块名称","日期"]).apply(
#     lambda row: 0 if row["冰点"].any() else mydate(row["日期"], row["板块名称"]))
# data = result.groupby(["板块名称","日期"]).apply(lambda row: 0 if row["冰点"].any() else 100)
# data
ice_hot = pd.DataFrame(mydata)
ice_hot.columns = ["板块名称", '日期', "距上次冰点天数"]
ice_hot['日期'] = pd.to_datetime(ice_hot['日期'])
ice_hot.head()


Unnamed: 0,板块名称,日期,距上次冰点天数
0,IT服务,2023-03-01,0
1,IT服务,2023-03-02,0
2,IT服务,2023-03-03,0
3,IT服务,2023-03-06,0
4,IT服务,2023-03-07,0


In [144]:
ice_hot.info()


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4230 entries, 0 to 4229
Data columns (total 3 columns):
 #   Column   Non-Null Count  Dtype         
---  ------   --------------  -----         
 0   板块名称     4230 non-null   object        
 1   日期       4230 non-null   datetime64[ns]
 2   距上次冰点天数  4230 non-null   int64         
dtypes: datetime64[ns](1), int64(1), object(1)
memory usage: 99.3+ KB


In [145]:
result = pd.merge(result, ice_hot, on=['日期', '板块名称'])
result


Unnamed: 0,日期,板块名称,冰点,沸点,距上次沸点天数,距上次冰点天数
0,2023-03-01,IT服务,False,False,0,0
1,2023-03-01,LED,False,False,0,0
2,2023-03-01,专业工程,False,False,0,0
3,2023-03-01,专业服务,False,False,0,0
4,2023-03-01,专业连锁,False,False,0,0
...,...,...,...,...,...,...
4225,2023-03-24,食品及饲料添加剂,False,False,0,0
4226,2023-03-24,餐饮,False,False,3,8
4227,2023-03-24,饰品,False,False,0,0
4228,2023-03-24,饲料,False,False,0,0


In [146]:
result.to_csv("临时冰点沸点.csv",index=False)

## 检查结果

In [147]:
result[result['板块名称'] == "IT服务"]


Unnamed: 0,日期,板块名称,冰点,沸点,距上次沸点天数,距上次冰点天数
0,2023-03-01,IT服务,False,False,0,0
235,2023-03-02,IT服务,False,False,0,0
470,2023-03-03,IT服务,False,False,0,0
705,2023-03-06,IT服务,False,False,0,0
940,2023-03-07,IT服务,False,False,0,0
1175,2023-03-08,IT服务,False,False,0,0
1410,2023-03-09,IT服务,False,False,0,0
1645,2023-03-10,IT服务,False,False,0,0
1880,2023-03-13,IT服务,False,False,0,0
2115,2023-03-14,IT服务,False,False,0,0


In [148]:
result[result['板块名称'] == "化学制药"]


Unnamed: 0,日期,板块名称,冰点,沸点,距上次沸点天数,距上次冰点天数
61,2023-03-01,化学制药,True,False,0,-1
296,2023-03-02,化学制药,False,False,0,1
531,2023-03-03,化学制药,False,False,0,2
766,2023-03-06,化学制药,False,False,0,5
1001,2023-03-07,化学制药,False,False,0,6
1236,2023-03-08,化学制药,False,False,0,7
1471,2023-03-09,化学制药,False,True,-1,8
1706,2023-03-10,化学制药,True,False,1,-1
1941,2023-03-13,化学制药,False,True,-1,3
2176,2023-03-14,化学制药,True,False,1,-1
