In [1]:
# 読み込みファイルパスの設定とimportしたいmoduleパス(pythonパス)の設定
from pathlib import Path
import os, sys

CURRENT_DIR = Path(os.getcwd())
PJ_DIR = CURRENT_DIR.parent.parent
LIB_DIR = PJ_DIR / "lib"
DATA_DIR = PJ_DIR / "data" 

sys.path.append(str(LIB_DIR))

# notebook内で利用するmoduleのimport
from lib_dataprocess import KessanPl, PricelistPl, read_data
import polars as pl
from datetime import date

In [3]:
# 決算データを読み込んで加工する。
# dfをセットしない場合はデフォルトパスのparquetファイルからデータを読み込んでKessanPl.dfをセットする。
# KessanPl(df: Union[pl.DataFrame, None]=None)
KPL = KessanPl()
KPL.df

code,settlement_date,settlement_type,announcement_date,sales,operating_income,ordinary_profit,final_profit,reviced_eps,dividend,quater
i64,date,str,date,i64,i64,i64,i64,f64,f64,i64
1301,2017-03-31,"""本""",2017-05-11,236561,3723,3709,2422,230.7,60.0,-2
1301,2018-03-31,"""本""",2018-05-10,254783,4066,4437,3211,304.3,60.0,4
1301,2018-09-30,"""四""",2018-11-05,61245,507,595,269,24.9,0.8,2
1301,2018-12-31,"""四""",2019-02-08,78581,2208,2591,1677,155.2,2.8,3
1301,2019-03-31,"""本""",2019-05-13,256151,3831,4434,2914,269.6,70.0,4
…,…,…,…,…,…,…,…,…,…,…
9997,2024-03-31,"""四""",2024-05-13,52020,4062,4901,758,7.8,7.8,4
9997,2024-03-31,"""本""",2024-05-13,208298,9787,11831,5839,60.4,20.5,4
9997,2025-03-31,"""予""",2024-05-13,217000,11500,12500,8000,82.7,29.0,4
9997,2024-06-30,"""四""",2024-07-31,51368,982,1681,994,10.3,1.9,1


In [4]:
#### filter_で始まるメソッド
#### KessanPl.dfから、特定レコードを抽出する。
#### KessanPl.dfは加工され、書き換わる

In [5]:
fp = DATA_DIR/"kessan.parquet"
df = read_data(fp)
KPL = KessanPl(df)

# 四半期決算(quaterly)、または本決算(yearly)のみを抽出してKessanPl.dfを書き換える
# def filter_settlement_type(self, settlement_type: Literal["quaterly", "yearly"]) -> None:
KPL.filter_settlement_type("quaterly")
KPL.df

code,settlement_date,settlement_type,announcement_date,sales,operating_income,ordinary_profit,final_profit,reviced_eps,dividend,quater
i64,date,str,date,i64,i64,i64,i64,f64,f64,i64
1301,2018-09-30,"""四""",2018-11-05,61245,507,595,269,24.9,0.8,2
1301,2018-12-31,"""四""",2019-02-08,78581,2208,2591,1677,155.2,2.8,3
1301,2019-03-31,"""四""",2019-05-13,58368,551,511,413,38.2,0.9,4
1301,2019-06-30,"""四""",2019-08-02,62644,770,1015,706,65.3,1.2,1
1301,2019-09-30,"""四""",2019-11-05,64240,-35,113,4,0.4,-0.1,2
…,…,…,…,…,…,…,…,…,…,…
9997,2023-09-30,"""四""",2023-10-31,44263,1939,2466,1878,19.4,4.4,2
9997,2023-12-31,"""四""",2024-01-31,60173,2630,2605,1818,18.8,4.4,3
9997,2024-03-31,"""四""",2024-05-13,52020,4062,4901,758,7.8,7.8,4
9997,2024-06-30,"""四""",2024-07-31,51368,982,1681,994,10.3,1.9,1


In [6]:
fp = DATA_DIR/"kessan.parquet"
df = read_data(fp)
KPL = KessanPl(df)

# codeで指定した決算レコードのみを抽出してKessanPl.dfを書き換え
# def filter_code(self, code: int) -> None:
KPL.filter_code(9997)
KPL.df

code,settlement_date,settlement_type,announcement_date,sales,operating_income,ordinary_profit,final_profit,reviced_eps,dividend,quater
i64,date,str,date,i64,i64,i64,i64,f64,f64,i64
9997,2017-03-31,"""本""",2017-05-11,146083,10882,12188,5802,59.7,12.5,-2
9997,2018-03-31,"""本""",2018-05-11,161673,13008,13248,9665,99.4,12.5,4
9997,2018-09-30,"""四""",2018-10-31,37182,1374,2101,1362,14.0,3.7,2
9997,2018-12-31,"""四""",2019-01-31,54394,4610,4690,2949,30.3,8.5,3
9997,2019-03-31,"""四""",2019-05-13,44093,3545,4001,3166,32.6,8.0,4
…,…,…,…,…,…,…,…,…,…,…
9997,2024-03-31,"""四""",2024-05-13,52020,4062,4901,758,7.8,7.8,4
9997,2024-03-31,"""本""",2024-05-13,208298,9787,11831,5839,60.4,20.5,4
9997,2025-03-31,"""予""",2024-05-13,217000,11500,12500,8000,82.7,29.0,4
9997,2024-06-30,"""四""",2024-07-31,51368,982,1681,994,10.3,1.9,1


In [7]:
#### get_で始まるメソッド
#### KessanPl.dfを加工して結果をpl.DataFrameのインスタンスとして返す
#### KessanPl.dfには手を加えない

In [8]:
fp = DATA_DIR/"kessan.parquet"
df = read_data(fp)
KPL = KessanPl(df)

# evaluation_dateで指定した日における、決算進捗率が取得可能な全銘柄の四半期決算進捗率をpl.DataFrameで作成し、返す
# 進捗率は、evaluation_date時における当期最新決算予想に対する四半期決算の進捗率。
df = KPL.get_expected_quatery_settlements_progress_rate()
df

code,settlement_date,yearly_settlement_date,quater,announcement_date,sales_pr(%),operating_income_pr(%),ordinary_profit_pr(%),final_profit_pr(%),q_sales,q_operating_income,q_ordinary_profit,q_final_profit,forcast_announcement_date,forcast_sales,forcast_operating_income,forcast_ordinary_profit,forcast_final_profit
i64,date,date,i64,date,f64,f64,f64,f64,i64,i64,i64,i64,date,i64,i64,i64,i64
1301,2024-06-30,2025-03-31,1,2024-08-05,22.7,31.0,33.0,34.5,68204,3102,3295,2412,2024-05-10,300000,10000,10000,7000
1301,2024-09-30,2025-03-31,2,2024-11-06,46.9,54.6,53.2,38.7,140565,5464,5315,2709,2024-05-10,300000,10000,10000,7000
1332,2024-06-30,2025-03-31,1,2024-08-06,25.2,29.9,27.2,22.0,220644,9724,9516,5289,2024-05-14,875000,32500,35000,24000
1332,2024-09-30,2025-03-31,2,2024-11-06,50.4,53.2,48.5,43.7,440682,17276,16976,10499,2024-05-14,875000,32500,35000,24000
1333,2024-06-30,2025-03-31,1,2024-08-05,24.5,25.5,30.8,29.7,256928,7657,9868,6540,2024-08-05,1050000,30000,32000,22000
…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…
9994,2024-09-30,2025-03-31,2,2024-11-14,47.9,38.0,38.2,41.8,79570,2434,2485,1548,2024-05-15,166000,6400,6500,3700
9996,2024-06-30,2025-03-31,1,2024-08-09,24.3,20.9,23.2,22.7,11994,326,414,283,2024-05-13,49430,1560,1785,1245
9996,2024-09-30,2025-03-31,2,2024-11-13,48.7,51.3,52.5,51.3,24097,801,938,639,2024-05-13,49430,1560,1785,1245
9997,2024-06-30,2025-03-31,1,2024-07-31,23.7,8.5,13.4,12.4,51368,982,1681,994,2024-05-13,217000,11500,12500,8000


In [9]:
fp = DATA_DIR/"kessan.parquet"
df = read_data(fp)
KPL = KessanPl(df)

# 年度決算の実績値における、当該年度の四半期決算の進捗率をpl.DataFrameで作成し、返す
# def get_actual_quatery_settlements_progress_rate(self) -> pl.DataFrame:
df = KPL.get_actual_quatery_settlements_progress_rate()
df

code,settlement_date,yearly_settlement_date,quater,announcement_date,sales_pr(%),operating_income_pr(%),ordinary_profit_pr(%),final_profit_pr(%),q_sales,q_operating_income,q_ordinary_profit,q_final_profit,yearly_announcement_date,yearly_sales,yearly_operating_income,yearly_ordinary_profit,yearly_final_profit
i64,date,date,i64,date,f64,f64,f64,f64,i64,i64,i64,i64,date,i64,i64,i64,i64
1301,2019-03-31,2019-03-31,4,2019-05-13,100.0,100.0,100.0,100.0,256151,3831,4434,2914,2019-05-13,256151,3831,4434,2914
1301,2019-06-30,2020-03-31,1,2019-08-02,23.9,26.4,28.1,34.7,62644,770,1015,706,2020-05-12,262519,2918,3608,2037
1301,2019-09-30,2020-03-31,2,2019-11-05,48.3,25.2,31.3,34.9,126884,735,1128,710,2020-05-12,262519,2918,3608,2037
1301,2019-12-31,2020-03-31,3,2020-02-07,78.3,89.7,82.6,85.4,205436,2616,2979,1739,2020-05-12,262519,2918,3608,2037
1301,2020-03-31,2020-03-31,4,2020-05-12,100.0,100.0,100.0,100.0,262519,2918,3608,2037,2020-05-12,262519,2918,3608,2037
…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…
9997,2023-03-31,2023-03-31,4,2023-05-12,100.0,100.0,100.0,100.0,212376,11217,12459,7417,2023-05-12,212376,11217,12459,7417
9997,2023-06-30,2024-03-31,1,2023-07-31,24.9,11.8,15.7,23.7,51842,1156,1859,1385,2024-05-13,208298,9787,11831,5839
9997,2023-09-30,2024-03-31,2,2023-10-31,46.1,31.6,36.6,55.9,96105,3095,4325,3263,2024-05-13,208298,9787,11831,5839
9997,2023-12-31,2024-03-31,3,2024-01-31,75.0,58.5,58.6,87.0,156278,5725,6930,5081,2024-05-13,208298,9787,11831,5839


In [10]:
fp = DATA_DIR/"kessan.parquet"
df = read_data(fp)
KPL = KessanPl(df)

# codeで指定した銘柄のsettlement_typeで指定した決算のvaluation_date時点における期首、期末のannouncement_daetを取得する
# valuation_date = date.today()のような場合はまだ期末決算が発表されていないので、その場合においてはdate(2999, 12, 31)を期末として返す
# def get_current_settlement_period(self, code: int, valuation_date: date, settlement_type: Literal["四", "本"]) -> tuple:
# パラメータ
code = 1301
valuation_date = date.today()
valuation_date = date(2023, 12, 11)
settlement_type = "本"

# 
KPL.get_current_settlement_period_by_announcement_date(code, valuation_date, settlement_type)

(datetime.date(2023, 5, 12), datetime.date(2024, 5, 10))

In [11]:
fp = DATA_DIR/"kessan.parquet"
df = read_data(fp)
KPL = KessanPl(df)

# evaluation_dateで指定した日における、決算進捗率が取得可能な全銘柄の四半期決算進捗率をpl.DataFrame取得する
# 進捗率は、evaluation_date時における当期最新決算予想に対する四半期決算の進捗率。
# KessanPl.dfは、valuation_date以前の決算のみを抽出する。
# def with_columns_expected_quatery_settlements_progress_rate(self, valuation_date: date=date.today()) -> None:
df = KPL.get_expected_quatery_settlements_progress_rate()
df

code,settlement_date,yearly_settlement_date,quater,announcement_date,sales_pr(%),operating_income_pr(%),ordinary_profit_pr(%),final_profit_pr(%),q_sales,q_operating_income,q_ordinary_profit,q_final_profit,forcast_announcement_date,forcast_sales,forcast_operating_income,forcast_ordinary_profit,forcast_final_profit
i64,date,date,i64,date,f64,f64,f64,f64,i64,i64,i64,i64,date,i64,i64,i64,i64
1301,2024-06-30,2025-03-31,1,2024-08-05,22.7,31.0,33.0,34.5,68204,3102,3295,2412,2024-05-10,300000,10000,10000,7000
1301,2024-09-30,2025-03-31,2,2024-11-06,46.9,54.6,53.2,38.7,140565,5464,5315,2709,2024-05-10,300000,10000,10000,7000
1332,2024-06-30,2025-03-31,1,2024-08-06,25.2,29.9,27.2,22.0,220644,9724,9516,5289,2024-05-14,875000,32500,35000,24000
1332,2024-09-30,2025-03-31,2,2024-11-06,50.4,53.2,48.5,43.7,440682,17276,16976,10499,2024-05-14,875000,32500,35000,24000
1333,2024-06-30,2025-03-31,1,2024-08-05,24.5,25.5,30.8,29.7,256928,7657,9868,6540,2024-08-05,1050000,30000,32000,22000
…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…
9994,2024-09-30,2025-03-31,2,2024-11-14,47.9,38.0,38.2,41.8,79570,2434,2485,1548,2024-05-15,166000,6400,6500,3700
9996,2024-06-30,2025-03-31,1,2024-08-09,24.3,20.9,23.2,22.7,11994,326,414,283,2024-05-13,49430,1560,1785,1245
9996,2024-09-30,2025-03-31,2,2024-11-13,48.7,51.3,52.5,51.3,24097,801,938,639,2024-05-13,49430,1560,1785,1245
9997,2024-06-30,2025-03-31,1,2024-07-31,23.7,8.5,13.4,12.4,51368,982,1681,994,2024-05-13,217000,11500,12500,8000


In [12]:
fp = DATA_DIR/"kessan.parquet"
df = read_data(fp)
KPL = KessanPl(df)

code = 1301
# codeで指定した銘柄のvaluation_date時点で発表済の四半期決算を、新しいものからnum個返す
#def get_latest_quater_settlement(self, code: int, valuation_date: date=date.today(), num: int=8) -> None:

df = KPL.get_latest_quater_settlement(code)
df

code,settlement_date,settlement_type,announcement_date,sales,operating_income,ordinary_profit,final_profit,reviced_eps,dividend,quater
i64,date,str,date,i64,i64,i64,i64,f64,f64,i64
1301,2024-09-30,"""四""",2024-11-06,72361,2362,2020,297,25.0,3.3,2
1301,2024-06-30,"""四""",2024-08-05,68204,3102,3295,2412,203.1,4.5,1
1301,2024-03-31,"""四""",2024-05-10,59990,1857,1863,951,87.9,3.1,4
1301,2023-12-31,"""四""",2024-02-02,75539,2953,2851,2534,236.3,3.9,3
1301,2023-09-30,"""四""",2023-11-06,63708,1575,1594,624,58.2,2.5,2
1301,2023-06-30,"""四""",2023-08-04,62367,2421,2548,1827,170.5,3.9,1
1301,2023-03-31,"""四""",2023-05-12,58780,-19,-157,-182,-17.0,0.0,4
1301,2022-12-31,"""四""",2023-02-03,79768,2749,2534,1756,163.7,3.4,3


In [27]:
# valuation_date時点で発表済最新の全銘柄の四半期決算リストを返す
#def get_latest_quater_settlements(self, valuation_date: date=date.today()) -> pl.DataFrame:
KPL = KessanPl()
df = KPL.get_latest_quater_settlements()
df

code,announcement_date,settlement_date,settlement_type,sales,operating_income,ordinary_profit,final_profit,reviced_eps,dividend,quater
i64,date,date,str,i64,i64,i64,i64,f64,f64,i64
1301,2024-11-06,2024-09-30,"""四""",72361,2362,2020,297,25.0,3.3,2
1332,2024-11-06,2024-09-30,"""四""",220038,7552,7460,5210,16.8,3.4,2
1333,2024-11-11,2024-09-30,"""四""",275206,8413,5821,7270,144.4,3.1,2
1375,2024-11-07,2024-09-30,"""四""",11383,1393,1264,833,20.9,12.2,2
1376,2024-10-04,2024-08-31,"""四""",14832,131,192,106,9.4,0.9,1
…,…,…,…,…,…,…,…,…,…,…
9991,2024-10-29,2024-09-30,"""四""",26827,1386,1354,854,25.3,5.2,2
9993,2024-10-10,2024-08-31,"""四""",25989,-274,-52,-870,-80.7,-1.1,2
9994,2024-11-14,2024-09-30,"""四""",40419,1148,1164,731,67.4,2.8,2
9996,2024-11-13,2024-09-30,"""四""",12103,475,524,356,42.7,3.9,2


In [13]:
fp = DATA_DIR/"kessan.parquet"
df = read_data(fp)
KPL = KessanPl(df)

# codeで指定された銘柄のvaluation_date時点における発表済決算予想の発表推移をpl.DataFrameで返す
# this_settlement_periodをTrueにセットすると、valuation_dateを含む期の決算予想のみに絞る
# descending=Trueにすると、発表日が新しいもの順に並べ替える
#def get_settlement_forcast(self, code: int, valuation_date: date=date.today(), this_settlement_period=True, descending=True) -> pl.DataFrame:

# set parameter
code = 1301
valuation_date = date.today()
this_settlement_period = True


df = KPL.get_settlement_forcast(code, valuation_date=valuation_date, this_settlement_period=this_settlement_period)
df


code,settlement_date,settlement_type,announcement_date,sales,operating_income,ordinary_profit,final_profit,reviced_eps,dividend,quater
i64,date,str,date,i64,i64,i64,i64,f64,f64,i64
1301,2025-03-31,"""予""",2024-05-10,300000,10000,10000,7000,589.4,110.0,4


In [14]:
fp = DATA_DIR/"kessan.parquet"
df = read_data(fp)
KPL = KessanPl(df)

# KessanPlの四半期決算、または通期決算の決算発表日から翌決算発表日までの株価の騰落率列と同期間の日経平均の騰落率列を追加したpl.DataFrameを返す
# 計算量とメモリ消費量が多いので、KessanPl.dfとpricelist_dfは期間などである程度絞ってやった方が良い。
# settlement_typeで、通期決算で騰落率を取得するか、四半期決算で騰落率を取得するか選ぶ。
# pricelist_dfが空のdataframe(初期値)の場合、parquetファイルから読み込んでくる。
# overnight_biginingをTrueにセットすると、起点の株価として決算発表日当日の株価をセットし、Falseにセットすると、決算発表日翌営業日の株価をセットする。
# overnight_endをTrueにセットすると、終点の株価として決算発表日翌営業日の株価をセットし、Falseにセットすると、決算発表日当日の株価をセットする。
# *_pointは、期首(bigining)と期末(end)において、日足ローソクのどの時点の株価を起点、または終点とするか選択する。
#def get_settlement_performance(self,
#    settlement_type: Literal["本", "四"],
#    pricelist_df: pl.DataFrame = pl.DataFrame(),
#    overnight_bigining: bool = False,
#    overnight_end: bool = True,
#    bigining_point: Literal["open", "high", "low", "close"] = "open",
#    end_point: Literal["open", "high", "low", "close"] = "open"
#) -> pl.DataFrame:

# 引数の設定
# settlement_type = "四"
settlement_type = "本"
fp = DATA_DIR/"reviced_pricelist.parquet"
df = read_data(fp)
RevPl = PricelistPl(df)
pricelist_df = RevPl.df.filter(pl.col("date")>=date(2021, 1, 1))

# メソッドの実行
KPL.df = KPL.df.filter(pl.col("settlement_date")>=date(2022, 1, 1))                               
df = KPL.get_settlement_performance(
    settlement_type,
    pricelist_df
)
df

code,settlement_date,settlement_type,announcement_date,sales,operating_income,ordinary_profit,final_profit,reviced_eps,dividend,quater,price_change_rate
i64,date,str,date,i64,i64,i64,i64,f64,f64,i64,f64
1301,2023-03-31,"""本""",2023-05-12,272167,8105,8182,5782,539.1,100.0,4,10.8
1301,2024-03-31,"""本""",2024-05-10,261604,8806,8856,5936,548.6,100.0,4,4.1
1332,2023-03-31,"""本""",2023-05-12,768181,24488,27776,21233,68.2,18.0,4,18.1
1332,2024-03-31,"""本""",2024-05-14,831375,29663,31963,23850,76.7,24.0,4,47.1
1333,2023-03-31,"""本""",2023-05-08,1020456,29575,33500,18596,363.7,65.0,4,6.6
…,…,…,…,…,…,…,…,…,…,…,…
9995,2024-03-31,"""本""",2024-05-07,73361,862,1067,994,35.0,0.0,4,84.4
9996,2023-03-31,"""本""",2023-05-12,43667,1240,1354,968,116.1,31.0,4,-8.6
9996,2024-03-31,"""本""",2024-05-13,47606,1527,1756,1227,147.2,40.0,4,60.5
9997,2023-03-31,"""本""",2023-05-12,212376,11217,12459,7417,76.7,20.0,4,11.0


In [15]:
fp = DATA_DIR/"kessan.parquet"
df = read_data(fp)
KPL = KessanPl(df)


fp = DATA_DIR/"reviced_pricelist.parquet"
df = read_data(fp)
RPL = PricelistPl(df)
pricelist_df = RPL.df
# 決算期間中における株価騰落を求めるための引数一覧をpl.DataFrameで取得する
# 決算期は、当該期の決算発表日から翌決算発表日まで。
# settlement_typeは、四半期("四")、通期("本")いずれかで指定。
# pricelist_dfは、start_date, end_dateを取引日のある日(平日)に指定するために利用する。
# この例のようにpricelist_dfをまるごと全部渡すと楽だが、リソース消費が大きくなるので、KPL.dfを年度などでfilterして絞り、それに合わせてpricelist_dfも絞ると軽くなる。
# overnight_*は、決算発表日をまたぐかまたがないかを指定。Trueはまたぐ。Falseはまたがない。
# 取得されるdfの列は、"code", "settlement_date", "start_date", "end_date"
#def get_settlement_performance_items_df(self,
#    settlement_type: Literal["本", "四"],
#    pricelist_df: pl.DataFrame,
#    overnight_bigining: bool = False,
#    overnight_end: bool = True,
#) -> pl.DataFrame:
#df = KPL.get_settlement_performance_items_df("本", pricelist_df)
df = KPL.get_settlement_performance_items_df("四", pricelist_df)
df

code,settlement_date,start_date,end_date
i64,date,date,date
1301,2018-12-31,2018-11-06,2019-02-12
1301,2019-03-31,2019-02-12,2019-05-14
1301,2019-06-30,2019-05-14,2019-08-05
1301,2019-09-30,2019-08-05,2019-11-06
1301,2019-12-31,2019-11-06,2020-02-10
…,…,…,…
9997,2023-09-30,2023-08-01,2023-11-01
9997,2023-12-31,2023-11-01,2024-02-01
9997,2024-03-31,2024-02-01,2024-05-14
9997,2024-06-30,2024-05-14,2024-08-01


In [16]:
fp = DATA_DIR/"kessan.parquet"
df = read_data(fp)
KPL = KessanPl(df)

# for dev
# codeで指定した銘柄のsettlement_date, settlement_typeで指定した決算の機首、期末のannouncement_daetを取得する
# 期首は前期の決算発表日を返す
#def get_settlement_period_by_announcement_date(self, code: int, settlement_date: date, settlement_type: Literal["四", "本"]) -> tuple:
# パラメータ
code = 1301
settlement_date = date(2024, 9, 30)
settlement_type = "四"

# 
KPL.get_settlement_period_by_announcement_date(code, settlement_date, settlement_type)

(datetime.date(2024, 8, 5), datetime.date(2024, 11, 6))

In [17]:
fp = DATA_DIR/"kessan.parquet"
df = read_data(fp)
KPL = KessanPl(df)

# codeで指定した銘柄の年決算のリスト(履歴)を返す
# valuation_dateを指定すると、指定日時点までの年決算を返す。
# get_latest_forcast = Trueとした場合、valuation_date時点の最新の決算予想を返す
# def get_target_stock_yearly_settlements(self, code: int, get_latest_forcast=True, valuation_date: date=date.today()) -> pl.DataFrame:
df = KPL.get_target_stock_yearly_settlements(1301)
df

code,settlement_date,settlement_type,announcement_date,sales,operating_income,ordinary_profit,final_profit,reviced_eps,dividend,quater
i64,date,str,date,i64,i64,i64,i64,f64,f64,i64
1301,2017-03-31,"""本""",2017-05-11,236561,3723,3709,2422,230.7,60.0,-2
1301,2018-03-31,"""本""",2018-05-10,254783,4066,4437,3211,304.3,60.0,4
1301,2019-03-31,"""本""",2019-05-13,256151,3831,4434,2914,269.6,70.0,4
1301,2020-03-31,"""本""",2020-05-12,262519,2918,3608,2037,188.5,70.0,4
1301,2021-03-31,"""本""",2021-05-14,249197,4657,4879,3838,356.9,80.0,4
1301,2022-03-31,"""本""",2022-05-13,253575,6392,6904,4634,430.8,90.0,4
1301,2023-03-31,"""本""",2023-05-12,272167,8105,8182,5782,539.1,100.0,4
1301,2024-03-31,"""本""",2024-05-10,261604,8806,8856,5936,548.6,100.0,4
1301,2025-03-31,"""予""",2024-05-10,300000,10000,10000,7000,589.4,110.0,4


In [18]:
#### with_columns_で始まるメソッド
#### KessanPl.dfに列を追加する
#### KessanPl.dfは加工され、書き換わる

In [19]:
fp = DATA_DIR/"kessan.parquet"
df = read_data(fp)
KPL = KessanPl(df)

# 前年同期と比較して、差分利益率：(今年度利益率-昨年度利益率)/(今年度売上高-昨年度売上高)
# を営業利益～純利益の各差分利益について計算してKessanPl.dfに列を追加する。
# 売上高に対しては、売上高伸び率列を追加する。
# 利益改善度合いを見るために利用する。
# 決算予想の場合は、昨年度の実績に対して出す。
# 次期移行の予想はnull。
# def with_columns_diff_growthrate(self) -> None:
KPL.with_columns_diff_growthrate()
KPL.df

code,settlement_date,settlement_type,announcement_date,sales,operating_income,ordinary_profit,final_profit,reviced_eps,dividend,quater,sales_growthrate,diff_operating_income_growthrate,diff_ordinary_profit_growthrate,diff_final_profit_growthrate
i64,date,str,date,i64,i64,i64,i64,f64,f64,i64,f64,f64,f64,f64
1301,2017-03-31,"""本""",2017-05-11,236561,3723,3709,2422,230.7,60.0,-2,,,,
1301,2018-03-31,"""本""",2018-05-10,254783,4066,4437,3211,304.3,60.0,4,,,,
1301,2018-09-30,"""四""",2018-11-05,61245,507,595,269,24.9,0.8,2,,,,
1301,2018-12-31,"""四""",2019-02-08,78581,2208,2591,1677,155.2,2.8,3,,,,
1301,2019-03-31,"""四""",2019-05-13,58368,551,511,413,38.2,0.9,4,,,,
…,…,…,…,…,…,…,…,…,…,…,…,…,…,…
9997,2025-03-31,"""予""",2024-05-13,217000,11500,12500,8000,82.7,29.0,4,4.2,19.7,7.7,24.8
9997,2024-03-31,"""四""",2024-05-13,52020,4062,4901,758,7.8,7.8,4,0.2,733.7,1304.1,-1303.1
9997,2024-03-31,"""本""",2024-05-13,208298,9787,11831,5839,60.4,20.5,4,-1.9,35.1,15.4,38.7
9997,2024-06-30,"""四""",2024-07-31,51368,982,1681,994,10.3,1.9,1,-0.9,36.7,37.6,82.5


In [20]:
fp = DATA_DIR/"kessan.parquet"
df = read_data(fp)
KPL = KessanPl(df)

# 売上高～純利益までの前年同期からの成長率列を追加する
# def with_columns_growthrate_lastyear(self):
KPL.with_columns_growthrate_lastyear()
KPL.df

code,settlement_date,settlement_type,announcement_date,sales,operating_income,ordinary_profit,final_profit,reviced_eps,dividend,quater,sales_growthrate,operating_income_growthrate,ordinary_profit_growthrate,final_profit_growthrate
i64,date,str,date,i64,i64,i64,i64,f64,f64,i64,f64,f64,f64,f64
1301,2017-03-31,"""本""",2017-05-11,236561,3723,3709,2422,230.7,60.0,-2,,,,
1301,2018-03-31,"""本""",2018-05-10,254783,4066,4437,3211,304.3,60.0,4,7.7,9.2,19.6,32.6
1301,2018-09-30,"""四""",2018-11-05,61245,507,595,269,24.9,0.8,2,,,,
1301,2018-12-31,"""四""",2019-02-08,78581,2208,2591,1677,155.2,2.8,3,,,,
1301,2019-03-31,"""四""",2019-05-13,58368,551,511,413,38.2,0.9,4,,,,
…,…,…,…,…,…,…,…,…,…,…,…,…,…,…
9997,2025-03-31,"""予""",2024-05-13,217000,11500,12500,8000,82.7,29.0,4,4.2,17.5,5.7,37.0
9997,2024-03-31,"""四""",2024-05-13,52020,4062,4901,758,7.8,7.8,4,0.2,21.5,35.3,-62.8
9997,2024-03-31,"""本""",2024-05-13,208298,9787,11831,5839,60.4,20.5,4,-1.9,-12.7,-5.0,-21.3
9997,2024-06-30,"""四""",2024-07-31,51368,982,1681,994,10.3,1.9,1,-0.9,-15.1,-9.6,-28.2


In [21]:
fp = DATA_DIR/"kessan.parquet"
df = read_data(fp)
KPL = KessanPl(df)

# KessanPl.dfの最終列に、年度決算日"yearly_settlement_date"列を追加する
# 四半期決算の場合、四半期決算締め日がはいっているが、それが何年何月期の四半期決算かわかるようにするため。
# def with_columns_yearly_settlement_date(self) -> None:
KPL.with_columns_yearly_settlement_date()
KPL.df


code,settlement_date,settlement_type,announcement_date,sales,operating_income,ordinary_profit,final_profit,reviced_eps,dividend,quater,yearly_settlement_date
i64,date,str,date,i64,i64,i64,i64,f64,f64,i64,date
1301,2017-03-31,"""本""",2017-05-11,236561,3723,3709,2422,230.7,60.0,-2,2017-03-31
1301,2018-03-31,"""本""",2018-05-10,254783,4066,4437,3211,304.3,60.0,4,2018-03-31
1301,2018-09-30,"""四""",2018-11-05,61245,507,595,269,24.9,0.8,2,2019-03-31
1301,2018-12-31,"""四""",2019-02-08,78581,2208,2591,1677,155.2,2.8,3,2019-03-31
1301,2019-03-31,"""本""",2019-05-13,256151,3831,4434,2914,269.6,70.0,4,2019-03-31
…,…,…,…,…,…,…,…,…,…,…,…
9997,2024-03-31,"""四""",2024-05-13,52020,4062,4901,758,7.8,7.8,4,2024-03-31
9997,2024-03-31,"""本""",2024-05-13,208298,9787,11831,5839,60.4,20.5,4,2024-03-31
9997,2025-03-31,"""予""",2024-05-13,217000,11500,12500,8000,82.7,29.0,4,2025-03-31
9997,2024-06-30,"""四""",2024-07-31,51368,982,1681,994,10.3,1.9,1,2025-03-31


In [22]:
fp = DATA_DIR/"kessan.parquet"
df = read_data(fp)
KPL = KessanPl(df)

# KessanPl.dfの四半期決算レコード(pl.col("settlement_type")=="四")に決算進捗率の列を追加し、四半期決算のみを抽出したレコードをセットする。
# 追加される列は売上進捗率、営業利益進捗率、経常利益進捗率、純利益進捗率。
# def with_columns_settlements_progress_rate(self) -> None:
KPL.with_columns_settlements_progress_rate()
KPL.df

code,settlement_date,settlement_type,announcement_date,sales,operating_income,ordinary_profit,final_profit,reviced_eps,dividend,quater,yearly_settlement_date,settlement_date_right,settlement_type_right,announcement_date_right,sales_right,operating_income_right,ordinary_profit_right,final_profit_right,reviced_eps_right,dividend_right,quater_right
i64,date,str,date,i64,i64,i64,i64,f64,f64,i64,date,date,str,date,i64,i64,i64,i64,f64,f64,i64
1301,2020-06-30,"""四""",2020-08-07,54320,633,697,565,52.6,1.2,1,2021-03-31,2021-03-31,"""予""",2020-05-12,270000,4200,4500,3000,279.2,70.0,4
1301,2020-09-30,"""四""",2020-11-06,62541,746,808,1012,94.2,1.2,2,2021-03-31,2021-03-31,"""予""",2020-05-12,270000,4200,4500,3000,279.2,70.0,4
1301,2020-12-31,"""四""",2021-02-05,77527,2302,2316,1645,153.0,3.0,3,2021-03-31,2021-03-31,"""予""",2020-05-12,270000,4200,4500,3000,279.2,70.0,4
1301,2021-03-31,"""四""",2021-05-14,54809,976,1058,616,57.3,1.8,4,2021-03-31,2021-03-31,"""予""",2020-05-12,270000,4200,4500,3000,279.2,70.0,4
1301,2021-03-31,"""本""",2021-05-14,249197,4657,4879,3838,356.9,80.0,4,2021-03-31,2021-03-31,"""予""",2020-05-12,270000,4200,4500,3000,279.2,70.0,4
…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…
9997,2024-03-31,"""四""",2024-05-13,52020,4062,4901,758,7.8,7.8,4,2024-03-31,2024-03-31,"""予""",2023-10-30,208500,10000,11000,7000,72.4,20.5,4
9997,2024-03-31,"""本""",2024-05-13,208298,9787,11831,5839,60.4,20.5,4,2024-03-31,2024-03-31,"""予""",2023-05-12,219000,14000,14700,9400,97.2,20.5,4
9997,2024-03-31,"""本""",2024-05-13,208298,9787,11831,5839,60.4,20.5,4,2024-03-31,2024-03-31,"""予""",2023-10-30,208500,10000,11000,7000,72.4,20.5,4
9997,2024-06-30,"""四""",2024-07-31,51368,982,1681,994,10.3,1.9,1,2025-03-31,2025-03-31,"""予""",2024-05-13,217000,11500,12500,8000,82.7,29.0,4


In [23]:
fp = DATA_DIR/"kessan.parquet"
df = read_data(fp)
KPL = KessanPl(df)

# KessanPl.dfのレコードに、売上高～純利益までの四半期累積の列を追加。
# 本決算や決算予想のレコードは、売上高～純利益までの数値をそのままコピーした列を追加。
# def with_columns_accumulated_quaterly_settlement(self) -> None:
KPL.with_columns_accumulated_quaterly_settlement()
KPL.df.filter(pl.col("settlement_type")=="四")

code,settlement_date,settlement_type,announcement_date,sales,operating_income,ordinary_profit,final_profit,reviced_eps,dividend,quater,yearly_settlement_date,acc_sales,acc_operating_income,acc_ordinary_profit,acc_final_profit
i64,date,str,date,i64,i64,i64,i64,f64,f64,i64,date,i64,i64,i64,i64
1301,2019-03-31,"""四""",2019-05-13,58368,551,511,413,38.2,0.9,4,2019-03-31,256151,3831,4434,2914
1301,2019-06-30,"""四""",2019-08-02,62644,770,1015,706,65.3,1.2,1,2020-03-31,62644,770,1015,706
1301,2019-09-30,"""四""",2019-11-05,64240,-35,113,4,0.4,-0.1,2,2020-03-31,126884,735,1128,710
1301,2019-12-31,"""四""",2020-02-07,78552,1881,1851,1029,95.1,2.4,3,2020-03-31,205436,2616,2979,1739
1301,2020-03-31,"""四""",2020-05-12,57083,302,629,298,27.6,0.5,4,2020-03-31,262519,2918,3608,2037
…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…
9997,2023-09-30,"""四""",2023-10-31,44263,1939,2466,1878,19.4,4.4,2,2024-03-31,96105,3095,4325,3263
9997,2023-12-31,"""四""",2024-01-31,60173,2630,2605,1818,18.8,4.4,3,2024-03-31,156278,5725,6930,5081
9997,2024-03-31,"""四""",2024-05-13,52020,4062,4901,758,7.8,7.8,4,2024-03-31,208298,9787,11831,5839
9997,2024-06-30,"""四""",2024-07-31,51368,982,1681,994,10.3,1.9,1,2025-03-31,51368,982,1681,994


In [24]:
fp = DATA_DIR/"kessan.parquet"
df = read_data(fp)
KPL = KessanPl(df)

# 結果出力をしやすいように、決算期の表記を日本語にした列を追加
# add_settlement_type_string=Trueの場合、「〇年〇月期決算」決算の後ろに、決算種別を追加する。Falseの場合は〇年〇月期までしか表示しない。
  # 本決算 -> 〇年〇月期通期決算
  # 四半期決算 -> 〇年〇月第〇(単体|累積)四半期決算
# KessanPl.DataFrameに四半期決算データが単体データか累積データ化識別できないので、かっこわるいが指定する。
KPL.with_columns_financtial_period()
KPL.df

code,settlement_date,settlement_type,announcement_date,sales,operating_income,ordinary_profit,final_profit,reviced_eps,dividend,quater,yearly_settlement_date,fy,fm,決算期
i64,date,str,date,i64,i64,i64,i64,f64,f64,i64,date,str,str,str
1301,2017-03-31,"""本""",2017-05-11,236561,3723,3709,2422,230.7,60.0,4,2017-03-31,"""2017""","""3""","""2017年3月期"""
1301,2018-03-31,"""本""",2018-05-10,254783,4066,4437,3211,304.3,60.0,4,2018-03-31,"""2018""","""3""","""2018年3月期"""
1301,2018-09-30,"""四""",2018-11-05,61245,507,595,269,24.9,0.8,2,2019-03-31,"""2019""","""3""","""2019年3月期"""
1301,2018-12-31,"""四""",2019-02-08,78581,2208,2591,1677,155.2,2.8,3,2019-03-31,"""2019""","""3""","""2019年3月期"""
1301,2019-03-31,"""四""",2019-05-13,58368,551,511,413,38.2,0.9,4,2019-03-31,"""2019""","""3""","""2019年3月期"""
…,…,…,…,…,…,…,…,…,…,…,…,…,…,…
9997,2024-03-31,"""四""",2024-05-13,52020,4062,4901,758,7.8,7.8,4,2024-03-31,"""2024""","""3""","""2024年3月期"""
9997,2024-03-31,"""本""",2024-05-13,208298,9787,11831,5839,60.4,20.5,4,2024-03-31,"""2024""","""3""","""2024年3月期"""
9997,2025-03-31,"""予""",2024-05-13,217000,11500,12500,8000,82.7,29.0,4,2025-03-31,"""2025""","""3""","""2025年3月期"""
9997,2024-06-30,"""四""",2024-07-31,51368,982,1681,994,10.3,1.9,1,2025-03-31,"""2025""","""3""","""2025年3月期"""


In [25]:
fp = DATA_DIR/"kessan.parquet"
df = read_data(fp)
KPL = KessanPl(df)

# 週でグループ化できるように、日付から週グループのインデックス列を追加する
# dailyのdfとweeklyのdfを紐づける(joinする)ときに便利。
# KessanPl.dfに列を追加する
#def with_columns_weekid(self) -> None:

KPL.with_columns_weekid()
KPL.df

code,settlement_date,settlement_type,announcement_date,sales,operating_income,ordinary_profit,final_profit,reviced_eps,dividend,quater,weekid
i64,date,str,date,i64,i64,i64,i64,f64,f64,i64,i16
1301,2017-03-31,"""本""",2017-05-11,236561,3723,3709,2422,230.7,60.0,-2,10
1301,2018-03-31,"""本""",2018-05-10,254783,4066,4437,3211,304.3,60.0,4,62
1301,2018-09-30,"""四""",2018-11-05,61245,507,595,269,24.9,0.8,2,88
1301,2018-12-31,"""四""",2019-02-08,78581,2208,2591,1677,155.2,2.8,3,101
1301,2019-03-31,"""本""",2019-05-13,256151,3831,4434,2914,269.6,70.0,4,115
…,…,…,…,…,…,…,…,…,…,…,…
9997,2024-03-31,"""四""",2024-05-13,52020,4062,4901,758,7.8,7.8,4,376
9997,2024-03-31,"""本""",2024-05-13,208298,9787,11831,5839,60.4,20.5,4,376
9997,2025-03-31,"""予""",2024-05-13,217000,11500,12500,8000,82.7,29.0,4,376
9997,2024-06-30,"""四""",2024-07-31,51368,982,1681,994,10.3,1.9,1,387


In [26]:
fp = DATA_DIR/"kessan.parquet"
df = read_data(fp)
KPL = KessanPl(df)

# scrapingの際、正しく決算発表日が取得できなかったレコードを、仮にdate(1900, 1, 1)としstockdbにinsertされているが、
# これだとうまく解析ができないため、KessanPl.dfの該当レコードの決算発表日を一旦仮で決算日の60日後で書き換える。
#def _revice_irregular_announcement_date(self) -> None:
KPL._revice_irregular_announcement_date()
KPL.df.filter((pl.col("announcement_date")-pl.col("settlement_date"))==pl.duration(days=60))

code,settlement_date,settlement_type,announcement_date,sales,operating_income,ordinary_profit,final_profit,reviced_eps,dividend,quater
i64,date,str,date,i64,i64,i64,i64,f64,f64,i64
1375,2019-03-31,"""本""",2019-05-30,47592,6491,6321,4389,110.2,0.0,-2
1375,2020-03-31,"""本""",2020-05-30,50759,6691,6646,4346,109.1,4230.0,4
1375,2019-12-31,"""四""",2020-02-29,17366,3500,3435,2180,54.7,20.2,3
1375,2020-06-30,"""四""",2020-08-29,10624,909,775,487,12.2,8.6,1
1375,2020-03-31,"""四""",2020-05-30,12033,438,749,566,14.2,3.6,4
…,…,…,…,…,…,…,…,…,…,…
9564,2021-09-30,"""本""",2021-11-29,3522,331,324,211,70.3,0.0,4
9565,2021-10-31,"""本""",2021-12-30,1671,128,131,82,32.9,0.0,4
9565,2020-10-31,"""本""",2020-12-30,831,4,8,-8,-3.2,0.0,-2
9636,2020-07-31,"""四""",2020-09-29,552,-14,-4,-5,-1.8,-2.5,2
