In [12]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import japanize_matplotlib

volume_dfs = {}
for year in range(2018, 2025):
    filename = f"volume/Vol_{year}.csv"
    # 1ファイル内の全シートを読み込み
    volume_dfs[year] = pd.read_csv(filename, header=None)
    volume_dfs[year].columns = ['year', 'kg', 'volume']
    volume_dfs[year]['year'] = pd.to_datetime(volume_dfs[year]['year'].astype(str), format='%Y%m', errors='coerce')
    

future_dfs = {}
for year in range(24, 26):
    filename = f"future/RIC_IDX{year}.csv"
    # 1ファイル内の全シートを読み込み
    future_dfs[year] = pd.read_csv(filename)
    future_dfs[year]['取引日'] = pd.to_datetime(future_dfs[year]['取引日'].astype(str), format='%Y%m%d', errors='coerce')
    # future_dfs[year]['限月'] = pd.to_datetime(future_dfs[year]['限月'].astype(str), format='%Y%m', errors='coerce')
    future_dfs[year] = future_dfs[year].drop('ダミー', axis=1)
    future_dfs[year] = future_dfs[year].set_index('取引日')
    
# 表示する行数の最大値をNoneに設定（全ての行を表示）
pd.set_option('display.max_rows', None)

# 表示する列数の最大値をNoneに設定（全ての列を表示）
pd.set_option('display.max_columns', None)

# 列の幅制限を解除（長いテキストも省略せず表示）
pd.set_option('display.max_colwidth', None)

# 折り返し表示を有効にする（オプション）
pd.set_option('display.expand_frame_repr', False)



In [13]:
future_dfs[24].head()
# for group in future_dfs[2024].groupby(by=0):
#     print(group)

Unnamed: 0_level_0,取引種別,商品,限月,始値,高値,安値,終値,帳入値,出来高,取組高
取引日,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2024-08-13,61,51,202502,17200.0,17200.0,17200.0,17200.0,17200,1,1
2024-08-13,61,51,202504,17200.0,17200.0,17200.0,17200.0,17200,1,1
2024-08-13,61,51,202506,17200.0,17200.0,17200.0,17200.0,17200,1,1
2024-08-14,61,51,202502,17390.0,17390.0,17390.0,17390.0,17390,1,0
2024-08-14,61,51,202504,17370.0,17370.0,17370.0,17370.0,17370,1,0


In [14]:
volume_df = pd.DataFrame()
for key in volume_dfs:
    volume_df = pd.concat([volume_df,volume_dfs[key]])

volume_df_by_kg = {}
for kg, df in volume_df.groupby(by='kg'):
    volume_df_by_kg[kg] = df
    volume_df_by_kg[kg] = volume_df_by_kg[kg].set_index('year')
    # print(volume_df_by_kg[kg].head())


In [18]:
future_df = pd.DataFrame()
for key in future_dfs:
    future_df = pd.concat([future_df,future_dfs[key]])
future_columns = ['始値', '高値', '安値', '終値', '帳入値']
print(future_columns)

future_df_by_delivery = {}
for delivery, df in future_df.groupby(by='限月'):
    future_df_by_delivery[delivery] = df
    future_df_by_delivery[delivery]['帳入値'].plot()
    # plt.show()
    plt.close()
print(future_df_by_delivery.keys())
# future_df_by_delivery[202502]
# future_df_by_delivery[202502][['始値', '高値', '安値', '終値', '帳入値']].plot()

['始値', '高値', '安値', '終値', '帳入値']
dict_keys([202502, 202504, 202506, 202508, 202510, 202512, 202602, 202604])


In [31]:
from statsmodels.tsa.stattools import adfuller, kpss, range_unit_root_test, zivot_andrews

print('adf')
adf_test_dict = {}
for key in future_df_by_delivery:
    for name in future_columns:
        stationary_list = []
        for i in range(3, len(future_df_by_delivery[key])):
            # 2. ADF検定
            # print(future_df_by_delivery[key].tail(i))
            adf_result = adfuller(future_df_by_delivery[key][name].diff().dropna().tail(i+1))
            if adf_result[1] < 0.05:
                # print(f'{i+1} 定常')
                stationary_list.append('定常')
            else:
                # print(f'{i+1} 非定常')
                stationary_list.append('非定常')
        adf_test_dict[f'{name}_{key}'] = stationary_list
        # print(len(stationary_list))
adf_test_df = pd.DataFrame.from_dict(adf_test_dict, orient='index').T
print(adf_test_df)

print('adf')
kpss_test_dict = {}
for key in future_df_by_delivery:
    for name in future_columns:
        stationary_list = []
        for i in range(3, len(future_df_by_delivery[key])):
            # 2. kpss検定
            # print(future_df_by_delivery[key].tail(i))
            kpss_result = kpss(future_df_by_delivery[key][name].diff().tail(i+1))
            if kpss_result[1] < 0.05:
                # print(f'{i+1} 定常')
                stationary_list.append('定常')
            else:
                # print(f'{i+1} 非定常')
                stationary_list.append('非定常')
        kpss_test_dict[f'{name}_{key}'] = stationary_list
        # print(len(stationary_list))
kpss_test_df = pd.DataFrame.from_dict(adf_test_dict, orient='index').T
print(adf_test_df)

adf
    始値_202502 高値_202502 安値_202502 終値_202502 帳入値_202502 始値_202504 高値_202504 安値_202504 終値_202504 帳入値_202504 始値_202506 高値_202506 安値_202506 終値_202506 帳入値_202506 始値_202508 高値_202508 安値_202508 終値_202508 帳入値_202508 始値_202510 高値_202510 安値_202510 終値_202510 帳入値_202510 始値_202512 高値_202512 安値_202512 終値_202512 帳入値_202512 始値_202602 高値_202602 安値_202602 終値_202602 帳入値_202602 始値_202604 高値_202604 安値_202604 終値_202604 帳入値_202604
0         非定常        定常       非定常        定常        非定常        定常       非定常       非定常       非定常         定常       非定常       非定常       非定常       非定常        非定常       非定常       非定常       非定常        定常         定常       非定常        定常       非定常        定常         定常        定常        定常        定常        定常        非定常       非定常       非定常       非定常       非定常        非定常       非定常       非定常        定常       非定常        非定常
1         非定常        定常        定常        定常        非定常        定常        定常        定常       非定常         定常       非定常       非定常       非定常       非定常        非定常       非定常      

ValueError: cannot convert float NaN to integer