In [None]:
import sys
import os

from yahoo_finance_api2 import share
from yahoo_finance_api2.exceptions import YahooFinanceError
import pandas as pd
import pickle

from sktime.utils.plotting import plot_series

SECURITIES_CODE = "7974.T"

In [None]:
def data_shaping(symbol_data):
    '''
    yahoo_finance_api2のdataをDataFrameに整形
    '''
    
    # DataFrameに初期化
    df_symbol_data = pd.DataFrame(symbol_data)

    # UNIX時間をUTC時間に変換
    df_symbol_data.timestamp = pd.to_datetime(df_symbol_data.timestamp, unit='ms')

    return df_symbol_data

In [None]:
my_share = share.Share(SECURITIES_CODE) # 東京証券の証券コード指定:任天堂

symbol_data = None

try:
    # 5年分を1日間隔で取得
    symbol_data = my_share.get_historical(share.PERIOD_TYPE_YEAR,
    5,
    share.FREQUENCY_TYPE_DAY,
    1)
except YahooFinanceError as e:
    print(e.message)
    sys.exit(1)

# Dataframeに整形
df_symbol_data = data_shaping(symbol_data)
# 前日までのデータを取得
df_symbol_data = df_symbol_data[:-1]

df_symbol_data

In [None]:
def set_datetimeindex_with_freq(index):
    index = pd.DatetimeIndex(index)
    freq = pd.infer_freq(index)
    print(freq)
    return index


In [None]:
# 取引日と終値のみのSeriesを生成
y = df_symbol_data['close']
index = df_symbol_data['timestamp']
y.index = pd.PeriodIndex(index, freq="D")
y

In [None]:
X = df_symbol_data[['open', 'high', 'low', 'volume', 'close']]
X.index = pd.PeriodIndex(index, freq="D")

X

In [None]:
# 終値の単純移動平均(Simple Moving Average:SMA)作成
rolling_days = [5, 25, 75, 100, 200]
for rolling_day in rolling_days:
    X["close_sma_"+str(rolling_day)] = X["close"].rolling(rolling_day).mean()

X

In [None]:
# SMAをグラフに出力
plot_series(
    X['close'], 
    X['close_sma_5'], 
    X['close_sma_25'], 
    X['close_sma_75'], 
    X['close_sma_100'],
    X['close_sma_200'],
    labels=["close", "close_sma_5", "close_sma_25", "close_sma_75", "close_sma_100", "close_sma_200"]
    )

In [None]:
# MACD
# MACD＝短期EMA(指数平滑移動平均)－長期EMA
# Pandasのewmは厳密に定義と違うらしい
# https://turtlechan.hatenablog.com/entry/2019/08/15/195048
X["close_ema_12"] = X["close"].ewm(span=12, adjust=False).mean()
X["close_ema_26"] = X["close"].ewm(span=26, adjust=False).mean()
X["close_macd_12_26"] = X["close_ema_12"] - X["close_ema_26"]

X = X.drop(['close_ema_12', 'close_ema_26'], axis=1)
X


In [None]:
# MACDをグラフに出力
plot_series(
    X['close'], 
    X['close_macd_12_26'], 
    labels=["close", "close_macd_12_26"]
    )

In [None]:
# RSI(相対力指数)
# RS =(n日間の終値の上昇幅の平均)/(n日間の終値の下落幅の平均)
# RSI = 100-(100/(RS+1))
# n=14で算出

# 1行前との差分
df_diff = X['close'].diff()
# deep copy
df_up = df_diff.copy(deep=True)
df_down = df_diff.copy(deep=True)
 
df_up[df_up < 0] = 0
df_down[df_down > 0] = 0

df_up_sum = df_up.rolling(14).sum()
df_down_sum = df_down.abs().rolling(14).sum()

X["close_rsi_14"] = df_up_sum/(df_up_sum + df_down_sum) * 100

# 不要なデータ削除
del df_diff, df_up, df_down, df_up_sum, df_down_sum
X


In [None]:
# RSIをグラフに出力
plot_series(
    X['close_rsi_14'], 
    labels=["close_rsi_14"]
    )

In [None]:
# ファーストストキャスティックス
# n=14, m=3
X['%K'] = (
    (X['close'] - X['low'].rolling(window=14, center=False).min()) 
    / 
    ( X['high'].rolling(window=14,center=False).max() - X['low'].rolling(window=14,center=False).min())
    ) * 100
X['%D'] = X['%K'].rolling(window=3,center=False).mean()

# スローストキャスティクス
# x=3
X['Slow%D'] = X['%D'].rolling(window=3, center=False).mean()

X


In [None]:
# ストキャスティクスをグラフに出力
plot_series(
    X['%K'],
    X['%D'],
    X['Slow%D'],
    labels=["%K", "%D", "Slow%D"]
    )

In [None]:
X

In [None]:
X = X.shift(1)
X.rename(
    columns={
        'open': 'open_shift1',
        'high': 'high_shift1',
        'low': 'low_shift1',
        'volume': 'volume_shift1',
        })
X

In [None]:
# Nanを含む行を削除
X = X.dropna(how='any')
# close除去
X = X.drop('close', axis=1)
X

In [None]:
# XのPeriodIndexに合わせてyを調整
start = X.head(1).index[0]
end = X.tail(1).index[0]
y = y[(start <= y.index) & (y.index <= end)]
y

In [None]:
# データセットをpickleに保存
os.makedirs('./datasets', exist_ok=True)

# save y
with open('./datasets/' + str(SECURITIES_CODE) + '_y.pkl', 'wb') as f:
    pickle.dump(y, f)
# save X
with open('./datasets/' + str(SECURITIES_CODE) + '_X.pkl', 'wb') as f:
    pickle.dump(X, f)