In [1]:
!pip install -q yahoo_finance_api2

  Building wheel for yahoo-finance-api2 (setup.py) ... [?25l[?25hdone


In [133]:
import sys
import os 
from yahoo_finance_api2 import share
from yahoo_finance_api2.exceptions import YahooFinanceError
import pandas as pd
import numpy as np 
import matplotlib.pyplot as plt
import seaborn as sns 
import plotly.express as px 
import plotly.graph_objects as go
import datetime 
import gc 
import warnings

from sklearn.preprocessing import RobustScaler
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor
from lightgbm import LGBMRegressor

warnings.simplefilter("ignore")
np.random.seed(42)

ある特定の株式会社における株価データを取得する。  


In [30]:
my_share = share.Share('7203.T')
symbol_data = None
 
try:
    symbol_data = my_share.get_historical(
        share.PERIOD_TYPE_YEAR, 3,
        share.FREQUENCY_TYPE_DAY, 1)
except YahooFinanceError as e:
    print(e.message)
    sys.exit(1)
 
df = pd.DataFrame(symbol_data)
df["datetime"] = pd.to_datetime(df.timestamp, unit="ms")
df["datetime"] = df.datetime + datetime.timedelta(hours=9)
df.drop("timestamp",axis=1, inplace=True)
df.head()

Unnamed: 0,open,high,low,close,volume,datetime
0,1304.800049,1309.0,1285.199951,1285.199951,36240000,2018-10-29 09:00:00
1,1289.800049,1311.800049,1287.400024,1297.0,96631000,2018-10-30 09:00:00
2,1300.199951,1324.599976,1294.599976,1323.0,39510000,2018-10-31 09:00:00
3,1325.599976,1328.0,1309.800049,1315.199951,29424500,2018-11-01 09:00:00
4,1317.0,1319.199951,1285.599976,1311.400024,49483000,2018-11-02 09:00:00


# シャープレシオの算出
+ 始値と終値における上昇率の平均
+ 上昇率の標準偏差(リスク値）  
+ 国債  
  
これらから算出される値。一般的に値が大きいほど優良証券であることを示す。  
つまり、リスクが小さく利回りが高いということ。  
なお、ここでは**月**をグループ化して算出した。

In [None]:
class SPM:
    def __init__(self, code: list, year: int=3, day: int=1, tax_rate: float=0.0163):
        self.df = df
        self.code_list = code 
        self.year = year 
        self.day = day 
        self.tax = tax_rate 
        self.sharp_list = {}
        self.mean = []
        self.std = []

        self.main()

    def main(self):
        for code in self.code_list:
            print(f"================================code: {code}=======================================")
            df = self.read_csv(code)
            self.vizual(df)
            df = self.preprocess(df)
            self.save_csv(df, code)
            sharp, mean, std = self.calc(df)
            self.sharp_list[code] = sharp
            self.mean.append(mean)
            self.std.append(std)

        self.viz_sharp(pd.DataFrame({"growth": self.mean, "volality": self.std, "code": self.code_list})) 

    def read_csv(self, code):
        """証券データの読み込み"""
        my_share = share.Share(code)
        symbol_data = None
        try:
            symbol_data = my_share.get_historical(
                share.PERIOD_TYPE_YEAR, self.year,
                share.FREQUENCY_TYPE_DAY, self.day)
        except YahooFinanceError as e:
            print(e.message)
            sys.exit(1)
        
        df = pd.DataFrame(symbol_data)
        df["datetime"] = pd.to_datetime(df.timestamp, unit="ms")
        df["datetime"] = df.datetime + datetime.timedelta(hours=9)
        df.drop("timestamp",axis=1, inplace=True)
        return df 

    def preprocess(self, df):
        """月毎に成長率の平均と偏差を取得"""
        df["growth"] = ( (df["close"] / df["open"]) - 1.0 ) * 100.0
        df["year"] = df.datetime.dt.year 
        df["month"] = df.datetime.dt.month 
        df_mean = df.groupby(["year", "month"]).mean().loc[:, ["growth"]]
        df_mean.columns = ["growth"]
        df_std = df.groupby(["year", "month"]).std().loc[:, ["growth"]]
        df_std.columns = ["volality"]
        df = pd.merge(df_mean, df_std, how="inner", right_index=True, left_index=True)
        df["year"] = [c for c, _ in df.index]
        df["month"] = [c for _, c in df.index]
        df.reset_index(drop=True, inplace=True)
        del df_std, df_mean 
        return df[["year", "month", "growth", "volality"]]

    def save_csv(self, df, code):
        os.makedirs("result", exist_ok=True)
        df.to_csv(f"result/{code}.csv", index=False)

    def calc(self, df):
        """シャープ値の計算処理"""
        sharp = (np.mean(df["growth"]) - self.tax) / np.mean(df["volality"])
        return sharp, np.mean(df["growth"]), np.mean(df["volality"])

    def viz_sharp(self, df):
        """成長性xリスクをプロット"""
        fig = px.scatter(df, x="volality", y="growth", color="code", trendline="ols")
        fig.show()

    def vizual(self, df):
        """
        株価の推移、移動平均
        相関係数
        成長率のヒストグラム

        をプロットする
        """
        self.viz_transition(df)
        self.viz_corr(df)
        self.viz_growth_hist(df)

    def viz_growth_hist(self, df: pd.DataFrame):
        dfs = df.copy()
        dfs["growth"] = dfs["close"] / dfs["open"]
        fig = px.histogram(dfs, x="growth")
        fig.show()

        del dfs 
        gc.collect()

    def viz_corr(self, df: pd.DataFrame):
        fig = px.scatter_matrix(df.drop("datetime", axis=1))
        fig.show()

    def viz_transition(self, df: pd.DataFrame):
        dfs = df.copy()
        dfs["moving_avg30"] = dfs["close"].rolling(window=30).mean()
        dfs["year"] = dfs.datetime.dt.year 
        year = dfs.groupby("year").mean().loc[:, ["close"]]
        year.columns = ["close_year"]
        year["year"] = year.index 
        year.reset_index(drop=True, inplace=True)
        dfs = pd.merge(dfs, year, how="left", left_on="year", right_on="year")
        dfs.drop("year", axis=1, inplace=True)
        del year 

        fig = go.Figure()
        fig.add_trace(go.Scatter(x=dfs["datetime"], y=dfs["close"], name="close", text="close"))
        fig.add_trace(go.Scatter(x=dfs["datetime"], y=dfs["moving_avg30"], name="moving_avg_close", text="moving_avg_close"))
        fig.add_trace(go.Scatter(x=dfs["datetime"], y=dfs["close_year"], name="year_mean", text="year_mean"))
        fig.update_traces(hoverinfo='text+name', mode='lines+markers')
        fig.update_layout(legend=dict(y=0.5, traceorder='reversed', font_size=16))
        fig.show()

        del dfs 
        gc.collect()

s = SPM(["7201.T", "3323.T"])

In [87]:
code = [
        "9020.T", "4776.T", "9433.T", "3925.T"
]

SPM(code)









<__main__.SPM at 0x7f79eb6ba090>

# 明日の株価予測モデル
特定の株式会社における一日の推定をする。  
+ LinearRegression
+ RandomForest
+ LGBoostRegressor
  
以上のモデルを使用した。

In [134]:
def add_feature(df):
    """今日のデータから明日の株価を予測できるようにシフトする"""
    df = df.rename(columns={"close": "close_lag_1"})
    df["close"] = df["close_lag_1"].shift(-1)
    df.set_index("datetime", inplace=True)
    return df 

def mae(pred, corr):
    return np.mean(np.abs(pred - corr))

def build_model(model_type):
    if model_type == "linear":
        model = LinearRegression()
    elif model_type == "rf":
        model = RandomForestRegressor(random_state=42, n_estimators=500, criterion="mae")
    elif model_type == "lgb":
        model = LGBMRegressor(random_state=42, n_estimators=500)
    return model 

def show_predict(df):
    fig = go.Figure()
    fig.add_trace(go.Scatter(x=df["datetime"], y=df["close"], name="close", text="close"))
    fig.add_trace(go.Scatter(x=df["datetime"], y=df["pred"], name="predict", text="predict"))
    fig.update_traces(hoverinfo='text+name', mode='lines+markers')
    fig.update_layout(legend=dict(y=0.5, traceorder='reversed', font_size=16))
    fig.show()

def main(df, model_type="linear", is_scaler=True):
    df = add_feature(df.copy())
    # validation metrics
    train, val, test = df.iloc[:-31, :], df.iloc[-31: -1, :], df.iloc[-1:, :]
    x_train, y_train = train.drop("close", axis=1), train[["close"]]
    x_val, y_val = val.drop("close", axis=1), val[["close"]]
    x_test = test.drop("close", axis=1)
    del train, val, test 

    if is_scaler:
        rs = RobustScaler()
        x_train = rs.fit_transform(x_train)
        x_val = rs.transform(x_val)
        x_test = rs.transform(x_test)

    model = build_model(model_type)
    model.fit(x_train, y_train)
    predv = model.predict(x_val).flatten()
    print(f"mae: {mae(predv, y_val.values.ravel())}")
    show_predict(pd.DataFrame({"datetime": y_val.index, "pred": predv, "close": y_val.values.ravel()}))
    del x_train, x_val, x_test


    # test predict
    train, test = df.iloc[:-1, :], df.iloc[-1:, :]
    x_train, y_train = train.drop("close", axis=1), train[["close"]]
    x_test = test.drop("close", axis=1)

    if is_scaler:
        rs = RobustScaler()
        x_train = rs.fit_transform(x_train)
        x_test = rs.transform(x_test)

    model = build_model(model_type)
    model.fit(x_train, y_train)
    predt = model.predict(x_test).flatten()[0]

    # display prediction 
    print("\n")
    print("###########################################################################")
    print(f"明日は {predt:.3f} 円と予想されます。")
    print("###########################################################################")
    print("\n")

    del x_train, x_test 
    gc.collect()



In [136]:
if __name__ == "__main__":
    for model_name in ["linear", "rf", "lgb"]:
        main(df, model_name)

mae: 30.31973214568451




###########################################################################
明日は 1978.297 円と予想されます。
###########################################################################


mae: 30.1462632405599




###########################################################################
明日は 1969.511 円と予想されます。
###########################################################################


mae: 32.63590222956788




###########################################################################
明日は 1963.922 円と予想されます。
###########################################################################


