# 各暗号資産につき一目均衡表を作成する（日足：2021/01/01-2021/06/13）

## Libraries

In [None]:
# Install "TA-Lib"
!cp ../input/talib/ta-lib-0.4.0-src.tar.gzh  ./ta-lib-0.4.0-src.tar.gz
!tar -xzvf ta-lib-0.4.0-src.tar.gz > null
!cd ta-lib && ./configure --prefix=/usr > null && make  > null && make install > null

!cp ../input/talib/TA-Lib-0.4.21.tar.gzh TA-Lib-0.4.21.tar.gz
!pip install TA-Lib-0.4.21.tar.gz
!pip install ../input/talib/numpy-1.21.4-cp37-cp37m-manylinux_2_12_x86_64.manylinux2010_x86_64.whl
import talib as ta

In [None]:
!pip install mplfinance

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
from tqdm.auto import tqdm
import time

import datetime
import math

import mplfinance as mpf
import talib as ta
from decimal import Decimal, ROUND_HALF_UP

# Warningの無効化
import warnings
warnings.simplefilter("ignore")

# データフレームcolumの全表示
pd.set_option("display.max_columns", None)

## Load Data

In [None]:
df_asset_details = pd.read_csv(r"../input/g-research-crypto-forecasting/asset_details.csv").sort_values("Asset_ID")
df_asset_details

In [None]:
def reduce_mem_usage(df):
    """ iterate through all the columns of a dataframe and modify the data type
        to reduce memory usage.        
    """
    start_mem = df.memory_usage().sum() / 1024**2
    print("Memory usage of dataframe is {:.2f} MB".format(start_mem))
    
    for col in df.columns:
        col_type = df[col].dtype
        
        if col_type != object:
            c_min = df[col].min()
            c_max = df[col].max()
            if str(col_type)[:3] == "int":
                if c_min > np.iinfo(np.int8).min and c_max < np.iinfo(np.int8).max:
                    df[col] = df[col].astype(np.int8)
                elif c_min > np.iinfo(np.int16).min and c_max < np.iinfo(np.int16).max:
                    df[col] = df[col].astype(np.int16)
                elif c_min > np.iinfo(np.int32).min and c_max < np.iinfo(np.int32).max:
                    df[col] = df[col].astype(np.int32)
                elif c_min > np.iinfo(np.int64).min and c_max < np.iinfo(np.int64).max:
                    df[col] = df[col].astype(np.int64)  
            else:
                if c_min > np.finfo(np.float16).min and c_max < np.finfo(np.float16).max:
                    df[col] = df[col].astype(np.float16)
                elif c_min > np.finfo(np.float32).min and c_max < np.finfo(np.float32).max:
                    df[col] = df[col].astype(np.float32)
                else:
                    df[col] = df[col].astype(np.float64)
        # else:
            # df[col] = df[col].astype('category')

    end_mem = df.memory_usage().sum() / 1024**2
    print("Memory usage after optimization is: {:.2f} MB".format(end_mem))
    print("Decreased by {:.1f}%".format(100 * (start_mem - end_mem) / start_mem))
    
    return df

In [None]:
def read_csv_strict(file_name="/kaggle/input/g-research-crypto-forecasting/train.csv"):
    df = pd.read_csv(file_name).pipe(reduce_mem_usage)
    df["datetime"] = pd.to_datetime(df["timestamp"], unit="s")
    df = df["2021-01-01 00:00:00" <= df["datetime"]]
    df = df[df["datetime"] < "2021-06-13 00:00:00"]
    df = df.sort_values("datetime")
    date = df["datetime"]
    df["day"] = date.dt.strftime("%Y-%m-%d")
    #date = df.index
    return df

In [None]:
df_train = read_csv_strict()

In [None]:
df_train

## Data Visualization

In [None]:
def convert_ohclv(df):

    # まとめる足：分単位
    chart_term = 1440    # 

    # 日付のリスト
    origin_day = df.groupby("day").mean().index

    # 結果リスト
    result_list = []

    # 日付の繰り返し
    for day in origin_day:

        day_df = df[df["day"] == day]

        start_time = pd.to_datetime(day)
        for i in range(math.ceil(len(day_df) / chart_term)):
            end_time = start_time + datetime.timedelta(minutes=chart_term-1)
            term_df = day_df[(day_df["datetime"]>=start_time) & (day_df["datetime"]<=end_time)]
            if len(term_df) == 0:
                continue

            # 列ごとのデータ
            open = term_df["Open"].values[0]
            close = term_df["Close"].values[-1]
            high = term_df.max()["High"]
            low = term_df.min()["Low"]
            volume = term_df.sum()["Volume"]

            result_list.append([start_time, open, high, low, close, volume])

            # 次ループのために開始時間追加
            start_time = start_time + datetime.timedelta(minutes=chart_term)

    # 結果
    result_df = pd.DataFrame(result_list)
    result_df.columns = ["datetime", "Open", "High", "Low", "Close", "Volume"]
    result_df = result_df.sort_values("datetime")

    # CSV書き込み
    #result_df.to_csv("書き出すCSVファイルのパス")
    return result_df

In [None]:
def generate_stock_chart_image(df, asset_name):
    # 基準線
    high = df["High"]
    low = df["Low"]

    max26 = high.rolling(window=26).max()
    min26 = low.rolling(window=26).min()

    df["basic_line"] = (max26 + min26) / 2

    # 転換線
    high9 = high.rolling(window=9).max()
    low9 = low.rolling(window=9).min()

    df["turn_line"] = (high9 + low9) / 2

    # 雲形
    df["span1"] = (df["basic_line"] + df["turn_line"]) / 2

    high52 = high.rolling(window=52).max()
    low52 = low.rolling(window=52).min()

    df["span2"] = (high52 + low52) / 2

    # 遅行線
    df["slow_line"] = df["Close"].shift(-25)

    # ボリンジャーバンド用のdataframe追加
    df["upper"], df["middle"], df["lower"] = ta.BBANDS(
        df["Close"], timeperiod=25, nbdevup=2, nbdevdn=2, matype=0)

    # MACD用のdataframe追加
    df["macd"], df["macdsignal"], df["macdhist"] = ta.MACD(
        df["Close"], fastperiod=12, slowperiod=26, signalperiod=9)

    # RSIデータフレーム追加
    df["RSI"] = ta.RSI(df["Close"], timeperiod=25)

    # 基準線、転換線、雲、遅行線の追加
    apds = [mpf.make_addplot(df["upper"], color="g"),
            mpf.make_addplot(df["middle"], color="b"),
            mpf.make_addplot(df["lower"], color="r"),
            mpf.make_addplot(df["macdhist"], type="bar",
                             width=1.0, panel=1, color="gray", alpha=0.5, ylabel="MACD"),
            mpf.make_addplot(df["RSI"], panel=2,
                             type="line", ylabel="RSI"),
            mpf.make_addplot(df["basic_line"]),  # 基準線
            mpf.make_addplot(df["turn_line"]),  # 転換線
            mpf.make_addplot(df["slow_line"]),  # 遅行線
            ]

    labels = ["basic", "turn", "slow", "span"]

    fig, ax = mpf.plot(df, type="candle", figsize=(16, 9), title=(asset_name),
                       style="yahoo", xrotation=0, volume=True, addplot=apds, returnfig=True,
                       volume_panel=3, panel_ratios=(5, 2, 2, 1),
                       fill_between=dict(
                           y1=df["span1"].values, y2=df["span2"].values, alpha=0.5, color="gray"),
                       #savefig=f"chart_{asset_id}.png"
                       )
    ax[0].legend(labels)
    plt.grid()
    plt.show()

In [None]:
def get_Xy_and_model_for_asset(df_train, asset_id, asset_name):
    df = df_train[df_train["Asset_ID"] == asset_id]
    df = convert_ohclv(df)
    df = df.set_index("datetime")
    date = df.index
    generate_stock_chart_image(df, asset_name)

In [None]:
for asset_id, asset_name in zip(df_asset_details["Asset_ID"], df_asset_details["Asset_Name"]):
    print(f"Visualizing for  {asset_name:<16} (ID={asset_id:<2})")
    get_Xy_and_model_for_asset(df_train, asset_id, asset_name)