In [41]:
from oandapyV20 import API
import oandapyV20.endpoints.instruments as instruments
import scipy.sparse
import pandas as pd
import numpy as np
import talib as ta
import xgboost as xgb
from sklearn.model_selection import train_test_split
from sklearn.metrics import (
    accuracy_score,
    classification_report,
    roc_auc_score,
    precision_recall_curve,
    auc,
    roc_curve,
)
from sklearn.model_selection import GridSearchCV

In [42]:
accountID = "101-009-17701910-001"
access_token = "e4d4218a8bfc0d994d5147a2e1727a6c-931f32120c50c74a0c3268e5ca693ff0"

# APIキー、environmentはデモか本番か
oanda_api = API(access_token=access_token, environment="practice")

In [43]:
# oandaAPIを叩いて、いろいろな形のデータを返すクラス
class DataFromOanda:
    def __init__(self, instrument="USD_JPY", params=None):
        self.instrument = instrument
        self.params = params

    # oandaAPIからデータを取得する
    def get_candles(self):
        """
            足データを取得してDataFrameに変換
        """
        instruments_candles = instruments.InstrumentsCandles(
            instrument=self.instrument, params=self.params
        )

        oanda_api.request(instruments_candles)
        response = instruments_candles.response  # json型
        df = pd.DataFrame(response["candles"])  # response["candles"]はlist型

        return df

    # oandaAPIから5000件を超える足データの取得(日付とか色々ついている)
    def get_candles_over5000(self, nb_itr=1):
        candles = None
        for i in range(nb_itr):
            new_candles = self.get_candles()
            params["to"] = new_candles["time"].iloc[0]
            print(params["to"])
            candles = pd.concat([new_candles, candles])
        return candles

    # 終値のリストを返す
    def get_close_price_over5000(self, nb_itr=1):
        candles = self.get_candles_over5000(nb_itr)
        if self.params["price"] == "M":
            bid_ask_mid = "mid"
        elif self.params["price"] == "A":
            bid_ask_mid = "ask"
        elif self.params["price"] == "B":
            bid_ask_mid = "bid"

        candles_price = candles[bid_ask_mid]
        candles_price_list = list(candles_price)

        return [float(dict(i)["c"]) for i in candles_price_list]

In [44]:
# データ取得に使う定数
COUNT = 5000  # 一度に取得するデータ数(max:5000)
NB_ITR = 12  # count * NB_ITR 分データを取得 M15なら30、H1なら12
GRANULARITY = "H1"
INSTRUMENT = "USD_JPY"
SKIP = 500  # 学習データの最初を何個読み飛ばすか。テクニカルを計算するときに過去の値を使うから読み飛ばさないと計算できない
TARGET_OFFSET = 3  # 何個先の足をターゲット変数とするか
# MODE = "train"
# 足データを取得するためにoandaAPIに渡すパラメータ
params = {
    "granularity": GRANULARITY,
    "count": COUNT,
    "price": "M",
}
xgb_param = {
    "max_depth": 5,
    "eta": 0.2,
    "subsample": 1,
    "objective": "binary:logistic",
    "n_estimators": 2000,
}

In [45]:
# oandaAPIを叩くクラスのインスタンス化
data_from_oanda = DataFromOanda(instrument=INSTRUMENT, params=params)
candles = data_from_oanda.get_candles_over5000(NB_ITR)
candles = candles.reset_index(drop=True)
candles

2020-04-24T13:00:00.000000000Z
2019-07-05T12:00:00.000000000Z
2018-09-14T11:00:00.000000000Z
2017-11-24T04:00:00.000000000Z
2017-02-06T22:00:00.000000000Z
2016-04-18T11:00:00.000000000Z
2015-06-28T22:00:00.000000000Z
2014-09-05T08:00:00.000000000Z
2013-11-14T18:00:00.000000000Z
2013-01-28T20:00:00.000000000Z
2012-04-16T03:00:00.000000000Z
2011-07-25T09:00:00.000000000Z


Unnamed: 0,complete,volume,time,mid
0,True,536,2011-07-25T09:00:00.000000000Z,"{'o': '78.163', 'h': '78.199', 'l': '78.059', ..."
1,True,423,2011-07-25T10:00:00.000000000Z,"{'o': '78.076', 'h': '78.216', 'l': '78.064', ..."
2,True,506,2011-07-25T11:00:00.000000000Z,"{'o': '78.192', 'h': '78.348', 'l': '78.190', ..."
3,True,605,2011-07-25T12:00:00.000000000Z,"{'o': '78.219', 'h': '78.276', 'l': '78.194', ..."
4,True,680,2011-07-25T13:00:00.000000000Z,"{'o': '78.217', 'h': '78.229', 'l': '78.125', ..."
...,...,...,...,...
59995,True,1290,2021-02-12T17:00:00.000000000Z,"{'o': '104.952', 'h': '104.958', 'l': '104.919..."
59996,True,1376,2021-02-12T18:00:00.000000000Z,"{'o': '104.929', 'h': '104.954', 'l': '104.917..."
59997,True,759,2021-02-12T19:00:00.000000000Z,"{'o': '104.944', 'h': '104.977', 'l': '104.935..."
59998,True,908,2021-02-12T20:00:00.000000000Z,"{'o': '104.974', 'h': '104.988', 'l': '104.946..."


In [46]:
candle = [dict(i) for i in candles["mid"]]
candle[0]

{'o': '78.163', 'h': '78.199', 'l': '78.059', 'c': '78.073'}

In [47]:
candle_open = [float(i["o"]) for i in candle]
candle_high = [float(i["h"]) for i in candle]
candle_low = [float(i["l"]) for i in candle]
candle_close = [float(i["c"]) for i in candle]

In [48]:
open_df = pd.DataFrame(candle_open, columns=["open"])
high_df = pd.DataFrame(candle_high, columns=["high"])
low_df = pd.DataFrame(candle_low, columns=["low"])
close_df = pd.DataFrame(candle_close, columns=["close"])
close_df

Unnamed: 0,close
0,78.073
1,78.190
2,78.216
3,78.215
4,78.219
...,...
59995,104.928
59996,104.944
59997,104.972
59998,104.950


In [49]:
candles_concat = pd.concat([candles, open_df, high_df, low_df, close_df], axis=1)
del candles_concat["mid"]
del candles_concat["complete"]
candles_concat

Unnamed: 0,volume,time,open,high,low,close
0,536,2011-07-25T09:00:00.000000000Z,78.163,78.199,78.059,78.073
1,423,2011-07-25T10:00:00.000000000Z,78.076,78.216,78.064,78.190
2,506,2011-07-25T11:00:00.000000000Z,78.192,78.348,78.190,78.216
3,605,2011-07-25T12:00:00.000000000Z,78.219,78.276,78.194,78.215
4,680,2011-07-25T13:00:00.000000000Z,78.217,78.229,78.125,78.219
...,...,...,...,...,...,...
59995,1290,2021-02-12T17:00:00.000000000Z,104.952,104.958,104.919,104.928
59996,1376,2021-02-12T18:00:00.000000000Z,104.929,104.954,104.917,104.944
59997,759,2021-02-12T19:00:00.000000000Z,104.944,104.977,104.935,104.972
59998,908,2021-02-12T20:00:00.000000000Z,104.974,104.988,104.946,104.950


In [50]:
candles_concat.to_csv("usd_jpy_1H.csv")