In [66]:
from datetime import datetime, timedelta
import oandapy
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import pickle
import sys
import os
import talib
sys.path.append('/Users/toshio/project/fx')
from config import token
from lib.indicator import ichimoku

class Preprocess:
    def __init__(self, res, df = None):
        self.res = res
        if df is None:
            self.df = self.res_to_df()
        else:
            self.df = df
        self.arr_ohlc, self.ohlc = self.prep_ohlcv()
        self.delta = self.prep_delta()
        self.sma = self.prep_sma()
        self.macd = self.prep_macd()
        self.rsi = self.prep_rsi()
        self.bband = self.prep_bband()
        self.adx = self.prep_adx()
        self.di = self.prep_di()
        self.sar = self.prep_sar()
        self.ichi = self.prep_ichi()
        self.updown = self.prep_updown()
        self.data = self.prep_concat()
        
    def res_to_df(self):
        df = pd.DataFrame(self.res['candles'])
        df = df.drop(['complete'], axis = 1)
        df['time'] = df['time'].str[:-8]
        df['time'] = df['time'].str.replace('T',' ')
        times = [datetime.strptime(v, '%Y-%m-%d %H:%M:%S') for v in df['time']]
        df['time'] = times
        df = df.set_index('time',drop = True)
        return df

    def prep_ohlcv(self):
        ohlc = pd.DataFrame(columns = ['open', 'high', 'low', 'close'])
        ohlc['open'] = self.df['open']
        ohlc['high'] = self.df['high']
        ohlc['low'] = self.df['low']
        ohlc['close'] = self.df['close']
        arr_ohlc = np.array(ohlc)
        return arr_ohlc, ohlc

    def prep_delta(self):
        delta = pd.DataFrame(index = self.df.index, columns = ['delta_close'])
        delta['delta_close'] = self.ohlc['close'].diff()
        return delta

    def prep_sma(self):
        sma = pd.DataFrame(index = self.df.index, columns = ['sma5', 'sma25', 'sma50', 'sma75','sma100','sma500'])
        sma['sma5'] = talib.SMA(self.arr_ohlc[:,3], timeperiod = 5)
        sma['sma25'] = talib.SMA(self.arr_ohlc[:,3], timeperiod = 25)
        sma['sma50'] = talib.SMA(self.arr_ohlc[:,3], timeperiod = 50)
        sma['sma75'] = talib.SMA(self.arr_ohlc[:,3], timeperiod = 75)
        sma['sma100'] = talib.SMA(self.arr_ohlc[:,3], timeperiod = 100)
        sma['sma500'] = talib.SMA(self.arr_ohlc[:,3], timeperiod = 500)
        return sma

    def prep_macd(self):
        macd = pd.DataFrame(index = self.df.index, columns = ['macd', 'macdsignal', 'macdhist'])
        macd['macd'] =  talib.MACD(self.arr_ohlc[:,3],fastperiod=12, slowperiod=26, signalperiod=9)[0]
        macd['macdsignal'] =  talib.MACD(self.arr_ohlc[:,3],fastperiod=12, slowperiod=26, signalperiod=9)[1]
        macd['macdhist'] =  talib.MACD(self.arr_ohlc[:,3],fastperiod=12, slowperiod=26, signalperiod=9)[2]
        return macd

    def prep_rsi(self):
        rsi = pd.DataFrame(index = self.df.index, columns = ['rsi'])
        rsi['rsi'] =  talib.RSI(self.arr_ohlc[:,3], timeperiod = 14)
        return rsi

    def prep_bband(self):
        bband = pd.DataFrame(index = self.df.index, columns = ['-3sigma', '-2sigma', '-1sigma', '+1sigma', '+2sigma', '+3sigma'])
        bband['+1sigma'] = talib.BBANDS(self.arr_ohlc[:,3], timeperiod=15, nbdevup=1, nbdevdn=1)[0]
        bband['-1sigma'] = talib.BBANDS(self.arr_ohlc[:,3], timeperiod=15, nbdevup=1, nbdevdn=1)[2]
        bband['+2sigma'] = talib.BBANDS(self.arr_ohlc[:,3], timeperiod=15, nbdevup=2, nbdevdn=2)[0]
        bband['-2sigma'] = talib.BBANDS(self.arr_ohlc[:,3], timeperiod=15, nbdevup=2, nbdevdn=2)[2]
        bband['+3sigma'] = talib.BBANDS(self.arr_ohlc[:,3], timeperiod=15, nbdevup=3, nbdevdn=3)[0]
        bband['-3sigma'] = talib.BBANDS(self.arr_ohlc[:,3], timeperiod=15, nbdevup=3, nbdevdn=3)[2]
        return bband

    def prep_adx(self):
        adx = pd.DataFrame(index = self.df.index, columns = ['adx'])
        adx['adx'] = talib.ADX(self.arr_ohlc[:,1], self.arr_ohlc[:,2], self.arr_ohlc[:,3], timeperiod =14)
        return adx

    def prep_di(self):
        di = pd.DataFrame(index = self.df.index, columns = ['+di', '-di'])
        di['+di'] = talib.PLUS_DI(self.arr_ohlc[:,1], self.arr_ohlc[:,2], self.arr_ohlc[:,3], timeperiod = 14)
        di['-di'] = talib.MINUS_DI(self.arr_ohlc[:,1], self.arr_ohlc[:,2], self.arr_ohlc[:,3], timeperiod = 14)
        return di

    def prep_sar(self):
        sar = pd.DataFrame(index = self.df.index, columns = ['sar'])
        sar['sar'] = talib.SAR(self.arr_ohlc[:,1], self.arr_ohlc[:,2], acceleration=0.05, maximum=0.2)
        return sar

    def prep_ichi(self):
        ichi = ichimoku(self.ohlc).drop('close', axis = 1)
        return ichi

    def prep_updown(self):
        threshold = 0.01
        updown = pd.DataFrame(index = self.df.index, columns = ['up', 'down', 'nochange'])
        updown['up'] = self.delta['delta_close'].map(lambda x: 1 if x > threshold else 0)
        updown['down'] = self.delta['delta_close'].map(lambda x: 1 if x < -threshold else 0)
        updown['nochange'] = self.delta['delta_close'].map(lambda x: 1 if -threshold < x < threshold else 0)
        return updown

    def prep_concat(self):
        adder = [self.delta, self.sma, self.macd, self.rsi, self.bband, self.adx, self.di, self.sar, self.ichi, self.updown]
        data = self.df.join(adder)
        data = data.drop('chiko', axis = 1)
        data = data.dropna()
        return data

## Main

In [67]:
gran = 'M15'
look_back = 10

start_t = datetime(2010, 1, 1),
end_t = datetime(2018, 8, 23)

In [68]:
with open("../intermediate_data/USDJPY_M15_20100101@000000_20180823@000000.pkl", mode='rb') as f:
    df = pickle.load(f)
df = df.set_index('date')
res = 0
# oanda = oandapy.API(environment="practice", access_token=token)
# res = oanda.get_history(instrument="USD_JPY",granularity=gran, count = 77 + look_back)

In [69]:
prep = Preprocess(res, df)
data = prep.data

### 出力

In [64]:
data = data.drop(['up', 'down', 'nochange'], axis = 1)
with open('../intermediate_data/prep_for_reg_USDJPY_M15_20100101@000000_20180823@000000.pkl', mode='wb') as f:
    pickle.dump(data, f)

In [70]:
with open('../intermediate_data/prep_for_class_USDJPY_M15_20100101@000000_20180823@000000.pkl', mode='wb') as f:
    pickle.dump(data, f)