In [1]:
from datetime import datetime, timedelta
import oandapy
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import pickle
import sys
import os
import talib
sys.path.append('/Users/toshio/project/fx')
from config import token
from lib.indicator import ichimoku

class Preprocess:
    def __init__(self, res, df = None):
        self.res = res
        if df is None:
            self.df = self.res_to_df()
        else:
            self.df = df
        self.arr_ohlc, self.ohlc = self.prep_ohlcv()
        self.delta = self.prep_delta()
        self.sma = self.prep_sma()
        self.macd = self.prep_macd()
        self.rsi = self.prep_rsi()
        self.bband = self.prep_bband()
        self.adx = self.prep_adx()
        self.di = self.prep_di()
        self.sar = self.prep_sar()
        self.ichi = self.prep_ichi()
        self.updown = self.prep_updown()
        self.data = self.prep_concat()
        
    def res_to_df(self):
        df = pd.DataFrame(self.res['candles'])
        df = df.drop(['complete'], axis = 1)
        df['time'] = df['time'].str[:-8]
        df['time'] = df['time'].str.replace('T',' ')
        times = [datetime.strptime(v, '%Y-%m-%d %H:%M:%S') for v in df['time']]
        df['time'] = times
        df = df.set_index('time',drop = True)
        return df

    def prep_ohlcv(self):
        ohlc = pd.DataFrame(columns = ['open', 'high', 'low', 'close'])
        ohlc['open'] = self.df['open']
        ohlc['high'] = self.df['high']
        ohlc['low'] = self.df['low']
        ohlc['close'] = self.df['close']
        arr_ohlc = np.array(ohlc)
        return arr_ohlc, ohlc

    def prep_delta(self):
        delta = pd.DataFrame(index = self.df.index, columns = ['delta_close'])
        delta['delta_close'] = self.ohlc['close'].diff()
        return delta

    def prep_sma(self):
        sma = pd.DataFrame(index = self.df.index, columns = ['sma5', 'sma25', 'sma50', 'sma75','sma100','sma500'])
        sma['sma5'] = talib.SMA(self.arr_ohlc[:,3], timeperiod = 5)
        sma['sma25'] = talib.SMA(self.arr_ohlc[:,3], timeperiod = 25)
        sma['sma50'] = talib.SMA(self.arr_ohlc[:,3], timeperiod = 50)
        sma['sma75'] = talib.SMA(self.arr_ohlc[:,3], timeperiod = 75)
        sma['sma100'] = talib.SMA(self.arr_ohlc[:,3], timeperiod = 100)
        sma['sma500'] = talib.SMA(self.arr_ohlc[:,3], timeperiod = 500)
        return sma

    def prep_macd(self):
        macd = pd.DataFrame(index = self.df.index, columns = ['macd', 'macdsignal', 'macdhist'])
        macd['macd'] =  talib.MACD(self.arr_ohlc[:,3],fastperiod=12, slowperiod=26, signalperiod=9)[0]
        macd['macdsignal'] =  talib.MACD(self.arr_ohlc[:,3],fastperiod=12, slowperiod=26, signalperiod=9)[1]
        macd['macdhist'] =  talib.MACD(self.arr_ohlc[:,3],fastperiod=12, slowperiod=26, signalperiod=9)[2]
        return macd

    def prep_rsi(self):
        rsi = pd.DataFrame(index = self.df.index, columns = ['rsi'])
        rsi['rsi'] =  talib.RSI(self.arr_ohlc[:,3], timeperiod = 14)
        return rsi

    def prep_bband(self):
        bband = pd.DataFrame(index = self.df.index, columns = ['-3sigma', '-2sigma', '-1sigma', '+1sigma', '+2sigma', '+3sigma'])
        bband['+1sigma'] = talib.BBANDS(self.arr_ohlc[:,3], timeperiod=15, nbdevup=1, nbdevdn=1)[0]
        bband['-1sigma'] = talib.BBANDS(self.arr_ohlc[:,3], timeperiod=15, nbdevup=1, nbdevdn=1)[2]
        bband['+2sigma'] = talib.BBANDS(self.arr_ohlc[:,3], timeperiod=15, nbdevup=2, nbdevdn=2)[0]
        bband['-2sigma'] = talib.BBANDS(self.arr_ohlc[:,3], timeperiod=15, nbdevup=2, nbdevdn=2)[2]
        bband['+3sigma'] = talib.BBANDS(self.arr_ohlc[:,3], timeperiod=15, nbdevup=3, nbdevdn=3)[0]
        bband['-3sigma'] = talib.BBANDS(self.arr_ohlc[:,3], timeperiod=15, nbdevup=3, nbdevdn=3)[2]
        return bband

    def prep_adx(self):
        adx = pd.DataFrame(index = self.df.index, columns = ['adx'])
        adx['adx'] = talib.ADX(self.arr_ohlc[:,1], self.arr_ohlc[:,2], self.arr_ohlc[:,3], timeperiod =14)
        return adx

    def prep_di(self):
        di = pd.DataFrame(index = self.df.index, columns = ['+di', '-di'])
        di['+di'] = talib.PLUS_DI(self.arr_ohlc[:,1], self.arr_ohlc[:,2], self.arr_ohlc[:,3], timeperiod = 14)
        di['-di'] = talib.MINUS_DI(self.arr_ohlc[:,1], self.arr_ohlc[:,2], self.arr_ohlc[:,3], timeperiod = 14)
        return di

    def prep_sar(self):
        sar = pd.DataFrame(index = self.df.index, columns = ['sar'])
        sar['sar'] = talib.SAR(self.arr_ohlc[:,1], self.arr_ohlc[:,2], acceleration=0.05, maximum=0.2)
        return sar

    def prep_ichi(self):
        ichi = ichimoku(self.ohlc).drop('close', axis = 1)
        return ichi

    def prep_updown(self):
        updown = pd.DataFrame(index = self.df.index, columns = ['up', 'down', 'nochange'])
        updown['up'] = self.delta['delta_close'].map(lambda x: 1 if x > 0.1 else 0)
        updown['down'] = self.delta['delta_close'].map(lambda x: 1 if x < -0.1 else 0)
        updown['nochange'] = self.delta['delta_close'].map(lambda x: 1 if -0.1 < x < 0.1 else 0)
        return updown

    def prep_concat(self):
        adder = [self.delta, self.sma, self.macd, self.rsi, self.bband, self.adx, self.di, self.sar, self.ichi, self.updown]
        data = self.df.join(adder)
        data = data.drop('chiko', axis = 1)
        data = data.dropna()
        return data

## Main

In [2]:
gran = 'M15'
look_back = 10

start_t = datetime(2010, 1, 1),
end_t = datetime(2018, 8, 23)

In [25]:
with open("../intermediate_data/USDJPY_M15_20100101@000000_20180823@000000.pkl", mode='rb') as f:
    df = pickle.load(f)
df = df.set_index('date')
res = 0
# oanda = oandapy.API(environment="practice", access_token=token)
# res = oanda.get_history(instrument="USD_JPY",granularity=gran, count = 77 + look_back)

In [27]:
prep = Preprocess(res, df)
data = prep.data

In [28]:
data

Unnamed: 0_level_0,open,close,high,low,delta_close,sma5,sma25,sma50,sma75,macd,...,+di,-di,sar,tenkan,kijun,senko1,senko2,up,down,nochange
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2010-01-04 13:15:00,92.8580,92.8280,92.899,92.799,-0.0280,92.8604,92.87934,92.87775,92.895860,-0.011894,...,18.448222,18.011887,92.778531,92.84800,92.97375,92.899750,92.91300,0,0,1
2010-01-04 13:30:00,92.8260,92.6270,92.842,92.623,-0.2010,92.8132,92.86174,92.87194,92.891767,-0.028857,...,15.452352,28.137673,92.926000,92.76200,92.89050,92.909000,92.91300,0,1,0
2010-01-04 13:45:00,92.6310,92.6430,92.663,92.611,0.0160,92.7624,92.84130,92.86561,92.887927,-0.040542,...,14.836271,27.935902,92.910850,92.76200,92.89050,92.912250,92.91300,0,0,1
2010-01-04 14:00:00,92.6410,92.5440,92.649,92.477,-0.0990,92.6996,92.82154,92.85535,92.882633,-0.057133,...,12.991243,34.150271,92.880865,92.72050,92.79100,92.964000,92.94950,0,0,1
2010-01-04 14:15:00,92.5420,92.5450,92.616,92.524,0.0010,92.6374,92.80658,92.84665,92.877473,-0.069400,...,12.122830,31.867461,92.820285,92.72050,92.75450,92.964000,92.94950,0,0,1
2010-01-04 14:30:00,92.5480,92.6680,92.684,92.541,0.1230,92.6054,92.79470,92.84215,92.873793,-0.068409,...,15.688256,28.660508,92.768792,92.72050,92.75450,92.964000,92.94950,1,0,0
2010-01-04 14:45:00,92.6700,92.6140,92.729,92.611,-0.0540,92.6028,92.78292,92.83671,92.869447,-0.071160,...,17.530902,26.307839,92.477000,92.70100,92.74300,92.989250,92.94950,0,0,1
2010-01-04 15:00:00,92.6160,92.5830,92.710,92.534,-0.0310,92.5908,92.77202,92.83061,92.864660,-0.074977,...,15.488664,28.339739,92.489600,92.70000,92.74300,92.990375,92.94950,0,0,1
2010-01-04 15:15:00,92.5850,92.3810,92.627,92.343,-0.2020,92.5582,92.75404,92.82075,92.857287,-0.093228,...,12.881048,34.891123,92.729000,92.60450,92.66150,92.977125,92.94950,0,1,0
2010-01-04 15:30:00,92.3790,92.2510,92.442,92.202,-0.1300,92.4994,92.73098,92.80807,92.847940,-0.116834,...,11.169665,38.061028,92.710000,92.45950,92.59650,92.970875,92.94950,0,1,0


### 出力

In [21]:
data = data.drop(['up', 'down', 'nochange'], axis = 1)
with open('../intermediate_data/prep_reg_data_{}.pickle'.format(gran), mode='wb') as f:
    pickle.dump(data, f)

In [14]:
with open('../intermediate_data/prep_class_data_{}.pickle'.format(gran), mode='wb') as f:
    pickle.dump(data, f)

In [4]:
df

Unnamed: 0_level_0,closeAsk,closeBid,highAsk,highBid,lowAsk,lowBid,openAsk,openBid,volume
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2007-03-05 09:00:00,115.207,115.189,115.490,115.472,115.167,115.149,115.450,115.432,1380
2007-03-05 10:00:00,115.306,115.288,115.427,115.409,115.216,115.198,115.218,115.200,1424
2007-03-05 11:00:00,115.555,115.537,115.586,115.568,115.282,115.264,115.317,115.299,1365
2007-03-05 12:00:00,115.575,115.557,115.705,115.687,115.435,115.417,115.558,115.540,1228
2007-03-05 13:00:00,115.545,115.527,115.655,115.637,115.357,115.339,115.595,115.577,1593
2007-03-05 14:00:00,115.935,115.905,116.001,115.983,115.484,115.466,115.535,115.517,1690
2007-03-05 15:00:00,115.984,115.966,116.159,116.141,115.701,115.683,115.927,115.897,2084
2007-03-05 16:00:00,116.024,116.006,116.256,116.238,115.971,115.953,115.974,115.956,1532
2007-03-05 17:00:00,116.007,115.989,116.126,116.108,115.888,115.870,116.026,116.008,865
2007-03-05 18:00:00,115.902,115.884,116.012,115.994,115.820,115.802,116.012,115.994,760
