In [2]:
from datetime import datetime, timedelta
import oandapy
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import pickle
import sys
import os
import talib
sys.path.append('/Users/toshio/project/fx')
from config import token
from lib.indicator import ichimoku

class Preprocess:
    def __init__(self, res, df = None):
        self.res = res
        if df is None:
            self.df = self.res_to_df()
        else:
            self.df = df
        self.arr_ask, self.arr_bid, self.df_ask = self.prep_ohlcv()
        self.delta = self.prep_delta()
        self.sma = self.prep_sma()
        self.macd = self.prep_macd()
        self.rsi = self.prep_rsi()
        self.bband = self.prep_bband()
        self.adx = self.prep_adx()
        self.di = self.prep_di()
        self.sar = self.prep_sar()
        self.ichi = self.prep_ichi()
        self.updown = self.prep_updown()
        self.data = self.prep_concat()
        
    def res_to_df(self):
        df = pd.DataFrame(self.res['candles'])
        df = df.drop(['complete'], axis = 1)
        df['time'] = df['time'].str[:-8]
        df['time'] = df['time'].str.replace('T',' ')
        times = [datetime.strptime(v, '%Y-%m-%d %H:%M:%S') for v in df['time']]
        df['time'] = times
        df = df.set_index('time',drop = True)
        return df

    def prep_ohlcv(self):
        df_ask = pd.DataFrame(columns = ['open', 'high', 'low', 'close', 'volume'])
        df_ask['open'] = self.df['openAsk']
        df_ask['high'] = self.df['highAsk']
        df_ask['low'] = self.df['lowAsk']
        df_ask['close'] = self.df['closeAsk']
        df_ask['volume'] = self.df['volume']
        arr_ask = np.array(df_ask)

        df_bid = pd.DataFrame(columns = ['open', 'high', 'low', 'close', 'volume'])
        df_bid['open'] = self.df['openBid']
        df_bid['high'] = self.df['highBid']
        df_bid['low'] = self.df['lowBid']
        df_bid['close'] = self.df['closeBid']
        df_bid['volume'] = self.df['volume']
        arr_bid = np.array(df_bid)
        return arr_ask, arr_bid, df_ask

    def prep_delta(self):
        delta = pd.DataFrame(index = self.df.index, columns = ['delta_close'])
        delta['delta_close'] = self.df_ask['close'].diff()
        return delta

    def prep_sma(self):
        sma = pd.DataFrame(index = self.df.index, columns = ['sma5', 'sma25', 'sma50', 'sma75'])
        sma['sma5'] = talib.SMA(self.arr_ask[:,3], timeperiod = 5)
        sma['sma25'] = talib.SMA(self.arr_ask[:,3], timeperiod = 25)
        sma['sma50'] = talib.SMA(self.arr_ask[:,3], timeperiod = 50)
        sma['sma75'] = talib.SMA(self.arr_ask[:,3], timeperiod = 75)
        return sma

    def prep_macd(self):
        macd = pd.DataFrame(index = self.df.index, columns = ['macd', 'macdsignal', 'macdhist'])
        macd['macd'] =  talib.MACD(self.arr_ask[:,3],fastperiod=12, slowperiod=26, signalperiod=9)[0]
        macd['macdsignal'] =  talib.MACD(self.arr_ask[:,3],fastperiod=12, slowperiod=26, signalperiod=9)[1]
        macd['macdhist'] =  talib.MACD(self.arr_ask[:,3],fastperiod=12, slowperiod=26, signalperiod=9)[2]
        return macd

    def prep_rsi(self):
        rsi = pd.DataFrame(index = self.df.index, columns = ['rsi'])
        rsi['rsi'] =  talib.RSI(self.arr_ask[:,3], timeperiod = 14)
        return rsi

    def prep_bband(self):
        bband = pd.DataFrame(index = self.df.index, columns = ['-3sigma', '-2sigma', '-1sigma', '+1sigma', '+2sigma', '+3sigma'])
        bband['+1sigma'] = talib.BBANDS(self.arr_ask[:,3], timeperiod=15, nbdevup=1, nbdevdn=1)[0]
        bband['-1sigma'] = talib.BBANDS(self.arr_ask[:,3], timeperiod=15, nbdevup=1, nbdevdn=1)[2]
        bband['+2sigma'] = talib.BBANDS(self.arr_ask[:,3], timeperiod=15, nbdevup=2, nbdevdn=2)[0]
        bband['-2sigma'] = talib.BBANDS(self.arr_ask[:,3], timeperiod=15, nbdevup=2, nbdevdn=2)[2]
        bband['+3sigma'] = talib.BBANDS(self.arr_ask[:,3], timeperiod=15, nbdevup=3, nbdevdn=3)[0]
        bband['-3sigma'] = talib.BBANDS(self.arr_ask[:,3], timeperiod=15, nbdevup=3, nbdevdn=3)[2]
        return bband

    def prep_adx(self):
        adx = pd.DataFrame(index = self.df.index, columns = ['adx'])
        adx['adx'] = talib.ADX(self.arr_ask[:,1], self.arr_ask[:,2], self.arr_ask[:,3], timeperiod =14)
        return adx

    def prep_di(self):
        di = pd.DataFrame(index = self.df.index, columns = ['+di', '-di'])
        di['+di'] = talib.PLUS_DI(self.arr_ask[:,1], self.arr_ask[:,2], self.arr_ask[:,3], timeperiod = 14)
        di['-di'] = talib.MINUS_DI(self.arr_ask[:,1], self.arr_ask[:,2], self.arr_ask[:,3], timeperiod = 14)
        return di

    def prep_sar(self):
        sar = pd.DataFrame(index = self.df.index, columns = ['sar'])
        sar['sar'] = talib.SAR(self.arr_ask[:,1], self.arr_ask[:,2], acceleration=0.05, maximum=0.2)
        return sar

    def prep_ichi(self):
        ichi = ichimoku(self.df_ask).drop('close', axis = 1)
        return ichi

    def prep_updown(self):
        updown = pd.DataFrame(index = self.df.index, columns = ['up', 'down', 'nochange'])
        updown['up'] = self.delta['delta_close'].map(lambda x: 1 if x > 0.1 else 0)
        updown['down'] = self.delta['delta_close'].map(lambda x: 1 if x < -0.1 else 0)
        updown['nochange'] = self.delta['delta_close'].map(lambda x: 1 if -0.1 < x < 0.1 else 0)
        return updown

    def prep_concat(self):
        adder = [self.delta, self.sma, self.macd, self.rsi, self.bband, self.adx, self.di, self.sar, self.ichi, self.updown]
        data = self.df.join(adder)
        data = data.drop('chiko', axis = 1)
        data = data.dropna()
        return data

## Main

In [3]:
gran = 'H1'
look_back = 10

with open('../intermediate_data/data_{}.pickle'.format(gran), mode='rb') as f:
    df = pickle.load(f)
    
oanda = oandapy.API(environment="practice", access_token=token)
res = oanda.get_history(instrument="USD_JPY",granularity=gran, count = 77 + look_back)

In [18]:
prep = Preprocess(res, df)
data = prep.data

### 出力

In [21]:
data = data.drop(['up', 'down', 'nochange'], axis = 1)
with open('../intermediate_data/prep_reg_data_{}.pickle'.format(gran), mode='wb') as f:
    pickle.dump(data, f)

In [14]:
with open('../intermediate_data/prep_class_data_{}.pickle'.format(gran), mode='wb') as f:
    pickle.dump(data, f)

In [4]:
df

Unnamed: 0_level_0,closeAsk,closeBid,highAsk,highBid,lowAsk,lowBid,openAsk,openBid,volume
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2007-03-05 09:00:00,115.207,115.189,115.490,115.472,115.167,115.149,115.450,115.432,1380
2007-03-05 10:00:00,115.306,115.288,115.427,115.409,115.216,115.198,115.218,115.200,1424
2007-03-05 11:00:00,115.555,115.537,115.586,115.568,115.282,115.264,115.317,115.299,1365
2007-03-05 12:00:00,115.575,115.557,115.705,115.687,115.435,115.417,115.558,115.540,1228
2007-03-05 13:00:00,115.545,115.527,115.655,115.637,115.357,115.339,115.595,115.577,1593
2007-03-05 14:00:00,115.935,115.905,116.001,115.983,115.484,115.466,115.535,115.517,1690
2007-03-05 15:00:00,115.984,115.966,116.159,116.141,115.701,115.683,115.927,115.897,2084
2007-03-05 16:00:00,116.024,116.006,116.256,116.238,115.971,115.953,115.974,115.956,1532
2007-03-05 17:00:00,116.007,115.989,116.126,116.108,115.888,115.870,116.026,116.008,865
2007-03-05 18:00:00,115.902,115.884,116.012,115.994,115.820,115.802,116.012,115.994,760
