Implementation of LSTM Model on EUR-JPY prediction of the close price 1day ahead based on data of previous 10 days

## 1. Data Preparation

### 1.1 Read in data file

In [41]:
import talib as ta

import pandas as pd
import numpy as np

import seaborn as sns
import matplotlib.pyplot as plt

from sklearn.preprocessing import MinMaxScaler

from utils import series_to_supervised

In [42]:
data = pd.read_csv('../data/AUD_USD_M15.csv')
data

Unnamed: 0,datetime,open,high,low,close
0,2005-01-02T18:45:00.000000000Z,0.78230,0.78230,0.78230,0.78230
1,2005-01-02T19:15:00.000000000Z,0.78200,0.78280,0.78180,0.78230
2,2005-01-02T19:45:00.000000000Z,0.78180,0.78180,0.78180,0.78180
3,2005-01-02T20:00:00.000000000Z,0.78140,0.78160,0.78060,0.78060
4,2005-01-02T20:15:00.000000000Z,0.78080,0.78130,0.78080,0.78090
...,...,...,...,...,...
407395,2021-02-28T22:30:00.000000000Z,0.77134,0.77135,0.77082,0.77134
407396,2021-02-28T22:45:00.000000000Z,0.77129,0.77232,0.77125,0.77203
407397,2021-02-28T23:00:00.000000000Z,0.77210,0.77337,0.77140,0.77294
407398,2021-02-28T23:15:00.000000000Z,0.77290,0.77483,0.77258,0.77469


### 1.2 Datetime formatting

In [43]:
data['datetime'] = pd.to_datetime(data['datetime'])
data['datetime'] = data['datetime'].dt.date
# data['datetime'] = data['datetime'].dt.tz_localize(None)
data.set_index('datetime', inplace=True)
data.head()

Unnamed: 0_level_0,open,high,low,close
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2005-01-02,0.7823,0.7823,0.7823,0.7823
2005-01-02,0.782,0.7828,0.7818,0.7823
2005-01-02,0.7818,0.7818,0.7818,0.7818
2005-01-02,0.7814,0.7816,0.7806,0.7806
2005-01-02,0.7808,0.7813,0.7808,0.7809


### 1.3 TA indicators¶

In [44]:
open_ = data['open'].values
high_ = data['high'].values
low_ = data['low'].values
close_ = data['close'].values
dt = data.index

In [45]:
tp = 24 ## set time period window

## overlap studies indicators
# BBANDS - Bollinger Bands
data['upperband'] = ta.BBANDS(close_, timeperiod=tp, nbdevup=2, nbdevdn=2, matype=0)[0]
data['middleband'] = ta.BBANDS(close_, timeperiod=tp, nbdevup=2, nbdevdn=2, matype=0)[1]
data['lowerband'] = ta.BBANDS(close_, timeperiod=tp, nbdevup=2, nbdevdn=2, matype=0)[2]
# DEMA - Double Exponential Moving Average
data['dema'] = ta.DEMA(close_, timeperiod=tp)
# EMA - Exponential Moving Average
data['ema'] = ta.EMA(close_, timeperiod=tp)
# HT_TRENDLINE - Hilbert Transform - Instantaneous Trendline
data['ht'] = ta.HT_TRENDLINE(close_)
# KAMA - Kaufman Adaptive Moving Average
data['kama'] = ta.KAMA(close_, timeperiod=tp)
# MA - Moving average
data['ma'] = ta.MA(close_, timeperiod=tp, matype=0)
# MAMA - MESA Adaptive Moving Average
# data['mama'], data['fama'] = ta.MAMA(close_, fastlimit=5, slowlimit=10)
# MAVP - Moving average with variable period
# data['mavp'] = ta.MAVP(close_, periods, minperiod=2, maxperiod=30, matype=0)
# MIDPOINT - MidPoint over period
data['midpoint'] = ta.MIDPOINT(close_, timeperiod=tp)
# MIDPRICE - Midpoint Price over period
data['midprice'] = ta.MIDPRICE(high_, low_, timeperiod=tp)
# SAR - Parabolic SAR
data['sar'] = ta.SAR(high_, low_, acceleration=0, maximum=0)
# SAREXT - Parabolic SAR - Extended
data['sarext'] = ta.SAREXT(high_, low_, startvalue=0, offsetonreverse=0, accelerationinitlong=0, accelerationlong=0, accelerationmaxlong=0, accelerationinitshort=0, accelerationshort=0, accelerationmaxshort=0)
# SMA - Simple Moving Average
data['sma5'] = ta.SMA(close_, timeperiod=tp)
data['sma10'] = ta.SMA(close_, timeperiod=tp*2)
data['sma30'] = ta.SMA(close_, timeperiod=tp*6)
# T3 - Triple Exponential Moving Average (T3)
data['t3'] = ta.T3(close_, timeperiod=tp, vfactor=0)
# TEMA - Triple Exponential Moving Average
data['tema'] = ta.TEMA(close_, timeperiod=tp*6)
# TRIMA - Triangular Moving Average
data['trima5'] = ta.TRIMA(close_, timeperiod=tp)
data['trima30'] = ta.TRIMA(close_, timeperiod=tp*6)
# WMA - Weighted Moving Average
data['wma5'] = ta.WMA(close_, timeperiod=tp)
data['wma30'] = ta.WMA(close_, timeperiod=tp*6)

In [46]:
## Momentum indicators
# ADX - Average Directional Movement Index
data['adx'] = ta.ADX(high_, low_, close_, timeperiod=tp)
# ADXR - Average Directional Movement Index Rating
data['adxr'] = ta.ADXR(high_, low_, close_, timeperiod=tp)
# APO - Absolute Price Oscillator
data['apo'] = ta.APO(close_, fastperiod=12, slowperiod=24, matype=0)
# AROON - Aroon
data['aroondown'], data['aroonup'] = ta.AROON(high_, low_, timeperiod=tp)
# AROONOSC - Aroon Oscillator
data['aroonosc'] = ta.AROONOSC(high_, low_, timeperiod=tp)
# BOP - Balance Of Power
data['bop'] = ta.BOP(open_, high_, low_, close_)
# CCI - Commodity Channel Index
data['cci'] = ta.CCI(high_, low_, close_, timeperiod=tp)
# CMO - Chande Momentum Oscillator
data['cmo'] = ta.CMO(close_, timeperiod=tp)
# DX - Directional Movement Index
data['dx'] = ta.DX(high_, low_, close_, timeperiod=tp)
# MACD - Moving Average Convergence/Divergence
data['macd'], data['macdsignal'], data['macdhist'] = ta.MACD(close_, fastperiod=12, slowperiod=24, signalperiod=9)
# MACDEXT - MACD with controllable MA type
data['macdext'], data['macdsignalext'], data['macdhistext'] = ta.MACDEXT(close_, fastperiod=12, fastmatype=0, slowperiod=26, slowmatype=0, signalperiod=9, signalmatype=0)
# MACDFIX - Moving Average Convergence/Divergence Fix 12/26
data['macdfix'], data['macdsignalfix'], data['macdhistfix'] = ta.MACDFIX(close_, signalperiod=9)
# MINUS_DI - Minus Directional Indicator
data['minus_di'] = ta.MINUS_DI(high_, low_, close_, timeperiod=tp)
# MINUS_DM - Minus Directional Movement
data['minus_dm'] = ta.MINUS_DM(high_, low_, timeperiod=tp)
# MOM - Momentum
data['mom'] = ta.MOM(close_, timeperiod=tp)
# PLUS_DI - Plus Directional Indicator
data['plus_di'] = ta.PLUS_DI(high_, low_, close_, timeperiod=tp)
# PLUS_DM - Plus Directional Movement
data['plus_dm'] = ta.PLUS_DM(high_, low_, timeperiod=tp)
# PPO - Percentage Price Oscillator
data['ppo'] = ta.PPO(close_, fastperiod=12, slowperiod=24, matype=0)
# ROC - Rate of change : ((price/prevPrice)-1)*100
data['roc'] = ta.ROC(close_, timeperiod=tp)
# ROCP - Rate of change Percentage: (price-prevPrice)/prevPrice
data['rocp'] = ta.ROCP(close_, timeperiod=tp)
# ROCR - Rate of change ratio: (price/prevPrice)
data['rocr'] = ta.ROCR(close_, timeperiod=tp)
# ROCR100 - Rate of change ratio 100 scale: (price/prevPrice)*100
data['rocr100'] = ta.ROCR100(close_, timeperiod=tp)
# RSI - Relative Strength Index
data['rsi'] = ta.RSI(close_, timeperiod=tp)
# STOCH - Stochastic
data['slowk'], data['slowd'] = ta.STOCH(high_, low_, close_, fastk_period=24, slowk_period=12, slowk_matype=0, slowd_period=3, slowd_matype=0)
# STOCHF - Stochastic Fast
data['fastk'], data['fastd'] = ta.STOCHF(high_, low_, close_, fastk_period=24, fastd_period=12, fastd_matype=0)
# STOCHRSI - Stochastic Relative Strength Index
data['fastkrsi'], data['fastdrsi'] = ta.STOCHRSI(close_, timeperiod=tp, fastk_period=5, fastd_period=3, fastd_matype=0)
# TRIX - 1-day Rate-Of-Change (ROC) of a Triple Smooth EMA
data['trix'] = ta.TRIX(close_, timeperiod=tp)
# ULTOSC - Ultimate Oscillator
data['ultosc'] = ta.ULTOSC(high_, low_, close_, timeperiod1=tp, timeperiod2=tp*2, timeperiod3=tp*3)
# WILLR - Williams' %R
data['willr'] = ta.WILLR(high_, low_, close_, timeperiod=tp)

In [47]:
## volatility indicators

# ATR - Average True Range
data['atr'] = ta.ATR(high_, low_, close_, timeperiod=tp)
# NATR - Normalized Average True Range
data['natr'] = ta.NATR(high_, low_, close_, timeperiod=tp)
# TRANGE - True Range
data['trange'] = ta.TRANGE(high_, low_, close_)


In [48]:
## price transform

# AVGPRICE - Average Price
data['avgprice'] = ta.AVGPRICE(open_, high_, low_, close_)
# MEDPRICE - Median Price
data['medprice'] = ta.MEDPRICE(high_, low_)
# TYPPRICE - Typical Price
data['typprice'] = ta.TYPPRICE(high_, low_, close_)
# WCLPRICE - Weighted Close Price
data['wclprice'] = ta.WCLPRICE(high_, low_, close_)


In [50]:
data.to_csv('AUD_USD_M15_TA_71.csv', index=True)

## 2. Data Processing

In [9]:
## drop rows with NANs
data.dropna(axis=0, inplace=True)
data = data.reset_index(drop=True)
print('shape of data: ', data.shape)
data.head()

shape of data:  (406971, 71)


Unnamed: 0,open,high,low,close,upperband,middleband,lowerband,dema,ema,ht,...,trix,ultosc,willr,atr,natr,trange,avgprice,medprice,typprice,wclprice
0,0.76195,0.76225,0.76135,0.76175,0.763098,0.761877,0.760656,0.762221,0.761941,0.762053,...,0.002865,53.012169,-27.160494,0.000715,0.093845,0.0009,0.761825,0.7618,0.761783,0.761775
1,0.76185,0.76185,0.76105,0.76135,0.762799,0.761946,0.761092,0.762108,0.761894,0.762056,...,0.002831,54.124132,-37.037037,0.000718,0.09436,0.0008,0.761525,0.76145,0.761417,0.7614
2,0.76145,0.7622,0.76145,0.7621,0.762682,0.762004,0.761326,0.762123,0.76191,0.762068,...,0.002799,53.396229,-34.883721,0.000724,0.094986,0.00085,0.7618,0.761825,0.761917,0.761962
3,0.7622,0.7627,0.7618,0.7618,0.762665,0.761988,0.76131,0.762089,0.761902,0.76208,...,0.002746,48.871698,-58.333333,0.000731,0.095987,0.0009,0.762125,0.76225,0.7621,0.762025
4,0.7618,0.7626,0.7615,0.7624,0.762705,0.762008,0.761312,0.76215,0.761941,0.762102,...,0.002717,50.633319,-25.0,0.000747,0.097927,0.0011,0.762075,0.76205,0.762167,0.762225


In [10]:
data = data[-100000:]
data.reset_index(drop=True, inplace=True)

In [11]:
train_split_frac = 0.8
val_split_frac = 0.9

train_split = int(train_split_frac * int(data.shape[0]))  ## end of train index (exclusive)
val_split = int(val_split_frac * int(data.shape[0]))      ## end of val index (exclusive)

### 2.1 Train - validation - test split

In [12]:
training_data = data[: train_split]
validation_data = data[train_split: val_split]
test_data = data[val_split:]

In [13]:
print('Shape of training_data:   ', training_data.shape)
print('Shape of validation_data: ', validation_data.shape)  # xgboost test
print('Shape of test_data:       ', test_data.shape)

Shape of training_data:    (80000, 71)
Shape of validation_data:  (10000, 71)
Shape of test_data:        (10000, 71)


### 2.2 Supervised dataframe

In [14]:
## convert to supervised data frame
data_supervised = series_to_supervised(training_data, 3, 2)
data_supervised

Unnamed: 0,var1(t-3),var2(t-3),var3(t-3),var4(t-3),var5(t-3),var6(t-3),var7(t-3),var8(t-3),var9(t-3),var10(t-3),...,var62(t+1),var63(t+1),var64(t+1),var65(t+1),var66(t+1),var67(t+1),var68(t+1),var69(t+1),var70(t+1),var71(t+1)
3,0.77070,0.77076,0.77065,0.77074,0.771009,0.769917,0.768826,0.770340,0.770055,0.770050,...,0.001541,50.906105,-11.267606,0.000469,0.060788,0.00038,0.771075,0.771050,0.771020,0.771005
4,0.77071,0.77096,0.77071,0.77092,0.771119,0.769981,0.768843,0.770450,0.770124,0.770154,...,0.001806,50.084762,-14.436620,0.000456,0.059127,0.00016,0.770937,0.770940,0.770917,0.770905
5,0.77095,0.77128,0.77088,0.77096,0.771228,0.770038,0.768847,0.770552,0.770191,0.770255,...,0.002014,48.931796,-38.022814,0.000471,0.061089,0.00081,0.770583,0.770575,0.770477,0.770428
6,0.77100,0.77126,0.77084,0.77120,0.771365,0.770106,0.768847,0.770678,0.770272,0.770343,...,0.002138,48.187709,-56.653992,0.000478,0.062045,0.00064,0.770055,0.770060,0.769970,0.769925
7,0.77124,0.77124,0.77086,0.77096,0.771441,0.770170,0.768898,0.770752,0.770327,0.770418,...,0.002197,46.729569,-64.035088,0.000471,0.061135,0.00031,0.769853,0.769915,0.769883,0.769867
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
79994,0.65168,0.65240,0.65168,0.65212,0.653893,0.652530,0.651168,0.652234,0.652187,0.652523,...,0.007000,49.285428,-53.618421,0.000643,0.098592,0.00035,0.652322,0.652315,0.652267,0.652243
79995,0.65216,0.65233,0.65203,0.65222,0.653818,0.652487,0.651155,0.652235,0.652190,0.652494,...,0.006508,49.269999,-48.684211,0.000626,0.095995,0.00024,0.652260,0.652260,0.652280,0.652290
79996,0.65218,0.65254,0.65212,0.65240,0.653728,0.652447,0.651166,0.652264,0.652207,0.652483,...,0.006053,48.439015,-50.000000,0.000609,0.093406,0.00022,0.652305,0.652330,0.652313,0.652305
79997,0.65237,0.65263,0.65230,0.65246,0.653649,0.652413,0.651178,0.652298,0.652227,0.652467,...,0.005557,46.672612,-82.236842,0.000626,0.096174,0.00102,0.651765,0.651755,0.651603,0.651528


In [15]:
len(data_supervised.columns.tolist())

355

In [16]:
indicators = data.columns.tolist()

In [17]:
column_names = []
for t in range(-3, 2):
    
    if t < 0:
        for var in range(1, 72):  ## exclude var(datetime)

            column_names.append(indicators[var-1]+'(t'+str(t)+')')
        
    elif t == 0:
        for var in range(1, 72):  ## exclude var(datetime)

            column_names.append(indicators[var-1]+'(t)')
    else:
        for var in range(1, 72):  ## exclude var(datetime)

            column_names.append(indicators[var-1]+'(t+1)')

In [18]:
len(column_names)

355

In [19]:
data_supervised.columns = column_names

In [20]:
data_supervised

Unnamed: 0,open(t-3),high(t-3),low(t-3),close(t-3),upperband(t-3),middleband(t-3),lowerband(t-3),dema(t-3),ema(t-3),ht(t-3),...,trix(t+1),ultosc(t+1),willr(t+1),atr(t+1),natr(t+1),trange(t+1),avgprice(t+1),medprice(t+1),typprice(t+1),wclprice(t+1)
3,0.77070,0.77076,0.77065,0.77074,0.771009,0.769917,0.768826,0.770340,0.770055,0.770050,...,0.001541,50.906105,-11.267606,0.000469,0.060788,0.00038,0.771075,0.771050,0.771020,0.771005
4,0.77071,0.77096,0.77071,0.77092,0.771119,0.769981,0.768843,0.770450,0.770124,0.770154,...,0.001806,50.084762,-14.436620,0.000456,0.059127,0.00016,0.770937,0.770940,0.770917,0.770905
5,0.77095,0.77128,0.77088,0.77096,0.771228,0.770038,0.768847,0.770552,0.770191,0.770255,...,0.002014,48.931796,-38.022814,0.000471,0.061089,0.00081,0.770583,0.770575,0.770477,0.770428
6,0.77100,0.77126,0.77084,0.77120,0.771365,0.770106,0.768847,0.770678,0.770272,0.770343,...,0.002138,48.187709,-56.653992,0.000478,0.062045,0.00064,0.770055,0.770060,0.769970,0.769925
7,0.77124,0.77124,0.77086,0.77096,0.771441,0.770170,0.768898,0.770752,0.770327,0.770418,...,0.002197,46.729569,-64.035088,0.000471,0.061135,0.00031,0.769853,0.769915,0.769883,0.769867
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
79994,0.65168,0.65240,0.65168,0.65212,0.653893,0.652530,0.651168,0.652234,0.652187,0.652523,...,0.007000,49.285428,-53.618421,0.000643,0.098592,0.00035,0.652322,0.652315,0.652267,0.652243
79995,0.65216,0.65233,0.65203,0.65222,0.653818,0.652487,0.651155,0.652235,0.652190,0.652494,...,0.006508,49.269999,-48.684211,0.000626,0.095995,0.00024,0.652260,0.652260,0.652280,0.652290
79996,0.65218,0.65254,0.65212,0.65240,0.653728,0.652447,0.651166,0.652264,0.652207,0.652483,...,0.006053,48.439015,-50.000000,0.000609,0.093406,0.00022,0.652305,0.652330,0.652313,0.652305
79997,0.65237,0.65263,0.65230,0.65246,0.653649,0.652413,0.651178,0.652298,0.652227,0.652467,...,0.005557,46.672612,-82.236842,0.000626,0.096174,0.00102,0.651765,0.651755,0.651603,0.651528


In [21]:
indicators.remove('close')

In [22]:
for i in indicators:
    data_supervised.drop(columns=[i+'(t+1)'],inplace=True)

In [23]:
data_supervised.shape

(79996, 285)

## 3. XGBoost

In [24]:
import xgboost as xgb
from xgboost.sklearn import XGBRegressor
from sklearn.metrics import mean_squared_error, mean_absolute_error, mean_absolute_percentage_error

In [25]:
training_fraction = 0.8
training = int(training_fraction * int(data_supervised.shape[0]))
training_data_sp = data_supervised[:training]
test_data_sp = data_supervised[training:]


In [26]:
features = data_supervised.columns.tolist()
features.remove('close(t+1)')

X_train = training_data_sp[features]
y_train = training_data_sp[['close(t+1)']]
X_test = test_data_sp[features]
y_test = test_data_sp[['close(t+1)']]

In [27]:
xgb = XGBRegressor()
xgb.fit(X_train, y_train)
xgb_pred = xgb.predict(X_test)

xgb_rmse = mean_squared_error(y_true=y_test, y_pred=xgb_pred, squared=False)
xgb_mae = mean_absolute_error(y_true=y_test, y_pred=xgb_pred)
xgb_mape = mean_absolute_percentage_error(y_true=y_test, y_pred=xgb_pred)

print('Evaluation results')
print(f'RMSE: {xgb_rmse:.6f}')
print(f'MAE : {xgb_mae:.6f}')
print(f'MAPE: {xgb_mape:.6f}')

Evaluation results
RMSE: 0.026227
MAE : 0.013154
MAPE: 0.021318


In [28]:
xgb_fi_df =  pd.DataFrame(columns=['Features', 'Importance'])
xgb_fi_df.Features = [f for f in features]
xgb_fi_df.Importance = xgb.feature_importances_
xgb_fi_df.sort_values(by='Importance', ascending=False)

Unnamed: 0,Features,Importance
283,wclprice(t),0.650174
216,close(t),0.175095
282,typprice(t),0.170059
212,wclprice(t-1),0.001661
280,avgprice(t),0.001254
...,...,...
156,sar(t-1),0.000000
157,sarext(t-1),0.000000
158,sma5(t-1),0.000000
159,sma10(t-1),0.000000


In [29]:
xgb_fi_df[xgb_fi_df['Importance'] > 0].sort_values(by='Importance', ascending=False)[:10]

Unnamed: 0,Features,Importance
283,wclprice(t),0.650174
216,close(t),0.175095
282,typprice(t),0.170059
212,wclprice(t-1),0.001661
280,avgprice(t),0.001254
214,high(t),0.000476
3,close(t-3),0.000356
140,typprice(t-2),0.000147
215,low(t),3.2e-05
230,sma10(t),3.1e-05


## 4. Random Forest

In [30]:
from sklearn.ensemble import RandomForestRegressor

rf = RandomForestRegressor()
rf.fit(X_train, y_train)
rf_pred = rf.predict(X_test)

rf_rmse = mean_squared_error(y_true=y_test, y_pred=rf_pred, squared=False)
rf_mae = mean_absolute_error(y_true=y_test, y_pred=rf_pred)
rf_mape = mean_absolute_percentage_error(y_true=y_test, y_pred=rf_pred)

print('Evaluation results')
print(f'RMSE: {rf_rmse:.6f}')
print(f'MAE : {rf_mae:.6f}')
print(f'MAPE: {rf_mape:.6f}')

  rf.fit(X_train, y_train)


Evaluation results
RMSE: 0.026180
MAE : 0.013044
MAPE: 0.021152


In [31]:
rf_fi_df =  pd.DataFrame(columns=['Features', 'Importance'])
rf_fi_df.Features = [f for f in features]
rf_fi_df.Importance = rf.feature_importances_
rf_fi_df.sort_values(by='Importance', ascending=False)

Unnamed: 0,Features,Importance
216,close(t),0.540558
283,wclprice(t),0.354558
282,typprice(t),0.076035
280,avgprice(t),0.026809
214,high(t),0.001281
...,...,...
14,sar(t-3),0.000000
15,sarext(t-3),0.000000
156,sar(t-1),0.000000
227,sar(t),0.000000


In [32]:
rf_fi_df.sort_values(by='Importance', ascending=False)[:10]

Unnamed: 0,Features,Importance
216,close(t),0.540558
283,wclprice(t),0.354558
282,typprice(t),0.076035
280,avgprice(t),0.026809
214,high(t),0.001281
281,medprice(t),0.000438
215,low(t),0.000207
247,dx(t),2e-06
208,trange(t-1),2e-06
279,trange(t),2e-06


In [35]:
rf_fi_df =  pd.DataFrame(columns=['Features', 'Importance'])
rf_fi_df.Features = [f for f in features]
rf_fi_df.Importance = rf.feature_importances_
rf_fi_df

Unnamed: 0,Features,Importance
0,open(t-3),8.115607e-07
1,high(t-3),6.353195e-07
2,low(t-3),6.634952e-07
3,close(t-3),4.473910e-07
4,upperband(t-3),3.714295e-07
...,...,...
279,trange(t),1.800633e-06
280,avgprice(t),2.680901e-02
281,medprice(t),4.375500e-04
282,typprice(t),7.603469e-02


In [36]:
rf_fi_df.to_csv('rf_feature_importances.csv', index=False)

In [37]:
xgb_fi_df =  pd.DataFrame(columns=['Features', 'Importance'])
xgb_fi_df.Features = [f for f in features]
xgb_fi_df.Importance = xgb.feature_importances_
xgb_fi_df

Unnamed: 0,Features,Importance
0,open(t-3),0.000013
1,high(t-3),0.000005
2,low(t-3),0.000002
3,close(t-3),0.000356
4,upperband(t-3),0.000022
...,...,...
279,trange(t),0.000004
280,avgprice(t),0.001254
281,medprice(t),0.000018
282,typprice(t),0.170059


In [38]:
xgb_fi_df.to_csv('xgb_feature_importances.csv', index=False)

In [40]:
pd.read_csv('rf_feature_importances.csv')

Unnamed: 0,Features,Importance
0,open(t-3),8.115607e-07
1,high(t-3),6.353195e-07
2,low(t-3),6.634952e-07
3,close(t-3),4.473910e-07
4,upperband(t-3),3.714295e-07
...,...,...
279,trange(t),1.800633e-06
280,avgprice(t),2.680901e-02
281,medprice(t),4.375500e-04
282,typprice(t),7.603469e-02
