## Feature engineering

What Drives the Price of Gasoline? 
Gasoline prices can fluctuate for many reasons but the most important ones include the following: 
1. Crude oil prices (~crude oil price t-1)
2. Refining costs and profits (?)
3. Distribution and miscellaneous costs (?)
4. Seasonal demand (~time)
5. Political events (~Google News --> high noise and close to stationary)
6. Global demand picture (~S&P 500)

Source: https://commodity.com/energy/rbob-gasoline/

In [1]:
import pandas as pd
import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt
%matplotlib inline
import matplotlib.pylab as pylab
plt.style.use('bmh')
params = {'legend.fontsize': 'x-large',
          'figure.figsize': (20, 10),
         'axes.labelsize': 'x-large',
         'axes.titlesize':'x-large',
         'xtick.labelsize':'x-large',
         'ytick.labelsize':'x-large'}
from IPython.core.display import display, HTML
pylab.rcParams.update(params)
display(HTML("<style>.container { width:80% !important; }</style>"))
pd.set_option('display.max_columns', None)

In [2]:
df = pd.read_csv("rb_c_d.csv")
df.rename(columns={"Otwarcie":"Open","Najwyzszy":"y","Najnizszy":"Low","Zamkniecie":"Close","Data":"Date"},inplace=True)
df['Date'] =  pd.to_datetime(df['Date'])
df = df[df.Date>="2010-01-01"]
df.set_index("Date",inplace=True)
df = df[["y"]]

### Time

In [3]:
df["day_of_week"] = df.index.dayofweek
df["day_of_year"] = df.index.dayofyear
df["week"] = df.index.week
df["quarter"] = df.index.quarter
df["month"] = df.index.month

In [4]:
df.head()

Unnamed: 0_level_0,y,day_of_week,day_of_year,week,quarter,month
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2010-01-04,2.0856,0,4,1,1,1
2010-01-05,2.1137,1,5,1,1,1
2010-01-06,2.1228,2,6,1,1,1
2010-01-07,2.12,3,7,1,1,1
2010-01-08,2.145,4,8,1,1,1


### S&P

In [5]:
df_sandp = pd.read_csv("spx_d.csv")

In [6]:
df_sandp['Date'] =  pd.to_datetime(df_sandp['Date'])
df_sandp.set_index("Date",inplace=True)

In [7]:
df_sandp = df_sandp.add_prefix("S&P_")

In [8]:
df = pd.merge(df,df_sandp,left_index=True,right_index=True,how="left")

### Crude Oil

In [9]:
df_co = pd.read_csv("cl_c_d.csv")
df_co.rename(columns={"Otwarcie":"Open","Najwyzszy":"High","Najnizszy":"Low","Zamkniecie":"Close","Data":"Date"},inplace=True)
df_co = df_co[["Date","Close"]]

In [10]:
df_co['Date'] =  pd.to_datetime(df_co['Date'])
df_co.set_index("Date",inplace=True)

In [11]:
df_co = df_co.add_prefix("CO_")

In [12]:
df = pd.merge(df,df_co,left_index=True,right_index=True,how="left")

In [13]:
del df_sandp, df_co

### Gasoline RBOB - futures

In [14]:
tmp = pd.read_csv("rb_f_d.csv")
tmp['Date'] =  pd.to_datetime(tmp['Date'])
tmp = tmp[tmp.Date>="2010-01-01"]
tmp.set_index("Date",inplace=True)
tmp = tmp.add_prefix("RBOB_f__")
df = pd.merge(df,tmp,left_index=True,right_index=True,how='left')

### DF

In [15]:
df.head()

Unnamed: 0_level_0,y,day_of_week,day_of_year,week,quarter,month,S&P_Open,S&P_High,S&P_Low,S&P_Close,S&P_Volume,CO_Close,RBOB_f__Open,RBOB_f__High,RBOB_f__Low,RBOB_f__Close,RBOB_f__Volume,RBOB_f__OpenInt
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
2010-01-04,2.0856,0,4,1,1,1,1115.1,1133.87,1115.1,1132.99,820109000.0,81.51,2.075,2.1136,2.0671,2.1044,66994,232937.0
2010-01-05,2.1137,1,5,1,1,1,1132.99,1136.63,1129.66,1136.52,974375200.0,81.77,2.1111,2.1315,2.1004,2.125,78884,239903.0
2010-01-06,2.1228,2,6,1,1,1,1136.52,1139.19,1133.95,1137.14,915084700.0,83.18,2.1187,2.1462,2.0933,2.1366,101809,246666.0
2010-01-07,2.12,3,7,1,1,1,1137.14,1142.46,1131.32,1141.69,1046512000.0,82.66,2.14,2.1455,2.1212,2.1349,92094,251785.0
2010-01-08,2.145,4,8,1,1,1,1141.69,1145.39,1136.22,1144.98,860963300.0,82.75,2.137,2.172,2.1162,2.1553,81527,256842.0


In [16]:
df["RBOB_minus_CO"] = df.y - df.CO_Close/42

In [17]:
df.tail()

Unnamed: 0_level_0,y,day_of_week,day_of_year,week,quarter,month,S&P_Open,S&P_High,S&P_Low,S&P_Close,S&P_Volume,CO_Close,RBOB_f__Open,RBOB_f__High,RBOB_f__Low,RBOB_f__Close,RBOB_f__Volume,RBOB_f__OpenInt,RBOB_minus_CO
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1
2019-11-22,1.6754,4,326,47,4,11,3111.41,3112.87,3099.26,3110.29,418027927.0,57.88,1.6854,1.7036,1.6608,1.6698,217784,414291.0,0.297305
2019-11-25,1.6743,0,329,48,4,11,3117.44,3133.83,3117.44,3133.64,513728761.0,57.98,1.6698,1.6766,1.6465,1.6715,197810,397040.0,0.293824
2019-11-26,1.7034,1,330,48,4,11,3134.85,3142.69,3131.0,3140.52,986041660.0,58.38,1.67,1.7015,1.6683,1.6976,186722,397582.0,0.3134
2019-11-27,1.6791,2,331,48,4,11,3145.49,3154.26,3143.41,3153.63,421853938.0,58.12,1.6916,1.6983,1.6594,1.6765,189449,383413.0,0.29529
2019-11-29,1.6012,4,333,48,4,11,3147.18,3150.3,3139.34,3140.98,,55.42,1.6764,1.6885,1.5877,1.591,154868,,0.281676


In [18]:
df["y_shif1"] = df.y.shift(1)
df["y_shif2"] = df.y.shift(2)
df["y_shif3"] = df.y.shift(3)
df["y_shif4"] = df.y.shift(4)
df["y_shif5"] = df.y.shift(5)

In [19]:
df["rolling_mean_week"] = df.y.rolling(5).mean()
df["rolling_mean_month"] = df.y.rolling(21).mean()
df["rolling_mean_2days"] = df.y.rolling(2).mean()
df["rolling_mean_3days"] = df.y.rolling(3).mean()

df["rolling_std_week"] = df.y.rolling(5).std()
df["rolling_std_month"] = df.y.rolling(21).std()
df["rolling_std_3days"] = df.y.rolling(3).std()
df["rolling_std_2days"] = df.y.rolling(2).std()

In [20]:
df["y_rr"] = df.y.pct_change()

In [21]:
df.head()

Unnamed: 0_level_0,y,day_of_week,day_of_year,week,quarter,month,S&P_Open,S&P_High,S&P_Low,S&P_Close,S&P_Volume,CO_Close,RBOB_f__Open,RBOB_f__High,RBOB_f__Low,RBOB_f__Close,RBOB_f__Volume,RBOB_f__OpenInt,RBOB_minus_CO,y_shif1,y_shif2,y_shif3,y_shif4,y_shif5,rolling_mean_week,rolling_mean_month,rolling_mean_2days,rolling_mean_3days,rolling_std_week,rolling_std_month,rolling_std_3days,rolling_std_2days,y_rr
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1
2010-01-04,2.0856,0,4,1,1,1,1115.1,1133.87,1115.1,1132.99,820109000.0,81.51,2.075,2.1136,2.0671,2.1044,66994,232937.0,0.144886,,,,,,,,,,,,,,
2010-01-05,2.1137,1,5,1,1,1,1132.99,1136.63,1129.66,1136.52,974375200.0,81.77,2.1111,2.1315,2.1004,2.125,78884,239903.0,0.166795,2.0856,,,,,,,2.09965,,,,,0.01987,0.013473
2010-01-06,2.1228,2,6,1,1,1,1136.52,1139.19,1133.95,1137.14,915084700.0,83.18,2.1187,2.1462,2.0933,2.1366,101809,246666.0,0.142324,2.1137,2.0856,,,,,,2.11825,2.107367,,,0.019392,0.006435,0.004305
2010-01-07,2.12,3,7,1,1,1,1137.14,1142.46,1131.32,1141.69,1046512000.0,82.66,2.14,2.1455,2.1212,2.1349,92094,251785.0,0.151905,2.1228,2.1137,2.0856,,,,,2.1214,2.118833,,,0.004661,0.00198,-0.001319
2010-01-08,2.145,4,8,1,1,1,1141.69,1145.39,1136.22,1144.98,860963300.0,82.75,2.137,2.172,2.1162,2.1553,81527,256842.0,0.174762,2.12,2.1228,2.1137,2.0856,,2.11742,,2.1325,2.129267,0.021346,,0.013697,0.017678,0.011792


### Technical Analysis

In [22]:
import talib

In [23]:
for i in talib.get_function_groups().keys():
    print(i,":",talib.get_function_groups()[i],'\n')

Cycle Indicators : ['HT_DCPERIOD', 'HT_DCPHASE', 'HT_PHASOR', 'HT_SINE', 'HT_TRENDMODE'] 

Math Operators : ['ADD', 'DIV', 'MAX', 'MAXINDEX', 'MIN', 'MININDEX', 'MINMAX', 'MINMAXINDEX', 'MULT', 'SUB', 'SUM'] 

Math Transform : ['ACOS', 'ASIN', 'ATAN', 'CEIL', 'COS', 'COSH', 'EXP', 'FLOOR', 'LN', 'LOG10', 'SIN', 'SINH', 'SQRT', 'TAN', 'TANH'] 

Momentum Indicators : ['ADX', 'ADXR', 'APO', 'AROON', 'AROONOSC', 'BOP', 'CCI', 'CMO', 'DX', 'MACD', 'MACDEXT', 'MACDFIX', 'MFI', 'MINUS_DI', 'MINUS_DM', 'MOM', 'PLUS_DI', 'PLUS_DM', 'PPO', 'ROC', 'ROCP', 'ROCR', 'ROCR100', 'RSI', 'STOCH', 'STOCHF', 'STOCHRSI', 'TRIX', 'ULTOSC', 'WILLR'] 

Overlap Studies : ['BBANDS', 'DEMA', 'EMA', 'HT_TRENDLINE', 'KAMA', 'MA', 'MAMA', 'MAVP', 'MIDPOINT', 'MIDPRICE', 'SAR', 'SAREXT', 'SMA', 'T3', 'TEMA', 'TRIMA', 'WMA'] 

Pattern Recognition : ['CDL2CROWS', 'CDL3BLACKCROWS', 'CDL3INSIDE', 'CDL3LINESTRIKE', 'CDL3OUTSIDE', 'CDL3STARSINSOUTH', 'CDL3WHITESOLDIERS', 'CDLABANDONEDBABY', 'CDLADVANCEBLOCK', 'CDLBELTHOLD

In [24]:
tmp = talib.get_function_groups()['Pattern Recognition']
for i in tmp:
    name = f'RBOB_f_{i}'
    todo = f'talib.{i}(df.RBOB_f__Open,df.RBOB_f__High,df.RBOB_f__Low,df.RBOB_f__Close)'
    df[name] = eval(todo)

In [25]:
df["CCI"] = talib.CCI(df.RBOB_f__High,df.RBOB_f__Low,df.RBOB_f__Close)
df["BOP"] = talib.BOP(df.RBOB_f__Open,df.RBOB_f__High,df.RBOB_f__Low,df.RBOB_f__Close)
df["WILLR"] = talib.WILLR(df.RBOB_f__High,df.RBOB_f__Low,df.RBOB_f__Close)
df["RSI"] = talib.RSI(df.y)
df["PLUS_DM"] = talib.PLUS_DM(df.RBOB_f__High,df.RBOB_f__Low)

In [26]:
tmp = talib.get_function_groups()['Overlap Studies']
for i in tmp:
    try:
        name = f'RBOB_f_{i}'
        todo = f'talib.{i}(df.y)'
        df[name] = eval(todo)
    except:
        print(i)

BBANDS
MAMA
MAVP
MIDPRICE
SAR
SAREXT


In [27]:
df.shape

(2490, 110)

### Final Adjustment

In [28]:
to_shift = ['S&P_Open', 'S&P_High', 'S&P_Low', 'S&P_Close', 'S&P_Volume', 'CO_Close', 'RBOB_f__Open', 'RBOB_f__High', 'RBOB_f__Low', 'RBOB_f__Close', 'RBOB_f__Volume', 'RBOB_f__OpenInt', 'RBOB_minus_CO', 'y_shif1', 'y_shif2', 'y_shif3', 'y_shif4', 'y_shif5', 'rolling_mean_week', 'rolling_mean_month', 'rolling_mean_2days', 'rolling_mean_3days', 'rolling_std_week', 'rolling_std_month', 'rolling_std_3days', 'rolling_std_2days', 'y_rr', 'RBOB_f_CDL2CROWS', 'RBOB_f_CDL3BLACKCROWS', 'RBOB_f_CDL3INSIDE', 'RBOB_f_CDL3LINESTRIKE', 'RBOB_f_CDL3OUTSIDE', 'RBOB_f_CDL3STARSINSOUTH', 'RBOB_f_CDL3WHITESOLDIERS', 'RBOB_f_CDLABANDONEDBABY', 'RBOB_f_CDLADVANCEBLOCK', 'RBOB_f_CDLBELTHOLD', 'RBOB_f_CDLBREAKAWAY', 'RBOB_f_CDLCLOSINGMARUBOZU', 'RBOB_f_CDLCONCEALBABYSWALL', 'RBOB_f_CDLCOUNTERATTACK', 'RBOB_f_CDLDARKCLOUDCOVER', 'RBOB_f_CDLDOJI', 'RBOB_f_CDLDOJISTAR', 'RBOB_f_CDLDRAGONFLYDOJI', 'RBOB_f_CDLENGULFING', 'RBOB_f_CDLEVENINGDOJISTAR', 'RBOB_f_CDLEVENINGSTAR', 'RBOB_f_CDLGAPSIDESIDEWHITE', 'RBOB_f_CDLGRAVESTONEDOJI', 'RBOB_f_CDLHAMMER', 'RBOB_f_CDLHANGINGMAN', 'RBOB_f_CDLHARAMI', 'RBOB_f_CDLHARAMICROSS', 'RBOB_f_CDLHIGHWAVE', 'RBOB_f_CDLHIKKAKE', 'RBOB_f_CDLHIKKAKEMOD', 'RBOB_f_CDLHOMINGPIGEON', 'RBOB_f_CDLIDENTICAL3CROWS', 'RBOB_f_CDLINNECK', 'RBOB_f_CDLINVERTEDHAMMER', 'RBOB_f_CDLKICKING', 'RBOB_f_CDLKICKINGBYLENGTH', 'RBOB_f_CDLLADDERBOTTOM', 'RBOB_f_CDLLONGLEGGEDDOJI', 'RBOB_f_CDLLONGLINE', 'RBOB_f_CDLMARUBOZU', 'RBOB_f_CDLMATCHINGLOW', 'RBOB_f_CDLMATHOLD', 'RBOB_f_CDLMORNINGDOJISTAR', 'RBOB_f_CDLMORNINGSTAR', 'RBOB_f_CDLONNECK', 'RBOB_f_CDLPIERCING', 'RBOB_f_CDLRICKSHAWMAN', 'RBOB_f_CDLRISEFALL3METHODS', 'RBOB_f_CDLSEPARATINGLINES', 'RBOB_f_CDLSHOOTINGSTAR', 'RBOB_f_CDLSHORTLINE', 'RBOB_f_CDLSPINNINGTOP', 'RBOB_f_CDLSTALLEDPATTERN', 'RBOB_f_CDLSTICKSANDWICH', 'RBOB_f_CDLTAKURI', 'RBOB_f_CDLTASUKIGAP', 'RBOB_f_CDLTHRUSTING', 'RBOB_f_CDLTRISTAR', 'RBOB_f_CDLUNIQUE3RIVER', 'RBOB_f_CDLUPSIDEGAP2CROWS', 'RBOB_f_CDLXSIDEGAP3METHODS', 'CCI', 'BOP', 'WILLR', 'RSI', 'PLUS_DM', 'RBOB_f_DEMA', 'RBOB_f_EMA', 'RBOB_f_HT_TRENDLINE', 'RBOB_f_KAMA', 'RBOB_f_MA', 'RBOB_f_MIDPOINT', 'RBOB_f_SMA', 'RBOB_f_T3', 'RBOB_f_TEMA', 'RBOB_f_TRIMA', 'RBOB_f_WMA']

In [29]:
df.head(5)

Unnamed: 0_level_0,y,day_of_week,day_of_year,week,quarter,month,S&P_Open,S&P_High,S&P_Low,S&P_Close,S&P_Volume,CO_Close,RBOB_f__Open,RBOB_f__High,RBOB_f__Low,RBOB_f__Close,RBOB_f__Volume,RBOB_f__OpenInt,RBOB_minus_CO,y_shif1,y_shif2,y_shif3,y_shif4,y_shif5,rolling_mean_week,rolling_mean_month,rolling_mean_2days,rolling_mean_3days,rolling_std_week,rolling_std_month,rolling_std_3days,rolling_std_2days,y_rr,RBOB_f_CDL2CROWS,RBOB_f_CDL3BLACKCROWS,RBOB_f_CDL3INSIDE,RBOB_f_CDL3LINESTRIKE,RBOB_f_CDL3OUTSIDE,RBOB_f_CDL3STARSINSOUTH,RBOB_f_CDL3WHITESOLDIERS,RBOB_f_CDLABANDONEDBABY,RBOB_f_CDLADVANCEBLOCK,RBOB_f_CDLBELTHOLD,RBOB_f_CDLBREAKAWAY,RBOB_f_CDLCLOSINGMARUBOZU,RBOB_f_CDLCONCEALBABYSWALL,RBOB_f_CDLCOUNTERATTACK,RBOB_f_CDLDARKCLOUDCOVER,RBOB_f_CDLDOJI,RBOB_f_CDLDOJISTAR,RBOB_f_CDLDRAGONFLYDOJI,RBOB_f_CDLENGULFING,RBOB_f_CDLEVENINGDOJISTAR,RBOB_f_CDLEVENINGSTAR,RBOB_f_CDLGAPSIDESIDEWHITE,RBOB_f_CDLGRAVESTONEDOJI,RBOB_f_CDLHAMMER,RBOB_f_CDLHANGINGMAN,RBOB_f_CDLHARAMI,RBOB_f_CDLHARAMICROSS,RBOB_f_CDLHIGHWAVE,RBOB_f_CDLHIKKAKE,RBOB_f_CDLHIKKAKEMOD,RBOB_f_CDLHOMINGPIGEON,RBOB_f_CDLIDENTICAL3CROWS,RBOB_f_CDLINNECK,RBOB_f_CDLINVERTEDHAMMER,RBOB_f_CDLKICKING,RBOB_f_CDLKICKINGBYLENGTH,RBOB_f_CDLLADDERBOTTOM,RBOB_f_CDLLONGLEGGEDDOJI,RBOB_f_CDLLONGLINE,RBOB_f_CDLMARUBOZU,RBOB_f_CDLMATCHINGLOW,RBOB_f_CDLMATHOLD,RBOB_f_CDLMORNINGDOJISTAR,RBOB_f_CDLMORNINGSTAR,RBOB_f_CDLONNECK,RBOB_f_CDLPIERCING,RBOB_f_CDLRICKSHAWMAN,RBOB_f_CDLRISEFALL3METHODS,RBOB_f_CDLSEPARATINGLINES,RBOB_f_CDLSHOOTINGSTAR,RBOB_f_CDLSHORTLINE,RBOB_f_CDLSPINNINGTOP,RBOB_f_CDLSTALLEDPATTERN,RBOB_f_CDLSTICKSANDWICH,RBOB_f_CDLTAKURI,RBOB_f_CDLTASUKIGAP,RBOB_f_CDLTHRUSTING,RBOB_f_CDLTRISTAR,RBOB_f_CDLUNIQUE3RIVER,RBOB_f_CDLUPSIDEGAP2CROWS,RBOB_f_CDLXSIDEGAP3METHODS,CCI,BOP,WILLR,RSI,PLUS_DM,RBOB_f_DEMA,RBOB_f_EMA,RBOB_f_HT_TRENDLINE,RBOB_f_KAMA,RBOB_f_MA,RBOB_f_MIDPOINT,RBOB_f_SMA,RBOB_f_T3,RBOB_f_TEMA,RBOB_f_TRIMA,RBOB_f_WMA
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1,Unnamed: 77_level_1,Unnamed: 78_level_1,Unnamed: 79_level_1,Unnamed: 80_level_1,Unnamed: 81_level_1,Unnamed: 82_level_1,Unnamed: 83_level_1,Unnamed: 84_level_1,Unnamed: 85_level_1,Unnamed: 86_level_1,Unnamed: 87_level_1,Unnamed: 88_level_1,Unnamed: 89_level_1,Unnamed: 90_level_1,Unnamed: 91_level_1,Unnamed: 92_level_1,Unnamed: 93_level_1,Unnamed: 94_level_1,Unnamed: 95_level_1,Unnamed: 96_level_1,Unnamed: 97_level_1,Unnamed: 98_level_1,Unnamed: 99_level_1,Unnamed: 100_level_1,Unnamed: 101_level_1,Unnamed: 102_level_1,Unnamed: 103_level_1,Unnamed: 104_level_1,Unnamed: 105_level_1,Unnamed: 106_level_1,Unnamed: 107_level_1,Unnamed: 108_level_1,Unnamed: 109_level_1,Unnamed: 110_level_1
2010-01-04,2.0856,0,4,1,1,1,1115.1,1133.87,1115.1,1132.99,820109000.0,81.51,2.075,2.1136,2.0671,2.1044,66994,232937.0,0.144886,,,,,,,,,,,,,,,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,,0.632258,,,,,,,,,,,,,,
2010-01-05,2.1137,1,5,1,1,1,1132.99,1136.63,1129.66,1136.52,974375200.0,81.77,2.1111,2.1315,2.1004,2.125,78884,239903.0,0.166795,2.0856,,,,,,,2.09965,,,,,0.01987,0.013473,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,,0.446945,,,,,,,,,,,,,,
2010-01-06,2.1228,2,6,1,1,1,1136.52,1139.19,1133.95,1137.14,915084700.0,83.18,2.1187,2.1462,2.0933,2.1366,101809,246666.0,0.142324,2.1137,2.0856,,,,,,2.11825,2.107367,,,0.019392,0.006435,0.004305,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,,0.338374,,,,,,,,,,,,,,
2010-01-07,2.12,3,7,1,1,1,1137.14,1142.46,1131.32,1141.69,1046512000.0,82.66,2.14,2.1455,2.1212,2.1349,92094,251785.0,0.151905,2.1228,2.1137,2.0856,,,,,2.1214,2.118833,,,0.004661,0.00198,-0.001319,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,,-0.209877,,,,,,,,,,,,,,
2010-01-08,2.145,4,8,1,1,1,1141.69,1145.39,1136.22,1144.98,860963300.0,82.75,2.137,2.172,2.1162,2.1553,81527,256842.0,0.174762,2.12,2.1228,2.1137,2.0856,,2.11742,,2.1325,2.129267,0.021346,,0.013697,0.017678,0.011792,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,,0.327957,,,,,,,,,,,,,,


In [30]:
df[to_shift] = df[to_shift].shift(1)

In [31]:
df = df[df.index>="2010-07-01"]

In [32]:
df.tail()

Unnamed: 0_level_0,y,day_of_week,day_of_year,week,quarter,month,S&P_Open,S&P_High,S&P_Low,S&P_Close,S&P_Volume,CO_Close,RBOB_f__Open,RBOB_f__High,RBOB_f__Low,RBOB_f__Close,RBOB_f__Volume,RBOB_f__OpenInt,RBOB_minus_CO,y_shif1,y_shif2,y_shif3,y_shif4,y_shif5,rolling_mean_week,rolling_mean_month,rolling_mean_2days,rolling_mean_3days,rolling_std_week,rolling_std_month,rolling_std_3days,rolling_std_2days,y_rr,RBOB_f_CDL2CROWS,RBOB_f_CDL3BLACKCROWS,RBOB_f_CDL3INSIDE,RBOB_f_CDL3LINESTRIKE,RBOB_f_CDL3OUTSIDE,RBOB_f_CDL3STARSINSOUTH,RBOB_f_CDL3WHITESOLDIERS,RBOB_f_CDLABANDONEDBABY,RBOB_f_CDLADVANCEBLOCK,RBOB_f_CDLBELTHOLD,RBOB_f_CDLBREAKAWAY,RBOB_f_CDLCLOSINGMARUBOZU,RBOB_f_CDLCONCEALBABYSWALL,RBOB_f_CDLCOUNTERATTACK,RBOB_f_CDLDARKCLOUDCOVER,RBOB_f_CDLDOJI,RBOB_f_CDLDOJISTAR,RBOB_f_CDLDRAGONFLYDOJI,RBOB_f_CDLENGULFING,RBOB_f_CDLEVENINGDOJISTAR,RBOB_f_CDLEVENINGSTAR,RBOB_f_CDLGAPSIDESIDEWHITE,RBOB_f_CDLGRAVESTONEDOJI,RBOB_f_CDLHAMMER,RBOB_f_CDLHANGINGMAN,RBOB_f_CDLHARAMI,RBOB_f_CDLHARAMICROSS,RBOB_f_CDLHIGHWAVE,RBOB_f_CDLHIKKAKE,RBOB_f_CDLHIKKAKEMOD,RBOB_f_CDLHOMINGPIGEON,RBOB_f_CDLIDENTICAL3CROWS,RBOB_f_CDLINNECK,RBOB_f_CDLINVERTEDHAMMER,RBOB_f_CDLKICKING,RBOB_f_CDLKICKINGBYLENGTH,RBOB_f_CDLLADDERBOTTOM,RBOB_f_CDLLONGLEGGEDDOJI,RBOB_f_CDLLONGLINE,RBOB_f_CDLMARUBOZU,RBOB_f_CDLMATCHINGLOW,RBOB_f_CDLMATHOLD,RBOB_f_CDLMORNINGDOJISTAR,RBOB_f_CDLMORNINGSTAR,RBOB_f_CDLONNECK,RBOB_f_CDLPIERCING,RBOB_f_CDLRICKSHAWMAN,RBOB_f_CDLRISEFALL3METHODS,RBOB_f_CDLSEPARATINGLINES,RBOB_f_CDLSHOOTINGSTAR,RBOB_f_CDLSHORTLINE,RBOB_f_CDLSPINNINGTOP,RBOB_f_CDLSTALLEDPATTERN,RBOB_f_CDLSTICKSANDWICH,RBOB_f_CDLTAKURI,RBOB_f_CDLTASUKIGAP,RBOB_f_CDLTHRUSTING,RBOB_f_CDLTRISTAR,RBOB_f_CDLUNIQUE3RIVER,RBOB_f_CDLUPSIDEGAP2CROWS,RBOB_f_CDLXSIDEGAP3METHODS,CCI,BOP,WILLR,RSI,PLUS_DM,RBOB_f_DEMA,RBOB_f_EMA,RBOB_f_HT_TRENDLINE,RBOB_f_KAMA,RBOB_f_MA,RBOB_f_MIDPOINT,RBOB_f_SMA,RBOB_f_T3,RBOB_f_TEMA,RBOB_f_TRIMA,RBOB_f_WMA
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1,Unnamed: 77_level_1,Unnamed: 78_level_1,Unnamed: 79_level_1,Unnamed: 80_level_1,Unnamed: 81_level_1,Unnamed: 82_level_1,Unnamed: 83_level_1,Unnamed: 84_level_1,Unnamed: 85_level_1,Unnamed: 86_level_1,Unnamed: 87_level_1,Unnamed: 88_level_1,Unnamed: 89_level_1,Unnamed: 90_level_1,Unnamed: 91_level_1,Unnamed: 92_level_1,Unnamed: 93_level_1,Unnamed: 94_level_1,Unnamed: 95_level_1,Unnamed: 96_level_1,Unnamed: 97_level_1,Unnamed: 98_level_1,Unnamed: 99_level_1,Unnamed: 100_level_1,Unnamed: 101_level_1,Unnamed: 102_level_1,Unnamed: 103_level_1,Unnamed: 104_level_1,Unnamed: 105_level_1,Unnamed: 106_level_1,Unnamed: 107_level_1,Unnamed: 108_level_1,Unnamed: 109_level_1,Unnamed: 110_level_1
2019-11-22,1.6754,4,326,47,4,11,3108.49,3110.11,3094.55,3103.54,476836171.0,58.58,1.6474,1.7,1.6358,1.6983,177670.0,425127.0,0.309138,1.6571,1.6031,1.6221,1.6337,1.6154,1.64398,1.64449,1.6805,1.6547,0.038788,0.029491,0.050443,0.033093,0.028242,0.0,0.0,0.0,0.0,100.0,0.0,0.0,0.0,0.0,0.0,0.0,100.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,100.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,164.23811,0.792835,-1.430976,60.810694,0.134242,1.637104,1.636823,1.639892,1.644171,1.637757,1.6535,1.637757,1.633362,1.65179,1.6416,1.639888
2019-11-25,1.6743,0,329,48,4,11,3111.41,3112.87,3099.26,3110.29,418027927.0,57.88,1.6854,1.7036,1.6608,1.6698,217784.0,414291.0,0.297305,1.7039,1.6571,1.6031,1.6221,1.6337,1.65232,1.645043,1.68965,1.6788,0.040472,0.029976,0.023585,0.020153,-0.016726,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-100.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,150.468024,-0.364486,-27.614379,55.681382,0.128253,1.641903,1.639312,1.639728,1.644488,1.63897,1.6535,1.63897,1.647181,1.657803,1.641825,1.642316
2019-11-26,1.7034,1,330,48,4,11,3117.44,3133.83,3117.44,3133.64,513728761.0,57.98,1.6698,1.6766,1.6465,1.6715,197810.0,397040.0,0.293824,1.6754,1.7039,1.6571,1.6031,1.6221,1.66276,1.64521,1.67485,1.684533,0.037339,0.030135,0.016781,0.000778,-0.000657,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,100.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,100.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,100.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,100.0,0.0,0.0,0.0,0.0,100.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,111.834294,0.056478,-26.22549,55.486845,0.119092,1.646105,1.641569,1.640083,1.644979,1.64102,1.6535,1.64102,1.659193,1.662798,1.642025,1.644596
2019-11-27,1.6791,2,331,48,4,11,3134.85,3142.69,3131.0,3140.52,986041660.0,58.38,1.67,1.7015,1.6683,1.6976,186722.0,397582.0,0.3134,1.6743,1.6754,1.7039,1.6571,1.6031,1.68282,1.646624,1.68885,1.684367,0.020354,0.032168,0.016493,0.020577,0.01738,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,100.0,0.0,100.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,100.0,100.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,150.420833,0.831325,-4.901961,59.516389,0.135486,1.653533,1.645558,1.642743,1.646352,1.644013,1.6535,1.644013,1.671547,1.672366,1.642126,1.64862
2019-11-29,1.6012,4,333,48,4,11,3145.49,3154.26,3143.41,3153.63,421853938.0,58.12,1.6916,1.6983,1.6594,1.6765,189449.0,383413.0,0.29529,1.7034,1.6743,1.6754,1.7039,1.6571,1.68722,1.646419,1.69125,1.6856,0.015105,0.031935,0.015601,0.017183,-0.014266,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-100.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,94.421246,-0.388175,-22.140523,55.036032,0.125808,1.657207,1.647722,1.645137,1.64681,1.645867,1.6535,1.645867,1.68034,1.676237,1.642187,1.650884


In [33]:
df.fillna(method='ffill',inplace=True)

In [35]:
df.to_csv("ml_dataset.csv")