In [1]:
# Put these at the top of every notebook, to get automatic reloading and inline plotting
%reload_ext autoreload
%autoreload 2
%matplotlib inline

In [2]:
import matplotlib.pyplot as plt

# Change the plot size.
plt.rcParams['figure.figsize'] = [18.0, 12.0]

In [3]:
import pandas as pd
import numpy as np
from sklearn.metrics import mean_squared_error

In [4]:
# import my Finance Analysis Tools (fat)
import finance as fat

In [5]:
TICKER = 'IAU'
PATH = f'data/{TICKER}-test'
FILE = 'IAU-predictions-20180223-003722.csv'

In [9]:
data = fat.get_price_data(TICKER)

Loaded data for IAU: 2005-01-28 to 2018-02-21.


In [12]:
data['Next Day Open'] = data['Open'].shift(-1)
data['Next Day Adj Close'] = data['Adj Close'].shift(-1)
data['Next Day Gain'] = data['Next Day Adj Close'] - data['Next Day Open']
data['Next Day % Gain'] = 100 * data['Next Day Gain'] / data['Next Day Open']

In [14]:
data = data.sort_values('Next Day % Gain', ascending=False)

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume,Next Day Open,Next Day Adj Close,Next Day Gain,Next Day % Gain
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2008-09-16,7.675000,7.735000,7.634000,7.659000,7.659000,5443000,7.717000,8.555000,0.838000,10.859142
2013-09-17,12.750000,12.770000,12.680000,12.720000,12.720000,2327800,12.600000,13.280000,0.680000,5.396825
2009-03-17,9.029000,9.064000,8.985000,9.016000,9.016000,3536000,8.880000,9.322000,0.442000,4.977477
2008-10-23,6.981000,7.222000,6.956000,7.105000,7.105000,8329000,6.923000,7.212000,0.289000,4.174491
2008-10-08,9.081000,9.085000,8.819000,8.975000,8.975000,15305000,8.742000,9.100000,0.358000,4.095173
2008-11-20,7.318000,7.424000,7.249000,7.376000,7.376000,6909000,7.604000,7.915000,0.311000,4.089953
2013-06-27,11.980000,12.000000,11.630000,11.650000,11.650000,10556900,11.570000,11.990000,0.420000,3.630078
2008-11-03,7.155000,7.220000,7.112000,7.125000,7.125000,2876000,7.319000,7.565000,0.246000,3.361115
2008-08-18,7.849000,7.919000,7.785000,7.899000,7.899000,4790000,7.797000,8.059000,0.262000,3.360267
2012-01-24,16.230000,16.290001,16.200001,16.240000,16.240000,3215700,16.160000,16.700001,0.540001,3.341590


In [19]:
big_gain = data.loc[data['Next Day % Gain'] > 1.0]
big_gain = big_gain.sort_index()
len(big_gain)

243

In [20]:
big_gain

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume,Next Day Open,Next Day Adj Close,Next Day Gain,Next Day % Gain
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2005-04-18,4.258,4.280,4.258,4.273,4.273,183000,4.280,4.333,0.053,1.238318
2005-06-09,4.233,4.244,4.217,4.232,4.232,151000,4.227,4.270,0.043,1.017270
2005-08-10,4.354,4.366,4.349,4.362,4.362,408000,4.392,4.450,0.058,1.320583
2005-09-20,4.650,4.657,4.628,4.630,4.630,759000,4.663,4.710,0.047,1.007935
2005-09-23,4.614,4.629,4.599,4.628,4.628,486000,4.586,4.658,0.072,1.569996
2005-09-27,4.620,4.636,4.573,4.628,4.628,2277000,4.636,4.685,0.049,1.056946
2005-10-06,4.686,4.730,4.686,4.727,4.727,661000,4.704,4.752,0.048,1.020408
2005-11-15,4.684,4.689,4.662,4.664,4.664,320000,4.727,4.775,0.048,1.015443
2005-12-05,5.051,5.085,5.034,5.078,5.078,1324000,5.040,5.096,0.056,1.111111
2005-12-07,5.125,5.152,5.117,5.140,5.140,1114000,5.136,5.196,0.060,1.168224


In [240]:
predict = pd.read_csv(f'{PATH}/{FILE}')
predict = predict.set_index('Date')

In [241]:
predict.head()

Unnamed: 0_level_0,Month,Week,Day,Dayofweek,Dayofyear,Days since updated gold price usd,Is_month_end,Is_month_start,Is_quarter_end,Is_quarter_start,...,Volume SMA100,Volume SMA200,pct diff Volume SMA3,pct diff Volume SMA5,pct diff Volume SMA10,pct diff Volume SMA20,pct diff Volume SMA50,pct diff Volume SMA100,pct diff Volume SMA200,Will Close Higher Tomorrow
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2015-08-21,8,34,21,4,233,20.0,False,False,False,False,...,2549062.0,3641629.5,-15.692302,18.573483,42.078293,49.529427,41.621037,78.60295,25.01821,26.086124
2015-08-24,8,35,24,0,236,23.0,False,False,False,False,...,2604616.0,3638480.0,6.049517,48.946842,109.83934,132.04971,128.14676,192.99904,109.74418,48.456825
2015-08-25,8,35,25,1,237,24.0,False,False,False,False,...,2643510.0,3644522.5,-4.794828,-3.983591,43.462845,62.879505,65.80061,114.26815,55.416794,57.43276
2015-08-26,8,35,26,2,238,25.0,False,False,False,False,...,2644652.0,3642706.0,-46.94225,-52.561325,-25.427914,-18.604883,-17.126911,8.017993,-21.57753,143.16562
2015-08-27,8,35,27,3,239,26.0,False,False,False,False,...,2688441.0,3656939.5,18.409332,5.786842,32.00227,50.331715,58.115566,106.66996,51.93579,50.143867


In [242]:
data = data.join(predict['Will Close Higher Tomorrow'])

In [243]:
data = data.dropna()
data.head()

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume,Next Day Adj Close,Next Day Gain,Will Close Higher Tomorrow
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2015-08-21,11.18,11.23,11.11,11.2,11.2,4552700,11.14,-0.06,26.086124
2015-08-24,11.26,11.31,11.07,11.14,11.14,7631500,11.02,-0.12,48.456825
2015-08-25,11.09,11.1,10.96,11.02,11.02,5664200,10.87,-0.15,57.43276
2015-08-26,10.87,10.9,10.8,10.87,10.87,2856700,10.85,-0.02,143.16562
2015-08-27,10.83,10.9,10.8,10.85,10.85,5556200,10.95,0.1,50.143867


In [245]:
thresh100 = pd.DataFrame(data.loc[data['Will Close Higher Tomorrow'] >= 100.0])
thresh100

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume,Next Day Adj Close,Next Day Gain,Will Close Higher Tomorrow
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2015-08-26,10.87,10.9,10.8,10.87,10.87,2856700,10.85,-0.02,143.16562
2015-12-17,10.18,10.19,10.12,10.15,10.15,10515000,10.29,0.14,102.56679
2015-12-31,10.24,10.27,10.22,10.23,10.23,5240700,10.38,0.15,108.538216
2016-02-22,11.65,11.73,11.64,11.65,11.65,4084700,11.84,0.19,116.74172
2016-03-24,11.78,11.81,11.74,11.75,11.75,26374100,11.78,0.03,104.84429
2016-06-16,12.65,12.69,12.34,12.36,12.36,14666900,12.52,0.16,122.53122
2016-08-24,12.81,12.82,12.75,12.77,12.77,4695300,12.75,-0.02,102.327415
2016-11-14,11.72,11.83,11.67,11.73,11.73,17358800,11.83,0.1,196.81398
2016-11-15,11.77,11.84,11.75,11.83,11.83,9865900,11.79,-0.04,143.4878
2016-11-16,11.84,11.85,11.77,11.79,11.79,9302700,11.74,-0.05,142.97365


In [246]:
thresh100['Next Day Gain'].sum()

0.7300000000000022

In [252]:
thresh95 = pd.DataFrame(data.loc[data['Will Close Higher Tomorrow'] >= 95.0])
thresh95

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume,Next Day Adj Close,Next Day Gain,Will Close Higher Tomorrow
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2015-08-26,10.87,10.9,10.8,10.87,10.87,2856700,10.85,-0.02,143.16562
2015-12-17,10.18,10.19,10.12,10.15,10.15,10515000,10.29,0.14,102.56679
2015-12-31,10.24,10.27,10.22,10.23,10.23,5240700,10.38,0.15,108.538216
2016-02-22,11.65,11.73,11.64,11.65,11.65,4084700,11.84,0.19,116.74172
2016-03-24,11.78,11.81,11.74,11.75,11.75,26374100,11.78,0.03,104.84429
2016-04-04,11.77,11.78,11.72,11.72,11.72,5664300,11.88,0.16,98.224625
2016-06-16,12.65,12.69,12.34,12.36,12.36,14666900,12.52,0.16,122.53122
2016-08-24,12.81,12.82,12.75,12.77,12.77,4695300,12.75,-0.02,102.327415
2016-11-14,11.72,11.83,11.67,11.73,11.73,17358800,11.83,0.1,196.81398
2016-11-15,11.77,11.84,11.75,11.83,11.83,9865900,11.79,-0.04,143.4878


In [253]:
thresh95['Next Day Gain'].sum()

1.0100000000000016

In [247]:
thresh90 = pd.DataFrame(data.loc[data['Will Close Higher Tomorrow'] >= 90.0])
thresh90

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume,Next Day Adj Close,Next Day Gain,Will Close Higher Tomorrow
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2015-08-26,10.87,10.9,10.8,10.87,10.87,2856700,10.85,-0.02,143.16562
2015-12-17,10.18,10.19,10.12,10.15,10.15,10515000,10.29,0.14,102.56679
2015-12-31,10.24,10.27,10.22,10.23,10.23,5240700,10.38,0.15,108.538216
2016-01-14,10.48,10.51,10.34,10.39,10.39,5323600,10.51,0.12,92.996185
2016-02-22,11.65,11.73,11.64,11.65,11.65,4084700,11.84,0.19,116.74172
2016-03-24,11.78,11.81,11.74,11.75,11.75,26374100,11.78,0.03,104.84429
2016-04-01,11.72,11.81,11.66,11.81,11.81,11475200,11.72,-0.09,91.60598
2016-04-04,11.77,11.78,11.72,11.72,11.72,5664300,11.88,0.16,98.224625
2016-05-26,11.87,11.87,11.75,11.76,11.76,4090400,11.68,-0.08,90.13611
2016-06-16,12.65,12.69,12.34,12.36,12.36,14666900,12.52,0.16,122.53122


In [248]:
thresh90['Next Day Gain'].sum()

0.9100000000000019

In [249]:
thresh80 = pd.DataFrame(data.loc[data['Will Close Higher Tomorrow'] >= 80.0])
thresh80

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume,Next Day Adj Close,Next Day Gain,Will Close Higher Tomorrow
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2015-08-26,10.87,10.9,10.8,10.87,10.87,2856700,10.85,-0.02,143.16562
2015-11-05,10.69,10.71,10.65,10.65,10.65,5383600,10.5,-0.15,85.250244
2015-12-17,10.18,10.19,10.12,10.15,10.15,10515000,10.29,0.14,102.56679
2015-12-31,10.24,10.27,10.22,10.23,10.23,5240700,10.38,0.15,108.538216
2016-01-14,10.48,10.51,10.34,10.39,10.39,5323600,10.51,0.12,92.996185
2016-02-22,11.65,11.73,11.64,11.65,11.65,4084700,11.84,0.19,116.74172
2016-03-04,12.23,12.37,12.14,12.17,12.17,22198400,12.23,0.06,82.81406
2016-03-24,11.78,11.81,11.74,11.75,11.75,26374100,11.78,0.03,104.84429
2016-04-01,11.72,11.81,11.66,11.81,11.81,11475200,11.72,-0.09,91.60598
2016-04-04,11.77,11.78,11.72,11.72,11.72,5664300,11.88,0.16,98.224625


In [251]:
thresh80['Next Day Gain'].sum()

0.8400000000000034

In [254]:
thresh70 = pd.DataFrame(data.loc[data['Will Close Higher Tomorrow'] >= 70.0])
thresh70['Next Day Gain'].sum()

0.09000000000000696

In [255]:
thresh60 = pd.DataFrame(data.loc[data['Will Close Higher Tomorrow'] >= 60.0])
thresh60['Next Day Gain'].sum()

7.105427357601002e-15

In [256]:
thresh50 = pd.DataFrame(data.loc[data['Will Close Higher Tomorrow'] >= 50.0])
thresh50['Next Day Gain'].sum()

0.7800000000000029

In [257]:
thresh40 = pd.DataFrame(data.loc[data['Will Close Higher Tomorrow'] >= 40.0])
thresh40['Next Day Gain'].sum()

-0.05999999999999872

In [258]:
thresh30 = pd.DataFrame(data.loc[data['Will Close Higher Tomorrow'] >= 30.0])
thresh30['Next Day Gain'].sum()

0.909999999999993

In [259]:
thresh20 = pd.DataFrame(data.loc[data['Will Close Higher Tomorrow'] >= 20.0])
thresh20['Next Day Gain'].sum()

0.5699999999999914

In [260]:
thresh10 = pd.DataFrame(data.loc[data['Will Close Higher Tomorrow'] >= 10.0])
thresh10['Next Day Gain'].sum()

1.5099999999999998

In [262]:
thresh5 = pd.DataFrame(data.loc[data['Will Close Higher Tomorrow'] >= 5.0])
thresh5['Next Day Gain'].sum()

1.5600000000000005