In [18]:
import pandas as pd
from pandas_datareader import data as web
from pandas import read_csv
from pandas import concat
import os.path
import datetime 
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from keras.utils.np_utils import to_categorical
import numpy

mypath='C:\\Users\\mwnuk\\Dropbox\\Quotes\\'
settingsPath='C:\\Users\\mwnuk\\Dropbox\\Settings\\'

# Stochastic Oscilator %K
def STOK(close, low, high, n): 
 STOK = ((close - pd.Series.rolling(low, n).min()) / (pd.Series.rolling(high, n).max() - pd.Series.rolling(low, n).min())) * 100
 return STOK

# Stochastic Oscilator %D
def STOD(close, low, high, n):
    STOK = ((close - pd.Series.rolling(low, n).min()) / (pd.Series.rolling(high, n).max() - pd.Series.rolling(low, n).min())) * 100
    STOD = pd.Series.rolling(STOK, 3).mean()
    return STOD
    #STOD

    
def RSI(series, period):
    delta = series.diff().dropna()
    u = delta * 0
    d = u.copy()
    u[delta > 0] = delta[delta > 0]
    d[delta < 0] = -delta[delta < 0]
    u[u.index[period-1]] = np.mean( u[:period] ) #first value is sum of avg gains
    u = u.drop(u.index[:(period-1)])
    d[d.index[period-1]] = np.mean( d[:period] ) #first value is sum of avg losses
    d = d.drop(d.index[:(period-1)])
   # rs = pd.stats.moments.ewma(u, com=period-1, adjust=False) / \
   #      pd.stats.moments.ewma(d, com=period-1, adjust=False)
    rs= pd.Series.ewm(u,com=period-1, min_periods=0,adjust=False,ignore_na=False).mean() / \
        pd.Series.ewm(d,com=period-1,min_periods=0,adjust=False,ignore_na=False).mean()
    return 100 - 100 / (1 + rs)    


def getData(item):
    start = datetime.datetime(2016, 12, 21)
    end = datetime.datetime.now()
    #print( start,end) 
    file_path=mypath+item +'.csv'
    if not os.path.exists(file_path):
        print("No data")
    else:
        print("Read from file")  
        try:     
            df = read_csv(file_path)
            df['Date']=pd.to_datetime(df['Date']) #important for sorting
            df.set_index("Date",inplace=True)
        except: print("Exception caught")
    return df


def cleanData( df):
    # drop Close and Volume, keep Adj Close
    
    df1 = df.drop('Close', 1)
    df1 = df1.drop('Volume', 1)
    if 'y' in df1.columns:
        df1 = df1.drop('y', 1)
    if 'yhat' in df1.columns:
        df1 = df1.drop('yhat', 1)
    if 'yhathat' in df1.columns:
        df1 = df1.drop('yhathat', 1)
    df1.rename(columns={'Adj Close': 'Close', 'oldName2': 'newName2'}, inplace=True)
    return df1


def engFeatures(df):
    df['%K'] = STOK(df['Close'], df['Low'], df['High'], 14)
    df['%D'] = STOD(df['Close'], df['Low'], df['High'], 14)
    df['Avg5'] =pd.Series.rolling(df['Close'],5).mean()
    df['Avg10'] =pd.Series.rolling(df['Close'],10).mean()
    df['RSI14'] = RSI(df['Close'],14)
    df['RSI7'] = RSI(df['Close'],7)
    #df['Date']=pd.to_datetime(df['Date'])
    df.sort_index(ascending=False,inplace=True)
    df['Rise'] = (  pd.Series.rolling(df['Close'],5).max()-df['Close'] >df['Close']*0.04)*1
    df.sort_index(ascending=True,inplace=True)
    return df


# convert series to supervised learning
def series_to_supervised(data, n_in=1, n_out=1, dropnan=True):
    n_vars = 1 if type(data) is list else data.shape[1]
    df = pd.DataFrame(data)
    cols, names = list(), list()
    # input sequence (t-n, ... t-1)
    for i in range(n_in, 0, -1):
        cols.append(df.shift(i))
        names += [('var%d(t-%d)' % (j+1, i)) for j in range(n_vars)]
    # forecast sequence (t, t+1, ... t+n)
    for i in range(0, n_out):
        cols.append(df.shift(-i))
        if i == 0:
            names += [('var%d(t)' % (j+1)) for j in range(n_vars)]
        else:
            names += [('var%d(t+%d)' % (j+1, i)) for j in range(n_vars)]
    # put it all together
    agg = concat(cols, axis=1)
    agg.columns = names
    # drop rows with NaN values
    if dropnan:
        agg.dropna(inplace=True)
    return agg
#from sklearn.preprocessing import MinMaxScaler

def rescale( df):
    df.dropna(inplace=True)
    values = df.values
    # ensure all data is float
    values = values.astype('float')
    #drop columns to simplify test
    timesteps=3
    features = values.shape[1]-1
    # normalize features
    scaler = MinMaxScaler()
    #scaled = scaler.fit_transform(values)
    scaled = values #scaler.fit_transform(values)
    # frame as supervised learning
    reframed = series_to_supervised(scaled,timesteps, 1)
    # drop columns we don't want to predict
    #reframed.drop(reframed.columns[[10,11,12,13,14,15,16,17,18,19,20]], axis=1, inplace=True)
    reframed= pd.DataFrame(reframed)
    return reframed
#print(reframed.head())
#reframed.iloc[0]

#def cleadData( df1,df2):
    
#    return cleanSet
#####################################################################3
def loadModel( item ):
    # later...
    # load json and create model
    from keras.models import model_from_json

    json_file = open(mypath + item+'_model.json', 'r')
    loaded_model_json = json_file.read()
    json_file.close()
    loaded_model = model_from_json(loaded_model_json)
    # load weights into new model
    loaded_model.load_weights(mypath + item +'_model.h5')
    print("Loaded model from disk")
    # evaluate loaded model on test data
    #loaded_model.compile(loss='binary_crossentropy', optimizer='rmsprop', metrics=['accuracy']) #61.29
    loaded_model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    #score = loaded_model.evaluate( test_X, test_y, verbose=0)
    #print("%s: %.2f%%" % (loaded_model.metrics_names[1], score[1]*100))
    return loaded_model

#######################################################################
def MakePredictions( dfr):
    
    dframeclean=cleanData(dfr)
    djiframe=getData('DJI')
    djiframe=cleanData(djiframe)
    #dframe=dframe.join(djiframe, lsuffix='_left', rsuffix='_right')
    dframeclean=dframeclean.join(djiframe , rsuffix='_right')
    fullFrame=engFeatures(dframeclean)
    reframed=rescale(fullFrame)
    
    #test on entire dataframe
    test = reframed.values
    test_X, test_y = test[:, :-1], to_categorical(test[:, -1])
    test_X = numpy.reshape(test_X, (test_X.shape[0], test_X.shape[1],1))
    print(test_X.shape)
    yhat = jason_model.predict(test_X)
    from numpy import argmax
    yhat=argmax(yhat,axis=1)
    print(test_X.shape, test_y.shape,yhat.shape, dframe.shape)
   
    # reframing makes original dframe shorter
    # below code makes them all equal length by padding with zeros

    yhat=yhat.reshape(yhat.shape[0],1)
    print(dfr.shape,yhat.shape)
    nz=pd.DataFrame(np.zeros(dfr.shape[0]-yhat.shape[0]))
    yhat1=nz.append(pd.DataFrame(yhat) )
    y=nz.append( pd.DataFrame(argmax(test_y,1) ))
    print( yhat1.shape, y.shape )
    
    #put results back into dframe
    dfr['y']=y.values
    dfr['yhat']=yhat1.values
    dfr['yhathat']=np.zeros(yhat1.shape[0]) # prepare empty column
    print("Finished MakePredictions")
    return dfr
#############################################################################
## MAIN EXECUTION 
itemname = 'AAPL'
dframe = getData(itemname)
jason_model = loadModel(itemname)

df1 = MakePredictions(dframe)

#PrintReport(df1)
#df1.tail

## get all predicted values yhat and verify they are still valid when 
## at the end of the dataset. Cut dataframe in pieces and predict yhathat
predictedlist=df1.index[df1['yhat'] >0]
predictedlist[0]
#for idx in range(len(predictedlist)):
#    #cut dframe into chunkes
#    D1 = dframe[dframe.index <= predictedlist[idx]]
#    #predict every chunk from the beggining
#    df2 = MakePredictions(df1)
#    #dframe['yhathat']=d2['yhat']
#    print(df2.shape)



Read from file


FileNotFoundError: [Errno 2] No such file or directory: 'C:\\Users\\mwnuk\\Dropbox\\Quotes\\AXP_model.json'

In [15]:
end = datetime.datetime(2017, 11, 15)
D1 = dframe[dframe.index <= end]
#D1
df2 = MakePredictions(D1)
df2
#df1['yhat'].iloc[-1]

Read from file
(710, 59, 1)
(710, 59, 1) (710, 2) (710,) (757, 9)
(728, 9) (710, 1)
(728, 1) (728, 1)
Finished MakePredictions


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume,y,yhat,yhathat
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2014-12-29,113.79,114.77,113.70,113.91,107.83,27598900,0.0,0.0,0.0
2014-12-30,113.64,113.92,112.11,112.52,106.51,29881500,0.0,0.0,0.0
2014-12-31,112.82,113.13,110.21,110.38,104.49,41403400,0.0,0.0,0.0
2015-01-02,111.39,111.44,107.35,109.33,103.49,53204600,0.0,0.0,0.0
2015-01-05,108.29,108.65,105.41,106.25,100.58,64285500,0.0,0.0,0.0
2015-01-06,106.54,107.43,104.63,106.26,100.59,65797100,0.0,0.0,0.0
2015-01-07,107.20,108.20,106.70,107.75,102.00,40105900,0.0,0.0,0.0
2015-01-08,109.23,112.15,108.70,111.89,105.92,59364500,0.0,0.0,0.0
2015-01-09,112.67,113.25,110.21,112.01,106.03,53699500,0.0,0.0,0.0
2015-01-12,112.60,112.63,108.80,109.25,103.42,49650800,0.0,0.0,0.0


In [16]:
predictedlist=df1.index[df1['y'] >0]
predictedlist[0]
for idx in range(len(predictedlist)):
    #cut dframe into chunkes
    D1 = dframe[dframe.index <= predictedlist[idx]]
    #predict every chunk from the beggining
    df2 = MakePredictions(D1)
    #dframe['yhathat']=d2['yhat']
    print(predictedlist[idx],df2['yhat'].iloc[-1])
    #print(D1.iloc[-1])

Read from file
(2, 59, 1)
(2, 59, 1) (2, 1) (2,)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


 (757, 9)
(20, 9) (2, 1)
(20, 1) (20, 1)
Finished MakePredictions
2015-01-27 00:00:00 0.0
Read from file
(3, 59, 1)
(3, 59, 1) (3, 1) (3,) (757, 9)
(21, 9) (3, 1)
(21, 1) (21, 1)
Finished MakePredictions
2015-01-28 00:00:00 0.0
Read from file
(10, 59, 1)
(10, 59, 1) (10, 2) (10,) (757, 9)
(28, 9) (10, 1)
(28, 1) (28, 1)
Finished MakePredictions
2015-02-06 00:00:00 0.0
Read from file
(11, 59, 1)
(11, 59, 1) (11, 2) (11,) (757, 9)
(29, 9) (11, 1)
(29, 1) (29, 1)
Finished MakePredictions
2015-02-09 00:00:00 0.0
Read from file
(12, 59, 1)
(12, 59, 1) (12, 2) (12,) (757, 9)
(30, 9) (12, 1)
(30, 1) (30, 1)
Finished MakePredictions
2015-02-10 00:00:00 0.0
Read from file
(13, 59, 1)
(13, 59, 1) (13, 2) (13,) (757, 9)
(31, 9) (13, 1)
(31, 1) (31, 1)
Finished MakePredictions
2015-02-11 00:00:00 0.0
Read from file
(17, 59, 1)
(17, 59, 1) (17, 2) (17,) (757, 9)
(35, 9) (17, 1)
(35, 1) (35, 1)
Finished MakePredictions
2015-02-18 00:00:00 0.0
Read from file
(61, 59, 1)
(61, 59, 1) (61, 2) (61,) (757

(413, 59, 1) (413, 2) (413,) (757, 9)
(431, 9) (413, 1)
(431, 1) (431, 1)
Finished MakePredictions
2016-09-13 00:00:00 1.0
Read from file
(414, 59, 1)
(414, 59, 1) (414, 2) (414,) (757, 9)
(432, 9) (414, 1)
(432, 1) (432, 1)
Finished MakePredictions
2016-09-14 00:00:00 0.0
Read from file
(458, 59, 1)
(458, 59, 1) (458, 2) (458,) (757, 9)
(476, 9) (458, 1)
(476, 1) (476, 1)
Finished MakePredictions
2016-11-15 00:00:00 0.0
Read from file
(459, 59, 1)
(459, 59, 1) (459, 2) (459,) (757, 9)
(477, 9) (459, 1)
(477, 1) (477, 1)
Finished MakePredictions
2016-11-16 00:00:00 0.0
Read from file
(472, 59, 1)
(472, 59, 1) (472, 2) (472,) (757, 9)
(490, 9) (472, 1)
(490, 1) (490, 1)
Finished MakePredictions
2016-12-06 00:00:00 0.0
Read from file
(507, 59, 1)
(507, 59, 1) (507, 2) (507,) (757, 9)
(525, 9) (507, 1)
(525, 1) (525, 1)
Finished MakePredictions
2017-01-27 00:00:00 0.0
Read from file
(508, 59, 1)
(508, 59, 1) (508, 2) (508,) (757, 9)
(526, 9) (508, 1)
(526, 1) (526, 1)
Finished MakePredict

In [5]:
predictedlist

DatetimeIndex(['2015-01-27', '2015-01-28', '2015-02-06', '2015-02-09',
               '2015-02-10', '2015-02-11', '2015-02-18', '2015-04-22',
               '2015-07-10', '2015-07-13', '2015-07-15', '2015-07-16',
               '2015-08-05', '2015-08-06', '2015-08-24', '2015-08-25',
               '2015-08-26', '2015-09-02', '2015-09-08', '2015-09-10',
               '2015-10-19', '2015-10-20', '2015-10-21', '2015-10-22',
               '2015-10-27', '2015-10-28', '2015-11-16', '2015-11-17',
               '2015-11-18', '2016-01-19', '2016-01-20', '2016-01-21',
               '2016-01-22', '2016-01-28', '2016-02-11', '2016-02-12',
               '2016-02-16', '2016-02-25', '2016-02-26', '2016-02-29',
               '2016-03-01', '2016-03-11', '2016-03-29', '2016-05-13',
               '2016-05-16', '2016-05-20', '2016-05-23', '2016-05-24',
               '2016-06-28', '2016-07-25', '2016-07-26', '2016-07-27',
               '2016-08-03', '2016-09-09', '2016-09-12', '2016-09-13',
      

In [6]:
# reframing makes original dframe shorter
# below code makes them all equal length by padding with zeros

yhat=yhat.reshape(yhat.shape[0],1)
print(dframe.shape,yhat.shape)
nz=pd.DataFrame(np.zeros(dframe.shape[0]-yhat.shape[0]))
yhat1=nz.append(pd.DataFrame(yhat) )
y=nz.append( pd.DataFrame(argmax(test_y,1) ))
print( yhat1.shape, y.shape )

dframe['yhat']=yhat1.values
dframe['y']=y.values
dframe.tail

NameError: name 'yhat' is not defined

In [7]:
#np.where( dframe['yhat'] > 0 )
df=dframe.index[dframe['yhat'] >0]
df[0]
for idx in range(len(df)):
    D1=dframe[dframe.index <= df[idx]]
    #yhathat=PredictLastValue(D1)
    dframe['yhat']=yhat1.values
    print(D1.shape)


NameError: name 'yhat1' is not defined

In [8]:
reframed

NameError: name 'reframed' is not defined

In [9]:
argmax(test_y,axis=1)

NameError: name 'argmax' is not defined

In [10]:
#fullFrame['Date']=pd.to_datetime(fullFrame['Date'])
df1=fullFrame[['Close','Rise']].iloc[-30:] 

df1=df1.sort_index(ascending=False)
df1['window']=pd.Series.rolling(df1['Close'],5).max()
df1['Rise1'] = (  pd.Series.rolling(df1['Close'],5).max()-df1['Close'] >df1['Close']*0.04)*1
#df['yhat'] =yhat
df1=df1.sort_index(ascending=True)
df1

NameError: name 'fullFrame' is not defined

In [11]:
fullFrame[['Close']].iloc[-30:] 
#dframe[['Close']].iloc[-30:] 

NameError: name 'fullFrame' is not defined

In [12]:
test

NameError: name 'test' is not defined