In [19]:
import pandas as pd
from pandas_datareader import data as web
from pandas import read_csv
from pandas import concat
import os.path
import datetime 
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from keras.utils.np_utils import to_categorical
import numpy


# Stochastic Oscilator %K
def STOK(close, low, high, n): 
 STOK = ((close - pd.Series.rolling(low, n).min()) / (pd.Series.rolling(high, n).max() - pd.Series.rolling(low, n).min())) * 100
 return STOK

# Stochastic Oscilator %D
def STOD(close, low, high, n):
    STOK = ((close - pd.Series.rolling(low, n).min()) / (pd.Series.rolling(high, n).max() - pd.Series.rolling(low, n).min())) * 100
    STOD = pd.Series.rolling(STOK, 3).mean()
    return STOD
    #STOD

    
def RSI(series, period):
    delta = series.diff().dropna()
    u = delta * 0
    d = u.copy()
    u[delta > 0] = delta[delta > 0]
    d[delta < 0] = -delta[delta < 0]
    u[u.index[period-1]] = np.mean( u[:period] ) #first value is sum of avg gains
    u = u.drop(u.index[:(period-1)])
    d[d.index[period-1]] = np.mean( d[:period] ) #first value is sum of avg losses
    d = d.drop(d.index[:(period-1)])
   # rs = pd.stats.moments.ewma(u, com=period-1, adjust=False) / \
   #      pd.stats.moments.ewma(d, com=period-1, adjust=False)
    rs= pd.Series.ewm(u,com=period-1, min_periods=0,adjust=False,ignore_na=False).mean() / \
        pd.Series.ewm(d,com=period-1,min_periods=0,adjust=False,ignore_na=False).mean()
    return 100 - 100 / (1 + rs)    


def getData(item):
    start = datetime.datetime(2016, 12, 21)
    end = datetime.datetime.now()
    #print( start,end) 
    file_path='./data/'+item +'.csv'
    if not os.path.exists(file_path):
        print("Empty set")
        df =web.DataReader(item,'yahoo',start,end)  
        df=df.round(2)
        
        df.to_csv('./data/' + item + '.csv')
        #df['Date']=pd.to_datetime(df['Date']) #important for sorting
        #df.set_index("Date",inplace=True)
        df.index.name = 'Date'
    else:
        print("Read from file")  
        df = read_csv(file_path)
        df['Date']=pd.to_datetime(df['Date']) #important for sorting
        df.set_index("Date",inplace=True)
        try:
            #end = datetime.datetime.now()
            lastDate=df.index[df.shape[0]-1] #last recorded day
            d1 = lastDate #datetime.datetime.strptime(lastDate, "%Y-%m-%d")  
            #d1 = datetime.datetime.strptime(lastDate)
            #print(d1,end )
            if( d1 < end - datetime.timedelta(days=3)): # dont update on Sat or Sun
                print("Updating")  
                d2 = lastDate + datetime.timedelta(days=1)
                #print(d2,end )
                df1 =web.DataReader(item,'yahoo',d2,end)  
                updateDate=df1.index[df1.shape[0]-1]
                print(updateDate , d1)
                if (updateDate !=  d1): #yahoo gives unwanted records
                    df1=df1.round(2)
                    df=pd.concat([df,df1])
                    df.index= pd.to_datetime(df.index, format="%Y-%m-%d") # drop time 
                    df.to_csv('./data/' + item + '.csv')
                else:
                    print("No need to update")
        except: print("Exception caught")
    return df


def cleanData( df):
    # drop Close and Volume, keep Adj Close
    
    df1 = df.drop('Close', 1)
    df1 = df1.drop('Volume', 1)
    if 'y' in df1.columns:
        df1 = df1.drop('y', 1)
    if 'yhat' in df1.columns:
        df1 = df1.drop('yhat', 1)
    if 'yhathat' in df1.columns:
        df1 = df1.drop('yhathat', 1)
    df1.rename(columns={'Adj Close': 'Close', 'oldName2': 'newName2'}, inplace=True)
    return df1


def engFeatures(df):
    df['%K'] = STOK(df['Close'], df['Low'], df['High'], 14)
    df['%D'] = STOD(df['Close'], df['Low'], df['High'], 14)
    df['Avg5'] =pd.Series.rolling(df['Close'],5).mean()
    df['Avg10'] =pd.Series.rolling(df['Close'],10).mean()
    df['RSI14'] = RSI(df['Close'],14)
    df['RSI7'] = RSI(df['Close'],7)
    #df['Date']=pd.to_datetime(df['Date'])
    df.sort_index(ascending=False,inplace=True)
    df['Rise'] = (  pd.Series.rolling(df['Close'],5).max()-df['Close'] >df['Close']*0.04)*1
    df.sort_index(ascending=True,inplace=True)
    return df


# convert series to supervised learning
def series_to_supervised(data, n_in=1, n_out=1, dropnan=True):
    n_vars = 1 if type(data) is list else data.shape[1]
    df = pd.DataFrame(data)
    cols, names = list(), list()
    # input sequence (t-n, ... t-1)
    for i in range(n_in, 0, -1):
        cols.append(df.shift(i))
        names += [('var%d(t-%d)' % (j+1, i)) for j in range(n_vars)]
    # forecast sequence (t, t+1, ... t+n)
    for i in range(0, n_out):
        cols.append(df.shift(-i))
        if i == 0:
            names += [('var%d(t)' % (j+1)) for j in range(n_vars)]
        else:
            names += [('var%d(t+%d)' % (j+1, i)) for j in range(n_vars)]
    # put it all together
    agg = concat(cols, axis=1)
    agg.columns = names
    # drop rows with NaN values
    if dropnan:
        agg.dropna(inplace=True)
    return agg
#from sklearn.preprocessing import MinMaxScaler

def rescale( df):
    df.dropna(inplace=True)
    values = df.values
    # ensure all data is float
    values = values.astype('float')
    #drop columns to simplify test
    timesteps=3
    features = values.shape[1]-1
    # normalize features
    scaler = MinMaxScaler()
    #scaled = scaler.fit_transform(values)
    scaled = values #scaler.fit_transform(values)
    # frame as supervised learning
    reframed = series_to_supervised(scaled,timesteps, 1)
    # drop columns we don't want to predict
    #reframed.drop(reframed.columns[[10,11,12,13,14,15,16,17,18,19,20]], axis=1, inplace=True)
    reframed= pd.DataFrame(reframed)
    return reframed
#print(reframed.head())
#reframed.iloc[0]

#def cleadData( df1,df2):
    
#    return cleanSet
#####################################################################3
def loadModel( item ):
    # later...
    # load json and create model
    from keras.models import model_from_json

    json_file = open('./data/' + item+'_model.json', 'r')
    loaded_model_json = json_file.read()
    json_file.close()
    loaded_model = model_from_json(loaded_model_json)
    # load weights into new model
    loaded_model.load_weights('./data/' + item +'_model.h5')
    print("Loaded model from disk")
    # evaluate loaded model on test data
    #loaded_model.compile(loss='binary_crossentropy', optimizer='rmsprop', metrics=['accuracy']) #61.29
    loaded_model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    #score = loaded_model.evaluate( test_X, test_y, verbose=0)
    #print("%s: %.2f%%" % (loaded_model.metrics_names[1], score[1]*100))
    return loaded_model

#######################################################################
def MakePredictions( dfr):
    
    dframeclean=cleanData(dfr)
    djiframe=getData('DJI')
    djiframe=cleanData(djiframe)
    #dframe=dframe.join(djiframe, lsuffix='_left', rsuffix='_right')
    dframeclean=dframeclean.join(djiframe , rsuffix='_right')
    fullFrame=engFeatures(dframeclean)
    reframed=rescale(fullFrame)
    
    #test on entire dataframe
    test = reframed.values
    test_X, test_y = test[:, :-1], to_categorical(test[:, -1])
    test_X = numpy.reshape(test_X, (test_X.shape[0], test_X.shape[1],1))
    print(test_X.shape)
    yhat = jason_model.predict(test_X)
    from numpy import argmax
    yhat=argmax(yhat,axis=1)
    print(test_X.shape, test_y.shape,yhat.shape, dframe.shape)
   
    # reframing makes original dframe shorter
    # below code makes them all equal length by padding with zeros

    yhat=yhat.reshape(yhat.shape[0],1)
    print(dfr.shape,yhat.shape)
    nz=pd.DataFrame(np.zeros(dfr.shape[0]-yhat.shape[0]))
    yhat1=nz.append(pd.DataFrame(yhat) )
    y=nz.append( pd.DataFrame(argmax(test_y,1) ))
    print( yhat1.shape, y.shape )
    
    #put results back into dframe
    dfr['y']=y.values
    dfr['yhat']=yhat1.values
    dfr['yhathat']=np.zeros(yhat1.shape[0]) # prepare empty column
    print("Finished MakePredictions")
    return dfr
#############################################################################
## MAIN EXECUTION 
itemname = 'GM'
dframe = getData(itemname)
jason_model = loadModel(itemname)

df1 = MakePredictions(dframe)

#PrintReport(df1)
#df1.tail

## get all predicted values yhat and verify they are still valid when 
## at the end of the dataset. Cut dataframe in pieces and predict yhathat
predictedlist=df1.index[df1['yhat'] >0]
predictedlist[0]
#for idx in range(len(predictedlist)):
#    #cut dframe into chunkes
#    D1 = dframe[dframe.index <= predictedlist[idx]]
#    #predict every chunk from the beggining
#    df2 = MakePredictions(df1)
#    #dframe['yhathat']=d2['yhat']
#    print(df2.shape)



Read from file
Loaded model from disk
Read from file
Updating
Exception caught
(226, 59, 1)
(226, 59, 1) (226, 2) (226,) (245, 6)
(245, 6) (226, 1)
(245, 1) (245, 1)
Finished MakePredictions


Timestamp('2017-01-25 00:00:00')

In [30]:
end = datetime.datetime(2017, 11, 15)
D1 = dframe[dframe.index <= end]
#D1
df2 = MakePredictions(D1)
df2
#df1['yhat'].iloc[-1]

Read from file
Updating
Exception caught
(210, 59, 1)
(210, 59, 1) (210, 2) (210,) (245, 9)
(228, 9) (210, 1)
(228, 1) (228, 1)
Finished MakePredictions


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume,y,yhat,yhathat
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2016-12-21,36.50,36.69,36.35,36.42,35.29,6669700,0.0,0.0,0.0
2016-12-22,36.29,36.40,35.63,35.69,34.59,9911700,0.0,0.0,0.0
2016-12-23,35.83,35.87,35.54,35.69,34.59,9044300,0.0,0.0,0.0
2016-12-27,35.80,35.93,35.50,35.54,34.44,6008700,0.0,0.0,0.0
2016-12-28,35.74,35.80,35.13,35.15,34.06,8451900,0.0,0.0,0.0
2016-12-29,35.25,35.48,35.12,35.14,34.05,4415300,0.0,0.0,0.0
2016-12-30,35.21,35.31,34.67,34.84,33.76,7646100,0.0,0.0,0.0
2017-01-03,34.98,35.57,34.84,35.15,34.06,10904900,0.0,0.0,0.0
2017-01-04,35.60,37.24,35.47,37.09,35.94,23388500,0.0,0.0,0.0
2017-01-05,37.01,37.05,36.07,36.39,35.26,15636700,0.0,0.0,0.0


In [27]:
predictedlist=df1.index[df1['y'] >0]
predictedlist[0]
for idx in range(len(predictedlist)):
    #cut dframe into chunkes
    D1 = dframe[dframe.index <= predictedlist[idx]]
    #predict every chunk from the beggining
    df2 = MakePredictions(D1)
    #dframe['yhathat']=d2['yhat']
    print(predictedlist[idx],df2['yhat'].iloc[-1])
    #print(D1.iloc[-1])

Read from file
Updating
Exception caught
(4, 59, 1)
(4, 59, 1) (4, 1)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


 (4,) (245, 9)
(22, 9) (4, 1)
(22, 1) (22, 1)
Finished MakePredictions
2017-01-24 00:00:00 0.0
Read from file
Updating
Exception caught
(16, 59, 1)
(16, 59, 1) (16, 2) (16,) (245, 9)
(34, 9) (16, 1)
(34, 1) (34, 1)
Finished MakePredictions
2017-02-09 00:00:00 1.0
Read from file
Updating
Exception caught
(17, 59, 1)
(17, 59, 1) (17, 2) (17,) (245, 9)
(35, 9) (17, 1)
(35, 1) (35, 1)
Finished MakePredictions
2017-02-10 00:00:00 1.0
Read from file
Updating
Exception caught
(18, 59, 1)
(18, 59, 1) (18, 2) (18,) (245, 9)
(36, 9) (18, 1)
(36, 1) (36, 1)
Finished MakePredictions
2017-02-13 00:00:00 0.0
Read from file
Updating
Exception caught
(19, 59, 1)
(19, 59, 1) (19, 2) (19,) (245, 9)
(37, 9) (19, 1)
(37, 1) (37, 1)
Finished MakePredictions
2017-02-14 00:00:00 0.0
Read from file
Updating
Exception caught
(90, 59, 1)
(90, 59, 1) (90, 2) (90,) (245, 9)
(108, 9) (90, 1)
(108, 1) (108, 1)
Finished MakePredictions
2017-05-26 00:00:00 0.0
Read from file
Updating
Exception caught
(91, 59, 1)
(91,

In [28]:
predictedlist

DatetimeIndex(['2017-01-24', '2017-02-09', '2017-02-10', '2017-02-13',
               '2017-02-14', '2017-05-26', '2017-05-30', '2017-08-29',
               '2017-08-30', '2017-08-31', '2017-09-11', '2017-09-12',
               '2017-09-20', '2017-09-27', '2017-09-28', '2017-09-29',
               '2017-10-02', '2017-10-03', '2017-10-04', '2017-11-08',
               '2017-11-15', '2017-11-16'],
              dtype='datetime64[ns]', name='Date', freq=None)

In [None]:
# reframing makes original dframe shorter
# below code makes them all equal length by padding with zeros

yhat=yhat.reshape(yhat.shape[0],1)
print(dframe.shape,yhat.shape)
nz=pd.DataFrame(np.zeros(dframe.shape[0]-yhat.shape[0]))
yhat1=nz.append(pd.DataFrame(yhat) )
y=nz.append( pd.DataFrame(argmax(test_y,1) ))
print( yhat1.shape, y.shape )

dframe['yhat']=yhat1.values
dframe['y']=y.values
dframe.tail

In [173]:
#np.where( dframe['yhat'] > 0 )
df=dframe.index[dframe['yhat'] >0]
df[0]
for idx in range(len(df)):
    D1=dframe[dframe.index <= df[idx]]
    #yhathat=PredictLastValue(D1)
    dframe['yhat']=yhat1.values
    print(D1.shape)


(22, 8)
(33, 8)
(34, 8)
(35, 8)
(36, 8)
(107, 8)
(108, 8)
(173, 8)
(174, 8)
(175, 8)
(181, 8)
(182, 8)
(188, 8)
(193, 8)
(194, 8)
(195, 8)
(196, 8)
(197, 8)
(198, 8)
(222, 8)
(228, 8)
(229, 8)


In [None]:
reframed

In [17]:
argmax(test_y,axis=1)

array([1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 1,
       1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0], dtype=int64)

In [5]:
#fullFrame['Date']=pd.to_datetime(fullFrame['Date'])
df1=fullFrame[['Close','Rise']].iloc[-30:] 

df1=df1.sort_index(ascending=False)
df1['window']=pd.Series.rolling(df1['Close'],5).max()
df1['Rise1'] = (  pd.Series.rolling(df1['Close'],5).max()-df1['Close'] >df1['Close']*0.04)*1
#df['yhat'] =yhat
df1=df1.sort_index(ascending=True)
df1

Unnamed: 0_level_0,Close,Rise,window,Rise1
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2017-10-20,45.61,0,46.48,0
2017-10-23,45.15,0,46.48,0
2017-10-24,46.48,0,46.48,0
2017-10-25,45.12,0,45.25,0
2017-10-26,45.25,0,45.25,0
2017-10-27,44.64,0,44.64,0
2017-10-30,43.37,0,43.37,0
2017-10-31,42.98,0,43.13,0
2017-11-01,43.13,0,43.13,0
2017-11-02,42.6,0,42.6,0


In [42]:
fullFrame[['Close']].iloc[-30:] 
#dframe[['Close']].iloc[-30:] 

Unnamed: 0_level_0,Close
Date,Unnamed: 1_level_1
2017-10-11,45.47
2017-10-12,44.89
2017-10-13,45.88
2017-10-16,45.76
2017-10-17,45.02
2017-10-18,45.12
2017-10-19,45.35
2017-10-20,45.61
2017-10-23,45.15
2017-10-24,46.48


In [12]:
test

array([[ 38.54      ,  38.87      ,  38.54      , ...,  60.7658393 ,
         66.16377762,   1.        ],
       [ 34.88      ,  35.14      ,  34.62      , ...,  53.23391581,
         56.53293806,   1.        ],
       [ 29.01      ,  29.22      ,  28.62      , ...,  72.76774699,
         76.73209434,   0.        ],
       ..., 
       [ 29.5       ,  29.94      ,  29.5       , ...,  30.80723241,
         19.73558947,   0.        ],
       [ 31.85      ,  32.        ,  31.68      , ...,  46.98693043,
         53.31674551,   0.        ],
       [ 36.8       ,  37.04      ,  36.62      , ...,  36.79397471,
         22.05245118,   0.        ]])

In [21]:
    item='F'
    start = datetime.datetime(2017, 11, 20)
    end = datetime.datetime.now()
    #print( start,end) 
    file_path='./data/'+item +'.csv'
    df =web.DataReader(item,'yahoo',start,end)  
    print(df.dtypes)
    df.index.values

Open         float64
High         float64
Low          float64
Close        float64
Adj Close    float64
Volume         int64
dtype: object


array(['2017-11-20T00:00:00.000000000', '2017-11-21T00:00:00.000000000',
       '2017-11-22T00:00:00.000000000', '2017-11-24T00:00:00.000000000',
       '2017-11-27T00:00:00.000000000', '2017-11-28T00:00:00.000000000',
       '2017-11-29T00:00:00.000000000', '2017-11-30T00:00:00.000000000'], dtype='datetime64[ns]')