In [18]:
import pandas as pd
from pandas_datareader import data as web
from pandas import read_csv
from pandas import concat
import os.path
import datetime 
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from keras.utils.np_utils import to_categorical
import numpy


# Stochastic Oscilator %K
def STOK(close, low, high, n): 
 STOK = ((close - pd.Series.rolling(low, n).min()) / (pd.Series.rolling(high, n).max() - pd.Series.rolling(low, n).min())) * 100
 return STOK

# Stochastic Oscilator %D
def STOD(close, low, high, n):
    STOK = ((close - pd.Series.rolling(low, n).min()) / (pd.Series.rolling(high, n).max() - pd.Series.rolling(low, n).min())) * 100
    STOD = pd.Series.rolling(STOK, 3).mean()
    return STOD
    #STOD

    
def RSI(series, period):
    delta = series.diff().dropna()
    u = delta * 0
    d = u.copy()
    u[delta > 0] = delta[delta > 0]
    d[delta < 0] = -delta[delta < 0]
    u[u.index[period-1]] = np.mean( u[:period] ) #first value is sum of avg gains
    u = u.drop(u.index[:(period-1)])
    d[d.index[period-1]] = np.mean( d[:period] ) #first value is sum of avg losses
    d = d.drop(d.index[:(period-1)])
   # rs = pd.stats.moments.ewma(u, com=period-1, adjust=False) / \
   #      pd.stats.moments.ewma(d, com=period-1, adjust=False)
    rs= pd.Series.ewm(u,com=period-1, min_periods=0,adjust=False,ignore_na=False).mean() / \
        pd.Series.ewm(d,com=period-1,min_periods=0,adjust=False,ignore_na=False).mean()
    return 100 - 100 / (1 + rs)    


def getData(item):
    start = datetime.datetime(2016, 12, 21)
    end = datetime.datetime.now()
    #print( start,end) 
    file_path='./data/'+item +'.csv'
    if not os.path.exists(file_path):
        print("Empty set")
        df =web.DataReader(item,'yahoo',start,end)  
        df=df.round(2)
        
        df.to_csv('./data/' + item + '.csv')
        #df['Date']=pd.to_datetime(df['Date']) #important for sorting
        #df.set_index("Date",inplace=True)
        df.index.name = 'Date'
    else:
        print("Read from file")  
        df = read_csv(file_path)
        df['Date']=pd.to_datetime(df['Date']) #important for sorting
        df.set_index("Date",inplace=True)

        #end = datetime.datetime.now()
        lastDate=df.index[df.shape[0]-1] #last recorded day
        d1 = lastDate #datetime.datetime.strptime(lastDate, "%Y-%m-%d")  
        #d1 = datetime.datetime.strptime(lastDate)
        #print(d1,end )
        if( d1 < end - datetime.timedelta(days=2)): # dont update on Sat or Sun
            print("Updating")  
            d2 = lastDate + datetime.timedelta(days=1)
            #print(d2,end )
            df1 =web.DataReader(item,'yahoo',d2,end)  
            updateDate=df1.index[df1.shape[0]-1]
            print(updateDate , d1)
            if (updateDate !=  d1): #yahoo gives unwanted records
                df1=df1.round(2)
                df=pd.concat([df,df1])
                df.index= pd.to_datetime(df.index, format="%Y-%m-%d") # drop time 
                df.to_csv('./data/' + item + '.csv')
            else:
                print("No need to update")
    return df


def cleanData( df):
    # drop Close and Volume, keep Adj Close
    df1 = df.drop('Close', 1)
    df1 = df1.drop('Volume', 1)
    df1.rename(columns={'Adj Close': 'Close', 'oldName2': 'newName2'}, inplace=True)
    return df1


def engFeatures(df):
    df['%K'] = STOK(df['Close'], df['Low'], df['High'], 14)
    df['%D'] = STOD(df['Close'], df['Low'], df['High'], 14)
    df['Avg5'] =pd.Series.rolling(df['Close'],5).mean()
    df['Avg10'] =pd.Series.rolling(df['Close'],10).mean()
    df['RSI14'] = RSI(df['Close'],14)
    df['RSI7'] = RSI(df['Close'],7)
    #df['Date']=pd.to_datetime(df['Date'])
    df.sort_index(ascending=False,inplace=True)
    df['Rise'] = (  pd.Series.rolling(df['Close'],5).max()-df['Close'] >df['Close']*0.04)*1
    df.sort_index(ascending=True,inplace=True)
    return df


# convert series to supervised learning
def series_to_supervised(data, n_in=1, n_out=1, dropnan=True):
    n_vars = 1 if type(data) is list else data.shape[1]
    df = pd.DataFrame(data)
    cols, names = list(), list()
    # input sequence (t-n, ... t-1)
    for i in range(n_in, 0, -1):
        cols.append(df.shift(i))
        names += [('var%d(t-%d)' % (j+1, i)) for j in range(n_vars)]
    # forecast sequence (t, t+1, ... t+n)
    for i in range(0, n_out):
        cols.append(df.shift(-i))
        if i == 0:
            names += [('var%d(t)' % (j+1)) for j in range(n_vars)]
        else:
            names += [('var%d(t+%d)' % (j+1, i)) for j in range(n_vars)]
    # put it all together
    agg = concat(cols, axis=1)
    agg.columns = names
    # drop rows with NaN values
    if dropnan:
        agg.dropna(inplace=True)
    return agg
#from sklearn.preprocessing import MinMaxScaler

def rescale( df):
    df.dropna(inplace=True)
    values = df.values
    # ensure all data is float
    values = values.astype('float')
    #drop columns to simplify test
    timesteps=3
    features = values.shape[1]-1
    # normalize features
    scaler = MinMaxScaler()
    #scaled = scaler.fit_transform(values)
    scaled = values #scaler.fit_transform(values)
    # frame as supervised learning
    reframed = series_to_supervised(scaled,timesteps, 1)
    # drop columns we don't want to predict
    #reframed.drop(reframed.columns[[10,11,12,13,14,15,16,17,18,19,20]], axis=1, inplace=True)
    reframed= pd.DataFrame(reframed)
    return reframed
#print(reframed.head())
#reframed.iloc[0]

def cleadData( df1,df2):
    
    return cleanSet
#####################################################################3

itemname='GM'
dframe=getData(itemname)
dframe=cleanData(dframe)
djiframe=getData('DJI')
djiframe=cleanData(djiframe)
#dframe=dframe.join(djiframe, lsuffix='_left', rsuffix='_right')
dframe=dframe.join(djiframe , rsuffix='_right')
fullFrame=engFeatures(dframe)
reframed=rescale(fullFrame)


values = reframed.values
n_test_size = 50

test = values[values.shape[0]- n_test_size:, :]
# split into input and outputs
#train_X, train_y = train[:, :-1], to_categorical(train[:, -1])
#train_X, train_y = train[:, :-1], train[:, -1]
test_X, test_y = test[:, :-1], to_categorical(test[:, -1])
#test_X, test_y = test[:, :-1], test[:, -1]
# reshape input to be 3D [samples, timesteps, features]
#train_X = numpy.reshape(train_X, (train_X.shape[0], train_X.shape[1], 1 ))

test_X = numpy.reshape(test_X, (test_X.shape[0], test_X.shape[1],1))

print(test_X.shape, test_y.shape)



# later...
# load json and create model
from keras.models import model_from_json

json_file = open('./data/' + itemname +'_model.json', 'r')
loaded_model_json = json_file.read()
json_file.close()
loaded_model = model_from_json(loaded_model_json)
# load weights into new model
loaded_model.load_weights('./data/' + itemname +'_model.h5')
print("Loaded model from disk")
# evaluate loaded model on test data
#loaded_model.compile(loss='binary_crossentropy', optimizer='rmsprop', metrics=['accuracy']) #61.29
loaded_model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
score = loaded_model.evaluate( test_X, test_y, verbose=0)
print("%s: %.2f%%" % (loaded_model.metrics_names[1], score[1]*100))

yhat = loaded_model.predict(test_X)
from numpy import argmax
yhat=argmax(yhat,axis=1)
yhat


Read from file
Updating
2017-12-01 00:00:00 2017-11-06 00:00:00
Read from file
Updating


RemoteDataError: Unable to read URL: https://query1.finance.yahoo.com/v7/finance/download/DJI?period1=1512190800&period2=1512363599&interval=1d&events=history&crumb=V5qxV%5Cu002F0VyU%5Cu002F

In [16]:
#fullFrame.iloc[-10:]

In [17]:
argmax(test_y,axis=1)

array([1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 1,
       1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0], dtype=int64)

In [5]:
#fullFrame['Date']=pd.to_datetime(fullFrame['Date'])
df1=fullFrame[['Close','Rise']].iloc[-30:] 

df1=df1.sort_index(ascending=False)
df1['window']=pd.Series.rolling(df1['Close'],5).max()
df1['Rise1'] = (  pd.Series.rolling(df1['Close'],5).max()-df1['Close'] >df1['Close']*0.04)*1
#df['yhat'] =yhat
df1=df1.sort_index(ascending=True)
df1

Unnamed: 0_level_0,Close,Rise,window,Rise1
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2017-10-20,45.61,0,46.48,0
2017-10-23,45.15,0,46.48,0
2017-10-24,46.48,0,46.48,0
2017-10-25,45.12,0,45.25,0
2017-10-26,45.25,0,45.25,0
2017-10-27,44.64,0,44.64,0
2017-10-30,43.37,0,43.37,0
2017-10-31,42.98,0,43.13,0
2017-11-01,43.13,0,43.13,0
2017-11-02,42.6,0,42.6,0


In [42]:
fullFrame[['Close']].iloc[-30:] 
#dframe[['Close']].iloc[-30:] 

Unnamed: 0_level_0,Close
Date,Unnamed: 1_level_1
2017-10-11,45.47
2017-10-12,44.89
2017-10-13,45.88
2017-10-16,45.76
2017-10-17,45.02
2017-10-18,45.12
2017-10-19,45.35
2017-10-20,45.61
2017-10-23,45.15
2017-10-24,46.48


In [12]:
test

array([[ 38.54      ,  38.87      ,  38.54      , ...,  60.7658393 ,
         66.16377762,   1.        ],
       [ 34.88      ,  35.14      ,  34.62      , ...,  53.23391581,
         56.53293806,   1.        ],
       [ 29.01      ,  29.22      ,  28.62      , ...,  72.76774699,
         76.73209434,   0.        ],
       ..., 
       [ 29.5       ,  29.94      ,  29.5       , ...,  30.80723241,
         19.73558947,   0.        ],
       [ 31.85      ,  32.        ,  31.68      , ...,  46.98693043,
         53.31674551,   0.        ],
       [ 36.8       ,  37.04      ,  36.62      , ...,  36.79397471,
         22.05245118,   0.        ]])

In [21]:
    item='F'
    start = datetime.datetime(2017, 11, 20)
    end = datetime.datetime.now()
    #print( start,end) 
    file_path='./data/'+item +'.csv'
    df =web.DataReader(item,'yahoo',start,end)  
    print(df.dtypes)
    df.index.values

Open         float64
High         float64
Low          float64
Close        float64
Adj Close    float64
Volume         int64
dtype: object


array(['2017-11-20T00:00:00.000000000', '2017-11-21T00:00:00.000000000',
       '2017-11-22T00:00:00.000000000', '2017-11-24T00:00:00.000000000',
       '2017-11-27T00:00:00.000000000', '2017-11-28T00:00:00.000000000',
       '2017-11-29T00:00:00.000000000', '2017-11-30T00:00:00.000000000'], dtype='datetime64[ns]')