In [19]:
import pandas as pd
from pandas_datareader import data as web
from pandas import read_csv
from pandas import concat
import os.path
import datetime 
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from keras.utils.np_utils import to_categorical
import numpy

mypath='C:\\Users\\mwnuk\\Dropbox\\Quotes\\'
myModelpath='C:\\Users\\mwnuk\\Dropbox\\Models\\'
settingsPath='C:\\Users\\mwnuk\\Dropbox\\Settings\\'
reportPath='C:\\Users\\mwnuk\\Dropbox\\Reports\\'

# Stochastic Oscilator %K
def STOK(close, low, high, n): 
 STOK = ((close - pd.Series.rolling(low, n).min()) / (pd.Series.rolling(high, n).max() - pd.Series.rolling(low, n).min())) * 100
 return STOK

# Stochastic Oscilator %D
def STOD(close, low, high, n):
    STOK = ((close - pd.Series.rolling(low, n).min()) / (pd.Series.rolling(high, n).max() - pd.Series.rolling(low, n).min())) * 100
    STOD = pd.Series.rolling(STOK, 3).mean()
    return STOD
    #STOD

    
def RSI(series, period):
    delta = series.diff().dropna()
    u = delta * 0
    d = u.copy()
    u[delta > 0] = delta[delta > 0]
    d[delta < 0] = -delta[delta < 0]
    u[u.index[period-1]] = np.mean( u[:period] ) #first value is sum of avg gains
    u = u.drop(u.index[:(period-1)])
    d[d.index[period-1]] = np.mean( d[:period] ) #first value is sum of avg losses
    d = d.drop(d.index[:(period-1)])
   # rs = pd.stats.moments.ewma(u, com=period-1, adjust=False) / \
   #      pd.stats.moments.ewma(d, com=period-1, adjust=False)
    rs= pd.Series.ewm(u,com=period-1, min_periods=0,adjust=False,ignore_na=False).mean() / \
        pd.Series.ewm(d,com=period-1,min_periods=0,adjust=False,ignore_na=False).mean()
    return 100 - 100 / (1 + rs)    


def getData(item):
    start = datetime.datetime(2016, 12, 21)
    end = datetime.datetime.now()
    #print( start,end) 
    file_path=mypath+item +'.csv'
    if not os.path.exists(file_path):
        print("No data")
    else:
        print("Read from file ", item)  
        try:     
            df = read_csv(file_path)
            df['Date']=pd.to_datetime(df['Date']) #important for sorting
            df.set_index("Date",inplace=True)
        except: print("Exception caught")
    return df


def cleanData( df):
    # drop Close and Volume, keep Adj Close
    
    df1 = df.drop('Close', 1)
    df1 = df1.drop('Volume', 1)
    if 'y' in df1.columns:
        df1 = df1.drop('y', 1)
    if 'yhat' in df1.columns:
        df1 = df1.drop('yhat', 1)
    if 'yhathat' in df1.columns:
        df1 = df1.drop('yhathat', 1)
    df1.rename(columns={'Adj Close': 'Close', 'oldName2': 'newName2'}, inplace=True)
    return df1


def engFeatures(df):
    df['%K'] = STOK(df['Close'], df['Low'], df['High'], 14)
    df['%D'] = STOD(df['Close'], df['Low'], df['High'], 14)
    df['Avg5'] =pd.Series.rolling(df['Close'],5).mean()
    df['Avg10'] =pd.Series.rolling(df['Close'],10).mean()
    df['RSI14'] = RSI(df['Close'],14)
    df['RSI7'] = RSI(df['Close'],7)
    #df['Date']=pd.to_datetime(df['Date'])
    df.sort_index(ascending=False,inplace=True)
    df['Rise'] = (  pd.Series.rolling(df['Close'],5).max()-df['Close'] >df['Close']*0.04)*1
    df.sort_index(ascending=True,inplace=True)
    return df


# convert series to supervised learning
def series_to_supervised(data, n_in=1, n_out=1, dropnan=True):
    n_vars = 1 if type(data) is list else data.shape[1]
    df = pd.DataFrame(data)
    cols, names = list(), list()
    # input sequence (t-n, ... t-1)
    for i in range(n_in, 0, -1):
        cols.append(df.shift(i))
        names += [('var%d(t-%d)' % (j+1, i)) for j in range(n_vars)]
    # forecast sequence (t, t+1, ... t+n)
    for i in range(0, n_out):
        cols.append(df.shift(-i))
        if i == 0:
            names += [('var%d(t)' % (j+1)) for j in range(n_vars)]
        else:
            names += [('var%d(t+%d)' % (j+1, i)) for j in range(n_vars)]
    # put it all together
    agg = concat(cols, axis=1)
    agg.columns = names
    # drop rows with NaN values
    if dropnan:
        agg.dropna(inplace=True)
    return agg
#from sklearn.preprocessing import MinMaxScaler

def rescale( df):
    df.dropna(inplace=True)
    values = df.values
    # ensure all data is float
    values = values.astype('float')

    timesteps=3

    features = values.shape[1]-1
    # normalize features
    scaler = MinMaxScaler()
    #scaled = scaler.fit_transform(values)
    scaled = values #scaler.fit_transform(values)
    # frame as supervised learning
    reframed = series_to_supervised(scaled,timesteps, 1)
    # drop columns we don't want to predict
    #reframed.drop(reframed.columns[[10,11,12,13,14,15,16,17,18,19,20]], axis=1, inplace=True)
    reframed= pd.DataFrame(reframed)
    return reframed
#print(reframed.head())
#reframed.iloc[0]

#def cleadData( df1,df2):
    
#    return cleanSet
#####################################################################3
def loadModel( item ):
    # later...
    # load json and create model
    from keras.models import model_from_json

    json_file = open(myModelpath + item+'_model.json', 'r')
    loaded_model_json = json_file.read()
    json_file.close()
    loaded_model = model_from_json(loaded_model_json)
    # load weights into new model
    loaded_model.load_weights(myModelpath + item +'_model.h5')
    print("Loaded model ", item," from disk")
    # evaluate loaded model on test data
    #loaded_model.compile(loss='binary_crossentropy', optimizer='rmsprop', metrics=['accuracy']) #61.29
    loaded_model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    #score = loaded_model.evaluate( test_X, test_y, verbose=0)
    #print("%s: %.2f%%" % (loaded_model.metrics_names[1], score[1]*100))
    return loaded_model

#######################################################################
def MakePredictions( dfr):
    
    dframeclean=cleanData(dfr)
    #djiframe=getData('DJI')  # this can be loaded once
    #djiframe=cleanData(djiframe)
    #dframe=dframe.join(djiframe, lsuffix='_left', rsuffix='_right')
    dframeclean=dframeclean.join(djiframe , rsuffix='_right')
    fullFrame=engFeatures(dframeclean)
    reframed=rescale(fullFrame)
    
    #test on entire dataframe
    test = reframed.values
    test_X, test_y = test[:, :-1], to_categorical(test[:, -1])
    test_X = numpy.reshape(test_X, (test_X.shape[0], test_X.shape[1],1))
    #print(test_X.shape)
    yhat = jason_model.predict(test_X)
    from numpy import argmax
    yhat=argmax(yhat,axis=1)
    #print(test_X.shape, test_y.shape,yhat.shape, dframe.shape)
   
    # reframing makes original dframe shorter
    # below code makes them all equal length by padding with zeros

    yhat=yhat.reshape(yhat.shape[0],1)
    #print(dfr.shape,yhat.shape)
    nz=pd.DataFrame(np.zeros(dfr.shape[0]-yhat.shape[0]))
    yhat1=nz.append(pd.DataFrame(yhat) )
    y=nz.append( pd.DataFrame(argmax(test_y,1) ))
    #print( yhat1.shape, y.shape )
    
    #put results back into dframe
    dfr['y']=y.values
    dfr['yhat']=yhat1.values
    dfr['yhathat']=np.zeros(yhat1.shape[0]) # prepare empty column
    print("Finished MakePredictions" )
    return dfr
#############################################################################
#############################################################################
## MAIN EXECUTION 
file = open(reportPath + 'Report.txt','w+') 
df1 = read_csv(settingsPath + 'List1.csv', header=None)
df2 = read_csv(settingsPath + 'List2.csv', header=None)
df3 = read_csv(settingsPath + 'List3.csv', header=None)
df4 = read_csv(settingsPath + 'List4.csv', header=None)
df5 = read_csv(settingsPath + 'List5.csv', header=None)
df6 = read_csv(settingsPath + 'List6.csv', header=None)
df7 = read_csv(settingsPath + 'BetaList.csv', header=None)
#df6 = read_csv(settingsPath + 'List6 - test.csv', header=None)
df=pd.concat([df2,df3,df4])
#df=df1
#df=pd.concat([df5,df6])
#df=df7
djiframe=getData('DJI')  # this can be loaded once
djiframe=cleanData(djiframe)

file.write(datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S") +  " Verified " +str(len(df.index)) + "models \n") 

for index in range (df.shape[0]):
    itemname=df.iloc[index,0]
    #print ('Current item :', itemname)
    #itemname = 'BA'
    dframe = getData(itemname)
    jason_model = loadModel(itemname)

    df1  = MakePredictions(dframe)
    
#Analyse statistics
    y_cnt=df1.index[df1['yhat'] >0].size
    yhat_cnt = df1.index[df1['y'] >0].size
    tp_cnt = df1.index[np.where((df1['yhat'] >0) & (df1['y'] >0))].size 
    if( y_cnt>0):
        percent='%3.2f'%(tp_cnt/y_cnt)
    else:
        percent=0
    print( " it predicted ", y_cnt, "out of",yhat_cnt," dates in last 3 years,and ",tp_cnt,"(",percent,"%) were right")
    
    with open(reportPath + "History.txt", "a") as text_file:
        text_file.write("%4s predicted %d out of %d events in last 3 years, and %d ( %s %%) were right \n" % (itemname,y_cnt,yhat_cnt,tp_cnt,percent)  ) 
    
    df2=df1[-2:]
    if (df2['yhat'].any() > 0):
        print('----------got it',itemname)
        file.write("got it " + itemname +"\n") 

file.write(datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S") + "\n") 
file.close() 

#PrintReport(df1)
#df1.tail

## get all predicted values yhat and verify they are still valid when 
## at the end of the dataset. Cut dataframe in pieces and predict yhathat
#predictedlist=df1.index[df1['yhat'] >0]
#predictedlist[0]




Read from file  DJI
Read from file  CVX
Loaded model  CVX  from disk
Finished MakePredictions
 it predicted  35 out of 64  dates in last 3 years,and  25 ( 0.71 %) were right
Read from file  DIS
Loaded model  DIS  from disk
Finished MakePredictions
 it predicted  14 out of 37  dates in last 3 years,and  14 ( 1.00 %) were right
Read from file  DWDP
Loaded model  DWDP  from disk
Finished MakePredictions
 it predicted  50 out of 69  dates in last 3 years,and  40 ( 0.80 %) were right
Read from file  GE
Loaded model  GE  from disk
Finished MakePredictions
 it predicted  22 out of 36  dates in last 3 years,and  21 ( 0.95 %) were right
Read from file  GS
Loaded model  GS  from disk
Finished MakePredictions
 it predicted  67 out of 77  dates in last 3 years,and  53 ( 0.79 %) were right
Read from file  HD
Loaded model  HD  from disk
Finished MakePredictions
 it predicted  29 out of 43  dates in last 3 years,and  26 ( 0.90 %) were right
----------got it HD
Read from file  IBM
Loaded model  IBM  f

len(df.index)


In [None]:
filename=mypath+"AAPL.csv"
fo = open(filename, "r")
fo.seek(-80, os.SEEK_END)
line = fo.readlines()[-1]
line

In [None]:
def getLastLine(fname, maxLineLength=80):
  fp=file(fname, "rb")
  fp.seek(-maxLineLength-1, 2) # 2 means "from the end of the file"
  return fp.readlines()[-1]

filename=mypath+"AAPL.csv"
getLastLine(filename)