In [377]:
from stockDatabase import StockDatabase
from stockplot.models import Stock, StockData
import stockDepot
import matplotlib.pyplot as plt
import datetime
import pandas as pd
import numpy as np

%matplotlib

Using matplotlib backend: TkAgg


In [395]:
def getStockDataframe(stocklist, fromDate, toDate):
    dateslist = []
    datalist = []
    dataMACDlist = []
    emaMACD = []
    dataPPOlist = []
    emaPPO = []
    lowBollist = []
    averageBollist = []
    highBollist = []
    ema15 = []
    ema30 = []
    ema50 = []
    ema100 = []
    ema200 = []

    for i in range(0, len(stocklist)):
        # get a stock
        teststock = StockDatabase(stocklist[i])
        
        # get history for stock fromDate to Date
        step = 1
        dates, data = teststock.getStockHistory('close', fromDate, toDate, step)

        # get data for Moving Average Convergence Divergence
        dataMACD = teststock.MACD(dates, data)

        # get 9 day Exponential Moving Average for MACD
        emaMACD += teststock.ExpAverage(dates, dataMACD, 9)

        # get data for Percentage Price Oscillator
        dataPPO = teststock.PPO(dates, data)

        # get 9 day EMA for PPO
        emaPPO += teststock.ExpAverage(dates, dataPPO, 9)

        # get bollinger band
        days = 20 # bollinger band based on 20-day Simple Moving Average
        factor = 2 # factor for standard deviation, lowBol = averageBol - 2 * std
        lowBol, averageBol, highBol = teststock.Bollinger(dates, data, days, factor)

        # get exponential moving averages
        ema15 += teststock.ExpAverage(dates, data, 15)
        ema30 += teststock.ExpAverage(dates, data, 30)
        ema50 += teststock.ExpAverage(dates, data, 50)
        ema100 += teststock.ExpAverage(dates, data, 100)
        ema200+= teststock.ExpAverage(dates, data, 200)

        dateslist += dates
        datalist += data
        dataMACDlist += dataMACD
        dataPPOlist += dataPPO
        lowBollist += lowBol
        averageBollist += averageBol
        highBollist += highBol
    
    # create pandas dataframe for all data
    dfdata = {'Date': dateslist, 'Price': datalist, 'EMA 15': ema15, 'EMA 30': ema30, 'EMA 50': ema50, 'EMA 100': ema100, 'EMA 200': ema200, 'MACD': dataMACDlist, 'MACD EMA': emaMACD, 'PPO': dataPPOlist, 'PPO EMA': emaPPO, 'Bollinger low': lowBollist, 'Bollinger': averageBollist, 'Bollinger high': highBollist}
    df = pd.DataFrame(dfdata, columns=['Date', 'Price', 'EMA 15', 'EMA 30', 'EMA 50', 'EMA 100', 'EMA 200', 'MACD', 'MACD EMA', 'PPO', 'PPO EMA', 'Bollinger low', 'Bollinger', 'Bollinger high'])
    return df

### Features

- Which features can be used to predict stock price change over the next n days? I.e. does the price increase or decrease?

- mostly features independent of the stock price (almost scaled)
- PPO
- net change
- percent change
- 10, 20, 30 day volatility
- EMA: positive cross, negative cross, higher -> find times when a shorter EMA crosses a longer one
- Find when price crosses EMA or is higher than EMA
- Find when EMA of MACD crosses MACD or is higher
- same for PPO
- find when price is higher than upper bollinger band, average one or lower than the lower band


- not yet implemented: (http://cs229.stanford.edu/proj2013/DaiZhang-MachineLearningInStockPriceTrendForecasting.pdf)
    - PE ratio
    - PX volume
    - PX ebitda
    - current enterprise value
    - quick ratio
    - alpha overridable
    - alpha for beta pm
    - beta raw overridable
    - risk premium
    - IS EPS
    - corresponding S&P 500 index

- not yet implemented: (https://arxiv.org/pdf/1603.00751.pdf)
    - Book value - the net asset value of a company, calculated by total assets minus intangible assets (patents, goodwill) and liabilities.
    - Market capitalization - the market value of a company's issued share capital; it is equal to the share price times the number of shares outstanding.
    - Change of stock Net price over the one month period
    - Percentage change of Net price over the one month period
    - Dividend yield - indicates how much a company pays out in dividends each year relative to its share price.
    - Earnings per share - a portion of a company's profit divided by the number of issued shares. Earnings per share serves as an indicator of a company's profitability.
    - Earnings per share growth – the growth of earnings per share over the trailing one-year period.
    - Sales revenue turnover -
    - Net revenue - the proceeds from the sale of an asset, minus commissions, taxes, or other expenses related to the sale.
    - Net revenue growth – the growth of Net revenue over the trailing one-year period.
    - Sales growth – sales growth over the trailing one-year period.
    - Price to earnings ratio – measures company’s current share price relative to its per-share earnings.
    - Price to earnings ratio, five years average – averaged price to earnings ratio over the period of five years.
    - Price to book ratio - compares a company's current market price to its book value.
    - Price to sales ratio – ratio calculated by dividing the company's market cap by the revenue in the most recent year.
    - Dividend per share - is the total dividends paid out over an entire year divided by the number of
    - ordinary shares issued.
    - Current ratio - compares a firm's current assets to its current liabilities.
    - Quick ratio - compares the total amount of cash, marketable securities and accounts receivable to the amount of current liabilities.
    - Total debt to equity - ratio used to measure a company's financial leverage, calculated by dividing a company's total liabilities by its stockholders' equity.
    - Analyst ratio – ratio given by human analyst.
    - Revenue growth adjusted by 5 year compound annual growth ratio
    - Profit margin – a profitability ratio calculated as net income divided by revenue, or net profits divided by sales
    - Operating margin - ratio used to measure a company's pricing strategy and operating efficiency. It is a measurement of what proportion of a company's revenue is left over after paying for variable costs of production such as wages, raw materials, etc.
    - Asset turnover - the ratio of the value of a company’s sales or revenues generated relative to the value of its assets

Source: https://www.cs.princeton.edu/sites/default/files/uploads/saahil_madge.pdf
- when deciding to buy or not, use past of stock to determine accuracy of machine learning algorithm for that particular stock

Source, master thesis: http://www.diva-portal.org/smash/get/diva2:354463/FULLTEXT01.pdf

Source, MIT based on earnings reports: http://ocw.mit.edu/courses/sloan-school-of-management/15-097-prediction-machine-learning-and-statistics-spring-2012/projects/MIT15_097S12_proj2.pdf

Source, Lehman: https://www.cis.upenn.edu/~mkearns/papers/rlexec.pdf

Source, Github App: https://github.com/DMTSource/daily-stock-forecast

In [397]:
def crossing(feature1, feature2, df):
    # create feature for feature ema crossing feature
    poscross = [0] # positive cross
    negcross = [0] # negative cross
    higher = [0] # ema higher than feature
    
    feature1 = df[feature1].values
    feature2 = df[feature2].values

    for i in range (1, len(feature1)):
        # check if ema is crossing feature
        if feature1[i-1] < feature2[i-1] and feature1[i] > feature2[i]:
            poscross.append(1)
            negcross.append(0)
        elif feature1[i-1] > feature2[i-1] and feature1[i] < feature2[i]:
            poscross.append(0)
            negcross.append(1)
        else:
            poscross.append(0)
            negcross.append(0)
            
        # check if ema is higher or lower than feature
        if feature1[i] > feature2[i]:
            higher.append(1)
        else:
            higher.append(0)
    return poscross, negcross, higher

In [398]:
from math import sqrt

def createFeatures(df):
    # PPO as feature
    df['featPPO'] = stockdf['PPO']

    # price change features
    netchange = [0]
    price = df['Price'].values
    netchange += [price[i]-price[i-1] for i in range(1, len(price))]
    percentchange = [netchange[i]/price[i] * 100 for i in range(0, len(price))]

    # volatility based on price change
    sqrt254 = sqrt(254) # square root of yearly trading days
    volatility10 = [0] * 10
    volatility20 = [0] * 20
    volatility30 = [0] * 30
    volatility10 += [sqrt254 * np.std(percentchange[i-10:i]) for i in range(10, len(price))]
    volatility20 += [sqrt254 * np.std(percentchange[i-20:i]) for i in range(20, len(price))]
    volatility30 += [sqrt254 * np.std(percentchange[i-30:i]) for i in range(30, len(price))]

    # save in dataframe
    df['Net Change'] = netchange
    df['Perc Change'] = percentchange
    df['10 day Volatility'] = volatility10
    df['20 day Volatility'] = volatility20
    df['30 day Volatility'] = volatility30
    
    # check if shorter EMA crosses a longer one

    poscross, negcross, higher = crossing('EMA 15', 'EMA 30', df)
    df['EMA 15 / 30 poscross'] = poscross
    df['EMA 15 / 30 negcross'] = negcross
    df['EMA 15 / 30 higher'] = higher

    poscross, negcross, higher = crossing('EMA 15', 'EMA 50', df)
    df['EMA 15 / 50 poscross'] = poscross
    df['EMA 15 / 50 negcross'] = negcross
    df['EMA 15 / 50 higher'] = higher

    poscross, negcross, higher = crossing('EMA 15', 'EMA 100', df)
    df['EMA 15 / 100 poscross'] = poscross
    df['EMA 15 / 100 negcross'] = negcross
    df['EMA 15 / 100 higher'] = higher

    poscross, negcross, higher = crossing('EMA 15', 'EMA 200', df)
    df['EMA 15 / 200 poscross'] = poscross
    df['EMA 15 / 200 negcross'] = negcross
    df['EMA 15 / 200 higher'] = higher

    poscross, negcross, higher = crossing('EMA 30', 'EMA 50', df)
    df['EMA 30 / 50 poscross'] = poscross
    df['EMA 30 / 50 negcross'] = negcross
    df['EMA 30 / 50 higher'] = higher

    poscross, negcross, higher = crossing('EMA 30', 'EMA 100', df)
    df['EMA 30 / 100 poscross'] = poscross
    df['EMA 30 / 100 negcross'] = negcross
    df['EMA 30 / 100 higher'] = higher

    poscross, negcross, higher = crossing('EMA 30', 'EMA 200', df)
    df['EMA 30 / 200 poscross'] = poscross
    df['EMA 30 / 200 negcross'] = negcross
    df['EMA 30 / 200 higher'] = higher

    poscross, negcross, higher = crossing('EMA 50', 'EMA 100', df)
    df['EMA 50 / 100 poscross'] = poscross
    df['EMA 50 / 100 negcross'] = negcross
    df['EMA 50 / 100 higher'] = higher

    poscross, negcross, higher = crossing('EMA 50', 'EMA 200', df)
    df['EMA 50 / 200 poscross'] = poscross
    df['EMA 50 / 200 negcross'] = negcross
    df['EMA 50 / 200 higher'] = higher
    
    # check if price crosses an EMA

    poscross, negcross, higher = crossing('Price', 'EMA 15', df)
    df['Price / EMA 15 poscross'] = poscross
    df['Price / EMA 15 negcross'] = negcross
    df['Price / EMA 15 higher'] = higher

    poscross, negcross, higher = crossing('Price', 'EMA 30', df)
    df['Price / EMA 30 poscross'] = poscross
    df['Price / EMA 30 negcross'] = negcross
    df['Price / EMA 30 higher'] = higher

    poscross, negcross, higher = crossing('Price', 'EMA 50', df)
    df['Price / EMA 50 poscross'] = poscross
    df['Price / EMA 50 negcross'] = negcross
    df['Price / EMA 50 higher'] = higher

    poscross, negcross, higher = crossing('Price', 'EMA 100', df)
    df['Price / EMA 100 poscross'] = poscross
    df['Price / EMA 100 negcross'] = negcross
    df['Price / EMA 100 higher'] = higher

    poscross, negcross, higher = crossing('Price', 'EMA 200', df)
    df['Price / EMA 200 poscross'] = poscross
    df['Price / EMA 200 negcross'] = negcross
    df['Price / EMA 200 higher'] = higher
    
    # check if EMA of MACD crosses MACD
    poscross, negcross, higher = crossing('MACD EMA', 'MACD', df)
    df['MACD poscross'] = poscross
    df['MACD negcross'] = negcross
    df['MACD higher'] = higher
    
    # check if EMA of PPO crosses PPO
    poscross, negcross, higher = crossing('PPO EMA', 'PPO', df)
    df['PPO poscross'] = poscross
    df['PPO negcross'] = negcross
    df['PPO higher'] = higher
    
    # create features for bollinger
    higheraverage = [] # price higher than average bollinger band
    higherhigh = [] # price higher than upper bollinger band
    lowerlow = [] # price lower than lower bollinger band

    price = df['Price'].values
    bollinger = df['Bollinger'].values
    bollingerhigh = df['Bollinger high'].values
    bollingerlow = df['Bollinger low'].values
    for i in range (0, len(price)):
            # check if price is higher than Bollinger
            if price[i] > bollinger[i]:
                higheraverage.append(1)
            else:
                higheraverage.append(0)

            if price[i] > bollingerhigh[i]:
                higherhigh.append(1)
            else:
                higherhigh.append(0)

            if price[i] < bollingerlow[i]:
                lowerlow.append(1)
            else:
                lowerlow.append(0)

    df['Bollinger higher'] = higheraverage
    df['Bollinger higher high'] = higherhigh
    df['Bollinger lower low'] = lowerlow
    
    return df

### Prediction Feature

- What should be predicted?
- Here: is the price higher or lower the next n days? (1, 5, 10, 20, 50, 100, 200)

In [400]:
# predict if price is higher the next n days.

def predictY(df):
    higher1 = []
    higher5 = []
    higher10 = []
    higher20 = []
    higher50 = []
    higher100 = []
    higher200 = []

    price = df['Price'].values
    for i in range (0, len(price)-200):

        if price[i+1] > price[i]:
            higher1.append(1)
        else:
            higher1.append(0)

        if price[i+5] > price[i]:
            higher5.append(1)
        else:
            higher5.append(0)

        if price[i+10] > price[i]:
            higher10.append(1)
        else:
            higher10.append(0)

        if price[i+20] > price[i]:
            higher20.append(1)
        else:
            higher20.append(0)

        if price[i+50] > price[i]:
            higher50.append(1)
        else:
            higher50.append(0)

        if price[i+100] > price[i]:
            higher100.append(1)
        else:
            higher100.append(0)

        if price[i+200] > price[i]:
            higher200.append(1)
        else:
            higher200.append(0)

    for i in range (len(price)-200, len(price)):
        higher1.append(0)
        higher5.append(0)
        higher10.append(0)
        higher20.append(0)
        higher50.append(0)
        higher100.append(0)
        higher200.append(0)

    # for last day append 0
    higher.append(0)

    df['Prediction Feature 1'] = higher1
    df['Prediction Feature 5'] = higher5
    df['Prediction Feature 10'] = higher10
    df['Prediction Feature 20'] = higher20
    df['Prediction Feature 50'] = higher50
    df['Prediction Feature 100'] = higher100
    df['Prediction Feature 200'] = higher200
    
    return df

### Prediction

- Machine Learning begins here

In [402]:
# create dataframe with stocks
stocklist = []
stocks = Stock.objects.filter(source = 'Quandl')
for i in range(0, 10):#int(len(stocks)/4)):
    stocklist.append(stocks[i].sourceSymbol)

stockdf = getStockDataframe(stocklist, '2000-01-01', '2013-01-01')

# add features
stockdf = createFeatures(stockdf)

# add prediction features
stockdf = predictY(stockdf)
stockdf.head(3)

Unnamed: 0,Date,Price,EMA 15,EMA 30,EMA 50,EMA 100,EMA 200,MACD,MACD EMA,PPO,...,Bollinger higher,Bollinger higher high,Bollinger lower low,Prediction Feature 1,Prediction Feature 5,Prediction Feature 10,Prediction Feature 20,Prediction Feature 50,Prediction Feature 100,Prediction Feature 200
0,1153865000.0,14.94,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,1,1,0,0,1,1,1,1,1,1
1,1153951000.0,14.85,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,1,1,0,1,1,1,1,1,1,1
2,1154038000.0,14.9,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,1,1,0,1,1,1,1,1,1,1


In [403]:
cols = []
cols.append(stockdf.columns.get_loc('featPPO'))
cols.append(stockdf.columns.get_loc('Prediction Feature 1'))
cols

[14, 71]

In [404]:
# drop first 200 rows of stockdf and last 200 rows, missing values because of 200 day moving average and price prediction for next 200 days
stockdf.drop(stockdf.index[:200], inplace=True)
stockdf.drop(stockdf.index[len(stockdf)-200:], inplace=True)

In [405]:
# use random forest for prediction
from sklearn.ensemble import RandomForestClassifier
from sklearn.cross_validation import train_test_split

featurestart = stockdf.columns.get_loc('featPPO')
featureend = stockdf.columns.get_loc('Prediction Feature 1')
score = []
rows, cols = stockdf.shape

# split stock data into a training set and test set -> randomly selects 25% of data as test set
train, test = train_test_split(stockdf, test_size = 0.25)

# fit the random forest regressor
for i in range(featureend, cols):
    print(i)
    stockforest = RandomForestClassifier(n_estimators = 100)
    stockforest = stockforest.fit(train.values[0::, featurestart:featureend].astype(int), train.values[0::, i].astype(int))
    score.append(stockforest.score(test.values[0::, featurestart:featureend].astype(int), test.values[0::, i].astype(int)))
score

71
72
73
74
75
76
77


[0.52027733779624041,
 0.59791996652819701,
 0.6193478975523744,
 0.63488837751412097,
 0.65126565255073066,
 0.66244284390783303,
 0.67284301126684798]

In [538]:
# final training of models:
stockforest5 = RandomForestClassifier(n_estimators = 100)
stockforest5 = stockforest5.fit(stockdf.values[0::, featurestart:featureend].astype(int), stockdf.values[0::, featureend+1].astype(int))
print(5)

stockforest20 = RandomForestClassifier(n_estimators = 100)
stockforest20 = stockforest20.fit(stockdf.values[0::, featurestart:featureend].astype(int), stockdf.values[0::, featureend+3].astype(int))
print(20)

stockforest50 = RandomForestClassifier(n_estimators = 100)
stockforest50 = stockforest50.fit(stockdf.values[0::, featurestart:featureend].astype(int), stockdf.values[0::, featureend+4].astype(int))
print(50)

stockforest100 = RandomForestClassifier(n_estimators = 100)
stockforest100 = stockforest100.fit(stockdf.values[0::, featurestart:featureend].astype(int), stockdf.values[0::, featureend+5].astype(int))
print(100)

5
20
50
100


### Trading System

- implement simple trading system to test prediction

In [540]:
# create depot as test.
from django.contrib.auth.models import User

depotname = 'PredictionTest'
user = User.objects.get(username='oliver')
value = 10000
depot = stockDepot.createDepot(user, depotname, value)
print (depot)

oliver PredictionTest


In [547]:
# Import stocks for prediction:
today = datetime.datetime.now().strftime("%Y-%m-%d")
stockdfAccuracy = [] # used to check accuracy for each stock
stockdfDepotList = [] # used to run trading system

for i in range(0, len(stocklist)): # take second quarter for now
    print (i/len(stocklist)*100)
    
    # create dataframe to test accuracy
    stockdfTest = getStockDataframe([stocklist[i]], '2000-01-01', '2013-01-01')
    stockdfTest = createFeatures(stockdfTest)
    stockdfTest = predictY(stockdfTest)
    stockdfTest.drop(stockdfTest.index[:200], inplace=True)
    stockdfTest.drop(stockdfTest.index[len(stockdfTest)-200:], inplace=True)
    
    # assess accuracy for different prediction times
    score5 = stockforest5.score(stockdfTest.values[0::, featurestart:featureend].astype(int), stockdfTest.values[0::, featureend+1].astype(int))
    score20 = stockforest20.score(stockdfTest.values[0::, featurestart:featureend].astype(int), stockdfTest.values[0::, featureend+3].astype(int))
    score50 = stockforest50.score(stockdfTest.values[0::, featurestart:featureend].astype(int), stockdfTest.values[0::, featureend+4].astype(int))
    score100 = stockforest100.score(stockdfTest.values[0::, featurestart:featureend].astype(int), stockdfTest.values[0::, featureend+5].astype(int))
    stockdfAccuracy.append([score5, score20, score50, score100])
    
    # create dataframe for trading system
    stockdfDepot = getStockDataframe(stocklist, '2012-01-01', today)
    stockdfDepot = createFeatures(stockdfDepot)
    stockdfDepot.drop(stockdfDepot.index[:200], inplace=True)
    stockdfDepot.drop(stockdfDepot.index[len(stockdfDepot)-200:], inplace=True)
    stockdfDepotList.append(stockdfDepot)
    
    print(stockdfAccuracy[i])

0.0
[0.83348254252461951, 0.83348254252461951, 0.84601611459265891, 0.84392718591465232]
2.5


KeyboardInterrupt: 

In [544]:
def buyorsell(depot):#'probabilities, stocklist, depot):
    depotcontent_total, balance, depotvalue, available, change = stockDepot.depotAnalysis(depot)
    print(depotcontent_total, balance, depotvalue, available, change)
    
buyorsell(depot)

[] 10000.0 10000 10000.0 0.0


In [537]:
# go through dataframe
for i in range(0, 20): #len(stockdfDepotList[0])): # for each day
    probabilities = []
    for j in range(0, int(len(stocks)/4)): # for each stock
        if(len(stockdfDepotList[j]) < i+1):
            probabilities.append(0)
        else:
            proba5 = stockforest5.predict_proba(stockdfDepotList[j].values[i:i+1, featurestart:featureend].astype(int))
            #proba20 = stockforest20.predict_proba(stockdfDepotList[j].values[i:i+1, featurestart:featureend].astype(int))
            proba50 = stockforest50.predict_proba(stockdfDepotList[j].values[i:i+1, featurestart:featureend].astype(int))
            #proba100 = stockforest100.predict_proba(stockdfDepotList[j].values[i:i+1, featurestart:featureend].astype(int))
            # multiply by accuracies
            #proba5 = proba5[0][0] * stockdfAccuracy[j][0]
            #proba20 = proba20[0][0] * stockdfAccuracy[j][1]
            #proba50 = proba50[0][0] * stockdfAccuracy[j][2]
            #proba100 = proba100[0][0] * stockdfAccuracy[j][3]
            #probabilities.append(proba5 * proba20 * proba50 * proba100)
            probabilities.append([proba5[0][0], proba50[0][0]])
    print(probabilities)

[[0.90000000000000002, 0.90000000000000002], [0.34999999999999998, 0.34999999999999998], [0.37, 0.37], [0.49400000000000005, 0.49400000000000005], [0.52000000000000002, 0.52000000000000002], [0.46999999999999997, 0.46999999999999997], [0.66000000000000003, 0.66000000000000003], [0.85999999999999999, 0.85999999999999999], [0.78000000000000003, 0.78000000000000003], [0.44, 0.44], [0.28999999999999998, 0.28999999999999998], [0.56000000000000005, 0.56000000000000005], [0.34000000000000002, 0.34000000000000002], [0.31, 0.31], [0.81999999999999995, 0.81999999999999995], [0.28000000000000003, 0.28000000000000003], [0.58999999999999997, 0.58999999999999997], [0.65000000000000002, 0.65000000000000002], [0.73999999999999999, 0.73999999999999999], [0.63, 0.63], [0.31, 0.31], [0.56000000000000005, 0.56000000000000005], [0.81999999999999995, 0.81999999999999995], [0.33000000000000002, 0.33000000000000002], [0.73999999999999999, 0.73999999999999999], [0.48999999999999999, 0.48999999999999999], [0.53

KeyboardInterrupt: 

In [361]:
# sell stock
stockid = int(request.POST.get('stock')) # returns stock ID
depotname =  request.session['depotname']
depot = Depot.objects.get(depotname = depotname)
amount = int(request.POST.get('amount'))
fee = float(request.POST.get('fee'))
datatype = 'close'
stockDepot.sellStock(depot, stockid, amount, datatype, fee)

NameError: name 'request' is not defined

In [None]:
# buy stock
depotname =  request.session['depotname']
depot = Depot.objects.get(depotname = depotname)
stockid= request.POST.get('select_stock')
amount = int(request.POST.get('amount'))
datatype = 'close'
fee = float(request.POST.get('fees'))
stockDepot.buyStock(depot, stockid, amount, datatype, fee)