In [1]:
from stockDatabase import StockDatabase
from stockplot.models import Stock, StockData
import stockDepot
import matplotlib.pyplot as plt
import datetime
import pandas as pd
import numpy as np

%matplotlib

Using matplotlib backend: TkAgg


In [2]:
def getStockDataframe(stocklist, fromDate, toDate):
    dateslist = []
    datalist = []
    dataMACDlist = []
    emaMACD = []
    dataPPOlist = []
    emaPPO = []
    lowBollist = []
    averageBollist = []
    highBollist = []
    ema15 = []
    ema30 = []
    ema50 = []
    ema100 = []
    ema200 = []

    for i in range(0, len(stocklist)):
        # get a stock
        teststock = StockDatabase(stocklist[i].sourceSymbol)
        
        # get history for stock fromDate to Date
        step = 1
        dates, data = teststock.getStockHistoryDate('close', fromDate, toDate, step)

        # get data for Moving Average Convergence Divergence
        dataMACD = teststock.MACD(dates, data)

        # get 9 day Exponential Moving Average for MACD
        emaMACD += teststock.ExpAverage(dates, dataMACD, 9)

        # get data for Percentage Price Oscillator
        dataPPO = teststock.PPO(dates, data)

        # get 9 day EMA for PPO
        emaPPO += teststock.ExpAverage(dates, dataPPO, 9)

        # get bollinger band
        days = 20 # bollinger band based on 20-day Simple Moving Average
        factor = 2 # factor for standard deviation, lowBol = averageBol - 2 * std
        lowBol, averageBol, highBol = teststock.Bollinger(dates, data, days, factor)

        # get exponential moving averages
        ema15 += teststock.ExpAverage(dates, data, 15)
        ema30 += teststock.ExpAverage(dates, data, 30)
        ema50 += teststock.ExpAverage(dates, data, 50)
        ema100 += teststock.ExpAverage(dates, data, 100)
        ema200+= teststock.ExpAverage(dates, data, 200)

        dateslist += dates
        datalist += data
        dataMACDlist += dataMACD
        dataPPOlist += dataPPO
        lowBollist += lowBol
        averageBollist += averageBol
        highBollist += highBol
    
    # create pandas dataframe for all data
    dfdata = {'Date': dateslist, 'Price': datalist, 'EMA 15': ema15, 'EMA 30': ema30, 'EMA 50': ema50, 'EMA 100': ema100, 'EMA 200': ema200, 'MACD': dataMACDlist, 'MACD EMA': emaMACD, 'PPO': dataPPOlist, 'PPO EMA': emaPPO, 'Bollinger low': lowBollist, 'Bollinger': averageBollist, 'Bollinger high': highBollist}
    df = pd.DataFrame(dfdata, columns=['Date', 'Price', 'EMA 15', 'EMA 30', 'EMA 50', 'EMA 100', 'EMA 200', 'MACD', 'MACD EMA', 'PPO', 'PPO EMA', 'Bollinger low', 'Bollinger', 'Bollinger high'])
    return df

### Features

- Which features can be used to predict stock price change over the next n days? I.e. does the price increase or decrease?

- mostly features independent of the stock price (almost scaled)
- PPO
- net change
- percent change
- 10, 20, 30 day volatility
- EMA: positive cross, negative cross, higher -> find times when a shorter EMA crosses a longer one
- Find when price crosses EMA or is higher than EMA
- Find when EMA of MACD crosses MACD or is higher
- same for PPO
- find when price is higher than upper bollinger band, average one or lower than the lower band


- not yet implemented: (http://cs229.stanford.edu/proj2013/DaiZhang-MachineLearningInStockPriceTrendForecasting.pdf)
    - PE ratio
    - PX volume
    - PX ebitda
    - current enterprise value
    - quick ratio
    - alpha overridable
    - alpha for beta pm
    - beta raw overridable
    - risk premium
    - IS EPS
    - corresponding S&P 500 index

- not yet implemented: (https://arxiv.org/pdf/1603.00751.pdf)
    - Book value - the net asset value of a company, calculated by total assets minus intangible assets (patents, goodwill) and liabilities.
    - Market capitalization - the market value of a company's issued share capital; it is equal to the share price times the number of shares outstanding.
    - Change of stock Net price over the one month period
    - Percentage change of Net price over the one month period
    - Dividend yield - indicates how much a company pays out in dividends each year relative to its share price.
    - Earnings per share - a portion of a company's profit divided by the number of issued shares. Earnings per share serves as an indicator of a company's profitability.
    - Earnings per share growth – the growth of earnings per share over the trailing one-year period.
    - Sales revenue turnover -
    - Net revenue - the proceeds from the sale of an asset, minus commissions, taxes, or other expenses related to the sale.
    - Net revenue growth – the growth of Net revenue over the trailing one-year period.
    - Sales growth – sales growth over the trailing one-year period.
    - Price to earnings ratio – measures company’s current share price relative to its per-share earnings.
    - Price to earnings ratio, five years average – averaged price to earnings ratio over the period of five years.
    - Price to book ratio - compares a company's current market price to its book value.
    - Price to sales ratio – ratio calculated by dividing the company's market cap by the revenue in the most recent year.
    - Dividend per share - is the total dividends paid out over an entire year divided by the number of
    - ordinary shares issued.
    - Current ratio - compares a firm's current assets to its current liabilities.
    - Quick ratio - compares the total amount of cash, marketable securities and accounts receivable to the amount of current liabilities.
    - Total debt to equity - ratio used to measure a company's financial leverage, calculated by dividing a company's total liabilities by its stockholders' equity.
    - Analyst ratio – ratio given by human analyst.
    - Revenue growth adjusted by 5 year compound annual growth ratio
    - Profit margin – a profitability ratio calculated as net income divided by revenue, or net profits divided by sales
    - Operating margin - ratio used to measure a company's pricing strategy and operating efficiency. It is a measurement of what proportion of a company's revenue is left over after paying for variable costs of production such as wages, raw materials, etc.
    - Asset turnover - the ratio of the value of a company’s sales or revenues generated relative to the value of its assets

Source: https://www.cs.princeton.edu/sites/default/files/uploads/saahil_madge.pdf
- when deciding to buy or not, use past of stock to determine accuracy of machine learning algorithm for that particular stock

Source, master thesis: http://www.diva-portal.org/smash/get/diva2:354463/FULLTEXT01.pdf

Source, MIT based on earnings reports: http://ocw.mit.edu/courses/sloan-school-of-management/15-097-prediction-machine-learning-and-statistics-spring-2012/projects/MIT15_097S12_proj2.pdf

Source, Lehman: https://www.cis.upenn.edu/~mkearns/papers/rlexec.pdf

Source, Github App: https://github.com/DMTSource/daily-stock-forecast

In [3]:
def crossing(feature1, feature2, df):
    # create feature for feature ema crossing feature
    poscross = [0] # positive cross
    negcross = [0] # negative cross
    higher = [0] # ema higher than feature
    
    feature1 = df[feature1].values
    feature2 = df[feature2].values

    for i in range (1, len(feature1)):
        # check if ema is crossing feature
        if feature1[i-1] < feature2[i-1] and feature1[i] > feature2[i]:
            poscross.append(1)
            negcross.append(0)
        elif feature1[i-1] > feature2[i-1] and feature1[i] < feature2[i]:
            poscross.append(0)
            negcross.append(1)
        else:
            poscross.append(0)
            negcross.append(0)
            
        # check if ema is higher or lower than feature
        if feature1[i] > feature2[i]:
            higher.append(1)
        else:
            higher.append(0)
    return poscross, negcross, higher

In [4]:
from math import sqrt

def createFeatures(df):
    # PPO as feature
    df['featPPO'] = stockdf['PPO']

    # price change features
    netchange = [0]
    price = df['Price'].values
    netchange += [price[i]-price[i-1] for i in range(1, len(price))]
    percentchange = [netchange[i]/price[i] * 100 for i in range(0, len(price))]

    # volatility based on price change
    sqrt254 = sqrt(254) # square root of yearly trading days
    volatility10 = [0] * 10
    volatility20 = [0] * 20
    volatility30 = [0] * 30
    volatility10 += [sqrt254 * np.std(percentchange[i-10:i]) for i in range(10, len(price))]
    volatility20 += [sqrt254 * np.std(percentchange[i-20:i]) for i in range(20, len(price))]
    volatility30 += [sqrt254 * np.std(percentchange[i-30:i]) for i in range(30, len(price))]

    # save in dataframe
    df['Net Change'] = netchange
    df['Perc Change'] = percentchange
    df['10 day Volatility'] = volatility10
    df['20 day Volatility'] = volatility20
    df['30 day Volatility'] = volatility30
    
    # check if shorter EMA crosses a longer one

    poscross, negcross, higher = crossing('EMA 15', 'EMA 30', df)
    df['EMA 15 / 30 poscross'] = poscross
    df['EMA 15 / 30 negcross'] = negcross
    df['EMA 15 / 30 higher'] = higher

    poscross, negcross, higher = crossing('EMA 15', 'EMA 50', df)
    df['EMA 15 / 50 poscross'] = poscross
    df['EMA 15 / 50 negcross'] = negcross
    df['EMA 15 / 50 higher'] = higher

    poscross, negcross, higher = crossing('EMA 15', 'EMA 100', df)
    df['EMA 15 / 100 poscross'] = poscross
    df['EMA 15 / 100 negcross'] = negcross
    df['EMA 15 / 100 higher'] = higher
    
    poscross, negcross, higher = crossing('EMA 15', 'EMA 200', df)
    df['EMA 15 / 200 poscross'] = poscross
    df['EMA 15 / 200 negcross'] = negcross
    df['EMA 15 / 200 higher'] = higher

    poscross, negcross, higher = crossing('EMA 30', 'EMA 50', df)
    df['EMA 30 / 50 poscross'] = poscross
    df['EMA 30 / 50 negcross'] = negcross
    df['EMA 30 / 50 higher'] = higher

    poscross, negcross, higher = crossing('EMA 30', 'EMA 100', df)
    df['EMA 30 / 100 poscross'] = poscross
    df['EMA 30 / 100 negcross'] = negcross
    df['EMA 30 / 100 higher'] = higher
    
    poscross, negcross, higher = crossing('EMA 30', 'EMA 200', df)
    df['EMA 30 / 200 poscross'] = poscross
    df['EMA 30 / 200 negcross'] = negcross
    df['EMA 30 / 200 higher'] = higher

    poscross, negcross, higher = crossing('EMA 50', 'EMA 100', df)
    df['EMA 50 / 100 poscross'] = poscross
    df['EMA 50 / 100 negcross'] = negcross
    df['EMA 50 / 100 higher'] = higher
    
    poscross, negcross, higher = crossing('EMA 50', 'EMA 200', df)
    df['EMA 50 / 200 poscross'] = poscross
    df['EMA 50 / 200 negcross'] = negcross
    df['EMA 50 / 200 higher'] = higher
    
    poscross, negcross, higher = crossing('EMA 100', 'EMA 200', df)
    df['EMA 100 / 200 poscross'] = poscross
    df['EMA 100 / 200 negcross'] = negcross
    df['EMA 100 / 200 higher'] = higher

    
    # check if price crosses an EMA

    poscross, negcross, higher = crossing('Price', 'EMA 15', df)
    df['Price / EMA 15 poscross'] = poscross
    df['Price / EMA 15 negcross'] = negcross
    df['Price / EMA 15 higher'] = higher

    poscross, negcross, higher = crossing('Price', 'EMA 30', df)
    df['Price / EMA 30 poscross'] = poscross
    df['Price / EMA 30 negcross'] = negcross
    df['Price / EMA 30 higher'] = higher

    poscross, negcross, higher = crossing('Price', 'EMA 50', df)
    df['Price / EMA 50 poscross'] = poscross
    df['Price / EMA 50 negcross'] = negcross
    df['Price / EMA 50 higher'] = higher

    poscross, negcross, higher = crossing('Price', 'EMA 100', df)
    df['Price / EMA 100 poscross'] = poscross
    df['Price / EMA 100 negcross'] = negcross
    df['Price / EMA 100 higher'] = higher
    
    poscross, negcross, higher = crossing('Price', 'EMA 200', df)
    df['Price / EMA 200 poscross'] = poscross
    df['Price / EMA 200 negcross'] = negcross
    df['Price / EMA 200 higher'] = higher
    
    # check if EMA of MACD crosses MACD
    poscross, negcross, higher = crossing('MACD EMA', 'MACD', df)
    df['MACD poscross'] = poscross
    df['MACD negcross'] = negcross
    df['MACD higher'] = higher
    
    # check if EMA of PPO crosses PPO
    poscross, negcross, higher = crossing('PPO EMA', 'PPO', df)
    df['PPO poscross'] = poscross
    df['PPO negcross'] = negcross
    df['PPO higher'] = higher
    
    # create features for bollinger
    higheraverage = [] # price higher than average bollinger band
    higherhigh = [] # price higher than upper bollinger band
    lowerlow = [] # price lower than lower bollinger band

    price = df['Price'].values
    bollinger = df['Bollinger'].values
    bollingerhigh = df['Bollinger high'].values
    bollingerlow = df['Bollinger low'].values
    for i in range (0, len(price)):
            # check if price is higher than Bollinger
            if price[i] > bollinger[i]:
                higheraverage.append(1)
            else:
                higheraverage.append(0)

            if price[i] > bollingerhigh[i]:
                higherhigh.append(1)
            else:
                higherhigh.append(0)

            if price[i] < bollingerlow[i]:
                lowerlow.append(1)
            else:
                lowerlow.append(0)

    df['Bollinger higher'] = higheraverage
    df['Bollinger higher high'] = higherhigh
    df['Bollinger lower low'] = lowerlow
    
    return df

### Prediction Features

- What should be predicted?
- Here: is the price higher or lower the next n days? (1, 5, 10, 20, 50, 100)

In [5]:
# predict if price is higher the next n days.

def predictY(df):
    #higher1 = []
    higher5 = []
    #higher10 = []
    higher20 = []
    higher50 = []

    price = df['Price'].values
    #for i in range (0, len(price)-1):
    #    if price[i+1] > price[i]:
    #        higher1.append(1)
    #    else:
    #        higher1.append(0)
            
    for i in range (0, len(price)-5):
        if price[i+5] > price[i]:
            higher5.append(1)
        else:
            higher5.append(0)

    #for i in range (0, len(price)-10):
    #    if price[i+10] > price[i]:
    #        higher10.append(1)
    #    else:
    #        higher10.append(0)

    for i in range (0, len(price)-20):
        if price[i+20] > price[i]:
            higher20.append(1)
        else:
            higher20.append(0)

    for i in range (0, len(price)-50):
        if price[i+50] > price[i]:
            higher50.append(1)
        else:
            higher50.append(0)

    # for last days append 0
    #for i in range (len(price)-1, len(price)):
    #    higher1.append(0)
    for i in range (len(price)-5, len(price)):
        higher5.append(0)
    #for i in range (len(price)-10, len(price)):
    #    higher10.append(0)
    for i in range (len(price)-20, len(price)):
        higher20.append(0)
    for i in range (len(price)-50, len(price)):
        higher50.append(0)

    #df['Prediction Feature 1'] = higher1
    df['Prediction Feature 5'] = higher5
    #df['Prediction Feature 10'] = higher10
    df['Prediction Feature 20'] = higher20
    df['Prediction Feature 50'] = higher50
    
    return df

### Prediction

- Machine Learning begins here

In [6]:
# create dataframe with stocks
stocklist = []
stocks = Stock.objects.filter(source = 'Quandl')
for i in range(0, int(len(stocks)/3)):
    stocklist.append(stocks[i])

start = datetime.datetime.strptime('2000-01-01', "%Y-%m-%d").timestamp()
end = datetime.datetime.strptime('2012-04-01', "%Y-%m-%d").timestamp()
stockdf = getStockDataframe(stocklist, start, end)

# add features
stockdf = createFeatures(stockdf)

# add prediction features
stockdf = predictY(stockdf)
stockdf.head(3)

Error. Too many days!
Error. Too many days!
Error. Too many days!
Error. Too many days!
Error. Too many days!
Error. Too many days!
Error. Too many days!
Error. Too many days!
Error. Too many days!
Error. Too many days!
Error. Too many days!
Error. Too many days!
Error. Too many days!
Error. Too many days!
Error. Too many days!
Error. Too many days!
Error. Too many days!
Error. Too many days!
Error. Too many days!
Error. Too many days!
Error. Too many days!
Error. Too many days!
Error. Too many days!
Error. Too many days!
Error. Too many days!
Error. Too many days!
Error. Too many days!
Error. Too many days!
Error. Too many days!
Error. Too many days!
Error. Too many days!
Error. Too many days!
Error. Too many days!
Error. Too many days!
Error. Too many days!
Error. Too many days!
Error. Too many days!
Error. Too many days!
Error. Too many days!
Error. Too many days!
Error. Too many days!
Error. Too many days!
Error. Too many days!
Error. Too many days!
Error. Too many days!
Error. Too

Unnamed: 0,Date,Price,EMA 15,EMA 30,EMA 50,EMA 100,EMA 200,MACD,MACD EMA,PPO,...,MACD higher,PPO poscross,PPO negcross,PPO higher,Bollinger higher,Bollinger higher high,Bollinger lower low,Prediction Feature 5,Prediction Feature 20,Prediction Feature 50
0,1153865000.0,14.94,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0,0,0,0,1,1,0,1,1,1
1,1153951000.0,14.85,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0,0,0,0,1,1,0,1,1,1
2,1154038000.0,14.9,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0,0,0,0,1,1,0,1,1,1


In [7]:
cols = []
cols.append(stockdf.columns.get_loc('featPPO'))
cols.append(stockdf.columns.get_loc('Prediction Feature 5'))
cols

[14, 74]

In [8]:
# drop first 200 rows of stockdf and last 200 rows, missing values because of 200 day moving average and price prediction for next 200 days
#stockdf.drop(stockdf.index[:200], inplace=True)
#stockdf.drop(stockdf.index[len(stockdf)-50:], inplace=True)

In [9]:
stockdf.tail(3)

Unnamed: 0,Date,Price,EMA 15,EMA 30,EMA 50,EMA 100,EMA 200,MACD,MACD EMA,PPO,...,MACD higher,PPO poscross,PPO negcross,PPO higher,Bollinger higher,Bollinger higher high,Bollinger lower low,Prediction Feature 5,Prediction Feature 20,Prediction Feature 50
128730,1332886000.0,2.875,2.829299,2.774152,2.710691,2.605492,2.54206,0.05494,0.045808,1.970586,...,0,0,0,0,1,0,0,0,0,0
128731,1332972000.0,2.793,2.824761,2.775368,2.713918,2.609205,2.544557,0.046887,0.046023,1.681512,...,0,0,0,0,1,0,0,0,0,0
128732,1333058000.0,2.832,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0,0,0,0,1,1,0,0,0,0


In [10]:
# use random forest for prediction
from sklearn.ensemble import RandomForestClassifier
from sklearn.cross_validation import train_test_split

featurestart = stockdf.columns.get_loc('featPPO')
featureend = stockdf.columns.get_loc('Prediction Feature 5')
score = []
rows, cols = stockdf.shape

# split stock data into a training set and test set -> randomly selects 25% of data as test set
train, test = train_test_split(stockdf, test_size = 0.25)

# fit the random forest regressor
for i in range(featureend, cols):
    print(i)
    stockforest = RandomForestClassifier(n_estimators = 100)
    stockforest = stockforest.fit(train.values[0::, featurestart:featureend].astype(int), train.values[0::, i].astype(int))
    score.append(stockforest.score(test.values[0::, featurestart:featureend].astype(int), test.values[0::, i].astype(int)))
score

74
75
76


[0.60222470792940597, 0.64227566492667165, 0.66868630375341787]

In [11]:
# final training of models:
featurecol = stockdf.columns.get_loc('Prediction Feature 5')
stockforest5 = RandomForestClassifier(n_estimators = 100)
stockforest5 = stockforest5.fit(stockdf.values[0::, featurestart:featureend].astype(int), stockdf.values[0::, featurecol].astype(int))
print(5)

featurecol = stockdf.columns.get_loc('Prediction Feature 20')
stockforest20 = RandomForestClassifier(n_estimators = 100)
stockforest20 = stockforest20.fit(stockdf.values[0::, featurestart:featureend].astype(int), stockdf.values[0::, featurecol].astype(int))
print(20)

featurecol = stockdf.columns.get_loc('Prediction Feature 50')
stockforest50 = RandomForestClassifier(n_estimators = 100)
stockforest50 = stockforest50.fit(stockdf.values[0::, featurestart:featureend].astype(int), stockdf.values[0::, featurecol].astype(int))
print(50)

#stockforest100 = RandomForestClassifier(n_estimators = 100)
#stockforest100 = stockforest100.fit(stockdf.values[0::, featurestart:featureend].astype(int), stockdf.values[0::, featureend+5].astype(int))
#print(100)

5
20
50


### Trading System

- implement simple trading system to test prediction

In [12]:
# create depot as test.
from django.contrib.auth.models import User

depotname = 'PredictionTest'
user = User.objects.get(username='oliver')

stockDepot.deleteDepot(user, depotname)

value = 10000
depot = stockDepot.createDepot(user, depotname, value)
#print (depot)
depotcontent_total, balance, depotvalue, available, change = stockDepot.depotAnalysis(depot)
print(depotcontent_total, balance, depotvalue, available, change)

[] 10000.0 10000 10000.0 0.0


In [13]:
# continuous training of models:
def trainmodels(df):
    
    featurecol = df.columns.get_loc('Prediction Feature 5')
    stockforest5 = RandomForestClassifier(n_estimators = 100)
    stockforest5 = stockforest5.fit(df.values[0::, featurestart:featureend].astype(int), df.values[0::, featurecol].astype(int))

    featurecol = df.columns.get_loc('Prediction Feature 20')
    stockforest20 = RandomForestClassifier(n_estimators = 100)
    stockforest20 = stockforest20.fit(df.values[0::, featurestart:featureend].astype(int), df.values[0::, featurecol].astype(int))

    featurecol = df.columns.get_loc('Prediction Feature 50')
    stockforest50 = RandomForestClassifier(n_estimators = 100)
    stockforest50 = stockforest50.fit(df.values[0::, featurestart:featureend].astype(int), df.values[0::, featurecol].astype(int))
    
    return stockforest5, stockforest20, stockforest50

In [14]:
# Import stocks for prediction:
# data until 2012 was used to train algorithm
# data from 2012 to 2013 will be used to calculate accuracy
#start1 = datetime.datetime.strptime('2011-05-01', "%Y-%m-%d")
#end1 = datetime.datetime.strptime('2013-04-01', "%Y-%m-%d")
# data from 2013 until today will be used for test depot / trading system


start2 = datetime.datetime.strptime('2012-05-01', "%Y-%m-%d").timestamp()
today = datetime.datetime.now().timestamp()
#stockdfAccuracy = [] # used to check accuracy for each stock
stockdfDepotList = [] # used to run trading system

for i in range(0, len(stocklist)):
    print (i/(len(stocklist)-1)*100)
    # create dataframe for trading system
    stockdfDepot = getStockDataframe([stocklist[i]], start2, today)
    stockdfDepot = createFeatures(stockdfDepot)
    stockdfDepot.drop(stockdfDepot.index[:200], inplace=True)
    stockdfDepotList.append(stockdfDepot)   
    
    
    # create dataframe to test accuracy
    #stockdfTest = getStockDataframe([stocklist[i]], start1, end1)
    #stockdfTest = createFeatures(stockdfTest)
    #stockdfTest = predictY(stockdfTest)
    #stockdfTest.drop(stockdfTest.index[:200], inplace=True)
    #stockdfTest.drop(stockdfTest.index[len(stockdfTest)-50:], inplace=True)
    # assess accuracy for different prediction times
    #score5 = stockforest5.score(stockdfTest.values[0::, featurestart:featureend].astype(int), stockdfTest.values[0::, featureend+1].astype(int))
    #score20 = stockforest20.score(stockdfTest.values[0::, featurestart:featureend].astype(int), stockdfTest.values[0::, featureend+3].astype(int))
    #score50 = stockforest50.score(stockdfTest.values[0::, featurestart:featureend].astype(int), stockdfTest.values[0::, featureend+4].astype(int))
    #score100 = stockforest100.score(stockdfTest.values[0::, featurestart:featureend].astype(int), stockdfTest.values[0::, featureend+5].astype(int))
    #stockdfAccuracy.append([score5, score20, score50])
    #print(stockdfAccuracy[i])

0.0
1.9607843137254901
3.9215686274509802
5.88235294117647
7.8431372549019605
9.803921568627452
11.76470588235294
13.725490196078432
15.686274509803921
17.647058823529413
19.607843137254903
21.568627450980394
23.52941176470588
25.49019607843137
27.450980392156865
29.411764705882355
31.372549019607842
33.33333333333333
35.294117647058826
37.254901960784316
39.21568627450981
41.17647058823529
43.13725490196079
45.09803921568628
47.05882352941176
49.01960784313725
50.98039215686274
52.94117647058824
54.90196078431373
56.86274509803921
58.82352941176471
60.78431372549019
62.745098039215684
64.70588235294117
66.66666666666666
68.62745098039215
70.58823529411765
72.54901960784314
74.50980392156863
76.47058823529412
78.43137254901961
80.3921568627451
82.35294117647058
84.31372549019608
86.27450980392157
88.23529411764706
90.19607843137256
92.15686274509804
94.11764705882352
96.07843137254902
98.0392156862745
100.0


In [15]:
#stockdfAccuracy

In [16]:
def buyorsell(depot, probabilities, date):
    depotcontent_total, balance, depotvalue, available, change = stockDepot.depotAnalysisDate(depot, date)

    maxperstock = depotvalue / 4 # maximum amount to be spent on one stock
    mintrans = 1000 # minimum amount to be bought for
    fee = 4.9
    
    for stockprob in sorted(probabilities, key=lambda x: x['Proba50'], reverse = True): # go through sorted list
        # sorted to have stocks with biggest probability in front
        
        stockid= stockprob['Stock'].id
        current_price = stockDepot.getStockPriceDate(stockid, 'close', date)
        
        # no trade if no price available or accuracy of prediction too low
        if current_price == 0:
            continue
        
        
        if available >= mintrans + fee and stockprob['Proba50'] >= 0.8 and stockprob['Proba20'] >= 0.8 and stockprob['Proba5'] >= 0.6: # condition for buying
            amounttobuy = int(maxperstock/current_price) # buy maximum allowed if possible
            for content in depotcontent_total: # find if stock is already bought
                if content.stock == stockprob['Stock']:
                    amounttobuy = maxperstock - int(content.current_total/current_price) # reduce maximum amount by already available
                    break
            if amounttobuy > int(mintrans/current_price) and available >= amounttobuy * current_price + fee:
                #buy
                datatype = 'high'
                stockDepot.buyStockDate(depot, stockid, amounttobuy, datatype, fee, date)
                
        if (stockprob['Proba50'] <= 0.6 and stockprob['Proba20'] <= 0.6 and stockprob['Proba5'] <= 0.5) or stockprob['Proba5'] < 0.4: # condition for selling
            amounttosell = 0
            for content in depotcontent_total: # find if stock was bought
                if content.stock == stockprob['Stock']:
                    amounttosell = int(content.current_total/current_price)
                    break
            if amounttosell > 0:
                # sell
                datatype = 'low'
                stockDepot.sellStockDate(depot, stockid, amounttosell, datatype, fee, date)

In [17]:
featurestart = stockdfDepotList[0].columns.get_loc('featPPO')

# go through dataframe
for i in range(0, len(stockdfDepotList[0])): # for each day
    probabilities = []
    date = stockdfDepotList[0].values[i, 0]
    for j in range(0, len(stocklist)): # for each stock
        
        # only trade if prediction accuracy for 20 and 50 days is >= 60%
        #if stockdfAccuracy[j][1] >= 0.6 and stockdfAccuracy[j][2] >= 0.6:
        #    trade = 1
        #else:
        #    trade = 0
        
        if(len(stockdfDepotList[j]) <= i):
            probabilities.append({'Stock': stocklist[j], 'Proba5': 0, 'Proba20': 0, 'Proba50': 0, 'Date': 0})
        else:
            proba5 = stockforest5.predict_proba(stockdfDepotList[j].values[i:i+1, featurestart::].astype(int))
            proba20 = stockforest20.predict_proba(stockdfDepotList[j].values[i:i+1, featurestart::].astype(int))
            proba50 = stockforest50.predict_proba(stockdfDepotList[j].values[i:i+1, featurestart::].astype(int))
            probabilities.append({'Stock': stocklist[j], 'Proba5': proba5[0][0], 'Proba20': proba20[0][0], 'Proba50': proba50[0][0], 'Date': stockdfDepotList[j].values[i,0]})
    buyorsell(depot, probabilities, date)

    depotcontent_total, balance, depotvalue, available, change = stockDepot.depotAnalysisDate(depot, date)
    if (i%10 == 0):
        print(depotcontent_total)
        #print(probabilities)
        print(i)

    if (i%50 == 0 and i != 0):
        # recalculate prediction models
        start = datetime.datetime.strptime('2000-01-01', "%Y-%m-%d").timestamp()
        end = date
        modeldf = getStockDataframe(stocklist, start, end)
        # add features
        modeldf = createFeatures(modeldf)
        # add prediction features
        modeldf = predictY(modeldf)

        stockforest5, stockforest20, stockforest50 = trainmodels(modeldf)
    print(balance, depotvalue, available, change)


[]
0
10000.0 10000 10000.0 0.0
10000.0 10000 10000.0 0.0
9995.1 10000 7536.85 -4.9
9958.2 10000 5032.5 -41.8
9975.2 10000 5032.5 -24.8
9919.21 10000 5032.5 -80.79
9845.63 10000 7452.38 -154.37
9885.08 10000 7405.72 -114.92
9909.02 10000 7405.72 -90.98
9874.69 10000 7405.72 -125.31
[<DepotContent: oliver PredictionTest: Drägerwerk, DRW3.F, Frankfurt Stock Exchange>, <DepotContent: oliver PredictionTest: C.A.T. Oil, O2C.F, Frankfurt Stock Exchange>, <DepotContent: oliver PredictionTest: Amadeus Fire Ag, AAD.F, Frankfurt Stock Exchange>]
10
9857.19 10000 9818.2 -142.81
9856.5 10000 9818.2 -143.5
9857.2 10000 9818.2 -142.8
9857.9 10000 9818.2 -142.1
9857.6 10000 9818.2 -142.4
9857.6 10000 9818.2 -142.4
9857.42 10000 9818.2 -142.58
9842.5 10000 4864.89 -157.5
9864.16 10000 4864.89 -135.84
9838.19 10000 7289.99 -161.81
[<DepotContent: oliver PredictionTest: Drägerwerk, DRW3.F, Frankfurt Stock Exchange>, <DepotContent: oliver PredictionTest: C.A.T. Oil, O2C.F, Frankfurt Stock Exchange>, <Depo

KeyboardInterrupt: 

In [549]:
%reset

Once deleted, variables cannot be recovered. Proceed (y/[n])? y
