In [1]:
from stockDatabase import StockDatabase
from stockplot.models import Stock, StockData
import Analyze_ValueDB
import stockDepot
import matplotlib.pyplot as plt
import datetime
import pandas as pd
import numpy as np

%matplotlib

Using matplotlib backend: TkAgg


### Features

- Which features can be used to predict stock price change over the next n days? I.e. does the price increase or decrease?

- mostly features independent of the stock price (almost scaled)
- PPO (Percentage Price Oscillator)
- net change
- percent change
- 10, 20, 30 day volatility
- EMA (Exponential Moving Average): positive cross, negative cross, higher -> find times when a shorter EMA crosses a longer one
- Find when price crosses EMA or is higher than EMA
- Find when EMA of MACD crosses MACD or is higher (MACD = Moving Average Convergence Divergence)
- same for PPO
- find when price is higher than upper bollinger band, average one or lower than the lower band
- Money Flow Index (MFI)
- considered overbought / very overbought if MFI > 80 / 90
- considered oversold / very oversold if MFI < 20 / 10


- not yet implemented: (http://cs229.stanford.edu/proj2013/DaiZhang-MachineLearningInStockPriceTrendForecasting.pdf)
    - PE ratio
    - PX volume
    - PX ebitda
    - current enterprise value
    - quick ratio
    - alpha overridable
    - alpha for beta pm
    - beta raw overridable
    - risk premium
    - IS EPS
    - corresponding S&P 500 index

- not yet implemented: (https://arxiv.org/pdf/1603.00751.pdf)
    - Book value - the net asset value of a company, calculated by total assets minus intangible assets (patents, goodwill) and liabilities.
    - Market capitalization - the market value of a company's issued share capital; it is equal to the share price times the number of shares outstanding.
    - Change of stock Net price over the one month period
    - Percentage change of Net price over the one month period
    - Dividend yield - indicates how much a company pays out in dividends each year relative to its share price.
    - Earnings per share - a portion of a company's profit divided by the number of issued shares. Earnings per share serves as an indicator of a company's profitability.
    - Earnings per share growth – the growth of earnings per share over the trailing one-year period.
    - Sales revenue turnover -
    - Net revenue - the proceeds from the sale of an asset, minus commissions, taxes, or other expenses related to the sale.
    - Net revenue growth – the growth of Net revenue over the trailing one-year period.
    - Sales growth – sales growth over the trailing one-year period.
    - Price to earnings ratio – measures company’s current share price relative to its per-share earnings.
    - Price to earnings ratio, five years average – averaged price to earnings ratio over the period of five years.
    - Price to book ratio - compares a company's current market price to its book value.
    - Price to sales ratio – ratio calculated by dividing the company's market cap by the revenue in the most recent year.
    - Dividend per share - is the total dividends paid out over an entire year divided by the number of
    - ordinary shares issued.
    - Current ratio - compares a firm's current assets to its current liabilities.
    - Quick ratio - compares the total amount of cash, marketable securities and accounts receivable to the amount of current liabilities.
    - Total debt to equity - ratio used to measure a company's financial leverage, calculated by dividing a company's total liabilities by its stockholders' equity.
    - Analyst ratio – ratio given by human analyst.
    - Revenue growth adjusted by 5 year compound annual growth ratio
    - Profit margin – a profitability ratio calculated as net income divided by revenue, or net profits divided by sales
    - Operating margin - ratio used to measure a company's pricing strategy and operating efficiency. It is a measurement of what proportion of a company's revenue is left over after paying for variable costs of production such as wages, raw materials, etc.
    - Asset turnover - the ratio of the value of a company’s sales or revenues generated relative to the value of its assets

Source: https://www.cs.princeton.edu/sites/default/files/uploads/saahil_madge.pdf
- when deciding to buy or not, use past of stock to determine accuracy of machine learning algorithm for that particular stock

Source, master thesis: http://www.diva-portal.org/smash/get/diva2:354463/FULLTEXT01.pdf

Source, MIT based on earnings reports: http://ocw.mit.edu/courses/sloan-school-of-management/15-097-prediction-machine-learning-and-statistics-spring-2012/projects/MIT15_097S12_proj2.pdf

Source, Lehman: https://www.cis.upenn.edu/~mkearns/papers/rlexec.pdf

Source, Github App: https://github.com/DMTSource/daily-stock-forecast

In [2]:
def crossing(feature1, feature2):
    # create feature for feature ema crossing feature
    poscross = [0] # positive cross
    negcross = [0] # negative cross
    higher = [0] # ema higher than feature

    for i in range (1, len(feature1)):
        # check if ema is crossing feature
        if feature1[i-1] < feature2[i-1] and feature1[i] > feature2[i]:
            poscross.append(1)
            negcross.append(0)
        elif feature1[i-1] > feature2[i-1] and feature1[i] < feature2[i]:
            poscross.append(0)
            negcross.append(1)
        else:
            poscross.append(0)
            negcross.append(0)
            
        # check if ema is higher or lower than feature
        if feature1[i] > feature2[i]:
            higher.append(1)
        else:
            higher.append(0)
    return poscross, negcross, higher

In [3]:
from math import sqrt

def createFeatures(df):
    
    data = df['Price'].values
    
    # price change features
    netchange = [0]
    netchange += [data[i]-data[i-1] for i in range(1, len(data))]
    percentchange = [netchange[i]/data[i] * 100 for i in range(0, len(data))]

    # volatility based on price change
    sqrt254 = sqrt(254) # square root of yearly trading days
    volatility10 = [0] * 10
    volatility20 = [0] * 20
    volatility30 = [0] * 30
    volatility10 += [sqrt254 * np.std(percentchange[i-10:i]) for i in range(10, len(data))]
    volatility20 += [sqrt254 * np.std(percentchange[i-20:i]) for i in range(20, len(data))]
    volatility30 += [sqrt254 * np.std(percentchange[i-30:i]) for i in range(30, len(data))]

    # add features to dataframe
    df['featPPO'] = df['PPO']
    df['featMFI'] = df['MFI']
    df['Netchange'] = netchange
    df['Percentage Change'] = percentchange
    df['10 day volatility'] = volatility10
    df['20 day volatility'] = volatility20
    df['30 day volatility'] = volatility30

    # check if shorter EMA crosses a longer one
    df['EMA 15 / 30 posx'], df['EMA 15 / 30 negx'], df['EMA 15 / 30 higher'] = crossing(df['EMA 15'].values, df['EMA 30'].values)

    df['EMA 15 / 50 posx'], df['EMA 15 / 50 negx'], df['EMA 15 / 50 higher'] = crossing(df['EMA 15'].values, df['EMA 50'].values)

    df['EMA 15 / 100 posx'], df['EMA 15 / 100 negx'], df['EMA 15 / 100 higher'] = crossing(df['EMA 15'].values, df['EMA 100'].values)

    df['EMA 15 / 200 posx'], df['EMA 15 / 200 negx'], df['EMA 15 / 200 higher'] = crossing(df['EMA 15'].values, df['EMA 200'].values)

    df['EMA 30 / 50 posx'], df['EMA 30 / 50 negx'], df['EMA 30 / 50 higher'] = crossing(df['EMA 30'].values, df['EMA 50'].values)

    df['EMA 30 / 100 posx'], df['EMA 30 / 100 negxx'], df['EMA 30 / 100 higher'] = crossing(df['EMA 30'].values, df['EMA 100'].values)

    df['EMA 30 / 200 posx'], df['EMA 30 / 200 negx'], df['EMA 30 / 200 higher'] = crossing(df['EMA 30'].values, df['EMA 200'].values)

    df['EMA 50 / 100 posx'], df['EMA 50 / 100 negx'], df['EMA 50 / 100 higher'] = crossing(df['EMA 50'].values, df['EMA 100'].values)

    df['EMA 50 / 200 posx'], df['EMA 50 / 200 negx'], df['EMA 50 / 200 higher'] = crossing(df['EMA 50'].values, df['EMA 200'].values)

    df['EMA 100 / 200 pox'], df['EMA 100 / 200 negx'],df['EMA 100 / 200 higher'] = crossing(df['EMA 100'].values, df['EMA 200'].values)


    # check if price crosses an EMA

    df['Price / EMA 15 posx'], df['Price / EMA 15 negx'], df['Price / EMA 15 higher'] = crossing(df['Price'].values, df['EMA 15'].values)

    df['Price / EMA 30 posx'], df['Price / EMA 30 negx'], df['Price / EMA 30 higher'] = crossing(df['Price'].values, df['EMA 30'].values)

    df['Price / EMA 50 posx'], df['Price / EMA 50 negx'], df['Price / EMA 50 higher'] = crossing(df['Price'].values, df['EMA 50'].values)

    df['Price / EMA 100 posx'], df['Price / EMA 100 negx'], df['Price / EMA 100 higher'] = crossing(df['Price'].values, df['EMA 100'].values)

    df['Price / EMA 200 posx'], df['Price / EMA 200 negx'], df['Price / EMA 200 higher'] = crossing(df['Price'].values, df['EMA 200'].values)

    # check if EMA of MACD crosses MACD
    df['MACD posx'], df['MACD negx'], df['MACD higher'] = crossing(df['MACD EMA'].values, df['MACD'].values)

    # check if EMA of PPO crosses PPO
    df['PPO posx'] , df['PPO negx'],  df['PPO higher'] = crossing(df['PPO EMA'].values, df['PPO'].values)
    
    
    
    # create features for bollinger
    higheraverage = [] # price higher than average bollinger band
    higherhigh = [] # price higher than upper bollinger band
    lowerlow = [] # price lower than lower bollinger band

    bollinger = df['Bollinger'].values
    bollingerhigh = df['Bollinger high'].values
    bollingerlow = df['Bollinger low'].values
    for i in range (0, len(data)):
            # check if price is higher than Bollinger
            if data[i] > bollinger[i]:
                higheraverage.append(1)
            else:
                higheraverage.append(0)

            if data[i] > bollingerhigh[i]:
                higherhigh.append(1)
            else:
                higherhigh.append(0)

            if data[i] < bollingerlow[i]:
                lowerlow.append(1)
            else:
                lowerlow.append(0)

    df['Bollinger higher'] = higheraverage
    df['Bollinger higher high'] = higherhigh
    df['Bollinger lower low'] = lowerlow
    
    # create features for Money Flow Index:
    strongoverbought = overbought = oversold = strongoversold = [0] * len(df)
    MFI = df['MFI'].values
    for i in range(0, len(df)):
        if MFI[i] > 90:
            strongoverbought[i] = 1
            overbought[i] = 1
        if MFI[i] > 80:
            overbought[i] = 1
        elif MFI[i] < 10:
            oversold[i] = 1
            strongoversold[i] = 1
        elif MFI[i] < 20:
            oversold[i] = 1

    df['MFI strong overbought'] = strongoverbought
    df['MFI overbought'] = overbought
    df['MFI oversold'] = oversold
    df['MFI strong oversold'] = strongoversold
    
        
    return df

### Prediction Features

- What should be predicted?
- Here: is the price higher or lower the next n days? (1, 5, 10, 20, 50, 100)

In [4]:
# predict if price is higher the next n days.

def predictY(df):
    higher5 = []
    higher20 = []
    higher50 = []

    price = df['Price'].values

    for i in range (0, len(price)-5):
        if price[i+5] > price[i]:
            higher5.append(1)
        else:
            higher5.append(0)

    for i in range (0, len(price)-20):
        if price[i+20] > price[i]:
            higher20.append(1)
        else:
            higher20.append(0)

    for i in range (0, len(price)-50):
        if price[i+50] > price[i]:
            higher50.append(1)
        else:
            higher50.append(0)

    # for last days append 0
    for i in range (len(price)-5, len(price)):
        higher5.append(0)
    for i in range (len(price)-20, len(price)):
        higher20.append(0)
    for i in range (len(price)-50, len(price)):
        higher50.append(0)

    df['Prediction Feature 5'] = higher5
    df['Prediction Feature 20'] = higher20
    df['Prediction Feature 50'] = higher50
    
    return df

### Create the Dataframe

In [9]:
def getStockDataframe(stocklist, fromDate, toDate, prediction):

    dflist = []

    for i in range(0, len(stocklist)):
        # get a stock
        teststock = StockDatabase(stocklist[i].sourceSymbol)
        
        # get history for stock fromDate to Date
        step = 1
        dates, data = teststock.getStockHistoryDate('close', fromDate, toDate, step)

        # get data for Moving Average Convergence Divergence
        dataMACD = teststock.MACD(dates, data)

        # get 9 day Exponential Moving Average for MACD
        emaMACD = teststock.ExpAverage(dates, dataMACD, 9)

        # get data for Percentage Price Oscillator
        dataPPO = teststock.PPO(dates, data)

        # get 9 day EMA for PPO
        emaPPO = teststock.ExpAverage(dates, dataPPO, 9)

        # get bollinger band
        days = 20 # bollinger band based on 20-day Simple Moving Average
        factor = 2 # factor for standard deviation, lowBol = averageBol - 2 * std
        lowBol, averageBol, highBol = teststock.Bollinger(dates, data, days, factor)

        # get exponential moving averages
        ema15 = teststock.ExpAverage(dates, data, 15)
        ema30 = teststock.ExpAverage(dates, data, 30)
        ema50 = teststock.ExpAverage(dates, data, 50)
        ema100 = teststock.ExpAverage(dates, data, 100)
        ema200 = teststock.ExpAverage(dates, data, 200)
        
        # get Money Flow Index (MFI)
        typical, volume, pos_MF, neg_MF, pos_MF14, neg_MF14, MFR14, MFI = teststock.MFI(fromDate, toDate)
        
        # get fundamentals (Markus)
        compNumber = stocklist[i].fundamentalsCompNumber
        fundamentals = []
        for date in dates:
            price = teststock.getStockPriceDate('close', date)[1]
            datestr = datetime.datetime.fromtimestamp(date).strftime('%Y-%m-%d')
            fundamentals.append(Analyze_ValueDB.fundamentals(compNumber, datestr, price))
        

        # create pandas dataframe for all date
        dfdata = {'Stock': stocklist[i], 'Date': dates, 'Price': data, 'EMA 15': ema15, 'EMA 30': ema30, 'EMA 50': ema50, 'EMA 100': ema100, 'EMA 200': ema200, 'MACD': data, 'MACD EMA': emaMACD, 'PPO': dataPPO, 'PPO EMA': emaPPO, 'Bollinger low': lowBol, 'Bollinger': averageBol, 'Bollinger high': highBol, 'MFI': MFI, 'Fundamentals': fundamentals}
        df = pd.DataFrame(dfdata, columns=['Stock', 'Date', 'Price', 'EMA 15', 'EMA 30', 'EMA 50', 'EMA 100', 'EMA 200', 'MACD', 'MACD EMA', 'PPO', 'PPO EMA', 'Bollinger low', 'Bollinger', 'Bollinger high', 'MFI', 'Fundamentals'])
        #dfdata = {'Stock': stocklist[i], 'Date': dates, 'Price': data, 'Typical': typical, 'Volume': volume, 'pos_MF': pos_MF, 'neg_MF': neg_MF, 'pos_MF14': pos_MF14, 'neg_MF14': neg_MF14,'MFR14': MFR14, 'MFI': MFI}
        #df = pd.DataFrame(dfdata, columns=['Stock', 'Date', 'Price', 'Typical', 'Volume', 'pos_MF', 'neg_MF', 'pos_MF14', 'neg_MF14', 'MFR14', 'MFI'])
        
        # numbers 250, 200, 50 to be automized...UPDATE IMPROVE
        if (len(df) > 250):
            # create features
            df = createFeatures(df)
        
            # create prediction features
            if prediction:
                df = predictY(df)

            # drop first 200 rows of df and last 50 rows, missing values because of 
            # 200 day moving average and price prediction for next 50 days
            df.drop(df.index[:200], inplace=True)
            if prediction:
                df.drop(df.index[len(df)-50:], inplace=True)

            dflist.append(df)
        #else:
            #dflist.append(df)
        
    return pd.concat(dflist)

In [10]:
# create dataframe with stocks
stocklist = []
stocks = Stock.objects.filter(source = 'Quandl')
for i in range(0, 5):#int(3*len(stocks)/4)):
    stocklist.append(stocks[i])

start = datetime.datetime.strptime('2011-01-01', "%Y-%m-%d").timestamp()
end = datetime.datetime.strptime('2011-02-01', "%Y-%m-%d").timestamp()#'2012-04-01', "%Y-%m-%d").timestamp()
stockdf = getStockDataframe(stocklist, start, end, True) # true for prediction features
stockdf.head(3)

Error. Too many days!
Error. Too many days!
Error. Too many days!
Error. Too many days!
Error. Too many days!
Error. Too many days!
Error. Too many days!
Error. Too many days!
Error. Too many days!
Error. Too many days!
Error. Too many days!
Error. Too many days!
Error. Too many days!
Error. Too many days!
Error. Too many days!
Error. Too many days!
Error. Too many days!
Error. Too many days!
Error. Too many days!
Error. Too many days!
Error. Too many days!
Error. Too many days!
Error. Too many days!
Error. Too many days!
Error. Too many days!
Error. Too many days!
Error. Too many days!
Error. Too many days!
Error. Too many days!
Error. Too many days!


Unnamed: 0,Stock,Date,Price,EMA 15,EMA 30,EMA 50,EMA 100,EMA 200,MACD,MACD EMA,PPO,PPO EMA,Bollinger low,Bollinger,Bollinger high,MFI,Fundamentals
0,"Kloeckner, KCO.F, Frankfurt Stock Exchange",1294009000.0,21.64,0.0,0,0,0,0,21.64,0.0,0,0.0,0,0,0,0.0,"{'Current Ratio': 3.140021, 'Shares Outstandin..."
1,"Kloeckner, KCO.F, Frankfurt Stock Exchange",1294096000.0,21.925,0.0,0,0,0,0,21.925,0.0,0,0.0,0,0,0,0.0,"{'Current Ratio': 3.140021, 'Shares Outstandin..."
2,"Kloeckner, KCO.F, Frankfurt Stock Exchange",1294182000.0,21.4,0.0,0,0,0,0,21.4,0.0,0,0.0,0,0,0,0.0,"{'Current Ratio': 3.140021, 'Shares Outstandin..."


In [7]:
stockdf.tail(3)

Unnamed: 0,Stock,Date,Price,EMA 15,EMA 30,EMA 50,EMA 100,EMA 200,MACD,MACD EMA,...,Bollinger higher,Bollinger higher high,Bollinger lower low,MFI strong overbought,MFI overbought,MFI oversold,MFI strong oversold,Prediction Feature 5,Prediction Feature 20,Prediction Feature 50
3123,"Aixtron, AIXA.F, Frankfurt Stock Exchange",1326841000.0,12.005,11.010459,10.49873,10.465511,11.91385,15.369978,12.005,0.442816,...,1,0,0,1,1,1,1,0,1,1
3124,"Aixtron, AIXA.F, Frankfurt Stock Exchange",1326928000.0,12.38,11.181652,10.620102,10.540589,11.92308,15.340227,12.38,0.484779,...,1,1,0,1,1,1,1,0,0,1
3125,"Aixtron, AIXA.F, Frankfurt Stock Exchange",1327014000.0,12.03,11.287695,10.711063,10.598998,11.925197,15.307289,12.03,0.519219,...,1,0,0,1,1,1,1,0,1,1


In [8]:
stockdf['Stock'].unique()

array([<Stock: Klöckner, KCO.F, Frankfurt Stock Exchange>,
       <Stock: Infineon, IFX.F, Frankfurt Stock Exchange>,
       <Stock: Gea Group, G1A.F, Frankfurt Stock Exchange>,
       <Stock: H&r, 2HR.F, Frankfurt Stock Exchange>,
       <Stock: Air Berlin, AB1.F, Frankfurt Stock Exchange>,
       <Stock: Amadeus Fire Ag, AAD.F, Frankfurt Stock Exchange>,
       <Stock: Carl Zeiss Meditec, AFK.F, Frankfurt Stock Exchange>,
       <Stock: Adva Optical Networking, ADV.F, Frankfurt Stock Exchange>,
       <Stock: Allianz, ALV.F, Frankfurt Stock Exchange>,
       <Stock: Airbus Group, AIR.F, Frankfurt Stock Exchange>,
       <Stock: Bayer, BAYN.F, Frankfurt Stock Exchange>,
       <Stock: Aaeral, ARL.F, Frankfurt Stock Exchange>,
       <Stock: Bertrandt, BDT.F, Frankfurt Stock Exchange>,
       <Stock: Aixtron, AIXA.F, Frankfurt Stock Exchange>], dtype=object)

### Prediction

- Machine Learning begins here

In [9]:
print(stockdf.columns.get_loc('featPPO'))
print(stockdf.columns.get_loc('Prediction Feature 5'))

16
81


In [10]:
# use random forest for prediction
from sklearn.ensemble import RandomForestClassifier
from sklearn.cross_validation import train_test_split

featurestart = stockdf.columns.get_loc('featPPO')
featureend = stockdf.columns.get_loc('Prediction Feature 5')
score = []
rows, cols = stockdf.shape

# split stock data into a training set and test set -> randomly selects 25% of data as test set
train, test = train_test_split(stockdf, test_size = 0.25)

# fit the random forest regressor
for i in range(featureend, cols):
    print(i)
    stockforest = RandomForestClassifier(n_estimators = 100)
    stockforest = stockforest.fit(train.values[0::, featurestart:featureend], train.values[0::, i].astype(int))
    score.append(stockforest.score(test.values[0::, featurestart:featureend], test.values[0::, i].astype(int)))
score



81
82
83


[0.64867237217467633, 0.70945797673908273, 0.73633969716919023]

In [11]:
# final training of models:
featurecol = stockdf.columns.get_loc('Prediction Feature 5')
stockforest5 = RandomForestClassifier(n_estimators = 100)
stockforest5 = stockforest5.fit(stockdf.values[0::, featurestart:featureend].astype(int), stockdf.values[0::, featurecol].astype(int))
print(5)

featurecol = stockdf.columns.get_loc('Prediction Feature 20')
stockforest20 = RandomForestClassifier(n_estimators = 100)
stockforest20 = stockforest20.fit(stockdf.values[0::, featurestart:featureend].astype(int), stockdf.values[0::, featurecol].astype(int))
print(20)

featurecol = stockdf.columns.get_loc('Prediction Feature 50')
stockforest50 = RandomForestClassifier(n_estimators = 100)
stockforest50 = stockforest50.fit(stockdf.values[0::, featurestart:featureend].astype(int), stockdf.values[0::, featurecol].astype(int))
print(50)

#stockforest100 = RandomForestClassifier(n_estimators = 100)
#stockforest100 = stockforest100.fit(stockdf.values[0::, featurestart:featureend].astype(int), stockdf.values[0::, featureend+5].astype(int))
#print(100)

5
20
50


### Trading System

- implement simple trading system to test prediction

In [12]:
# create depot as test.
from django.contrib.auth.models import User

depotname = 'PredictionTest1'
user = User.objects.get(username='oliver')

try:
    stockDepot.deleteDepot(user, depotname)
except:
    pass

value = 10000
depot = stockDepot.createDepot(user, depotname, value)
#print (depot)
depotcontent_total, balance, depotvalue, available, change = stockDepot.depotAnalysis(depot)
print(depotcontent_total, balance, depotvalue, available, change)

[] 10000.0 10000 10000.0 0.0


In [13]:
# continuous training of models:
def trainmodels(df):
    
    featurecol = df.columns.get_loc('Prediction Feature 5')
    stockforest5 = RandomForestClassifier(n_estimators = 100)
    stockforest5 = stockforest5.fit(df.values[0::, featurestart:featureend].astype(int), df.values[0::, featurecol].astype(int))

    featurecol = df.columns.get_loc('Prediction Feature 20')
    stockforest20 = RandomForestClassifier(n_estimators = 100)
    stockforest20 = stockforest20.fit(df.values[0::, featurestart:featureend].astype(int), df.values[0::, featurecol].astype(int))

    featurecol = df.columns.get_loc('Prediction Feature 50')
    stockforest50 = RandomForestClassifier(n_estimators = 100)
    stockforest50 = stockforest50.fit(df.values[0::, featurestart:featureend].astype(int), df.values[0::, featurecol].astype(int))
    
    return stockforest5, stockforest20, stockforest50

In [14]:
# Import stocks for prediction:
# data until 2012 was used to train algorithm
# data from 2012 to 2013 will be used to calculate accuracy
#start1 = datetime.datetime.strptime('2011-05-01', "%Y-%m-%d")
#end1 = datetime.datetime.strptime('2013-04-01', "%Y-%m-%d")
# data from 2013 until today will be used for test depot / trading system


start2 = datetime.datetime.strptime('2011-05-01', "%Y-%m-%d").timestamp()
today = datetime.datetime.now().timestamp()
#stockdfAccuracy = [] # used to check accuracy for each stock
stockdfDepotList1 = [] # used to run trading system

for i in range(0, len(stocklist)):
    print (i/(len(stocklist)-1)*100)
    # create dataframe for trading system
    # false so that no prediction features are created
    stockdfDepot = getStockDataframe([stocklist[i]], start2, today, False) 
    stockdfDepotList1.append(stockdfDepot)   
    
    
    # create dataframe to test accuracy
    #stockdfTest = getStockDataframe([stocklist[i]], start1, end1)
    #stockdfTest = createFeatures(stockdfTest)
    #stockdfTest = predictY(stockdfTest)
    #stockdfTest.drop(stockdfTest.index[:200], inplace=True)
    #stockdfTest.drop(stockdfTest.index[len(stockdfTest)-50:], inplace=True)
    # assess accuracy for different prediction times
    #score5 = stockforest5.score(stockdfTest.values[0::, featurestart:featureend].astype(int), stockdfTest.values[0::, featureend+1].astype(int))
    #score20 = stockforest20.score(stockdfTest.values[0::, featurestart:featureend].astype(int), stockdfTest.values[0::, featureend+3].astype(int))
    #score50 = stockforest50.score(stockdfTest.values[0::, featurestart:featureend].astype(int), stockdfTest.values[0::, featureend+4].astype(int))
    #score100 = stockforest100.score(stockdfTest.values[0::, featurestart:featureend].astype(int), stockdfTest.values[0::, featureend+5].astype(int))
    #stockdfAccuracy.append([score5, score20, score50])
    #print(stockdfAccuracy[i])

0.0
7.142857142857142
14.285714285714285
21.428571428571427
28.57142857142857
35.714285714285715
42.857142857142854
50.0
57.14285714285714
64.28571428571429
71.42857142857143
78.57142857142857
85.71428571428571
92.85714285714286
100.0


In [15]:
#stockdfAccuracy

In [16]:
def buyorsell(depot, probabilities, date):
    depotcontent_total, balance, depotvalue, available, change = stockDepot.depotAnalysisDate(depot, date)

    maxperstock = depotvalue / 4 # maximum amount to be spent on one stock
    mintrans = 1000 # minimum amount to be bought for
    fee = 4.9
    
    for stockprob in sorted(probabilities, key=lambda x: x['Proba50'], reverse = True): # go through sorted list
        # sorted to have stocks with biggest probability in front (for 50 days higher / lower)
        
        stockid= stockprob['Stock'].id
        # buy at day's high
        datatype = 'high'
        current_price = stockDepot.getStockPriceDate(stockid, datatype, date)
        # no trade if no price available or accuracy of prediction too low
        if current_price == 0:
            continue
        
        if available >= mintrans + fee and stockprob['Proba50'] > 0.8 and stockprob['Proba20'] > 0.8 and stockprob['Proba5'] > 0.5 or stockprob['Proba5'] > 0.8: # condition for buying
            amounttobuy = int(maxperstock/current_price) # buy maximum allowed if possible
            for content in depotcontent_total: # find if stock is already bought
                if content.stock == stockprob['Stock']:
                    amounttobuy = maxperstock - int(content.current_total/current_price) # reduce maximum amount by already available
                    break
            if amounttobuy > int(mintrans/current_price) and available >= amounttobuy * current_price + fee:
                #buy at day's high
                print(stockprob['Stock'])
                print('bought at: ' + str(current_price))
                stockDepot.buyStockDate(depot, stockid, amounttobuy, datatype, fee, date)
                
        # sell at day's low
        datatype = 'low'
        current_price = stockDepot.getStockPriceDate(stockid, datatype, date)
        if (stockprob['Proba50'] < 0.6 and stockprob['Proba20'] < 0.6 and stockprob['Proba5'] < 0.5) or stockprob['Proba5'] < 0.4: # condition for selling
            amounttosell = 0
            for content in depotcontent_total: # find if stock was bought
                if content.stock == stockprob['Stock']:
                    amounttosell = int(content.current_total/current_price)
                    break
            if amounttosell > 0:
                # sell at day's low
                print(stockprob['Stock'])
                print('sold at: ' + str(current_price))
                stockDepot.sellStockDate(depot, stockid, amounttosell, datatype, fee, date)

In [17]:
#stockdfDepotList[8].shape == stockdfDepotList[8].dropna().shape

In [18]:
import gc
gc.collect()

# throw out stocks with missing values:
stockdfDepotList = []
for df in stockdfDepotList1:
    if (df.shape == df.dropna().shape):
        stockdfDepotList.append(df)

featurestart = stockdfDepotList[0].columns.get_loc('featPPO')

# find longest dataframe:
days = 0
index = 0
for i in range(0, len(stockdfDepotList)):
    if len(stockdfDepotList[i]) > days:
        days = len(stockdfDepotList[i])
        index = i

# go through dataframe
for i in range(0, days): # for each day
    probabilities = []
    date = stockdfDepotList[index].values[i, 1] # date from longest dataframe
    for j in range(0, len(stockdfDepotList)): # for each stock
    
        # only trade if prediction accuracy for 20 and 50 days is >= 60%
        #if stockdfAccuracy[j][1] >= 0.6 and stockdfAccuracy[j][2] >= 0.6:
        #    trade = 1
        #else:
        #    trade = 0

        indexdf = stockdfDepotList[j][stockdfDepotList[j]['Date'] == date].index.tolist()
        stock = stockdfDepotList[j]
        
        if(len(stockdfDepotList[j]) <= i or indexdf == []):
            probabilities.append({'Stock': stock['Stock'].iloc[0], 'Proba5': 0, 'Proba20': 0, 'Proba50': 0, 'Date': 0})
        else:
            indexdf = indexdf[0]
            proba5 = stockforest5.predict_proba(stock.values[indexdf:indexdf+1, featurestart::])
            proba20 = stockforest20.predict_proba(stock.values[indexdf:indexdf+1, featurestart::])
            proba50 = stockforest50.predict_proba(stock.values[indexdf:indexdf+1, featurestart::])
            probabilities.append({'Stock': stock['Stock'].iloc[0], 'Proba5': proba5[0][1], 'Proba20': proba20[0][1], 'Proba50': proba50[0][1], 'Date': stockdfDepotList[j].values[i,1]})
    buyorsell(depot, probabilities, date)

    depotcontent_total, balance, depotvalue, available, change = stockDepot.depotAnalysisDate(depot, date)
    if (i%10 == 0):
        #print(datetime.datetime.fromtimestamp(date))
        #print(stockdfDepotList[0]['Stock'].iloc[0])
        #print(stockdfDepotList[0].loc[stockdfDepotList[0]['Date'] == date, 'Price'])
        print(depotcontent_total)
        #print(probabilities)
        print(i)

    if (i%50 == 0 and i != 0):
        # recalculate prediction models
        start = datetime.datetime.strptime('2000-01-01', "%Y-%m-%d").timestamp()
        end = date
        modeldf = getStockDataframe(stocklist, start, end, True)
        gc.collect()
        stockforest5, stockforest20, stockforest50 = trainmodels(modeldf)
    print(balance, depotvalue, available, change)


[]
0
10000.0 10000 10000.0 0.0
10000.0 10000 10000.0 0.0
10000.0 10000 10000.0 0.0
10000.0 10000 10000.0 0.0
10000.0 10000 10000.0 0.0
10000.0 10000 10000.0 0.0
10000.0 10000 10000.0 0.0
Aixtron, AIXA.F, Frankfurt Stock Exchange
bought at: 12.48
9995.1 10000 7541.1 -4.9
10191.1 10000 7541.1 191.1
Aixtron, AIXA.F, Frankfurt Stock Exchange
sold at: 13.421
10314.2 10000 10314.2 314.2
[<DepotContent: oliver PredictionTest1: Aixtron, AIXA.F, Frankfurt Stock Exchange>]
10
10314.2 10000 10314.2 314.2
Allianz, ALV.F, Frankfurt Stock Exchange
bought at: 91.0
10309.3 10000 7884.16 309.3
Carl Zeiss Meditec, AFK.F, Frankfurt Stock Exchange
bought at: 17.65
10311.96 10000 5421.63 311.96
10328.86 10000 5421.63 328.86
Allianz, ALV.F, Frankfurt Stock Exchange
sold at: 89.88
10325.99 10000 7886.69 325.99
10337.98 10000 7886.69 337.98
H&r, 2HR.F, Frankfurt Stock Exchange
bought at: 16.275
10356.34 10000 5439.15 356.34
Air Berlin, AB1.F, Frankfurt Stock Exchange
bought at: 2.37
10277.56 10000 2936.27 277



10282.32 10000 4832.32 282.32
10254.82 10000 4832.32 254.82
10084.82 10000 4832.32 84.82
Air Berlin, AB1.F, Frankfurt Stock Exchange
sold at: 2.06
10002.42 10000 10002.42 2.42
10002.42 10000 10002.42 2.42
10002.42 10000 10002.42 2.42
10002.42 10000 10002.42 2.42
10002.42 10000 10002.42 2.42
10002.42 10000 10002.42 2.42
10002.42 10000 10002.42 2.42
[<DepotContent: oliver PredictionTest1: Aixtron, AIXA.F, Frankfurt Stock Exchange>, <DepotContent: oliver PredictionTest1: Allianz, ALV.F, Frankfurt Stock Exchange>, <DepotContent: oliver PredictionTest1: Carl Zeiss Meditec, AFK.F, Frankfurt Stock Exchange>, <DepotContent: oliver PredictionTest1: H&r, 2HR.F, Frankfurt Stock Exchange>, <DepotContent: oliver PredictionTest1: Air Berlin, AB1.F, Frankfurt Stock Exchange>, <DepotContent: oliver PredictionTest1: Bayer, BAYN.F, Frankfurt Stock Exchange>, <DepotContent: oliver PredictionTest1: Adva Optical Networking, ADV.F, Frankfurt Stock Exchange>, <DepotContent: oliver PredictionTest1: Klöckner, 

KeyboardInterrupt: 

In [549]:
%reset

Once deleted, variables cannot be recovered. Proceed (y/[n])? y
