# Genetic Algorithms and Finance
Attempted implementation of https://drive.google.com/file/d/0B9-kA56h5JCMMW4tU25HRExWb1U/view?usp=sharing

## Data Gathering

In [239]:
import pandas as pd

#S&P 500 returns
#SPY data from yahoo finance https://finance.yahoo.com/quote/%5EGSPC/history?period1=-631130400&period2=1544248800&interval=1d&filter=history&frequency=1d
SPY = pd.read_csv('SPY.csv', parse_dates = True)
SPY["Returns"] = (SPY["Close"] - SPY["Close"].shift(1))/SPY["Close"].shift(1)
SPY["Date"] = pd.to_datetime(SPY["Date"])
data = SPY.set_index("Date")
data = data.dropna()
print(type(data.index[0]))
data.head()

<class 'pandas._libs.tslibs.timestamps.Timestamp'>


Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume,Returns
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
1950-01-04,16.85,16.85,16.85,16.85,16.85,1890000,0.011405
1950-01-05,16.93,16.93,16.93,16.93,16.93,2550000,0.004748
1950-01-06,16.98,16.98,16.98,16.98,16.98,2010000,0.002953
1950-01-09,17.08,17.08,17.08,17.08,17.08,2520000,0.005889
1950-01-10,17.030001,17.030001,17.030001,17.030001,17.030001,2160000,-0.002927


In [240]:
#True on the index that a crosses over b
def crossover(a, b):
    a.dropna(inplace = True)
    if not isinstance(b, pd.Series):
        b = pd.Series(b, index = a.index)
    else:
        b.dropna(inplace = True)
    minDate = max(a.index[0], b.index[0])
    maxDate = min(a.index[-1], b.index[-1])
    a = a[minDate:maxDate]
    b = b[minDate:maxDate]
    previousPeriodLessThan = a.shift(1) < b.shift(1) #check if in the previous day, a was less than b
    nextPeriodGreaterThan = a > b #in the day after the previous day ("today") a is greater than b
    out = previousPeriodLessThan & nextPeriodGreaterThan
    return out

#True on the index that a crosses under b
def crossunder(a, b):
    a.dropna(inplace = True)
    if not isinstance(b, pd.Series):
        b = pd.Series(b, index = a.index)
    else:
        b.dropna(inplace = True)
    minDate = max(a.index[0], b.index[0])
    maxDate = min(a.index[-1], b.index[-1])
    a = a[minDate:maxDate]
    b = b[minDate:maxDate]
    previousPeriodLessThan = a.shift(1) > b.shift(1) #check if in the previous day, a was less than b
    nextPeriodGreaterThan = a < b #in the day after the previous day ("today") a is greater than b
    out = previousPeriodLessThan & nextPeriodGreaterThan
    return out

Indicators: SMA, MACD, Slow Stochastic, RSI, CCI,
Momentum Oscillator, Price Oscillator, Larry Williams, Bollinger Bands and OBV.

In [241]:
import talib
import pandas as pd
close = data.Close
high = data.High
low = data.Low
volume = data.Volume

indicators = []

#Class that holds the buy and sell signals for a given indicator
class Indicator:
    def __init__(self, name, value):
        self.name = name
        self.value = value.dropna()
        self.minDate = value.index[0]
        self.maxDate = value.index[-1]

# Generating Indicators/Signals
Generate the desired technical indicators and their binary buy and sell signals, stored as an Indicator instance in the global list indicators

In [242]:
#SMA
SMA50 = talib.SMA(close, 50)
SMA = Indicator("SMA", SMA50)
indicators.append(SMA)

In [243]:
#MACD
macd, macdsignal, macdhist = talib.MACD(close, fastperiod=12, slowperiod=26, signalperiod=9)
MACD = Indicator("MACD", macd-macdsignal)
indicators.append(MACD)

In [244]:
#RSI
rsi = talib.RSI(close, 14)
RSI = Indicator("RSI", rsi)
indicators.append(RSI)

In [245]:
#CCI
CCI = talib.CCI(high, low, close, timeperiod=14)
CCI = Indicator("CCI", CCI)
indicators.append(CCI)

In [246]:
#Momentum Oscillator
MOM = talib.MOM(close, 9)
MOM = Indicator("MOM", MOM)
indicators.append(MOM)

In [247]:
#Price Oscillator
#Couldn't find

In [248]:
#Williams %R
Williams = talib.WILLR(high, low, close, timeperiod=14)
Williams = Indicator("Williams %R", abs(Williams))
indicators.append(Williams)

In [249]:
#Bollinger Bands
upperband, middleband, lowerband = talib.BBANDS(close, timeperiod=5, nbdevup=2, nbdevdn=2, matype=0)
bollinger = Indicator("Bollinger Bands Upper", upperband - close)
indicators.append(bollinger)

bollinger = Indicator("Bollinger Bands Lower", close - lowerband)
indicators.append(bollinger)

In [250]:
#OBV
OBV = talib.OBV(close, volume)
OBV_signal = talib.SMA(OBV, 20)
OBV = Indicator("OBV", OBV - OBV_signal)
indicators.append(OBV)

In [251]:
minDate = max([indicator.value.index[0] for indicator in indicators])
print(type(indicators[0].value.index[0]))
maxDate = min([indicator.value.index[-1] for indicator in indicators])
print("minDate " + str(minDate))
print("maxDate " + str(maxDate))
for i in range(len(indicators)):
    print(indicators[i].name)
    indicators[i].value = indicators[i].value[minDate:maxDate]

data = data[minDate:maxDate]

<class 'pandas._libs.tslibs.timestamps.Timestamp'>
minDate 1950-03-16 00:00:00
maxDate 2018-12-07 00:00:00
SMA
MACD
RSI
CCI
MOM
Williams %R
Bollinger Bands Upper
Bollinger Bands Lower
OBV


# Genetic Algorithm
TradingRule is a rule to either buy or sell a stock

TradingStrategy is composed of two trading rules, one for buying, one for selling

Population is a collection of TradingStrategies

GeneticAlgorithm is a class to execute the genetic algorithm

In [252]:
NO_INDICATORS = 2
BINARY_OPERATORS = ["and", "or", "xor"]
SIGNS = ["less than", "greater than", "equal to", "cross over", "cross under"]
TYPE = ["today", "yesterday", "percent change"]
PRECISION = 8
FEATURES = 6

TRAIN_LENGTH = 5*252
TRAIN_START_INDEX = 5000
TRAIN_START = data.index[TRAIN_START_INDEX]
TRAIN_END = data.index[TRAIN_START_INDEX + TRAIN_LENGTH]

TEST_LENGTH = 3*255
TEST_START = TRAIN_END
TEST_END = data.index[TRAIN_START_INDEX + TRAIN_LENGTH + TEST_LENGTH]

STARTING_POPULATION = 10
CARRYING_CAPACITY = 2*STARTING_POPULATION

In [253]:
import bitstream
import numpy as np
def randomGenome():
    #which indicator
    #less than, greater than, equal to, crossover, crossunder
    #cutoff value
    #today's, yesterday's, percent change
    
    #which operator
    
    #generate indicator data
    genome = ''
    
    for indicator in range(NO_INDICATORS):
        signal = []
        signal.append(np.random.randint(NO_INDICATORS))#which indicator
        signal.append(np.random.randint(2)) #empty
        signal.append(np.random.randint(5)) #less than greater than equal to crossover cross under
        signal.append(np.random.randint(100.0)) #value
        signal.append(np.random.randint(3)) #today, yesterday, percent change
        for i in range(len(signal)):
            binary = bin(signal[i])[2:]
            while(len(binary) < PRECISION):
                binary = '0' + binary
            genome += binary
        if indicator != 4:
            operator = np.random.randint(len(BINARY_OPERATORS))
            operator = bin(operator)[2:]
            while(len(operator) < PRECISION):
                operator = '0' + operator
            genome += operator
    
    return genome

In [254]:
import copy
class TradingRule:
    def __init__(self, genes):
        if genes == "random":
            self.genes = randomGenome()
        else:
            self.genes = genes
        self.evaluatePhenotype()
        return
    
    def evaluatePhenotype(self):
        signal = pd.Series(index = indicators[0].value.index)
        rule = ""
        for i in range(int(len(self.genes)/PRECISION/FEATURES)):
            k = 0
            indicator = indicators[int(self.genes[i*PRECISION*FEATURES + k*8:i*PRECISION*FEATURES + (k+1)*8], 2)]
            k = 2
            sign = SIGNS[int(self.genes[i*PRECISION*FEATURES + k*8:i*PRECISION*FEATURES + (k+1)*8], 2)]
            k = 3
            value = int(self.genes[i*PRECISION*FEATURES + k*8:i*PRECISION*FEATURES + (k+1)*8], 2)
            k = 4
            todayYesterday = TYPE[int(self.genes[i*PRECISION*FEATURES + k*8:i*PRECISION*FEATURES + (k+1)*8], 2)]
            
            indicator = copy.deepcopy(indicator)
            if todayYesterday == "today":
                pass
            elif todayYesterday == "yesterday":
                indicator.value.shift(1)
            elif todayYesterday == "percent change":
                indicator.value = 100*(indicator.value - indicator.value.shift(1))/indicator.value.shift(1)
            else:
                raise("Invalid todayYesterday")
            
            if sign == "less than":
                indicator.value = indicator.value < value
            elif sign == "greater than":
                indicator.value = indicator.value > value
            elif sign == "equal to":
                indicator.value = indicator.value == value
            elif sign == "cross over":
                indicator.value = crossover(indicator.value, value)
            elif sign == "cross under":
                indicator.value = crossunder(indicator.value, value)
            else:
                raise("Invalid sign")
            
            if i == 0:
                signal = indicator.value
            else:
                k = 5
                operator = BINARY_OPERATORS[int(self.genes[i*PRECISION*FEATURES + k*8:i*PRECISION*FEATURES + (k+1)*8], 2)]
                
                if operator == "and":
                    signal = signal & indicator.value
                elif operator == "or":
                    signal = signal | indicator.value
                elif operator == "xor":
                    signal = signal ^ indicator.value
                else:
                    raise("Invalid Operator")
                rule += " " + operator + " "
            rule = rule + indicator.name + "-" + todayYesterday + " " + sign + " " + str(value)
        self.signal = signal
        self.rule = rule

In [257]:
class Strategy:
    def __init__(self, buyRule, sellRule):
        self.buyRule = buyRule
        self.sellRule = sellRule
        return
    
    def print(self):
        print("Buy: " + self.buyRule.rule)
        print("Sell: " + self.sellRule.rule)
        
    def evaluate(self, start_date, end_date):
        performance = pd.DataFrame(index = data[start_date:end_date].index, columns = ["Daily Returns", "Cumulative Returns"])
        performance["Daily Returns"][start_date] = 0
        performance["Cumulative Returns"][start_date] = 0
        holding = False
        for i in range(len(data[start_date:end_date])-1):
            today = performance.index[i]
            tomorrow = performance.index[i+1]
            #NOTE: my convention for the returns column is that the returns on todays date represent the returns from 24 hours ago
            # to now. Therefore, when a stock is bought, TOMORROW'S returns are set.
            if holding:
                #check for sell
                if self.sellRule.signal[today]:
                    performance["Daily Returns"].loc[tomorrow] = 0
                    holding = False
                else:
                    performance["Daily Returns"].loc[tomorrow] = data["Returns"][tomorrow]
            else:
                #check for buy
                if self.buyRule.signal[today]:
                    performance["Daily Returns"].loc[tomorrow] = data["Returns"][tomorrow]
                    holding = True
                else:
                    performance["Daily Returns"].loc[tomorrow] = 0
            
            performance["Cumulative Returns"].loc[tomorrow] = (1+performance["Daily Returns"].loc[tomorrow])*(1+performance["Cumulative Returns"][today]) - 1
        self.performance = performance.dropna()
        return
    
    def evaluateFitness(self):
        if self.performance["Daily Returns"].std() == 0:
            return False
        else:
            sharpe = (255 ** 0.5)*self.performance["Daily Returns"].mean()/self.performance["Daily Returns"].std()
            self.fitness = sharpe
            return True
    def evaluateOOSF(self):
        if self.performance["Daily Returns"].std() == 0:
            self.OOSF = 0
        else:
            sharpe = (255 ** 0.5)*self.performance["Daily Returns"].mean()/self.performance["Daily Returns"].std()
            self.OOSF = sharpe
        return

In [261]:
class Population:
    def __init__(self):
        self.members = []
        while(len(self.members) < STARTING_POPULATION):
            print(len(self.members))
            strat = Strategy(TradingRule('random'), TradingRule('random'))
            strat.evaluate(TRAIN_START, TRAIN_END)
            if strat.evaluateFitness():
                self.members.append(strat)
            else:
                print("No trades")
        return
    
    def getPopulationFitness(self):
        fitness = {}
        for member in self.members:
            fitness[member] = member.fitness
        return np.mean(list(fitness.values()))
pop = Population()
pop.getPopulationFitness()

0
No trades
0
No trades
0
No trades
0
No trades
0
No trades
0
No trades
0
1
2
3
No trades
3
No trades
3
No trades
3
4
5
No trades
5
No trades
5
No trades
5
No trades
5
No trades
5
No trades
5
No trades
5
No trades
5
No trades
5
No trades
5
6
No trades
6
7
8
No trades
8
No trades
8
No trades
8
9
No trades
9
No trades
9


0.0874345130635362