In [None]:
'''
we will try to predict a stock's open price in a week
'''

In [1]:
'''
obtain and format data (stock information for 38 tech companies over last 3 months, ending yesterday)
'''

import requests
import random
import numpy as np

TOKEN = "pk_8d57e606cabc41aaaacc71dd142e8120" # edit if using your own IEX account
SYMBOLS = """abde,amd,googl,goog,adi,aapl,amat,asml,adsk,bidu,
    avgo,cdns,cern,chkp,csco,ctxs,fb,intc,intu,klac,
    lrcx,mxim,mchp,mu,msft,ntap,ntes,nvda,nxpi,qcom,
    swks,symc,snps,txn,vrsn,wdc,wday,xlnx"""
TYPES = "chart"
RANGE = "3m"

query = {"token": TOKEN, "symbols": SYMBOLS, "types": TYPES, "range": RANGE}
r = requests.get("https://cloud.iexapis.com/beta/stock/market/batch", params=query)

dict = r.json()

NUM_DAYS = 50 # a little less than 3 months
NUM_TRAINING_DAYS = 40

training_data = np.array([0, 0, 0, 0, 0, 0]) # python is weird
testing_data = np.array([0, 0, 0, 0, 0, 0])
    
for symbol in dict:
    for i in range(NUM_DAYS):
        openPrice = dict[symbol]["chart"][i]["open"] # open is a keyword
        high = dict[symbol]["chart"][i]["high"]
        low = dict[symbol]["chart"][i]["low"]
        close = dict[symbol]["chart"][i]["close"]
        volume = dict[symbol]["chart"][i]["volume"]
        predicted_price = dict[symbol]["chart"][i + 7]["open"]

        if i < NUM_TRAINING_DAYS:
            training_data = np.vstack([training_data, [openPrice, high, low, close, volume, predicted_price]])
        else:
            testing_data = np.vstack([training_data, [openPrice, high, low, close, volume, predicted_price]])

    
training_data = training_data[1::]
testing_data = testing_data[1::]

print(training_data)

[[1.94400000e+01 2.06800000e+01 1.90000000e+01 2.05700000e+01
  1.07157014e+08 2.04000000e+01]
 [2.11900000e+01 2.12000000e+01 1.96800000e+01 2.07500000e+01
  1.21270986e+08 1.94900000e+01]
 [2.08900000e+01 2.14400000e+01 2.00700000e+01 2.01900000e+01
  1.63944073e+08 2.03700000e+01]
 ...
 [1.26660000e+02 1.27700000e+02 1.25070000e+02 1.26640000e+02
  4.20203000e+06 1.22500000e+02]
 [1.27030000e+02 1.27260000e+02 1.19860000e+02 1.22810000e+02
  6.24916100e+06 1.23000000e+02]
 [1.22380000e+02 1.24220000e+02 1.22070000e+02 1.22950000e+02
  2.39522200e+06 1.23200000e+02]]


In [7]:
'''
genetic algorithm for stock market data
credits to MorvanZhou: https://github.com/yuanlairucisky/MorvanZhou-Evolutionary-Algorithm
'''
class Genetic_Algorithm:


    # Get stock training data
    def __init__(self, train_data, DNA_SIZE = 10, POP_SIZE = 100, CROSS_RATE = 0.75, MUTATION_RATE = 0.01, N_GENERATIONS = 200):
        #Define constants for algorithm
        self.DNA_SIZE = DNA_SIZE # Number of input variables
        self.POP_SIZE = POP_SIZE # Number of models in current generation
        self.CROSS_RATE = CROSS_RATE # Probability of crossover event
        self.MUTATION_RATE = MUTATION_RATE # Probability of a mutation occuring
        self.N_GENERATIONS = N_GENERATIONS # Number of generations in algorithm
        self.input_data = train_data[:,0:DNA_SIZE]
        self.output_data = train_data[:,DNA_SIZE:DNA_SIZE + 1]
        self.NUM_EXAMPLES = self.input_data.shape[0]

    # Find fitness for selection
    def get_fitness_one(self,calculated_out, real_out):
        reciporocal_reals = 1. / real_out
        averaging_factor = 1. / self.NUM_EXAMPLES
        fitness_array = np.absolute(1 - (averaging_factor * np.matmul(calculated_out, reciporocal_reals)))
        return fitness_array
    
    # Get predicted values of models
    def translateDNA(self,pop):
        transposed_input = np.transpose(self.input_data)
        return np.matmul(pop,transposed_input)

    # Get next generation based on fitness values
    def select(self,pop, fitness):
        fitness_array = np.transpose(fitness)
        selected_indexes = fitness_array.argsort()[-self.POP_SIZE:][::-1]
        selected_pop = pop[selected_indexes,:]
        return selected_pop

    # Crossover process
    def crossover(self,parent, pop):
        if np.random.rand() < self.CROSS_RATE: # Ensures crossover happens at crossover rate
            i_ = np.random.randint(0, self.POP_SIZE, size=1) # Select another individual from pop
            cross_points = np.random.randint(0, 2, size=self.DNA_SIZE).astype(np.bool) # Choose crossover points
            parent[cross_points] = pop[i_, cross_points] # Mating and produce one child
        return parent


    def mutate(self,pop): # Mutation Process
        for child in pop:
            for point in range(self.DNA_SIZE):
                if np.random.rand() < self.MUTATION_RATE: # Ensures mutation happens at mutation rate
                    if np.random.rand() < 0.5: # TODO: make a parameter
                        child[point] += 0.5
                    else:
                        child[point] -= 0.5
        return pop


In [8]:
'''
generate model
'''

pop = np.random.randint(2, size=(100,arr.shape[1] - 1)) # initialize initial population

ga = Genetic_Algorithm(arr, DNA_SIZE = arr.shape[1] - 1)

for i in range(0,ga.N_GENERATIONS):
    calculated_results = ga.translateDNA(pop)
    fitness = ga.get_fitness_one(calculated_results,ga.output_data)
    fitness_array = np.transpose(fitness)
    print("Most fit model:",pop[np.argmax(fitness),:])
    pop = ga.select(pop,fitness)
    pop = ga.mutate(pop)
    pop_copy = pop.copy()
    for i in range(0,pop.shape[0]):
        pop[i,:] = ga.crossover(pop[i,:],pop_copy)
        

Most fit model: [1 1 1 1 1 0]


IndexError: boolean index did not match indexed array along dimension 1; dimension is 100 but corresponding boolean dimension is 6

In [5]:
'''
apply model to real-time stock information
'''

import requests
import random
import numpy as np

TOKEN = "pk_8d57e606cabc41aaaacc71dd142e8120" # edit if using your own IEX account
SYMBOLS = """abde,amd,googl,goog,adi,aapl,amat,asml,adsk,bidu,
    avgo,cdns,cern,chkp,csco,ctxs,fb,intc,intu,klac,
    lrcx,mxim,mchp,mu,msft,ntap,ntes,nvda,nxpi,qcom,
    swks,symc,snps,txn,vrsn,wdc,wday,xlnx"""
TYPES = "chart"
RANGE = "1d"

query = {"token": TOKEN, "symbols": SYMBOLS, "types": TYPES, "range": RANGE}
r = requests.get("https://cloud.iexapis.com/beta/stock/market/batch", params=query)

realtime_prices = r.json()

print(realtime_prices)

{'AMD': {'chart': [{'date': '20190405', 'minute': '09:30', 'label': '09:30 AM', 'high': 29.675, 'low': 29.49, 'average': 29.612, 'volume': 7605, 'notional': 225197.665, 'numberOfTrades': 45, 'marketHigh': 29.69, 'marketLow': 29.47, 'marketAverage': 29.602, 'marketVolume': 1998055, 'marketNotional': 59146040.2189, 'marketNumberOfTrades': 2869, 'open': 29.575, 'close': 29.545, 'marketOpen': 29.64, 'marketClose': 29.555, 'changeOverTime': 0, 'marketChangeOverTime': 0}, {'date': '20190405', 'minute': '09:31', 'label': '09:31 AM', 'high': 29.53, 'low': 29.36, 'average': 29.488, 'volume': 7302, 'notional': 215323.78, 'numberOfTrades': 32, 'marketHigh': 29.555, 'marketLow': 29.34, 'marketAverage': 29.431, 'marketVolume': 1042507, 'marketNotional': 30682020.3764, 'marketNumberOfTrades': 2088, 'open': 29.53, 'close': 29.36, 'marketOpen': 29.55, 'marketClose': 29.355, 'changeOverTime': -0.0041874915574766575, 'marketChangeOverTime': -0.005776636713735538}, {'date': '20190405', 'minute': '09:32',