In [2]:
##########################################
############                  ############
############    MUST HAVES    ############
############                  ############
##########################################

print("Must Haves")


Must Haves


In [1]:
# Read CSV
# This will read in the CSVs that come from the alpha vantage website, where each CSV
# is from a separate stock symbol
import csv  

def open_csv(path_to_csv):
    data = []
    with open(path_to_csv) as csv_file:
        csv_reader = csv.reader(csv_file, delimiter=',')
        for row in csv_reader:
            if row[0] != '':
                data.append(row)
    return data


In [2]:
# Access API
from myapikey import APIkey
from alpha_vantage.timeseries import TimeSeries


In [3]:
# must provide an api key (they are free from the alpha vantage website
# this determines how much data you can get in how much time.
def get_intraday(stock):
    ts = TimeSeries(key=APIkey, output_format='pandas', indexing_type='integer')
    return ts.get_intraday(symbol=stock, interval='1min', outputsize='full')

# provide an api key to access the data
def get_daily(stock):
    ts = TimeSeries(key=APIkey, output_format='pandas')
    data, meta_data = ts.get_daily(symbol=stock, outputsize='full')
    return data, meta_data


In [18]:
# Select stocks based on ticker symbol
stock_names = ["AAOI", "AAP", "AAPL", "ABMD", "ABT", "ACET", "ADIL", "AETI", "AFL", "AKAM", "ALGN", "ALQA", "AMD", 
"AMG", "AMRH", "AMZN", "ANY", "APVO", "AVGR", "AXSM", "BAC", "BIIB", "BLIN", "BLK", "BLRX", "BSQR", "BSX", "BTAI", 
"CGA", "CGC", "CLIR", "CMCSA", "CMG", "COP", "COTY", "COUP", "CPSH", "CSCO", "CTK", "CTRV", "CVX", "D", "DIS", 
"DUK", "EXC", "EYES", "FARO", "FB", "FCEL", "FDS", "FLKS", "FRAN", "FTNT", "GE", "GM", "GNMX", "GOOGL", "HMC", "HOV", 
"HSGX", "IGLD", "INOD", "IVZ", "JAGX", "JNJ", "JPM", "JT", "KEYS", "KOSS", "LB", "LLY", "LPTH", "M", "MDT", "MSFT", 
"MYSZ", "NDAQ", "NEE", "NFLX", "NKE", "NRG", "NVDA", "NVFY", "ORLY", "OSS", "OXBR", "PFE", "PIR", "PRGO", "QBAK", 
"QUIK", "RAD", "RBCN","RHT", "RL", "SBUX", "SESN", "SFUN", "SGMA", "SMAR", "SO", "STZ", "SYK", "T", "TNK", "TOPS", 
"TRHC", "TRIL", "TRIP", "TRNX", "TSLA", "TSN", "TTWO", "TWMC", "TWTR", "UA", "UAA", "UPL", "USB", "UXIN", "V", 
"VRSN", "VTVT", "VZ", "WDC", "WFC", "WFT", "WMT", "XOM", "ZTS"]

# at least one of these is not a real stock recognized by alpha vantage.
# I must error check these stocks.
print(len(stock_names))


130


In [20]:
# Store Stock information as a data set
stock_data = batch_get_daily(stock_names)


In [5]:
# print(stock_data[0][0])
import pickle


In [21]:
f = open('store_stocks_new.pckl', 'wb') # rename and remove '_new' to load it
pickle.dump(stock_data, f)
f.close()


In [9]:
f = open('store_stocks.pckl', 'rb')
stock_data = pickle.load(f)
f.close()


In [25]:
# Format dataset with classification (up/down)
import numpy as np

def add_classification(stocks):
    # up = 1, down = 0
    new_stocks = []
    # column 4 is the end of day price
    last_price = stocks[len(stocks) - 1][4]
    # the very last price will always be up. Perhaps
    # this makes my AI optimistic?
    new_stocks.append(np.insert(stocks[len(stocks) - 1], 5, 1)) # up
    for s in reversed(list(range(len(stocks) - 1))):
        if (stocks[s][4] - last_price) > 0:
            new_stocks.append(np.insert(stocks[s], 5, 1)) # up
        else:
            new_stocks.append(np.insert(stocks[s], 5, 0)) # down
        last_price = stocks[s][4]
    return new_stocks

# accepts an array of all data from one stock
# produces chunks to be made into tensors
# run this once on each stock and save the resulting array (of arrays)
def split_sample(data):
    sample = []
    for start in range(0, len(data) - 50, 50):
        sample.append(data[start:start+50])
    return sample

# takes the results of batch_get_daily and prepares everything
def prepare(group_of_stocks):
    prepared_data_set = []
    for stock in group_of_stocks:
        prepared_data_set.append(split_sample(add_classification(stock[0].to_numpy())))
    return prepared_data_set

def prep_part_two(group_of_stocks):
    new_set = []
    for stock in group_of_stocks:
        for collection in stock:
            classification = collection[0][5]
            training_data = collection[1:]
            new_set.append([training_data, classification])
    return new_set

# prepped_stock[n][m][0][5] = the classification!
# prepped_stock[n][m][X > 0] = the data sets!


In [22]:
# an example of how to use this data and functions
# print(split_sample(add_classification(stock_data[0][0].to_numpy()))[0])

# results of preparing the stocks
prepped_stocks = prepare(stock_data)


In [2]:
# pickle the prepped stocks!
import pickle


In [23]:
f = open('store_prepped_stocks_new.pckl', 'wb') # rename and remove '_new' to load it
pickle.dump(prepped_stocks, f)
f.close()


In [13]:
f = open('store_prepped_stocks.pckl', 'rb')
prepped_stocks = pickle.load(f)
f.close()

print(len(prepped_stocks))

130


In [24]:
# print(len(prepped_stocks))
# 130
# print(len(prepped_stocks[0]))
# 28
# print(len(prepped_stocks[0][0]))
# 50
# print(len(prepped_stocks[0][0][0]))
# 6

# print(len(prepped_stocks[3]))
# 107

# each stock contains a (small to large) number of sets of 50 lines of info about a stock
# each line contains 6 data points, with the final data point being 1 or 0
# where 1 is up from the previous day and 0 is down from the previous day

# prepped_stock[n][m][0][5] = the classification!
# prepped_stock[n][m][X > 0] = the data sets!

# I want to make a list of each of the 50 training bits, but I want only 49 of the bits
# so the 1st bit will be the classification. The 50 training bits should actually be 
# 49*6 bits of info in one array. Then the second member of the array containing that data
# will be 0 or 1. Then it will be a list of everything!


28


In [26]:
# prepped stocks part 2
data_set = prep_part_two(prepped_stocks)

# now the data should be one array filled with pairs of 49 days worth of 
# data with the classification of the next day.

# TODO: so I need to split this into x_train, y_train and x_test, y_test

print(len(data_set))


9795


In [None]:
# save my work
import pickle


In [27]:
f = open('store_data_set_new.pckl', 'wb') # rename and remove '_new' to load it
pickle.dump(data_set, f)
f.close()


In [None]:
f = open('store_data_set.pckl', 'rb')
data_set = pickle.load(f)
f.close()

print(len(prepped_stocks))



In [9]:
# (3 must-haves in one cell)
# Train
# Hidden Nodes
# Dropout
import tensorflow as tf

# example training
model = tf.keras.models.Sequential([
  tf.keras.layers.Flatten(input_shape=(28, 28)),
  tf.keras.layers.Dense(512, activation=tf.nn.relu),
  tf.keras.layers.Dropout(0.2),
  tf.keras.layers.Dense(10, activation=tf.nn.softmax)
])
model.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

model.fit(x_train, y_train, epochs=5)


NameError: name 'x_train' is not defined

In [None]:
# Test
model.evaluate(x_test, y_test)


In [None]:
# Real Life Prediction


In [None]:
# Print results of prediction


In [None]:
##########################################
#######                           ########
#######    WOULD LIKE TO HAVES    ########
#######                           ########
##########################################

print("would like to haves")


In [None]:
# Save data as CSV



In [14]:
# Wait for API (free) access limit
import time

def batch_get_daily(stock_list):
    count = 0
    data = []
    for stock in stock_list:
        if count == 5:
            time.sleep(65) # 65 seconds just in case the timing on the server or here isn't perfect
            count = 0
        data.append(get_daily(stock))
        count += 1
    return data

# for 130 stocks this should take at least half an hour


In [14]:
# Save results of Training
# https://www.tensorflow.org/guide/keras#entire_model
model.save_weights('verdurouMKI.h5')


NameError: name 'model' is not defined

In [None]:
# Load Results from Previous Training
model = tf.keras.models.load_model('verdurouMKI.h5')


In [22]:
# Scrape HTML
