In [5]:
import sys
sys.path.append('./../../src')

# python's shit
from os import path
from datetime import date, datetime, timedelta
import time
import calendar

# someone elses shit
import finnhub
import pandas

# my shit
import config
from lib.stonk_jar import StonkJar

In [6]:
finnhub_client = finnhub.Client(api_key=config.api_keys['finnhub']['sandbox'])
ticker = 'AAPL'
jar = StonkJar(ticker)

In [7]:
# Time junk, just for testing?
now = time.time()
today = date.today()
yesterday = today + timedelta(days = -1)

In [8]:
def recommendation_trends_by_date(ticker, date):
    first_of_month = date.replace(day = 1)
    recommendations = jar.pickle_back(
        "{0}_recommendations.pkl".format(ticker),
        finnhub_client.recommendation_trends,
        ticker)
    recommendation = [r for r in recommendations if datetime.strptime(r['period'], '%Y-%m-%d').date() == first_of_month]
    if (len(recommendation) > 0):
        return recommendation[0]
    return {}

In [9]:
def company_earnings_by_date(ticker, date):
    reporting_period = timedelta(days = 90)
    earnings = jar.pickle_back(
        "{0}_earnings.pkl".format(ticker),
        finnhub_client.company_earnings,
        ticker)
    earning = [r for r in earnings if datetime.strptime(r['period'], '%Y-%m-%d').date() + reporting_period > date]
    if (len(earning) > 0):
        return earning[0]
    return {}

Build a base data frame from the stock's low, open, close, high and volume for the given time period

In [10]:
def stock_candles_by_date(ticker, date):
    first_of_month_ts = int(datetime.combine(date.replace(day = 1), datetime.min.time()).timestamp())
    next_month = date.replace(day = 28) + timedelta(days = 4)
    last_of_month = next_month - timedelta(days = next_month.day)
    last_of_month_ts = datetime.combine(last_of_month, datetime.max.time()).timestamp()
    earliest = int(min(time.time(), last_of_month_ts))
    candles = finnhub_client.stock_candles('AAPL', '60', first_of_month_ts, earliest)
    return zip(candles['t'], candles['l'], candles['o'], candles['c'], candles['h'], candles['v'])

Construct a set of data frames that each hold:
- timestamp
- low
- open
- close
- high
- volume
- eps_actual (the actual EPS of the last relevant reporting period of the data frame's time period)
- eps_estimate (the estimated EPS, same as above)
- rec_strong_sell
- rec_sell
- rec_hold
- rec_buy
- rec_strong_sell (this and the above 4 fields are proportions of each rec cat of the total [0 - 1])

In [11]:
def construct_data_frames(ticker, date):
    trend = recommendation_trends_by_date(ticker, date)
    trend_total = trend['strongSell'] + trend['sell'] + trend['hold'] + trend['buy'] + trend['strongBuy']
    earnings = company_earnings_by_date(ticker, date)
    static_data = []
    if 'actual' in earnings and 'estimate' in earnings:
        static_data = static_data + [earnings['actual'], earnings['estimate']]
    else:
        static_data = static_data + ['?', '?']
    static_data = static_data + [
        trend['strongSell'] / trend_total, trend['sell'] / trend_total,
        trend['hold'] / trend_total,
        trend['buy'] / trend_total, trend['strongBuy'] / trend_total
    ]
    base_frames = stock_candles_by_date(ticker, date)
    return [list(x) + static_data for x in base_frames]

Go through the calendar and fetch all the historical data on this ticker that we have access to.
Let's start with 3 months.

In [25]:
def get_historical_data(ticker, days = 90):
    # if this historical pickle file exists, just return it
    historical_pickle_name = "{0}.technical.historical.df.pkl".format(ticker)
    if jar.pickle_exists(historical_pickle_name):
        return jar.read_pickle_dataframe(historical_pickle_name)
    # if it doesn't, build it
    historical_data = pandas.DataFrame()
    today = date.today()
    x_days_ago = today + timedelta(days = -1 * days)
    current_date = x_days_ago
    while current_date < today:
        # look for pickle file for this days data for this day's ticker
        pickle_name = "{0}-{1}.technical.df.pkl".format(ticker, current_date.strftime("%m-%d-%Y"))
        if jar.pickle_exists(pickle_name):
            data = jar.read_pickle_dataframe(pickle_name)
        else:
            data = construct_data_frames(ticker, current_date)
            # pickle this day's data to cut down on API requests
            df = pandas.DataFrame.from_records(data)
            jar.write_pickle_dataframe(pickle_name, df)
            time.sleep(2) # sleep for 2 seconds so we don't hit the API limit
        historical_data = historical_data.append(data)
        current_date = current_date + timedelta(days = 1)
    # label & type the data frame
    historical_data.columns = ['ts', 'o', 'l', 'h', 'c', 'v', 'e_a', 'e_e', 'r_ss', 'r_s', 'r_h', 'r_b', 'r_sb']
    historical_data['ts'] = pandas.to_datetime(historical_data['ts'], unit = 's')
    historical_data.index.name = 'ts'
    # pickle this historical data
    jar.write_pickle_dataframe(historical_pickle_name, historical_data)
    return historical_data

In [26]:
df = get_historical_data(ticker)

In [30]:
df.describe()

Unnamed: 0,o,l,h,c,v,e_a,e_e,r_ss,r_s,r_h,r_b,r_sb
count,63766.0,63766.0,63766.0,63766.0,63766.0,63766.0,63766.0,63766.0,63766.0,63766.0,63766.0,63766.0
mean,198.20254,198.798937,198.875466,199.594132,50.097654,1.68,1.555857,0.015054,0.015054,0.178831,0.507775,0.283285
std,52.514127,52.672144,52.690478,52.882832,28.562268,2.220463e-16,0.0,0.011332,0.011332,0.018763,0.033206,0.00662
min,120.317204,120.679241,120.739702,121.161958,1.0,1.68,1.555857,0.0,0.0,0.158416,0.45283,0.278302
25%,153.498959,153.960842,154.015316,154.576685,25.0,1.68,1.555857,0.0,0.0,0.158416,0.476415,0.278302
50%,190.898874,191.473294,191.530295,192.239187,50.0,1.68,1.555857,0.023585,0.023585,0.174528,0.5,0.278302
75%,236.610098,237.322064,237.4199,238.271352,75.0,1.68,1.555857,0.023585,0.023585,0.198113,0.549505,0.292079
max,366.128118,367.229807,366.678963,368.698727,99.0,1.68,1.555857,0.023585,0.023585,0.221698,0.549505,0.292079


In [31]:
df.sample()

Unnamed: 0_level_0,ts,o,l,h,c,v,e_a,e_e,r_ss,r_s,r_h,r_b,r_sb
ts,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
573,2021-02-25 03:00:00,232.132192,232.830684,233.366194,233.762006,27,1.68,1.555857,0.023585,0.023585,0.174528,0.5,0.278302


Now that we have raw hr resolution tick data, split it up into data 'complete' frames for training/validating. Each frame will have 10 ticks of data, 9 training ticks and the last verification ticks. This set of data will be randomized, split into a training set and a validation set, and then used to train the network.