Build a base data frame from the stock's low, open, close, high and volume for the given time period

Construct a set of data frames that each hold:
- timestamp
- low
- open
- close
- high
- volume
- eps_actual (the actual EPS of the last relevant reporting period of the data frame's time period)
- eps_estimate (the estimated EPS, same as above)
- rec_strong_sell
- rec_sell
- rec_hold
- rec_buy
- rec_strong_sell (this and the above 4 fields are proportions of each rec cat of the total [0 - 1])

In [26]:
def construct_data_frames(ticker, date):
    trend = recommendation_trends_by_date(ticker, date)
    trend = { **trend, **{
        'strongSell': 0,
        'sell': 0,
        'hold': 0,
        'buy': 0,
        'strongBuy': 0
    } }
    trend_total = trend['strongSell'] + trend['sell'] + trend['hold'] + trend['buy'] + trend['strongBuy']
    trend_total = 1 if trend_total == 0 else trend_total
    earnings = company_earnings_by_date(ticker, date)
    static_data = []
    if 'actual' in earnings and 'estimate' in earnings:
        static_data = static_data + [earnings['actual'], earnings['estimate']]
    else:
        static_data = static_data + [0, 0]
    static_data = static_data + [
        trend['strongSell'] / trend_total,
        trend['sell'] / trend_total,
        trend['hold'] / trend_total,
        trend['buy'] / trend_total,
        trend['strongBuy'] / trend_total
    ]
    base_frames = stock_candles_by_date(ticker, date)
    return [list(x) + static_data for x in base_frames]

Go through the calendar and fetch all the historical data on this ticker that we have access to.
Let's start with 3 months.

In [22]:
def get_historical_data(ticker, days = 90):
    # if this historical pickle file exists, just return it
    historical_pickle_name = "{0}.technical.{1}.historical.df.pkl".format(ticker, time_period)
    if jar.pickle_exists(historical_pickle_name):
        return jar.read_pickle_dataframe(historical_pickle_name)
    # if it doesn't, build it
    historical_data = pandas.DataFrame()
    today = date.today()
    x_days_ago = today + timedelta(days = -1 * days)
    current_date = x_days_ago
    while current_date < today:
        # look for pickle file for this days data for this day's ticker
        pickle_name = "{0}-{1}.{2}.technical.df.pkl".format(ticker, current_date.strftime("%m-%d-%Y"), time_period)
        if jar.pickle_exists(pickle_name):
            data = jar.read_pickle_dataframe(pickle_name)
        else:
            data = pandas.DataFrame.from_records(
                construct_data_frames(ticker, current_date))
            # pickle this day's data to cut down on API requests
            jar.write_pickle_dataframe(pickle_name, data)
            time.sleep(2) # sleep for 2 seconds so we don't hit the API limit
        historical_data = historical_data.append(data)
        current_date = current_date + timedelta(days = 1)
    # label & type the data frame
    print(historical_data.head())
    historical_data.columns = ['ts', 'o', 'l', 'h', 'c', 'v', 'e_a', 'e_e', 'r_ss', 'r_s', 'r_h', 'r_b', 'r_sb']
    historical_data['ts'] = pandas.to_datetime(historical_data['ts'], unit = 's')
    historical_data.index.name = 'ts'
    # pickle this historical data
    jar.write_pickle_dataframe(historical_pickle_name, historical_data)
    return historical_data

In [23]:
df = get_historical_data(ticker)

df.index = df['ts']
df.sort_index()
df.drop_duplicates()

  0     1         2    3    4    5    6     7         8    9    10   11   12  \
0  s -0.53 -0.857116  0.0  0.0  0.0  0.0  0.00       NaN  NaN  NaN  NaN  NaN   
0  c     h         l    o    s    t    v -0.53 -0.857116  0.0  0.0  0.0  0.0   
0  c     h         l    o    s    t    v -0.53 -0.857116  0.0  0.0  0.0  0.0   
0  c     h         l    o    s    t    v -0.53 -0.857116  0.0  0.0  0.0  0.0   
0  c     h         l    o    s    t    v -0.53 -0.857116  0.0  0.0  0.0  0.0   

    13  
0  NaN  
0  0.0  
0  0.0  
0  0.0  
0  0.0  


ValueError: Length mismatch: Expected axis has 14 elements, new values have 13 elements

In [137]:
df.describe()

Unnamed: 0,o,l,h,c,v,e_a,e_e,r_ss,r_s,r_h,r_b,r_sb
count,1794.0,1794.0,1794.0,1794.0,1794.0,1794.0,1794.0,1794.0,1794.0,1794.0,1794.0,1794.0
mean,128.691088,130.574125,130.510833,132.123825,107256800.0,-0.53,-0.857116,0.0,0.0,0.0,0.0,0.0
std,5.532129,5.827971,5.596367,5.705862,30313950.0,2.221065e-16,0.0,0.0,0.0,0.0,0.0,0.0
min,117.57,120.5,118.899,121.67,54930060.0,-0.53,-0.857116,0.0,0.0,0.0,0.0,0.0
25%,123.449,124.94,126.6,127.93,86939790.0,-0.53,-0.857116,0.0,0.0,0.0,0.0,0.0
50%,128.5,129.2,130.89,131.685,98390560.0,-0.53,-0.857116,0.0,0.0,0.0,0.0,0.0
75%,133.59,135.58,134.87,136.31,121047300.0,-0.53,-0.857116,0.0,0.0,0.0,0.0,0.0
max,141.37,143.6,143.16,145.09,192541500.0,-0.53,-0.857116,0.0,0.0,0.0,0.0,0.0


In [147]:
df.tail(15)

Unnamed: 0_level_0,ts,o,l,h,c,v,e_a,e_e,r_ss,r_s,r_h,r_b,r_sb
ts,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
2021-03-01 00:00:00,2021-03-01 00:00:00,122.79,123.75,127.79,127.93,116307892,-0.53,-0.857116,0.0,0.0,0.0,0.0,0.0
2021-03-02 00:00:00,2021-03-02 00:00:00,125.01,128.41,125.12,128.72,102260945,-0.53,-0.857116,0.0,0.0,0.0,0.0,0.0
2021-03-03 00:00:00,2021-03-03 00:00:00,121.84,124.81,122.06,125.71,112966340,-0.53,-0.857116,0.0,0.0,0.0,0.0,0.0
2021-03-04 00:00:00,2021-03-04 00:00:00,118.62,121.75,120.13,123.6,177275300,-0.53,-0.857116,0.0,0.0,0.0,0.0,0.0
2021-03-05 14:30:00,2021-03-05 14:30:00,117.57,120.98,118.899,121.67,71996083,-0.53,-0.857116,0.0,0.0,0.0,0.0,0.0
2021-03-01 00:00:00,2021-03-01 00:00:00,122.79,123.75,127.79,127.93,116307892,-0.53,-0.857116,0.0,0.0,0.0,0.0,0.0
2021-03-02 00:00:00,2021-03-02 00:00:00,125.01,128.41,125.12,128.72,102260945,-0.53,-0.857116,0.0,0.0,0.0,0.0,0.0
2021-03-03 00:00:00,2021-03-03 00:00:00,121.84,124.81,122.06,125.71,112966340,-0.53,-0.857116,0.0,0.0,0.0,0.0,0.0
2021-03-04 00:00:00,2021-03-04 00:00:00,118.62,121.75,120.13,123.6,177275300,-0.53,-0.857116,0.0,0.0,0.0,0.0,0.0
2021-03-05 14:30:00,2021-03-05 14:30:00,117.57,120.98,118.95,121.67,72024122,-0.53,-0.857116,0.0,0.0,0.0,0.0,0.0
