In [14]:
from IPython.display import clear_output
import pickle
# general packages
import time
import datetime
import pandas as pd
import numpy as np
from math import modf
import glob
import sys
import os

In [15]:
# Read in the models and their features
model = pickle.load(open('models/model_1.pkl','rb'))

In [16]:
# read in the feature lists for the models above for our predictions
features = [x[:-1] for x in open('models/model_1_features.txt','r')]

# Create functions for preparing entries for model predictions

In [17]:
def print_bold(_str):
    print("\033[1m" + _str + "\033[0m")
    

def get_event_info(base_event, quote_event, level=None, outcome=None):
    '''
    - level - set to 'high','medium', or 'low'
    - outcome - if None, then returning count of events or number of events with FF Alerts
        - if assigned value, then returning the result actual, forecast, or sentiment
            - this will be based on the input 'base_event','quote_event' arguments
    '''
    results = {'high':[],'medium':[],'low':[]}
    for event in [base_event, quote_event]:
        if event == 'none':
            # all results stay empty
            pass
        elif 'multiple' in event:
            # what ever impact and result is given, return 2 times that
            items = event.split('_')
            for key in results.keys():
                if items[1]==key:
                    # append this value twice for having 'multiple' same event types and results
                    results[key].append(items[2])
                    results[key].append(items[2])
        else:
            items = event.split('__')
            for item in items:
                it = item.split('_')
                for key in results.keys():
                    if it[0]==key: results[key].append(it[1])
    # check what we are wanting to return 
    if outcome is None:
        return len(results[level])
    
    # if 'outcome' is assigned a value - identify actual, forecast or sentiment result
    if base_event==quote_event=='none': out = 0
    else:
        res_dict = {}
        for key in results.keys():
            actual = []
            if len(results[key])==0: pass
            else:
                for res in results[key]:
                    # 'better' - actual, 'right' - forecast, 'positive' - sentiment
                    if res in ['better','right','positive']: actual.append(1)
                    elif res in ['worse','wrong','negative']: actual.append(-1)
            res_dict[key] = actual
        actual_res = 3*sum(res_dict['high']) + 2*sum(res_dict['medium']) + sum(res_dict['low'])
        if actual_res>0 and outcome=='positive': out = 1
        elif actual_res<0 and outcome=='negative': out = 1
        elif actual_res==0 and outcome=='neutral': out = 1
        else: out = 0
    return out


def prepare_entry(entry, model_features):
    # create a list of features to drop after feature engineering
    to_drop = []
    for feature in entry.columns:
        if entry[feature].dtype == 'O' and ('__base_' in feature or '__quote_' in feature):
            to_drop.append(feature)
    
    # this will include all feature engineering to the above OBJECT categorical features
    # assuming NA values are taken care of
    entry['ind__base_mins2next'] = entry.ind__base_mins_to_next.apply(lambda x: 2088 if x=='none' else float(x))
    entry['ind__quote_mins2next'] = entry.ind__quote_mins_to_next.apply(lambda x: 2088 if x=='none' else float(x))
    
    # create binary indicator for whether or not a base/quote currency has the next event coming up (not including 2088 values)
    entry['ind__base_nextevent'] = entry.apply(lambda x: 1 if (x['ind__base_mins2next'] >= x['ind__quote_mins2next'] and\
                                                     x['ind__base_mins2next']!=2088) else 0, axis=1)
    entry['ind__quote_nextevent'] = entry.apply(lambda x: 1 if (x['ind__quote_mins2next'] >= x['ind__base_mins2next'] and\
                                                     x['ind__quote_mins2next']!=2088) else 0, axis=1)
    # past event impacts    
    entry['ind__pastimp_high'] = entry.apply(lambda x: get_event_info(x['ind__base_pastimpact'], x['ind__quote_pastimpact'], \
                                                             level='high'), axis=1)
    entry['ind__pastimp_med'] = entry.apply(lambda x: get_event_info(x['ind__base_pastimpact'], x['ind__quote_pastimpact'], \
                                                             level='medium'), axis=1)
    entry['ind__pastimp_low'] = entry.apply(lambda x: get_event_info(x['ind__base_pastimpact'], x['ind__quote_pastimpact'], \
                                                             level='low'), axis=1)
    entry['ind__past_actual_pos'] = entry.apply(lambda x: get_event_info(x['ind__base_pastimpact'], x['ind__quote_pastimpact'], \
                                                                outcome='positive'), axis=1)
    entry['ind__past_actual_neg'] = entry.apply(lambda x: get_event_info(x['ind__base_pastimpact'], x['ind__quote_pastimpact'], \
                                                                outcome='negative'), axis=1)
    entry['ind__past_actual_neut'] = entry.apply(lambda x: get_event_info(x['ind__base_pastimpact'], x['ind__quote_pastimpact'], \
                                                                outcome='neutral'), axis=1)
    # forecast of event impacts    
    entry['ind__nextimp_high'] = entry.apply(lambda x: get_event_info(x['ind__base_foreimpact'], x['ind__quote_foreimpact'], \
                                                             level='high'), axis=1)
    entry['ind__nextimp_med'] = entry.apply(lambda x: get_event_info(x['ind__base_foreimpact'], x['ind__quote_foreimpact'], \
                                                             level='medium'), axis=1)
    entry['ind__nextimp_low'] = entry.apply(lambda x: get_event_info(x['ind__base_foreimpact'], x['ind__quote_foreimpact'], \
                                                             level='low'), axis=1)
    entry['ind__next_fore_pos'] = entry.apply(lambda x: get_event_info(x['ind__base_foreimpact'], x['ind__quote_foreimpact'], \
                                                                outcome='positive'), axis=1)
    entry['ind__next_fore_neg'] = entry.apply(lambda x: get_event_info(x['ind__base_foreimpact'], x['ind__quote_foreimpact'], \
                                                                outcome='negative'), axis=1)
    entry['ind__next_fore_neut'] = entry.apply(lambda x: get_event_info(x['ind__base_foreimpact'], x['ind__quote_foreimpact'], \
                                                                outcome='neutral'), axis=1)
    # set FF Alert features for next events
    entry['ind__ffalert_high']=entry.apply(lambda x:get_event_info(x['ind__base_ffalert'],x['ind__quote_ffalert'],level='high'), axis=1)
    entry['ind__ffalert_med']=entry.apply(lambda x:get_event_info(x['ind__base_ffalert'],x['ind__quote_ffalert'],level='medium'), axis=1)
    entry['ind__ffalert_low']=entry.apply(lambda x:get_event_info(x['ind__base_ffalert'],x['ind__quote_ffalert'],level='low'), axis=1)
    # set sentiment analysis features
    entry['ind__pastimp_sent_pos'] = entry.apply(lambda x:get_event_info(x['ind__base_pastevent_sent'],x['ind__quote_pastevent_sent'],\
                                                                outcome='positive'), axis=1)
    entry['ind__pastimp_sent_neg'] = entry.apply(lambda x:get_event_info(x['ind__base_pastevent_sent'],x['ind__quote_pastevent_sent'],\
                                                                outcome='negative'), axis=1)
    entry['ind__pastimp_sent_neut'] = entry.apply(lambda x:get_event_info(x['ind__base_pastevent_sent'],x['ind__quote_pastevent_sent'],\
                                                                outcome='neutral'), axis=1)
    entry['ind__nextimp_sent_pos'] = entry.apply(lambda x:get_event_info(x['ind__base_nextevent_sent'],x['ind__quote_nextevent_sent'],\
                                                                outcome='positive'), axis=1)
    entry['ind__nextimp_sent_neg'] = entry.apply(lambda x:get_event_info(x['ind__base_nextevent_sent'],x['ind__quote_nextevent_sent'],\
                                                                outcome='negative'), axis=1)
    entry['ind__nextimp_sent_neut'] = entry.apply(lambda x:get_event_info(x['ind__base_nextevent_sent'],x['ind__quote_nextevent_sent'],\
                                                                outcome='neutral'), axis=1)
    # now we should drop all features in 'to_drop' that were used to create these features
    entry = entry.drop(to_drop, axis=1)
    
    # we are going to make Low == to lowest price if not already
    # same for High
    entry.Low = entry.apply(lambda x: x['Low'] if x['Low'] == min([x['Low'],x['High'],x['Open'],x['Close']]) else \
                         min([x['Low'],x['High'],x['Open'],x['Close']]), axis=1)
    entry.High = entry.apply(lambda x: x['High'] if x['High'] == max([x['Low'],x['High'],x['Open'],x['Close']]) else \
                         max([x['Low'],x['High'],x['Open'],x['Close']]), axis=1)
    
    # set negative values to 0 where wick height was taken
    entry.wick_high = entry.wick_high.apply(lambda x: 0 if x<0 else x)
    entry.wick_low = entry.wick_low.apply(lambda x: 0 if x<0 else x)
    
    # convert features to scale over their exchange rates to normalize the rows
    # best way to deal with wicks -- we will try wick / range pct
    entry['wick_low_pct'] = entry.wick_low / (entry.High - entry.Low)
    entry['wick_high_pct'] = entry.wick_high / (entry.High - entry.Low)
    entry['ind__midbb_pct'] = 10000*(entry.Open - entry.ind__mid_bb) / entry.Open
    entry['ind__bbdif_pct'] = 10000*entry.ind__bb_dif / entry.ind__low_bb 
    entry['spread_pct'] = 10000*entry.spread/entry.Close
    #entry['biddif_pct'] = entry.bid_diff/entry.spread #  -- only need one, the other is infered -- otherwise singularity potential
    entry['askdif_pct'] = entry.ask_diff/entry.spread
    entry['ind__bbslope_pct'] = 10000*entry.ind__midbb_slope / entry.Close
    # np.polyfit calculates the residuals as sum of squared errors -- therefore divide by Close**2 to normalize
    entry['ind__trendresids_pct'] = 10000*entry.ind__trend_residuals / entry.Close**2
    
    # drop remaining features that seperate symbols by rate
    drop_other = ['Close_Low_avg','Close_High_avg','ind__up_bb','ind__low_bb','Close','Open','Low','High',
                  'wick_high','wick_low','ind__bb_dif','spread','bid_diff','ask_diff','ind__midbb_slope','ind__mid_bb',
                  'ind__trend_residuals']
    entry = entry.drop(drop_other, axis=1)

    # filter out where we did not pull currency strengths
    entry = entry[entry.ind__base_strength!=0]
    
    # check entry features to see if we are missing any features needed for predicting value -- mainly symbols
    for col in model_features:
        if col not in entry.columns:
            entry[col] = 0
    # only use features in model
    entry = entry[model_features]
    
    return entry

    
def predictions(model, features):
    global time_dict
    preds, ents = [], []
    
    for i,file in enumerate(glob.glob('../data/full_data/updated_apicall_data/predict/to_predict*')):
        entry = pd.read_csv(file, index_col='date')
        if time_dict[str(i)]!=entry.index[0]: ents.append(1)
    if sum(ents)>0:
        clear_output(wait=True)
        print(datetime.datetime.now())
        for i,file in enumerate(glob.glob('../data/full_data/updated_apicall_data/predict/to_predict*')):
            time_dict[str(i)]=entry.index[0]
            entry = pd.read_csv(file, index_col='date')
            sym = entry.symbol[0]
            print_bold(f'Last update: {sym} -- {entry.index[0]}')
            print('---------------------------------------------')
            # predict with all models
            ent = prepare_entry(entry, features)
            pred = model.predict(ent)[0]
            res = 'call' if pred==1 else ('put' if pred==0 else 'small move.')
            pred_prob = model.predict_proba(ent)[0][int(pred)]
            print(res, pred_prob)
            print('\n')

In [None]:
collect = []
last = datetime.datetime.now()
# manual dict set up -- depends on number of tikers being watched
time_dict={'0':0,'1':0,'2':0}
i=0
while True:
    if i==0:predictions()
    else: predictions(first=False)
    i+=1