In [1]:
#####################
###### Imports ######
#####################

import os
from configparser import ConfigParser
import sys
import re
if not os.getcwd().endswith('trading'): os.chdir('../../..') # local machine
assert os.getcwd().endswith('trading'), 'Wrong path!'
import numerapi
import plotly.express as px


os.environ['NUMEXPR_MAX_THREADS'] = '32'
os.environ['NUMEXPR_NUM_THREADS'] = '16'

sys.path.append(os.getcwd())
from dev.scripts.ML_utils import * # run if on local machine
from dev.scripts.trading_utils import * # run if on local machine
from numerai.dev.scripts.numerai_utils import *
from numerai.dev.configs.submit_numerai_model_cfg import *

pd.set_option('display.float_format', lambda x: '%.5f' % x)
config = ConfigParser()
config.read('numerai/numerai_keys.ini')

# Connect to the Numerai API
napi = numerapi.SignalsAPI(config['KEYS']['NUMERAI_PUBLIC_KEY'], config['KEYS']['NUMERAI_SECRET_KEY'])

### Load in the data created from build_numerai_dataset.py ###

if MOST_RECENT_DF_BUILD_FILEPATH.endswith('feather'):
    df_numerai = pd.read_feather(MOST_RECENT_DF_BUILD_FILEPATH)
elif MOST_RECENT_DF_BUILD_FILEPATH.endswith('pq') or MOST_RECENT_DF_BUILD_FILEPATH.endswith('parquet'):
    df_numerai = pd.read_parquet(MOST_RECENT_DF_BUILD_FILEPATH)
elif MOST_RECENT_DF_BUILD_FILEPATH.endswith('csv'):
    df_numerai = pd.read_csv(MOST_RECENT_DF_BUILD_FILEPATH)
df_numerai.tail()

Unnamed: 0,date,yahoo_ticker,adj_close_1d,close_1d,high_1d,low_1d,open_1d,volume_1d,adj_close_1h_0,adj_close_1h_1,...,open_1d_ewm_mean,high_1d_ewm_mean,low_1d_ewm_mean,adj_close_1d_ewm_mean,volume_1d_ewm_mean,prev1_target_ewm_mean,prev1_target_HL5_ewm_mean,move_1d_iar,high_move_1d_iar,low_move_1d_iar
23081734,2021-04-16,ZUMZ,45.7,45.7,46.59,45.63,46.59,132400.0,,,...,46.26362,46.61275,45.46328,45.79586,146302.46875,0.99692,2.80506,-0.89,2016.37781,-2086.4021
23081735,2021-04-16,ZUO,17.33,17.33,17.53,17.15,17.51,687600.0,,,...,17.50032,17.63955,17.16549,17.38101,907516.5625,0.99795,1.89151,-0.31,318.07419,-331.53607
23081736,2021-04-16,ZURN.SW,381.60001,381.60001,381.60001,378.10001,379.60001,577428.0,,,...,379.64581,381.55566,377.83459,380.63611,601887.125,0.49898,1.92317,2.0,18746.17383,-22118.51953
23081737,2021-04-16,ZYXI,15.36,15.36,15.5,15.02,15.49,160300.0,,,...,15.45345,15.50563,15.03667,15.33261,175951.67188,0.74898,1.36688,-0.28998,385.58026,-393.11981
23081738,2021-04-16,ZZZ.TO,32.18,32.18,32.3,31.19,31.36,81000.0,,,...,31.2428,31.95944,31.0424,31.8477,67180.23438,0.25103,1.89848,1.25,433.07465,-452.00995


In [2]:
yahoo_bloomberg_map = df_numerai[['bloomberg_ticker', 'yahoo_ticker']].drop_duplicates().dropna()
yahoo_bloomberg_map = dict(zip(yahoo_bloomberg_map['bloomberg_ticker'], yahoo_bloomberg_map['yahoo_ticker']))

In [3]:
df_numerai_live = pd.read_csv(NUMERAI_LIVE_FILEPATH)
df_numerai_live.loc[:, 'data_type'] = 'live'
df_numerai_live['yahoo_ticker'] = df_numerai_live['bloomberg_ticker'].map(yahoo_bloomberg_map)
df_numerai_live.head()

Unnamed: 0,bloomberg_ticker,data_type,yahoo_ticker
0,SVW AU,live,SVW.AX
1,GEM AU,live,GEM.AX
2,VOC AU,live,VOC.AX
3,AZJ AU,live,AZJ.AX
4,NXT AU,live,NXT.AX


In [4]:
print(df_numerai_live.shape)
df_numerai_live.tail(2)

(5421, 3)


Unnamed: 0,bloomberg_ticker,data_type,yahoo_ticker
5419,IMAX US,live,IMAX
5420,LULU US,live,LULU


In [5]:
np.setdiff1d(df_numerai['bloomberg_ticker'].tolist(), df_numerai_live['bloomberg_ticker'].tolist())

array(['000070 KS', '000640 KS', '000670 KS', ..., 'ZU US', 'ZVO US',
       'ZZ US'], dtype='<U11')

In [6]:
np.setdiff1d(df_numerai_live['bloomberg_ticker'].tolist(), df_numerai['bloomberg_ticker'].tolist())

array(['6556 JP', 'AOF GR', 'AT CN', 'BBCP US'], dtype='<U11')

In [7]:
df_numerai_live.isnull().sum()

bloomberg_ticker     0
data_type            0
yahoo_ticker        70
dtype: int64

### Load most recent dates / data

In [8]:
valid_yahoo_tickers = [i for i in df_numerai_live['yahoo_ticker']\
     if not pd.isnull(i)\
     and not str(i).lower()=='nan' \
     and not str(i).lower()=='null' \
     and not str(i).lower()==''\
     and len(i) > 0\
]

In [9]:
from collections import Counter
assert len([item for item, count in Counter(valid_yahoo_tickers).items() if count > 1]) == 0

In [None]:
df_yahoo_recent = download_yfinance_data(tickers=valid_yahoo_tickers,
                                         n_chunks=1, # most accurate download
                                         yfinance_params={'start': df_numerai['date'].max(),
                                                          'progress': False})
df_yahoo_recent.head()

*** yfinance params threads set to False! ***


In [11]:
df_yahoo_recent = pd.read_feather('/media/melgazar9/HDD_10TB/trading/data/numerai/datasets/build_dataset_dfs/df_yahoo_test.feather')
df_yahoo_recent.tail(2)

Unnamed: 0,date,ticker,adj_close_1d,close_1d,high_1d,low_1d,open_1d,volume_1d,adj_close_1h_0,adj_close_1h_1,...,volume_1h_14,volume_1h_15,volume_1h_16,volume_1h_17,volume_1h_18,volume_1h_19,volume_1h_20,volume_1h_21,volume_1h_22,volume_1h_23
210324,2021-04-26,PTT.BK,,,,,,,,,...,,,,,,,,,,
210325,2021-04-26,STGT.BK,,,,,,,,,...,,,,,,,,,,


In [17]:
obj = dill.load(open('/media/melgazar9/HDD_10TB/trading/objects/XGBRegressor_2021-04-25_23_42_00.pkl', 'rb'))

In [23]:
[i for i in obj['input_features'] if not i in df_yahoo_recent.columns]

['prev_close_pct_chg_1d_pct_change',
 'prev3_volume_1d',
 'move_pct_1d_diff_pct_change',
 'low_minus_close_1d_pct_change',
 'prev5_target_HL5',
 'high_minus_prev_high_1d_diff',
 'prev4_target',
 'prev4_target_HL3',
 'prev1_volume_1d',
 'open_minus_prev_close_1d',
 'prev1_target_HL5_ewm_mean',
 'high_minus_prev_close_1d_diff_pct_change',
 'low_minus_prev_high_1d_diff',
 'low_minus_close_pct_1d_pct_change',
 'prev4_volume_1d',
 'prev_close_minus_low_1d_diff_pct_change',
 'high_minus_close_1d',
 'high_minus_prev_low_1d',
 'low_minus_close_1d_diff',
 'prev_close_pct_chg_1d_diff',
 'prev2_move_1d',
 'high_minus_close_1d_diff_pct_change',
 'prev5_move_1d',
 'prev4_adj_close_1d',
 'low_minus_prev_high_1d_diff_pct_change',
 'move_pct_1d_diff',
 'range_1d',
 'prev1_target_HL3',
 'prev5_target_HL3',
 'high_minus_close_1d_pct_change',
 'high_minus_prev_close_1d_diff',
 'prev1_target_ewm_mean',
 'prev2_target_HL3',
 'prev3_target_HL3',
 'high_move_1d_pct_change',
 'prev_close_pct_chg_1d_diff_pct_c

In [21]:
df_yahoo_transformed = obj['feature_transformer'].transform(df_yahoo_recent[obj['input_features']])

KeyError: "None of [Index(['prev_close_pct_chg_1d_pct_change', 'prev3_volume_1d',\n       'move_pct_1d_diff_pct_change', 'low_minus_close_1d_pct_change',\n       'prev5_target_HL5', 'high_minus_prev_high_1d_diff', 'prev4_target',\n       'prev4_target_HL3', 'prev1_volume_1d', 'open_minus_prev_close_1d',\n       ...\n       'high_minus_close_pct_1d', 'prev2_adj_close_1d', 'high_move_1d',\n       'high_minus_prev_close_1d_pct_change', 'prev1_adj_close_1d',\n       'high_move_pct_1d_pct_change', 'low_move_1d_diff', 'move_1d_diff',\n       'low_minus_prev_low_1d_diff', 'low_move_pct_1d_diff_pct_change'],\n      dtype='object', length=110)] are in the [columns]"

In [None]:
df_yahoo_recent.to_feather('/media/melgazar9/HDD_10TB/trading/data/numerai/datasets/live_data/df_yahoo_recent_'\
                           + str(datetime.datetime.today()\
                                 .replace(second=0, microsecond=0).replace(' ', '_')\
                                 .replace(':', '_') \
                                 + '.feather'))

In [6]:
df_yahoo_recent.loc[:, "data_type"﻿] = "live"

2021-04-24 14:54:40,932 ERROR numerapi.base_api: invalid tournament


ValueError: invalid tournament