In [1]:
#####################
###### Imports ######
#####################

import os
from configparser import ConfigParser
import sys
import re
if not os.getcwd().endswith('trading'): os.chdir('../../..') # local machine
assert os.getcwd().endswith('trading'), 'Wrong path!'
import numerapi
import plotly.express as px


os.environ['NUMEXPR_MAX_THREADS'] = '32'
os.environ['NUMEXPR_NUM_THREADS'] = '16'

sys.path.append(os.getcwd())
from dev.scripts.ML_utils import * # run if on local machine
from dev.scripts.trading_utils import * # run if on local machine
from numerai.dev.scripts.numerai_utils import *
from numerai.dev.configs.prep_and_train_cfg import *


pd.set_option('display.float_format', lambda x: '%.5f' % x)
config = ConfigParser()
config.read('numerai/numerai_keys.ini')

# Connect to the Numerai API
napi = numerapi.SignalsAPI(config['KEYS']['NUMERAI_PUBLIC_KEY'], config['KEYS']['NUMERAI_SECRET_KEY'])


### Load in the data created from build_numerai_dataset.py ###

if LOAD_DATA_FILEPATH.endswith('feather'):
    df_numerai = pd.read_feather(LOAD_DATA_FILEPATH)
elif LOAD_DATA_FILEPATH.endswith('pq') or LOAD_DATA_FILEPATH.endswith('parquet'):
    df_numerai = pd.read_parquet(LOAD_DATA_FILEPATH)
elif LOAD_DATA_FILEPATH.endswith('csv'):
    df_numerai = pd.read_csv(LOAD_DATA_FILEPATH)

### dropnas ###

if START_DATE:
    df_numerai = df_numerai[df_numerai[DATE_COL] >= START_DATE]

In [3]:
df_numerai.tail()

Unnamed: 0,date,yahoo_ticker,adj_close_1d,close_1d,high_1d,low_1d,open_1d,volume_1d,adj_close_1h_0,adj_close_1h_1,...,volume_1h_18,volume_1h_19,volume_1h_20,volume_1h_21,volume_1h_22,volume_1h_23,bloomberg_ticker,friday_date,data_type,target
25243537,2021-04-09,,,,,,,,,,...,,,,,,,VERO US,20210409.0,validation,0.25
25243538,2021-04-09,,,,,,,,,,...,,,,,,,VESTL TI,20210409.0,validation,0.0
25243539,2021-04-09,,,,,,,,,,...,,,,,,,VITRO MK,20210409.0,validation,0.75
25243540,2021-04-09,,,,,,,,,,...,,,,,,,XCUR US,20210409.0,validation,0.0
25243541,2021-04-09,,,,,,,,,,...,,,,,,,YNS MK,20210409.0,validation,0.25


In [4]:
data_cleaner = Pipeline(**DATA_CLEANER_PARAMS).fit(df_numerai.tail(10000))

In [5]:
data_cleaner.transform(df_numerai.head(1000))

### save model

In [79]:
if SAVE_MODEL:
    dill.dump(model_dict['model'], open(MODEL_OUTPATH + \
                                        type(model_dict['model']).__name__ + '_' + \
                                        str(datetime.datetime.today()\
                                            .replace(second=0, microsecond=0))\
                                            .replace(' ', '_')\
                                            .replace(':', '_') + '.pkl',\
                                            'wb'))

### submit model sandbox

In [45]:
df_val = pd.read_csv(napi.download_validation_data())
df_val = df_val[df_val['data_type'] == 'validation'].rename(columns={'bloomberg_ticker': 'ticker'})
print(df_val.shape)
df_val.head(2)

(2161626, 4)


Unnamed: 0,ticker,friday_date,data_type,target
2148958,000060 KS,20130104,validation,1.0
2148959,000080 KS,20130104,validation,0.75


In [41]:
df_to_submit = model_dict['df_pred'][\
                                     model_dict['df_pred']['data_type'] == 'validation'][\
                                                                                         ['bloomberg_ticker', 'friday_date', 'data_type', 'target', pred_colname]]\
                                                                                            .rename(columns={pred_colname: 'signal',
                                                                                                             'bloomberg_ticker': 'ticker'})
df_to_submit = df_to_submit[['ticker', 'signal', 'friday_date', 'data_type']]
print(df_to_submit.shape)
df_to_submit.head(2)

(2156196, 4)


Unnamed: 0,ticker,signal,friday_date,data_type
2148958,000060 KS,0.47222,20130104.0,validation
2148959,000080 KS,0.58933,20130104.0,validation


In [46]:
df_to_submit = pd.merge(df_to_submit,
                        df_val[['friday_date', 'ticker']], on=['friday_date', 'ticker'])
print(df_to_submit.shape)
df_to_submit.head(2)

(2156196, 4)


Unnamed: 0,ticker,signal,friday_date,data_type
0,000060 KS,0.47222,20130104.0,validation
1,000080 KS,0.58933,20130104.0,validation


'852fd5ba-3e09-4d50-b89a-4b91485aef7c'

In [52]:
df_to_submit.tail(2)

Unnamed: 0,ticker,signal,friday_date,data_type
2156194,ZYXI US,0.52891,20210402.0,validation
2156195,ZZZ CN,0.51358,20210402.0,validation


In [57]:
df_to_submit.to_csv('/media/melgazar9/HDD_10TB/trading/data/numerai/submissions/df_to_submit_example.csv')

In [58]:
# test submition
napi.upload_predictions('/media/melgazar9/HDD_10TB/trading/data/numerai/submissions/df_to_submit_example.csv',
                        model_id=list(napi.get_models().values())[0])

2021-04-24 12:39:21,783 INFO numerapi.base_api: uploading predictions...
2021-04-24 12:40:42,062 ERROR numerapi.base_api: headers must be signal, and bloomberg_ticker, cusip or sedol


ValueError: headers must be signal, and bloomberg_ticker, cusip or sedol