In [1]:
import pandas as pd
from xgboost import XGBRegressor
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.decomposition import PCA
from sklearn.utils.validation import check_is_fitted
from sklearn.base import TransformerMixin
from sklearn.preprocessing import MinMaxScaler, StandardScaler, FunctionTransformer, OneHotEncoder
from sklearn.compose import make_column_selector, ColumnTransformer
from sklearn.pipeline import Pipeline, make_pipeline
from sklearn.impute import SimpleImputer
from category_encoders.target_encoder import TargetEncoder
from skimpy import clean_columns

import os
from configparser import ConfigParser
import numerapi

  from pandas import MultiIndex, Int64Index


In [2]:
config = ConfigParser()
config.read('/home/melgazar9/scripts/github/trading/numerai_project/numerai_keys.ini')
napi = numerapi.NumerAPI(config['KEYS']['NUMERAI_PUBLIC_KEY'], config['KEYS']['NUMERAI_SECRET_KEY'])

In [32]:
napi.list_datasets()

['example_predictions.csv',
 'example_predictions.parquet',
 'example_validation_predictions.csv',
 'example_validation_predictions.parquet',
 'features.json',
 'numerai_datasets.zip',
 'numerai_live_data.csv',
 'numerai_live_data.parquet',
 'numerai_live_data_int8.csv',
 'numerai_live_data_int8.parquet',
 'numerai_tournament_data.csv',
 'numerai_tournament_data.parquet',
 'numerai_tournament_data_int8.csv',
 'numerai_tournament_data_int8.parquet',
 'numerai_training_data.csv',
 'numerai_training_data.parquet',
 'numerai_training_data_int8.csv',
 'numerai_training_data_int8.parquet',
 'numerai_validation_data.csv',
 'numerai_validation_data.parquet',
 'numerai_validation_data_int8.csv',
 'numerai_validation_data_int8.parquet']

In [36]:
# valid data_type inputs are:
# ['live', 'training', 'validation', 'test', 'max_test_era',
#  'tournament', 'tournament_ids', 'example_predictions']

napi.get_latest_data_url(data_type='training')

'https://numerai-public-datasets.s3-us-west-2.amazonaws.com/latest_numerai_training_data.csv'

In [37]:
pd.read_csv(napi.get_latest_data_url(data_type='training'))

Unnamed: 0,id,era,data_type,feature_intelligence1,feature_intelligence2,feature_intelligence3,feature_intelligence4,feature_intelligence5,feature_intelligence6,feature_intelligence7,...,feature_wisdom38,feature_wisdom39,feature_wisdom40,feature_wisdom41,feature_wisdom42,feature_wisdom43,feature_wisdom44,feature_wisdom45,feature_wisdom46,target
0,n000315175b67977,era1,train,0.00,0.50,0.25,0.00,0.50,0.25,0.25,...,1.00,1.00,0.75,0.50,0.75,0.50,1.00,0.50,0.75,0.50
1,n0014af834a96cdd,era1,train,0.00,0.00,0.00,0.25,0.50,0.00,0.00,...,1.00,1.00,0.00,0.00,0.75,0.25,0.00,0.25,1.00,0.25
2,n001c93979ac41d4,era1,train,0.25,0.50,0.25,0.25,1.00,0.75,0.75,...,0.25,0.50,0.00,0.00,0.50,1.00,0.00,0.25,0.75,0.25
3,n0034e4143f22a13,era1,train,1.00,0.00,0.00,0.50,0.50,0.25,0.25,...,1.00,1.00,0.75,0.75,1.00,1.00,0.75,1.00,1.00,0.25
4,n00679d1a636062f,era1,train,0.25,0.25,0.25,0.25,0.00,0.25,0.50,...,0.75,0.75,0.25,0.50,0.75,0.00,0.50,0.25,0.75,0.75
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
501803,nff6a8a8feaeeb52,era120,train,0.50,0.50,0.25,0.00,0.00,0.50,0.75,...,0.50,0.50,0.75,0.50,0.50,0.75,0.25,0.25,0.25,0.50
501804,nff6af62a0996372,era120,train,1.00,0.00,0.00,1.00,0.50,0.75,0.75,...,1.00,1.00,1.00,1.00,1.00,0.00,0.75,1.00,1.00,0.75
501805,nff9288983b8c040,era120,train,0.75,0.50,0.50,0.50,0.25,0.50,0.25,...,1.00,0.75,0.25,1.00,1.00,1.00,0.25,0.00,0.00,0.25
501806,nffaab4e1cacc4b1,era120,train,0.25,0.25,0.25,0.50,0.00,1.00,1.00,...,0.75,0.75,0.75,0.75,0.75,0.50,0.50,0.25,0.75,0.50


In [39]:
pd.read_csv(napi.get_latest_data_url(data_type='validation'))

Unnamed: 0,id,era,data_type,feature_intelligence1,feature_intelligence2,feature_intelligence3,feature_intelligence4,feature_intelligence5,feature_intelligence6,feature_intelligence7,...,feature_wisdom38,feature_wisdom39,feature_wisdom40,feature_wisdom41,feature_wisdom42,feature_wisdom43,feature_wisdom44,feature_wisdom45,feature_wisdom46,target
0,n0003aa52cab36c2,era121,validation,0.25,0.75,0.50,0.50,0.00,0.75,0.50,...,0.75,0.75,1.00,0.75,0.50,0.50,1.00,0.00,0.00,0.25
1,n000920ed083903f,era121,validation,0.75,0.50,0.75,1.00,0.50,0.00,0.00,...,0.50,0.50,0.75,1.00,0.75,0.50,0.50,0.50,0.50,0.50
2,n0038e640522c4a6,era121,validation,1.00,0.00,0.00,1.00,1.00,1.00,1.00,...,0.00,0.00,0.50,0.25,0.00,0.00,0.50,0.50,0.00,1.00
3,n004ac94a87dc54b,era121,validation,0.75,1.00,1.00,0.50,0.00,0.00,0.00,...,0.00,0.00,0.00,0.25,0.00,0.00,0.00,0.25,0.25,0.50
4,n0052fe97ea0c05f,era121,validation,0.25,0.50,0.50,0.25,1.00,0.50,0.50,...,0.50,0.75,0.00,0.00,0.75,1.00,0.00,0.25,1.00,0.75
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
137774,nffb61f786fe2a96,era212,validation,0.00,0.50,0.75,0.00,0.50,0.50,0.50,...,1.00,1.00,1.00,1.00,1.00,0.75,0.75,0.75,1.00,0.50
137775,nffbe101615ad597,era212,validation,0.75,0.75,0.75,0.75,0.25,0.25,0.25,...,0.75,0.75,0.25,0.25,0.75,0.75,0.25,0.50,0.75,0.25
137776,nffc1dc801a3318a,era212,validation,0.25,1.00,0.50,0.00,0.00,0.25,0.50,...,0.25,0.50,0.00,0.00,0.25,0.25,0.00,0.75,0.75,0.50
137777,nffc376c3127112d,era212,validation,0.00,0.75,0.75,0.00,0.75,0.50,0.75,...,0.50,0.50,0.75,0.75,0.25,0.25,0.75,0.50,0.50,1.00


In [40]:
pd.read_csv(napi.get_latest_data_url(data_type='test'))

Unnamed: 0,id,era,data_type,feature_intelligence1,feature_intelligence2,feature_intelligence3,feature_intelligence4,feature_intelligence5,feature_intelligence6,feature_intelligence7,...,feature_wisdom38,feature_wisdom39,feature_wisdom40,feature_wisdom41,feature_wisdom42,feature_wisdom43,feature_wisdom44,feature_wisdom45,feature_wisdom46,target
0,n000101811a8a843,era575,test,0.25,0.50,0.75,0.25,0.50,0.25,0.50,...,0.25,0.25,0.25,0.25,0.25,0.50,0.25,0.00,0.00,
1,n001e1318d5072ac,era575,test,0.50,0.50,0.50,0.75,0.75,0.25,0.00,...,0.00,0.00,0.75,0.75,0.00,0.00,0.75,0.50,0.25,
2,n002a9c5ab785cbb,era575,test,0.25,0.00,0.25,0.00,0.00,1.00,0.75,...,0.75,0.75,0.25,0.50,0.50,0.25,0.25,0.25,0.75,
3,n002ccf6d0e8c5ad,era575,test,0.00,0.00,0.00,0.75,0.00,1.00,1.00,...,1.00,1.00,1.00,1.00,1.00,0.50,1.00,0.50,0.75,
4,n0051ab821295c29,era575,test,0.75,1.00,1.00,0.00,1.00,1.00,1.00,...,0.25,0.25,0.50,0.50,0.25,0.75,0.75,0.00,0.75,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1812994,nffce522d3bc7edd,era1002,test,0.50,0.50,0.75,0.50,0.25,0.25,0.50,...,0.75,1.00,0.00,0.00,1.00,0.75,0.00,0.25,1.00,
1812995,nffd4817c95d5eda,era1002,test,1.00,0.00,0.00,1.00,0.00,0.75,0.75,...,0.25,0.00,0.25,0.25,0.00,0.75,0.00,0.50,0.00,
1812996,nffea88143e1d739,era1002,test,1.00,0.25,0.25,1.00,0.50,0.75,0.75,...,1.00,1.00,0.00,0.00,1.00,0.50,0.00,0.75,0.75,
1812997,nfff730790b4b9db,era1002,test,0.50,1.00,1.00,0.25,0.25,0.75,0.75,...,0.75,0.75,0.50,1.00,1.00,1.00,0.75,0.00,1.00,


In [38]:
pd.read_csv(napi.get_latest_data_url(data_type='live'))

Unnamed: 0,id,era,data_type,feature_intelligence1,feature_intelligence2,feature_intelligence3,feature_intelligence4,feature_intelligence5,feature_intelligence6,feature_intelligence7,...,feature_wisdom38,feature_wisdom39,feature_wisdom40,feature_wisdom41,feature_wisdom42,feature_wisdom43,feature_wisdom44,feature_wisdom45,feature_wisdom46,target
0,n0000f7bf863c606,eraX,live,0.00,0.50,0.75,0.00,0.00,0.75,0.75,...,0.25,0.25,0.50,0.50,0.25,0.00,0.50,0.25,0.50,
1,n00054b3868cab8e,eraX,live,0.75,0.00,0.00,1.00,0.25,0.25,0.00,...,0.00,0.00,0.25,0.25,0.00,0.00,0.25,0.00,0.00,
2,n00076b4e043eda1,eraX,live,0.50,1.00,1.00,0.50,0.25,0.50,0.50,...,0.25,0.25,0.50,0.50,0.00,0.00,0.50,0.50,0.50,
3,n0014a12880d7377,eraX,live,1.00,0.00,0.00,1.00,0.75,0.50,0.50,...,0.00,0.00,0.00,0.00,0.00,0.50,0.00,0.00,0.00,
4,n0015f151dbf590d,eraX,live,1.00,0.00,0.00,1.00,0.25,0.00,0.00,...,0.25,0.25,0.00,0.00,0.00,0.00,0.25,0.25,0.25,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5326,nffc86a7a59ab167,eraX,live,0.00,0.50,0.50,0.00,0.25,0.75,0.75,...,0.75,0.50,1.00,1.00,0.50,0.25,1.00,0.75,0.75,
5327,nffce9953e162d85,eraX,live,0.50,0.25,0.00,0.75,0.75,0.50,0.25,...,1.00,0.50,0.50,0.50,0.50,0.25,0.00,0.75,0.50,
5328,nffd7d1f3fa82d5a,eraX,live,0.50,0.25,0.50,0.00,0.00,1.00,1.00,...,0.25,0.25,0.00,0.00,0.50,1.00,0.00,0.00,0.00,
5329,nffe718d3439b2d2,eraX,live,0.50,0.00,0.00,0.00,0.25,0.00,0.00,...,0.75,0.50,0.25,0.25,0.50,0.50,0.00,0.00,0.75,


In [41]:
pd.read_csv(napi.get_latest_data_url(data_type='max_test_era'))

Unnamed: 0,id,era,data_type,feature_intelligence1,feature_intelligence2,feature_intelligence3,feature_intelligence4,feature_intelligence5,feature_intelligence6,feature_intelligence7,...,feature_wisdom38,feature_wisdom39,feature_wisdom40,feature_wisdom41,feature_wisdom42,feature_wisdom43,feature_wisdom44,feature_wisdom45,feature_wisdom46,target
0,n0002dc49b94b68c,era1002,test,0.25,1.00,1.00,0.50,0.25,0.75,0.75,...,0.75,0.25,0.75,0.75,0.25,0.00,0.50,0.00,0.00,
1,n00037a95061ee07,era1002,test,0.00,1.00,0.75,0.00,0.25,0.25,0.25,...,1.00,0.75,1.00,1.00,0.75,0.50,1.00,0.00,1.00,
2,n000438b626047f6,era1002,test,0.25,0.50,1.00,0.25,0.75,0.00,0.25,...,0.25,0.50,0.50,0.50,0.50,0.25,0.50,0.75,0.25,
3,n0005e45063ab844,era1002,test,0.75,0.25,0.25,0.50,0.25,0.75,1.00,...,0.00,0.00,1.00,1.00,0.00,0.00,1.00,1.00,0.00,
4,n000a50269d04dc2,era1002,test,0.50,0.00,0.00,1.00,1.00,1.00,1.00,...,0.00,0.00,0.50,0.50,0.00,0.00,0.25,0.50,0.00,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5302,nffce522d3bc7edd,era1002,test,0.50,0.50,0.75,0.50,0.25,0.25,0.50,...,0.75,1.00,0.00,0.00,1.00,0.75,0.00,0.25,1.00,
5303,nffd4817c95d5eda,era1002,test,1.00,0.00,0.00,1.00,0.00,0.75,0.75,...,0.25,0.00,0.25,0.25,0.00,0.75,0.00,0.50,0.00,
5304,nffea88143e1d739,era1002,test,1.00,0.25,0.25,1.00,0.50,0.75,0.75,...,1.00,1.00,0.00,0.00,1.00,0.50,0.00,0.75,0.75,
5305,nfff730790b4b9db,era1002,test,0.50,1.00,1.00,0.25,0.25,0.75,0.75,...,0.75,0.75,0.50,1.00,1.00,1.00,0.75,0.00,1.00,


In [42]:
pd.read_csv(napi.get_latest_data_url(data_type='tournament'))

Unnamed: 0,id,era,data_type,feature_intelligence1,feature_intelligence2,feature_intelligence3,feature_intelligence4,feature_intelligence5,feature_intelligence6,feature_intelligence7,...,feature_wisdom38,feature_wisdom39,feature_wisdom40,feature_wisdom41,feature_wisdom42,feature_wisdom43,feature_wisdom44,feature_wisdom45,feature_wisdom46,target
0,n0003aa52cab36c2,era121,validation,0.25,0.75,0.50,0.50,0.00,0.75,0.50,...,0.75,0.75,1.00,0.75,0.50,0.50,1.00,0.00,0.00,0.25
1,n000920ed083903f,era121,validation,0.75,0.50,0.75,1.00,0.50,0.00,0.00,...,0.50,0.50,0.75,1.00,0.75,0.50,0.50,0.50,0.50,0.50
2,n0038e640522c4a6,era121,validation,1.00,0.00,0.00,1.00,1.00,1.00,1.00,...,0.00,0.00,0.50,0.25,0.00,0.00,0.50,0.50,0.00,1.00
3,n004ac94a87dc54b,era121,validation,0.75,1.00,1.00,0.50,0.00,0.00,0.00,...,0.00,0.00,0.00,0.25,0.00,0.00,0.00,0.25,0.25,0.50
4,n0052fe97ea0c05f,era121,validation,0.25,0.50,0.50,0.25,1.00,0.50,0.50,...,0.50,0.75,0.00,0.00,0.75,1.00,0.00,0.25,1.00,0.75
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1956104,nffc86a7a59ab167,eraX,live,0.00,0.50,0.50,0.00,0.25,0.75,0.75,...,0.75,0.50,1.00,1.00,0.50,0.25,1.00,0.75,0.75,
1956105,nffce9953e162d85,eraX,live,0.50,0.25,0.00,0.75,0.75,0.50,0.25,...,1.00,0.50,0.50,0.50,0.50,0.25,0.00,0.75,0.50,
1956106,nffd7d1f3fa82d5a,eraX,live,0.50,0.25,0.50,0.00,0.00,1.00,1.00,...,0.25,0.25,0.00,0.00,0.50,1.00,0.00,0.00,0.00,
1956107,nffe718d3439b2d2,eraX,live,0.50,0.00,0.00,0.00,0.25,0.00,0.00,...,0.75,0.50,0.25,0.25,0.50,0.50,0.00,0.00,0.75,


In [43]:
pd.read_csv(napi.get_latest_data_url(data_type='tournament_ids'))

Unnamed: 0,id
0,n0003aa52cab36c2
1,n000920ed083903f
2,n0038e640522c4a6
3,n004ac94a87dc54b
4,n0052fe97ea0c05f
...,...
1956104,nffc86a7a59ab167
1956105,nffce9953e162d85
1956106,nffd7d1f3fa82d5a
1956107,nffe718d3439b2d2


In [44]:
pd.read_csv(napi.get_latest_data_url(data_type='example_predictions'))

Unnamed: 0,id,prediction
0,n0003aa52cab36c2,0.489186
1,n000920ed083903f,0.491093
2,n0038e640522c4a6,0.532746
3,n004ac94a87dc54b,0.507171
4,n0052fe97ea0c05f,0.503833
...,...,...
1956104,nffc86a7a59ab167,0.499157
1956105,nffce9953e162d85,0.513304
1956106,nffd7d1f3fa82d5a,0.482521
1956107,nffe718d3439b2d2,0.508860


### submission

In [None]:
output_dir = '/media/melgazar9/HDD_10TB/trading/data/numerai/submissions/'
output_filename = 'numerai_signals_submission.csv'
output_loc = f"{output_dir}{output_filename}"