# 1. Dependencies

In [None]:
# Install Dependencies
!pip install pandas sklearn numerapi

In [None]:
# Import Dependencies
import pandas as pd
import numerapi
import numpy as np
from tensorflow.keras.callbacks import ModelCheckpoint, LambdaCallback, EarlyStopping, ReduceLROnPlateau
from tensorflow.keras.layers import Dense, Dropout, BatchNormalization, Activation
from tensorflow.keras.models import Sequential, load_model
from tensorflow.keras.optimizers import RMSprop, Adam
from sklearn.model_selection import train_test_split, GroupKFold, GridSearchCV
from keras.wrappers.scikit_learn import KerasClassifier
from tensorflow.keras.utils import to_categorical

# Numer.ai API setup

In [None]:
# Get your API keys and model_id from https://numer.ai/submit
public_id = "YBZ4PFZHNJRZ4HHYP3CHNO2FPMKO3JBM"
secret_key = "SIEURFNDA6RDCNLKOLH7CPPW2BICB7RCF5JOFT3LOZUGSCCCHMJUBTBQ5W5MLS4Y"
model_id = "8583fec3-2bcd-49ed-9fde-7ada547acc66"
napi = numerapi.NumerAPI(public_id=public_id, secret_key=secret_key)

# 2. Download Data Sets

### Datasets 
*   `training_data` is used to train the model
*   `tournament_data` is used to evaluate the model

### Column descriptions
*   id: a randomized id that corresponds to a stock 
*   era: a period of time
*   data_type: either `train`, `validation`, `test`, or `live` 
*   feature_*: abstract financial features of the stock 
*   target: abstract measure of stock performance


In [69]:
# Download Training Data From Numer.ai (30 secs)
trainingData = pd.read_csv("https://numerai-public-datasets.s3-us-west-2.amazonaws.com/latest_numerai_training_data.csv.xz", header=0)

In [70]:
# Download Tournament Data From Numer.ai (30 secs)
tournamentData = pd.read_csv("https://numerai-public-datasets.s3-us-west-2.amazonaws.com/latest_numerai_tournament_data.csv.xz", header=0)

# 3. Explore The Dataset

In [68]:
# Select Validation Data Out of Tournament Dataset
validationData = tournamentData[tournamentData.data_type=='validation']

# Select Validation Data Out of Tournament Dataset
testData = tournamentData[tournamentData.data_type=='test']

# Select Live Data Out of Tournament Dataset
liveData = tournamentData[tournamentData.data_type=='live']

In [None]:
# Print Training Data
trainingData.head()

In [None]:
# Find Unique Items Within The Training Data
print(f'UNIQUE ERAS: {trainingData.era.unique()}\n')
print(f'UNIQUE DATA TYPE: {trainingData.data_type.unique()}')

In [None]:
# Print Tournament Data
tournamentData.head()

In [67]:
# Find Unique Items Within Tournament Data
print(f'UNIQUE DATA TYPE: {tournamentData.data_type.unique()}\n')
print(f'UNIQUE ERAS: {tournamentData.era.unique()}')

UNIQUE DATA TYPE: ['validation' 'test' 'live']

UNIQUE ERAS: ['era121' 'era122' 'era123' 'era124' 'era125' 'era126' 'era127' 'era128'
 'era129' 'era130' 'era131' 'era132' 'era575' 'era576' 'era577' 'era578'
 'era579' 'era580' 'era581' 'era582' 'era583' 'era584' 'era585' 'era586'
 'era587' 'era588' 'era589' 'era590' 'era591' 'era592' 'era593' 'era594'
 'era595' 'era596' 'era597' 'era598' 'era599' 'era600' 'era601' 'era602'
 'era603' 'era604' 'era605' 'era606' 'era607' 'era608' 'era609' 'era610'
 'era611' 'era612' 'era613' 'era614' 'era615' 'era616' 'era617' 'era618'
 'era619' 'era620' 'era621' 'era622' 'era623' 'era624' 'era625' 'era626'
 'era627' 'era628' 'era629' 'era630' 'era631' 'era632' 'era633' 'era634'
 'era635' 'era636' 'era637' 'era638' 'era639' 'era640' 'era641' 'era642'
 'era643' 'era644' 'era645' 'era646' 'era647' 'era648' 'era649' 'era650'
 'era651' 'era652' 'era653' 'era654' 'era655' 'era656' 'era657' 'era658'
 'era659' 'era660' 'era661' 'era662' 'era663' 'era664' 'era665'

In [64]:
print(f"UNIQUE TRAINING TARGETS: {trainingData.target[trainingData.data_type=='train'].unique()}")
print(f"UNIQUE VALIDATION TARGETS: {tournamentData.target[tournamentData.data_type=='validation'].unique()}")
print(f"UNIQUE TEST TARGETS: {tournamentData.target[tournamentData.data_type=='test'].unique()}")
print(f"UNIQUE LIVE TARGETS: {tournamentData.target[tournamentData.data_type=='live'].unique()}")

UNIQUE TRAINING TARGETS: [0.5  0.25 0.75 0.   1.  ]
UNIQUE VALIDATION TARGETS: [0.25 0.5  1.   0.75 0.  ]
UNIQUE TEST TARGETS: [nan]
UNIQUE LIVE TARGETS: [nan]


# Feature Engineering

In [None]:
def get_group_stats(df: pd.DataFrame) -> pd.DataFrame:
    """
    Create features by calculating statistical moments for each group.
    
    :param df: Pandas DataFrame containing all features
    """
    for group in ["intelligence", "wisdom", "charisma", "dexterity", "strength", "constitution"]:
        cols = [col for col in df.columns if group in col]
        df[f"feature_{group}_mean"] = df[cols].mean(axis=1)
        df[f"feature_{group}_std"] = df[cols].std(axis=1)
        df[f"feature_{group}_skew"] = df[cols].skew(axis=1)
    return df

trainEng = get_group_stats(trainingData)
valEng = get_group_stats(validationData)
testEng = get_group_stats(tournamentData)

# Train Model

In [None]:
# Defnine Linear Regression Model
def build_model(neurons=200,dropout=0.2):
    model = Sequential()
    model.add(Dense(neurons, activation='relu',kernel_initializer='glorot_uniform', use_bias=False))
    model.add(BatchNormalization())
    model.add(Dropout(dropout))
    model.add(Dense(1, activation='sigmoid', kernel_initializer='glorot_normal'))
    model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['acc','mse','mae'])
    return model

model = KerasClassifier(build_fn=create_model, epochs=8, batch_size=128, verbose=0)

# Validation

# Generate Predictions