In [1]:
# Numerai API
from numerapi import NumerAPI

# data
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np

# stats
from scipy.stats import spearmanr
from sklearn.metrics import r2_score, mean_squared_error

# machine learning models
from sklearn.base import BaseEstimator, RegressorMixin
from sklearn.utils.validation import check_X_y, check_array, check_is_fitted
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import GridSearchCV
from xgboost import XGBRegressor
from lightgbm import LGBMRegressor

# other
import gc
import json
from tqdm import trange
from itertools import product
import functools
import random
from timeit import default_timer
import re
import time
from pprint import pprint
from copy import deepcopy
from varname import nameof
from datetime import datetime

# save variables
import pickle
import joblib

# my utils
from utils import *

In [2]:
X_COLS = FEATURES_L
COLUMNS = [ERA] + X_COLS + Y_COLS
Y_ALT = 'target_paul_v4_20'

df = pd.read_parquet('data/train.parquet', columns=COLUMNS)
df[ERA] = df[ERA].astype('int32')
eras = df[ERA]
e0 = eras.min()
e1 = eras.max() + 1
df = df[df[ERA].isin(np.arange(e0, e1, 4))]
dfnan = df.isna().any()
# print(df.dtypes[df.columns[-1]])
df[np.isnan(df)] = 0.5

params_lgbm = {
    'n_estimators': 2000,
    'learning_rate': 0.01,
    'max_depth': 5,
    'num_leaves': 2**5,
    'colsample_bytree': 0.1,
    'device': 'gpu',
}

params_xgb = {
    'n_estimators': 2000,
    'learning_rate': 0.01,
    'max_depth': 5,
    'max_leaves': 2**5,
    'colsample_bytree': 0.1,
    'gpu_id': 0,
    'tree_method': 'gpu_hist',
}

X = df[X_COLS]
y = df[Y_TRUE]

In [None]:
lgbm = LGBMRegressor(**params_lgbm)

t0 = default_timer()
lgbm.fit(X, y)
t_lgbm_fit = default_timer() - t0

t0 = default_timer()
y_pred = lgbm.predict(X)
t_lgbm_pred = default_timer() - t0

In [None]:
xgb = XGBRegressor(**params_xgb)

t0 = default_timer()
xgb.fit(X, y)
t_xgb_fit = default_timer() - t0

t0 = default_timer()
y_pred = xgb.predict(X)
t_xgb_pred = default_timer() - t0

In [None]:
print(t_lgbm_fit)
print(t_lgbm_pred)
print(t_xgb_fit)
print(t_xgb_pred)

In [3]:
X = df[X_COLS]
y = df[Y_COLS]
del df

In [5]:
model_8jobs = LGBMRegressor(**params_lgbm)
model_8jobs = MultiTargetTrainer(model_8jobs)
t0 = default_timer()
model_8jobs.fit(X, y)
t_8jobs = default_timer() - t0