In [1]:
from glob import glob
import numpy as np
import pandas as pd
from tqdm import tqdm
from scipy import stats
import lightgbm as lgb
import xgboost as xgb

import matplotlib.pyplot as plt
import seaborn as sns

np.random.seed(2)

***
## load models

In [2]:
files = glob("../input/ump-artifacts/lgbm-gbrt/lgbm-seed*.txt")
display(files)

boosters_lgbm_gbrt = [lgb.Booster(model_file=fn) for fn in files]
display(boosters_lgbm_gbrt)

['../input/ump-artifacts/lgbm-gbrt/lgbm-seed2.txt',
 '../input/ump-artifacts/lgbm-gbrt/lgbm-seed19.txt',
 '../input/ump-artifacts/lgbm-gbrt/lgbm-seed23.txt',
 '../input/ump-artifacts/lgbm-gbrt/lgbm-seed7.txt',
 '../input/ump-artifacts/lgbm-gbrt/lgbm-seed11.txt']

[<lightgbm.basic.Booster at 0x7ff554a6b210>,
 <lightgbm.basic.Booster at 0x7ff50d490d50>,
 <lightgbm.basic.Booster at 0x7ff554a6b3d0>,
 <lightgbm.basic.Booster at 0x7ff554a6b510>,
 <lightgbm.basic.Booster at 0x7ff554a4b2d0>]

In [3]:
files = glob("../input/ump-train-lgbm-xtrees/lgbm-seed*.txt")
display(files)

boosters_lgbm_xtrees = [lgb.Booster(model_file=fn) for fn in files]
display(boosters_lgbm_xtrees)

['../input/ump-train-lgbm-xtrees/lgbm-seed2.txt',
 '../input/ump-train-lgbm-xtrees/lgbm-seed19.txt',
 '../input/ump-train-lgbm-xtrees/lgbm-seed23.txt',
 '../input/ump-train-lgbm-xtrees/lgbm-seed7.txt',
 '../input/ump-train-lgbm-xtrees/lgbm-seed11.txt']

[<lightgbm.basic.Booster at 0x7ff554a4bc90>,
 <lightgbm.basic.Booster at 0x7ff554a6b9d0>,
 <lightgbm.basic.Booster at 0x7ff50d490850>,
 <lightgbm.basic.Booster at 0x7ff554a4b710>,
 <lightgbm.basic.Booster at 0x7ff554a4be90>]

In [4]:
files = glob("../input/ump-artifacts/lgbm-dart/lgbm-seed*.txt")
display(files)

boosters_lgbm_dart = [lgb.Booster(model_file=fn) for fn in files]
display(boosters_lgbm_dart)

['../input/ump-artifacts/lgbm-dart/lgbm-seed2.txt',
 '../input/ump-artifacts/lgbm-dart/lgbm-seed5.txt',
 '../input/ump-artifacts/lgbm-dart/lgbm-seed3.txt',
 '../input/ump-artifacts/lgbm-dart/lgbm-seed7.txt',
 '../input/ump-artifacts/lgbm-dart/lgbm-seed11.txt']

[<lightgbm.basic.Booster at 0x7ff50d36bc90>,
 <lightgbm.basic.Booster at 0x7ff554a4b590>,
 <lightgbm.basic.Booster at 0x7ff50d2b6bd0>,
 <lightgbm.basic.Booster at 0x7ff50d36bcd0>,
 <lightgbm.basic.Booster at 0x7ff50d36b210>]

In [5]:
files = glob("../input/ump-artifacts/lgbm-linear/lgbm-seed*.txt")
display(files)

boosters_lgbm_linear = [lgb.Booster(model_file=fn) for fn in files]
display(boosters_lgbm_linear)

['../input/ump-artifacts/lgbm-linear/lgbm-seed2.txt',
 '../input/ump-artifacts/lgbm-linear/lgbm-seed19.txt',
 '../input/ump-artifacts/lgbm-linear/lgbm-seed23.txt',
 '../input/ump-artifacts/lgbm-linear/lgbm-seed7.txt',
 '../input/ump-artifacts/lgbm-linear/lgbm-seed11.txt']

[<lightgbm.basic.Booster at 0x7ff50d251350>,
 <lightgbm.basic.Booster at 0x7ff50d23eed0>,
 <lightgbm.basic.Booster at 0x7ff50d2513d0>,
 <lightgbm.basic.Booster at 0x7ff50d251790>,
 <lightgbm.basic.Booster at 0x7ff50d251690>]

In [6]:
files = glob("../input/ump-artifacts/lgbm-linear-dart/lgbm-seed*.txt")
display(files)

boosters_lgbm_linear_dart = [lgb.Booster(model_file=fn) for fn in files]
display(boosters_lgbm_linear)

['../input/ump-artifacts/lgbm-linear-dart/lgbm-seed2.txt',
 '../input/ump-artifacts/lgbm-linear-dart/lgbm-seed19.txt',
 '../input/ump-artifacts/lgbm-linear-dart/lgbm-seed23.txt',
 '../input/ump-artifacts/lgbm-linear-dart/lgbm-seed7.txt',
 '../input/ump-artifacts/lgbm-linear-dart/lgbm-seed11.txt']

[<lightgbm.basic.Booster at 0x7ff50d251350>,
 <lightgbm.basic.Booster at 0x7ff50d23eed0>,
 <lightgbm.basic.Booster at 0x7ff50d2513d0>,
 <lightgbm.basic.Booster at 0x7ff50d251790>,
 <lightgbm.basic.Booster at 0x7ff50d251690>]

In [7]:
files = glob("../input/ump-artifacts/xgboost-gbrt/xgb-seed*.json")
display(files)

boosters_xgb_gbrt = list()
for fn in files:
    _model = xgb.Booster()
    _model.load_model(fn)
    boosters_xgb_gbrt.append(_model)
display(boosters_xgb_gbrt)

['../input/ump-artifacts/xgboost-gbrt/xgb-seed11.json',
 '../input/ump-artifacts/xgboost-gbrt/xgb-seed2.json',
 '../input/ump-artifacts/xgboost-gbrt/xgb-seed23.json',
 '../input/ump-artifacts/xgboost-gbrt/xgb-seed19.json',
 '../input/ump-artifacts/xgboost-gbrt/xgb-seed7.json']

[<xgboost.core.Booster at 0x7ff50d598e90>,
 <xgboost.core.Booster at 0x7ff50d23eb90>,
 <xgboost.core.Booster at 0x7ff50d23ed90>,
 <xgboost.core.Booster at 0x7ff50d251c10>,
 <xgboost.core.Booster at 0x7ff50d2518d0>]

***
## inference

In [8]:
import ubiquant
env = ubiquant.make_env()  
iter_test = env.iter_test()

In [9]:
def predict(boosters, dataframe, backend="lightgbm"):
    features = [f"f_{i}" for i in range(300)]
    if backend == "lightgbm":
        preds = [model.predict(dataframe[features]) for model in boosters]
    elif backend == "xgboost":
        dmatrix = xgb.DMatrix(data=dataframe[features])
        preds = [model.predict(dmatrix) for model in boosters]
    elif backend == "catboost":
        pass
    return np.mean(preds, axis=0)

In [10]:
for (test_df, sample_prediction_df) in iter_test:  
    preds = [
        #predict([boosters_lgbm_gbrt[3],], test_df, backend="lightgbm"),
        #predict(boosters_lgbm_xtrees, test_df, backend="lightgbm"),
        #predict(boosters_lgbm_dart, test_df, backend="lightgbm"),
        #predict(boosters_lgbm_linear, test_df, backend="lightgbm"),
        predict(boosters_lgbm_linear_dart, test_df, backend="lightgbm"),
        #predict([boosters_xgb_gbrt], test_df, backend="xgboost")
    ]
    
    sample_prediction_df['target'] = np.mean(preds, axis=0)
    env.predict(sample_prediction_df) 
    display(sample_prediction_df)

This version of the API is not optimized and should not be used to estimate the runtime of your code on the hidden test set.


Unnamed: 0,row_id,target
0,1220_1,-0.150352
1,1220_2,-0.087205


Unnamed: 0,row_id,target
0,1221_0,-0.117456
1,1221_1,-0.135035
2,1221_2,-0.127813


Unnamed: 0,row_id,target
0,1222_0,-0.07497
1,1222_1,-0.10565
2,1222_2,-0.116976


Unnamed: 0,row_id,target
0,1223_0,-0.099613


***