# Intorduction
This notebook is EDA and training by using tabnet.  
RAPIDS(cuDF) is GPU DataFrame library for loading, aggregating, filtering, and otherwise manipulating data.  (<a href='https://docs.rapids.ai/api/cudf/stable/'>Ref</a>)  <br>
  
ver.14: cuDF dataframe is used in EDA.  
ver.18: Remove outliers (public score 0.135->0.137)

# Packages

In [None]:
!pip -q install ../input/pytorchtabnet/pytorch_tabnet-3.1.1-py3-none-any.whl

from pytorch_tabnet.tab_model import TabNetRegressor
from pytorch_tabnet.metrics import Metric

In [None]:
import copy
import gc
import glob
import os
import pickle
import random

import argparse
import matplotlib.pyplot as plt
from matplotlib.ticker import AutoMinorLocator
import numpy as np
import pandas as pd
import seaborn as sns
import sklearn
from sklearn.model_selection import KFold, StratifiedKFold, train_test_split
import torch
from torch.optim import Adam
from torch.optim.lr_scheduler import CosineAnnealingWarmRestarts
from tqdm.auto import tqdm


# Parameters

In [None]:
# Paths
INPUT_PATH = '../input/ubiquant-market-prediction'
TRAIN_PICKLE = '../input/fast-read-data-ubiquant/train_reduced.pkl' # train.csv reduced memory usage

# Train parematers
DEBUG = False
'''
If DEBUG==True, 
reduced sampling is performed for train data,
and the fold calculation is stopped at the first fold.  
'''
args = argparse.Namespace(
    seed = 2022,
    patience = 20,
    batch_size = 1024*20, 
    virtual_batch_size = 128*20,
    drop_last = True,
    reduced_sampling = None if not DEBUG else 0.10, # if DEBUG, 10% samples are used.
    max_epochs = 200 if not DEBUG else 5,
    n_folds = 5, # if DEBUG, one fold is only calculated.
    n_steps = 2, # equals to the number of masks
    n_workers = 2,
    n_bins = 16
)

# Random seed
def seed_everything(seed: int) -> None:
    random.seed(seed)
    np.random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)

seed_everything(args.seed)

# Matplotlib style
plt.style.use('ggplot')
plt.rcParams['axes.grid'] = True
plt.rcParams['grid.color'] = 'gray'
plt.rcParams['grid.alpha'] = 0.5
plt.rcParams['grid.linestyle'] = '--'

# Input data

In [None]:
%%time
df_train = pd.read_pickle(TRAIN_PICKLE)
display(df_train.head())

In [None]:
# Transform from pd.DataFrame to cudf.DataFrame
import cudf
print('RAPIDS version',cudf.__version__)

cudf_train = cudf.DataFrame.from_pandas(df_train)
display(cudf_train.head())

# EDA

In [None]:
class EDA():
    
    def __init__(self, df):
        self.df = df
        self.bin_color = '#66bd63'
        
    def base_info(self):
        print(f'# of records: {len(self.df)}')
        print(f'# of unique investment_id: {len(self.df["investment_id"].unique())}')
        print(f'# of unique time_id: {len(self.df["time_id"].unique())}')
        print(f'# of record with "target==0": {len(self.df[self.df["target"]==0])}')
        features_max = self.df.iloc[:, 4:].max()
        features_min = self.df.iloc[:, 4:].min()
        max_value   = features_max.max()
        max_feature = features_max.sort_values().index[-1]
        min_value   = features_min.min()
        min_feature = features_min.sort_values().index[0]
        print(f'Max feature value: {max_value:.2f} (in {max_feature})')
        print(f'Min feature value: {min_value:.2f} (in {min_feature})')
            
    def plot_histgram(self, n_bins=100):
        fig, ax = plt.subplots(1, 3, figsize=(20,4))
        
        ax[0].set_xlabel('target')
        ax[0].hist(self.df['target'].to_arrow(), bins=n_bins, color=self.bin_color)
        # .to_arrow() is requred for cudf dataframe or series

        ax[1].set_xlabel('target.mean group by investment_id')
        target_mean = self.df[['investment_id','target']]\
                     .groupby('investment_id', as_index=False).agg({'target':'mean'})\
                     ['target'].to_arrow()
        ax[1].hist(target_mean, bins=n_bins, color=self.bin_color)

        ax[2].set_xlabel('time_id.count group by investment_id')
        time_id_count = self.df[['investment_id','time_id']]\
                       .groupby('investment_id', as_index=False).agg({'time_id':'count'})\
                       ['time_id'].to_arrow()
        ax[2].hist(time_id_count, bins=n_bins, color=self.bin_color)
                
    def plot_features_mean_and_std(self):
        fig, ax1 = plt.subplots(3, 1, figsize=(19,8)) # ax1: mean
        feature_means = []
        for i in range(3):
            # Sampling data per 100 features
            col_s = 4 + i*100
            col_e = col_s + 100
            labels = self.df.columns[col_s:col_e] 
            # Plot Mean
            mean = (self.df.iloc[:,col_s:col_e]/len(self.df)).sum(axis=0)
            ax1[i].plot(mean.to_arrow(), color='red', label='Mean')
            ax1[i].set_xticks(np.arange(100))
            ax1[i].set_xticklabels(labels=labels, fontsize=8, rotation=90)
            ax1[i].tick_params(axis='y', color='red', labelcolor='red')
            ax1[i].set_xlim(-1, 100)
            ax1[i].set_ylim(-0.6, 0.6)
            ax1[i].set_ylabel('Mean', color='red')
            # Plot standard deviation
            std = np.sqrt(((mean - self.df.iloc[:,col_s:col_e])**2/len(self.df)).sum(axis=0))
            ax2 = ax1[i].twinx() # ax2: std
            ax2.plot(std.to_arrow(), color='blue', label='Standard Deviation')
            ax2.set_xticks(np.arange(100))
            ax2.set_xticklabels(labels=labels, fontsize=8, rotation=90)
            ax2.tick_params(axis='y', color='blue', labelcolor='blue')
            ax2.set_xlim(-1, 100)
            ax2.set_ylim(0, 1.2)
            ax2.set_ylabel('Standard Deviation', color='blue')
            # Legend
            handle1, label1 = ax1[i].get_legend_handles_labels()
            handle2, label2 = ax2.get_legend_handles_labels()
            ax1[i].legend([handle2[0],handle1[0]], [label2[0],label1[0]], loc=4)
        
        plt.subplots_adjust(hspace=0.4)    
        plt.show()

In [None]:
eda = EDA(cudf_train)
eda.base_info()

### There are "target==0" data. If you use the loss function like RSMPE, the value becames inf.

In [None]:
eda.plot_histgram()

In [None]:
eda.plot_features_mean_and_std()

### From above figures, almost the features have mean ≃ 0.0, std ≃ 1.0 except for some features:
Small std  : f_124, f_170, f175, f_272  
Small mean : f_170, f_175  
Large mean : f_41, f_182, f_246  

### Investigate these distributions as follows

In [None]:
def plot_distribution(df, features, title=None, bin_min=-100, bin_max=100, n_bins=100):
    fig, ax = plt.subplots(1, 4, figsize=(14, 3))
    bins = np.linspace(bin_min, bin_max, n_bins)
    for i in range(len(features)):
        
        mean = (df[features[i]]/len(df)).sum(axis=0)
        std = np.sqrt(((mean - df[features[i]])**2/len(df_train)).sum(axis=0))
                
        ax[i].hist(df[features[i]].to_arrow(), bins=bins, color='#66bd63', label=features[i])
        ax[i].set_xlim(bin_min, bin_max)
        ax[i].set_ylim(1, 10**6)
        ax[i].set_yscale('log')
        ax[i].text(0.99, 0.99, f'mean: {mean:.3f}',
                   va='top', ha='right', transform=ax[i].transAxes)
        ax[i].text(0.99, 0.89, f'std : {std:.3f}',
                   va='top', ha='right', transform=ax[i].transAxes)
        ax[i].set_title(features[i])
    
    ax[0].text(-0.5, 0.5, f'{title}:', fontsize=14,
               va='top', ha='right', transform=ax[0].transAxes)
    plt.show()

In [None]:
sample_features = {}
sample_features['Reference   '] = ['f_1', 'f_2', 'f_3', 'f_4']
sample_features['Small std   '] = ['f_124', 'f_170', 'f_175', 'f_272']
sample_features['Small mean']   = ['f_170', 'f_175'] 
sample_features['Large mean']   = ['f_41', 'f_182', 'f_246'] 

for k, v in sample_features.items():
    plot_distribution(cudf_train, features=v, title=k, bin_min=-100, bin_max=100, n_bins=100)

## Change the range of the histgrams between -10 and 10.¶

In [None]:
for k, v in sample_features.items():
    plot_distribution(cudf_train, features=v, title=k, bin_min=-10, bin_max=10, n_bins=100)

### From above,
- Most of the features are not normal and some have multimodal.
- Some singular peek exist in a distribution. Especially, f_124 is like delta functions.

In [None]:
del eda
torch.cuda.empty_cache()
_ = gc.collect()

## Summary

In [None]:
summary = cudf_train.describe().to_pandas()
f_xx = [f'f_{i}' for i in range(300)]  
summary.insert(3, 'f0-f291_mean', summary[f_xx].mean(axis='columns'))
summary.insert(4, 'f0-f291_std',  summary[f_xx].std(axis='columns'))
summary.loc['abs_mean'] = abs(summary.loc['mean'])
display(summary)

In [None]:
print('Sorted by abs_mean (descending)')
display(summary.sort_values(by='abs_mean', axis=1, ascending=False).\
        loc[['abs_mean'],:].drop(['investment_id', 'time_id'], axis=1))
      
print('Sorted from large std (descending)')
display(summary.sort_values(by='std', axis=1, ascending=False).\
        loc[['std'],:].drop(['investment_id', 'time_id'], axis=1))

In [None]:
#Plot target-feature with large absolute
fig, ax = plt.subplots(1, 4, figsize=(28, 7))
fig.suptitle('Large mean (absolute values)', fontsize=24)
for i, f in enumerate(['f_246', 'f_175', 'f_41', 'f_182']) :
    ax[i].scatter(cudf_train[f].to_arrow(), cudf_train['target'].to_arrow(), alpha=0.5)
    ax[i].set_xlabel(f)
    ax[i].set_ylabel('target')
    ax[i].set_title(f)
fig.tight_layout(rect=[0,0,1,0.96])
plt.show()

#Plot target-feature with large std
fig, ax = plt.subplots(1, 4, figsize=(28, 7))
fig.suptitle('Large  std', fontsize=24)
for i, f in enumerate(['f_9', 'f_294', 'f_18', 'f_176']) :
    ax[i].scatter(cudf_train[f].to_arrow(), cudf_train['target'].to_arrow(), alpha=0.5)
    ax[i].set_xlabel(f)
    ax[i].set_ylabel('target')
    ax[i].set_title(f)
fig.tight_layout(rect=[0,0,1,0.96])
plt.show

#### Some data exist far from the center of distribution. These points will be removed as follows.

# Preprocessing

### Define outliers
Judge the record as outlier if its features is out of mean ± 20*std   

In [None]:
def get_outliers(df):
    outliers = []
    features = []
    for f in f_xx :
        mean = summary.loc['mean', f]
        std  = summary.loc['std', f]
        df_temp1 = cudf_train[(cudf_train[f] > mean + 50*std) | (cudf_train[f] < mean - 50*std)]
        df_temp2 = cudf_train[(cudf_train[f] > mean + 20*std) | (cudf_train[f] < mean - 20*std)]

        if len(df_temp1)> 0 : 
            outliers.extend(df_temp1.index.to_arrow().to_pylist())
            features.extend([f for _ in range(len(df_temp1))])

        elif len(df_temp2)>0 and len(df_temp2) < 20:
            outliers.extend(df_temp2.index.to_arrow().to_pylist())
            features.extend([f for _ in range(len(df_temp2))])
    
    df_outliers = pd.DataFrame({'idx': outliers, 'features': features})
    df_outliers =  df_outliers.drop_duplicates(subset='idx')    
    
    return df_outliers

In [None]:
df_outliers = get_outliers(cudf)
print('Number of outliers:', len(df_outliers))
display(df_outliers.groupby('features').agg('count')\
        .sort_values('idx', ascending=False).head())

### Remove the outliers

In [None]:
cudf_train = cudf_train.drop(cudf_train.index[df_outliers['idx']])
print(f'len(cudf_train): {len(cudf_train)} ({len(df_outliers)} outliers are removed.)')

In [None]:
del summary, df_outliers
torch.cuda.empty_cache()
_ = gc.collect()

### KFold

In [None]:
def add_fold_column(df):
    # Create 'time_span' column, which is used for stratified KFold.
    df_time = (df.loc[:,['time_id', 'investment_id']]
               .groupby('investment_id')
               .agg({'time_id': ['min', 'max']})
               .reset_index())
    df_time['time_span'] = df_time['time_id']['max'] - df_time['time_id']['min']
    display(df_time.head())
    
    # Merge 'time_span' to df
    df_time = pd.DataFrame(df_time.to_pandas()
                           .droplevel(level=1, axis=1)
                           .drop('time_id' ,axis=1))
    df_time = cudf.DataFrame.from_pandas(df_time)
    df = df.merge(df_time, on=['investment_id'])
   
    # Holdout
    _target = cudf.cut(df['time_span'], args.n_bins, labels=False)
    _train, _valid = train_test_split(_target,
                                      stratify=_target.values.get(),
                                      random_state=args.seed)
    print(f'Number of holdout records: {len(_valid)}')
    df = df.iloc[_train.index].sort_values(by=['time_id', 'investment_id'])\
           .reset_index(drop=True)
    
    # StratifiedKFold
    df["fold"] = -1
    _target = cudf.cut(df['time_span'], args.n_bins, labels=False) 
    skfold = StratifiedKFold(n_splits=args.n_folds)
    for fold, (train_idx, valid_idx) in enumerate(skfold.split(_target, _target.values.get())):
        df.loc[valid_idx, 'fold'] = fold
       
    return df

In [None]:
cudf_train = add_fold_column(cudf_train)
display(cudf_train.head())

### Create features
ref. https://www.kaggle.com/valleyzw/ubiquant-lgbm-baseline

In [None]:
def create_features(df):
    cat_features = ['investment_id']
    num_features = [f'f_{i}' for i in range(300)] + ['time_id']
    features = num_features + cat_features

    combination_features = ['f_231-f_250', 'f_118-f_280', 'f_155-f_297','f_25-f_237',
                            'f_179-f_265', 'f_119-f_270', 'f_71-f_197', 'f_21-f_65']
    for f in combination_features:
        f1, f2 = f.split('-')
        df[f] = df[f1] + df[f2]
    
    features += combination_features
    drop_features = ['f_148', 'f_72', 'f_49', 'f_205', 'f_228', 'f_97', 'f_262', 'f_258']
    features = list(sorted(set(features).difference(set(drop_features))))
    df = df.drop(drop_features, axis=1)
                     
    return df, features

In [None]:
cudf_train, features = create_features(cudf_train)
cudf_train = cudf_train.drop(['time_span'], axis=1)
print('len(features):', len(features))
print(features)
display(cudf_train.head())

# Training

In [None]:
# Metric
class PearsonCorrelation(Metric):
    def __init__(self):
        self._name = 'pearson_corr'
        self._maximize = True

    def __call__(self, x, y):
        x = x.squeeze()
        y = y.squeeze()
        x_diff = x - np.mean(x)
        y_diff = y - np.mean(y)
        return np.dot(x_diff, y_diff)/(np.sqrt(sum(x_diff**2))*np.sqrt(sum(y_diff**2)))

# Train run
def train(df_train, features, args, debug=DEBUG):
    
    # Tabnet parameters    
    tabnet_params = dict(
                cat_idxs = [i for i, f in enumerate(features) if f in ['investment_id']],
                cat_emb_dim = 1,
                n_d = 16,
                n_a = 16,
                n_steps = args.n_steps,
                gamma = 2,
                n_independent = 2,
                n_shared = 2,
                lambda_sparse = 0,
                optimizer_fn = Adam,
                optimizer_params = dict(lr = (2e-2)),
                mask_type = 'entmax',
                scheduler_params = dict(T_0=200, T_mult=1, eta_min=1e-4, 
                                        last_epoch=-1, verbose=False),
                scheduler_fn = CosineAnnealingWarmRestarts,
                seed = args.seed,
                verbose = 10
            )
    
    if debug: # Reduced sampling
        print('Run as DEBUG')
        n_samples = len(df_train)
        sample_idx = df_train.sample(int(n_samples*args.reduced_sampling), random_state=args.seed).index
        df_train = df_train.iloc[sample_idx].reset_index(drop=True)
        print('len(df_train):', len(df_train))
        del sample_idx
        _ = gc.collect()
    
    ###### Outputs ######
    histories = {}
    oof_predictions = pd.DataFrame({'row_id': df_train['row_id'].to_pandas()}) # predictions for validatation data
    feature_importances = pd.DataFrame({'features': features})
#     masks = {}
#     explain_matrices = {}
    ####################

    for fold in tqdm(range(args.n_folds)):

        print(f"\n{'>'*15} Fold {fold} {'<'*15}")
        
        # Preapare train and valid data
        train_idx = df_train['fold']!=fold
        valid_idx = df_train['fold']==fold
        X_train = df_train.loc[train_idx, features].values.get()
        X_valid = df_train.loc[valid_idx, features].values.get()
        y_train = df_train.loc[train_idx,'target'].values.get().reshape(-1,1)
        y_valid = df_train.loc[valid_idx,'target'].values.get().reshape(-1,1)
        print(f'len(X_train): {len(X_train)}, len(X_valid): {len(X_valid)},')
        
        # Model initialize
        model = TabNetRegressor(**tabnet_params)
        
        # Train
        model.fit(
            X_train, y_train,
            eval_set = [(X_valid, y_valid)],
            max_epochs = args.max_epochs,
            patience = args.patience,
            batch_size = args.batch_size, 
            virtual_batch_size = args.virtual_batch_size,
            num_workers = args.n_workers,
            drop_last = args.drop_last,
            eval_metric = [PearsonCorrelation],
            loss_fn = torch.nn.MSELoss() # torch.nn.L1Loss()
        )

        # Save model
        model.save_model(f'./tabnet_fold{fold}')
        # Scores
        histories[f'fold{fold}'] = model.history
        # Predict for validation data
        oof_predictions.loc[valid_idx.values.get(), 'pred'] = model.predict(X_valid).astype('float16')
        # Feature importances
        feature_importances[f'importance_fold{fold}'] = model.feature_importances_
        # Explain matrices and Masks
#         explain_matrices_, masks_ = model.explain(X_valid)
#         explain_matrices[f'fold{fold}'] = explain_matrices_
#         masks[f'fold{fold}'] = masks_

        del model, X_train, X_valid, y_train, y_valid
        torch.cuda.empty_cache()
        _ = gc.collect()
        
        if debug:
            print('Debug: stop calculation at first epoch')
            break
        
    outputs = dict(
        histories = histories,
        feature_importances = feature_importances,
        oof_predictions = oof_predictions,
#         masks = masks,
#         explain_matrices = explain_matrices,
    )
    
    return outputs

In [None]:
outputs = train(cudf_train, features, args)
print('outputs:')
for k, v in outputs.items():
    print(f'  {k}: {type(v)}')

# Evaluation

## Score

In [None]:
def plot_scores(histories, metrics):
    
    fig, ax = plt.subplots(len(metrics), 1, figsize=(12,4*len(metrics)))
    for i, metric in enumerate(metrics):
        for fold, history in histories.items():
            
            if len(history[metric])==0:
                print(f'{metric} is not in history. (fold{fold})')
                return

            # Set linestyle
            if 'val' in metric:
                linestyle = 'solid'
            else:
                linestyle = 'dashed'
                
            ax[i].plot(history[metric], label=metric + f' {fold}', linestyle=linestyle)
            ax[i].set_xlabel('Epoch', fontsize=14)
            ax[i].set_ylabel(metric, fontsize=14)
            ax[i].legend()
            
    plt.show()

In [None]:
plot_scores(outputs['histories'], metrics=['loss', 'val_0_pearson_corr'])

## Feature importance (Top50)

In [None]:
def sort_by_importance(df):
    df_sorted = df.copy() 
    df_sorted['mean_importance'] = df.drop('features', axis=1).mean(axis=1)
    df_sorted.sort_values(by='mean_importance', ascending=False, inplace=True)
    return df_sorted

def plot_feature_importance_top50(df):
    df = sort_by_importance(df)
    fig = plt.figure(figsize=(10,14))
    sns.barplot(y=df['features'][:50], x=df['mean_importance'][:50], palette='autumn')
    plt.xlabel('')
    plt.ylabel('Features')
    plt.title('Feature Importance Top50 (mean by folds)')
    plt.show()

In [None]:
display(sort_by_importance(outputs['feature_importances']).head())
plot_feature_importance_top50(outputs['feature_importances'])

## Mask

In [None]:
def plot_mask(masks, fold):
    fig, ax = plt.subplots(args.n_steps, 1, figsize=(18, 3*args.n_steps))
    for i, mask in masks.items():
        ax[i].set_title(f'Mask{i+1} ({fold})')
        sns.heatmap(mask[:100, :], ax=ax[i], cmap='inferno') # sampling first 100 records
        x_ticks = sort_by_importance(outputs['feature_importances'])[:5].index # Top 10 features
        ax[i].set_xticks(x_ticks)
        ax[i].set_xticklabels(labels=df_train.columns[x_ticks], fontsize=7, rotation=90)
        ax[i].xaxis.grid(True, which='minor')
        ax[i].xaxis.set_minor_locator(AutoMinorLocator())
        ax[i].set_ylabel('rows (first 100 records)')
        
    plt.subplots_adjust(hspace=0.5)    
    plt.show()

In [None]:
# for fold, masks in outputs['masks'].items():
#     plot_mask(masks, fold)

## Explain matrix

In [None]:
def plot_explain_matrices(explain_matrix, fold):
    fig, ax = plt.subplots(1, 1, figsize=(18, 2.5))
    ax.set_title(f'Explain Matrix ({fold})')
    sns.heatmap(explain_matrix[:100], ax=ax, cmap='inferno')
    x_ticks = sort_by_importance(outputs['feature_importances'])[:5].index # Top 10 features
    ax.set_xticks(x_ticks)
    ax.set_xticklabels(labels=df_train.columns[x_ticks], fontsize=7, rotation=90)
    ax.xaxis.grid(True, which='minor')
    ax.xaxis.set_minor_locator(AutoMinorLocator())
    ax.set_ylabel('rows (first 100 records)')
    plt.show()

In [None]:
# for fold, explain_matrix in outputs['explain_matrices'].items():
#     plot_explain_matrices(explain_matrix, fold)

## True-Prediction correlation

In [None]:
def plot_pred_true(pred, true):
    fig, ax = plt.subplots(1, 1, figsize=(5,5))
    ax.scatter(pred, true, s=2)
    ax.plot([-10,10], [-10,10], c='gray', linestyle='dashed')
    ax.set_xlabel('Prediction')
    ax.set_ylabel('True target')
    ax.set_xlim(-10,10)
    ax.set_ylim(-10,10)
    plt.show()

In [None]:
df_corr = outputs['oof_predictions'].dropna().sort_values(by='row_id').reset_index(drop=True)
df_corr = df_corr.merge(df_train.loc[:, ['row_id', 'target']], on='row_id', how='left')
plot_pred_true(df_corr['pred'], df_corr['target'])

# Submission

In [None]:
def predict(models, df_test, features):
    # df_test['time_id'] = df_test.row_id.str.extract(r'(\d+)_.*').astype(np.uint16) # extract time_id from row_id # remove.ver2
    preds = []
    for model in models:
        pred = model.predict(df_test[features].values)
        preds.append(pred)
        
    mean_pred_by_folds = np.mean(np.stack(preds), axis=0)
    return mean_pred_by_folds

def load_trained_models():
    
    tabnet_params = dict(
                cat_idxs = [i for i, f in enumerate(features) if f in ['investment_id']],
                cat_emb_dim = 1,
                n_d = 16,
                n_a = 16,
                n_steps = args.n_steps,
                gamma = 2,
                n_independent = 2,
                n_shared = 2,
                lambda_sparse = 0,
                optimizer_fn = Adam,
                optimizer_params = dict(lr = (2e-2)),
                mask_type = 'entmax',
                scheduler_params = dict(T_0=200, T_mult=1, eta_min=1e-4, 
                                        last_epoch=-1, verbose=False),
                scheduler_fn = CosineAnnealingWarmRestarts,
                seed = args.seed,
                verbose = 10
            )
    
    model_paths = glob.glob('./tabnet_fold*.zip')
    model =  TabNetRegressor(**tabnet_params)
    models = []
    for model_path in model_paths:    
        model.load_model(model_path)
        model_ = copy.deepcopy(model)
        models.append(model_)
    
    return models

In [None]:
# Load model
models = load_trained_models()

# Make submission
import ubiquant
env = ubiquant.make_env()
iter_test = env.iter_test()
for (df_test, df_submission) in iter_test:
    # Extract 'time_id' from 'row_id'
    df_test['time_id'] = df_test.row_id.str.extract(r'(\d+)_.*').astype(np.uint16)
    # Create features same as df_train
    df_test, features = create_features(df_test)
    df_submission['target'] = predict(models, df_test, features)
    env.predict(df_submission) 

## Please upvoke, if useful for you