# Infer 1DCNN by using lag features

This code infer test data using lag features.  
It performs inference while updating the lag features during code execution.

For more information on the lag feature, please see [this discussion](https://www.kaggle.com/c/ubiquant-market-prediction/discussion/303147).

reference
- training code: https://www.kaggle.com/takamichitoda/ump-train-1dcnn-by-using-lag-features
- last features dict: https://www.kaggle.com/takamichitoda/ump-make-last-features-dict

update:
- Version 6: TimeSeriesSplit, use past average value features
- Version 7: lag features
- Version 9: past average value features

In [None]:
import gc
import pickle
import numpy as np

from sklearn.metrics import mean_squared_error
from sklearn.model_selection import KFold
from sklearn.preprocessing import StandardScaler

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

import ubiquant

device_name = tf.test.gpu_device_name()
if "GPU" not in device_name:
    print("GPU device not found")
print('Found GPU at: {}'.format(device_name))

In [None]:
class GCF:
    MODEL_ROOT = "/kaggle/input/k/takamichitoda/ump-train-1dcnn-by-using-lag-features"
    SCALER_PATH = "/kaggle/input/ump-npy-dataset/std_scaler.pkl"
    LAST_FEATURES_DIC = '/kaggle/input/ump-make-last-features-dict/last_features_dic.pkl'
    AVG_FEATURES = "/kaggle/input/ump-agg-average-value-features"
    
    N_FOLDS = 5
    FEAT_COLS = [f"f_{i}" for i in range(300)]

In [None]:
#models = []
#for fold in range(GCF.N_FOLDS):
#    model = tf.keras.models.load_model(f"{GCF.MODEL_ROOT}/ump_1dcnn_f{fold}.h5", compile=False)
#    models.append(model)
#models[0].summary()

model = tf.keras.models.load_model(f"{GCF.MODEL_ROOT}/ump_1dcnn_all_train.h5", compile=False)

In [None]:
scaler = pickle.load(open(GCF.SCALER_PATH, "rb"))
scaler

In [None]:
investment_id_counts = pickle.load(open(f"{GCF.AVG_FEATURES}/investment_id_counts.pkl", "rb"))
last_avg = pickle.load(open(f"{GCF.AVG_FEATURES}/last_avg.pkl", "rb"))
avg_scaler = pickle.load(open(f"{GCF.AVG_FEATURES}/agg_avg_std_scaler.pkl", "rb"))

def get_avg_features(investment_id):
    try:
        return last_avg[investment_id]
    except KeyError:
        return [0 for _ in range(300)]

def get_count(investment_id):
    try:
        return investment_id_counts[investment_id]
    except KeyError:
        return 0

In [None]:
"""
last_features_dic = pickle.load(open(GCF.LAST_FEATURES_DIC, 'rb'))
def get_last_features(investment_id):
    try:
        return last_features_dic[investment_id]
    except KeyError:
        return [0 for _ in range(300)]
"""

In [None]:
%%time
env = ubiquant.make_env()   # initialize the environment
iter_test = env.iter_test()    # an iterator which loops over the test set and sample submission
for (test_df, sample_prediction_df) in iter_test:
    x_org = scaler.transform(test_df[GCF.FEAT_COLS].values)
    
    #x_lag = np.array(test_df['investment_id'].map(get_last_features).tolist())
    #x = np.hstack([x_org, x_lag])
    
    prev_avg = np.array(test_df['investment_id'].map(get_avg_features).tolist())
    prev_count = test_df['investment_id'].map(get_count).values
    now_avg = prev_avg * prev_count.reshape(len(prev_count), 1) + test_df[GCF.FEAT_COLS].values
    now_avg = now_avg / (prev_count + 1).reshape(len(prev_count), 1)
    _now_avg = avg_scaler.transform(now_avg)
    x = np.hstack([x_org, _now_avg])
    
    #preds = []
    #for model in models:
    #    with tf.device('/GPU:0'):
    #        pred = model.predict(x)
    #    preds.append(pred)
    #pred_avg = np.hstack(preds).mean(1)
    with tf.device('/GPU:0'):
         pred_avg = model.predict(x)
    
    sample_prediction_df['target'] = pred_avg  # make your predictions here
    env.predict(sample_prediction_df)   # register your predictions
    
    # update last_features_dic
    for i, v in enumerate(test_df['investment_id'].tolist()):
        #last_features_dic[v] = list(x_org[i])
        last_avg[v] = list(now_avg[i])
        try:
            investment_id_counts[v] += 1
        except KeyError:
            investment_id_counts[v] = 1

In [None]:
sample_prediction_df