# INPUT

In [None]:
import os, gc, warnings, random, datetime, traceback, gresearch_crypto
warnings.filterwarnings('ignore')

import pandas as pd, numpy as np

import numpy.polynomial.hermite as Herm
import math

from tqdm.notebook import tqdm
import matplotlib.pyplot as plt
from matplotlib.colors import ListedColormap

import tensorflow as tf
import tensorflow.keras.backend as K
from sklearn.model_selection import KFold

In [None]:
TRAIN_CSV         = '/kaggle/input/g-research-crypto-forecasting/train.csv'
ASSET_DETAILS_CSV = '/kaggle/input/g-research-crypto-forecasting/asset_details.csv'
EXAMPLE_TEST      = '/kaggle/input/g-research-crypto-forecasting/example_test.csv'

df_train          = pd.read_csv(TRAIN_CSV)
df_test           = pd.read_csv(EXAMPLE_TEST)
df_asset_details  = pd.read_csv(ASSET_DETAILS_CSV).sort_values('Asset_ID')

url_save_weight   = './'
url_load_weight   = '../input/3f-weight-mlp512'

In [None]:
def fix_all_seeds(seed):
    np.random.seed(seed)
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)

In [None]:
device = "TPU" 
#device = "GPU" 

seed = 42
fix_all_seeds(seed)

remove_op_test_overlapping_data = True #& False 
visualization                   = True #& False
train_models                    = True #& False   #False == Load Models    
test_baseline_model             = True #& False
fine_tuning                     = True #& False  #&True == train_models  #pre training 10 epochs 
tuning_id                       = [3,12,13]      #fine tuning asset id 3,12,13

In [None]:
if device == "TPU":
    print("connecting to TPU...")
    try:
        tpu = tf.distribute.cluster_resolver.TPUClusterResolver()
        print('Running on TPU ', tpu.master())
    except ValueError:
        tpu = None
    if tpu:
        try:
            print("initializing  TPU ...")
            tf.config.experimental_connect_to_cluster(tpu)
            tf.tpu.experimental.initialize_tpu_system(tpu)
            strategy = tf.distribute.TPUStrategy(tpu)
            print("TPU initialized")
        except: print("failed to initialize TPU")
    else: device = "GPU"

if device != "TPU": strategy = tf.distribute.get_strategy()
if device == "GPU": print("Num GPUs Available: ", len(tf.config.experimental.list_physical_devices('GPU')))
AUTO     = tf.data.experimental.AUTOTUNE
REPLICAS = strategy.num_replicas_in_sync

In [None]:
env       = gresearch_crypto.make_env()
iter_test = env.iter_test()

In [None]:
# CV PARAMS
FOLDS                = 3
GROUP_GAP            = 180
MAX_TEST_GROUP_SIZE  = 270
MAX_TRAIN_GROUP_SIZE = 450

# BATCH SIZE AND EPOCHS
BATCH_SIZES = [2**9] * FOLDS  
EPOCHS      = [3] * FOLDS

# WHICH NETWORK ARCHITECTURE TO USE?
UNITS_NETWORK     = [2**7] * FOLDS
UNITS_NETWORK_MLP = [2**6, 2**7, 2**8]  

# USE VERBOSE=0 for silent, VERBOSE=1 for interactive, VERBOSE=2 for commit
VERBOSE = 2

In [None]:
# Remove the features to test the LR baseline score.
if remove_op_test_overlapping_data:
    df_train['datetime'] = pd.to_datetime(df_train['timestamp'], unit='s')
    df_valid  = df_train[(df_train['datetime'] > '2021-09-20 00:00:00')].reset_index(drop=True)
    df_train  = df_train.drop(['datetime'],axis=1)
    df_valid  = df_valid.drop(['datetime'],axis=1)
    
else:
    df_train['datetime'] = pd.to_datetime(df_train['timestamp'], unit='s')
    df_valid  = df_train[(df_train['datetime'] > '2021-02-23 00:00:00')].reset_index(drop=True)
    df_train  = df_train.drop(['datetime'],axis=1)
    df_valid  = df_valid.drop(['datetime'],axis=1)

In [None]:
df_valid = df_valid.dropna(subset=['Target']).reset_index(drop=True)

In [None]:
#@title GroupTimeSeriesSplit { display-mode: "form" }
from sklearn.model_selection._split import _BaseKFold, indexable, _num_samples
from sklearn.utils.validation import _deprecate_positional_args

# https://github.com/getgaurav2/scikit-learn/blob/d4a3af5cc9da3a76f0266932644b884c99724c57/sklearn/model_selection/_split.py#L2243
class GroupTimeSeriesSplit(_BaseKFold):
    """Time Series cross-validator variant with non-overlapping groups.
    Provides train/test indices to split time series data samples
    that are observed at fixed time intervals according to a
    third-party provided group.
    In each split, test indices must be higher than before, and thus shuffling
    in cross validator is inappropriate.
    This cross-validation object is a variation of :class:`KFold`.
    In the kth split, it returns first k folds as train set and the
    (k+1)th fold as test set.
    The same group will not appear in two different folds (the number of
    distinct groups has to be at least equal to the number of folds).
    Note that unlike standard cross-validation methods, successive
    training sets are supersets of those that come before them.
    Read more in the :ref:`User Guide <cross_validation>`.
    Parameters
    ----------
    n_splits : int, default=5
        Number of splits. Must be at least 2.
    max_train_size : int, default=None
        Maximum size for a single training set.
    Examples
    --------
    >>> import numpy as np
    >>> from sklearn.model_selection import GroupTimeSeriesSplit
    >>> groups = np.array(['a', 'a', 'a', 'a', 'a', 'a',\
                           'b', 'b', 'b', 'b', 'b',\
                           'c', 'c', 'c', 'c',\
                           'd', 'd', 'd'])
    >>> gtss = GroupTimeSeriesSplit(n_splits=3)
    >>> for train_idx, test_idx in gtss.split(groups, groups=groups):
    ...     print("TRAIN:", train_idx, "TEST:", test_idx)
    ...     print("TRAIN GROUP:", groups[train_idx],\
                  "TEST GROUP:", groups[test_idx])
    TRAIN: [0, 1, 2, 3, 4, 5] TEST: [6, 7, 8, 9, 10]
    TRAIN GROUP: ['a' 'a' 'a' 'a' 'a' 'a']\
    TEST GROUP: ['b' 'b' 'b' 'b' 'b']
    TRAIN: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10] TEST: [11, 12, 13, 14]
    TRAIN GROUP: ['a' 'a' 'a' 'a' 'a' 'a' 'b' 'b' 'b' 'b' 'b']\
    TEST GROUP: ['c' 'c' 'c' 'c']
    TRAIN: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14]\
    TEST: [15, 16, 17]
    TRAIN GROUP: ['a' 'a' 'a' 'a' 'a' 'a' 'b' 'b' 'b' 'b' 'b' 'c' 'c' 'c' 'c']\
    TEST GROUP: ['d' 'd' 'd']
    """
    @_deprecate_positional_args
    def __init__(self,
                 n_splits=5,
                 *,
                 max_train_size=None
                 ):
        super().__init__(n_splits, shuffle=False, random_state=None)
        self.max_train_size = max_train_size

    def split(self, X, y=None, groups=None):
        """Generate indices to split data into training and test set.
        Parameters
        ----------
        X : array-like of shape (n_samples, n_features)
            Training data, where n_samples is the number of samples
            and n_features is the number of features.
        y : array-like of shape (n_samples,)
            Always ignored, exists for compatibility.
        groups : array-like of shape (n_samples,)
            Group labels for the samples used while splitting the dataset into
            train/test set.
        Yields
        ------
        train : ndarray
            The training set indices for that split.
        test : ndarray
            The testing set indices for that split.
        """
        if groups is None:
            raise ValueError(
                "The 'groups' parameter should not be None")
        X, y, groups = indexable(X, y, groups)
        n_samples = _num_samples(X)
        n_splits = self.n_splits
        n_folds = n_splits + 1
        group_dict = {}
        u, ind = np.unique(groups, return_index=True)
        unique_groups = u[np.argsort(ind)]
        n_samples = _num_samples(X)
        n_groups = _num_samples(unique_groups)
        for idx in np.arange(n_samples):
            if (groups[idx] in group_dict):
                group_dict[groups[idx]].append(idx)
            else:
                group_dict[groups[idx]] = [idx]
        if n_folds > n_groups:
            raise ValueError(
                ("Cannot have number of folds={0} greater than"
                 " the number of groups={1}").format(n_folds,
                                                     n_groups))
        group_test_size = n_groups // n_folds
        group_test_starts = range(n_groups - n_splits * group_test_size,
                                  n_groups, group_test_size)
        for group_test_start in group_test_starts:
            train_array = []
            test_array = []
            for train_group_idx in unique_groups[:group_test_start]:
                train_array_tmp = group_dict[train_group_idx]
                train_array = np.sort(np.unique(
                                      np.concatenate((train_array,
                                                      train_array_tmp)),
                                      axis=None), axis=None)
            train_end = train_array.size
            if self.max_train_size and self.max_train_size < train_end:
                train_array = train_array[train_end -
                                          self.max_train_size:train_end]
            for test_group_idx in unique_groups[group_test_start:
                                                group_test_start +
                                                group_test_size]:
                test_array_tmp = group_dict[test_group_idx]
                test_array = np.sort(np.unique(
                                              np.concatenate((test_array,
                                                              test_array_tmp)),
                                     axis=None), axis=None)
            yield [int(i) for i in train_array], [int(i) for i in test_array]
import numpy as np
from sklearn.model_selection import KFold
from sklearn.model_selection._split import _BaseKFold, indexable, _num_samples
from sklearn.utils.validation import _deprecate_positional_args

# modified code for group gaps; source
# https://github.com/getgaurav2/scikit-learn/blob/d4a3af5cc9da3a76f0266932644b884c99724c57/sklearn/model_selection/_split.py#L2243
class PurgedGroupTimeSeriesSplit(_BaseKFold):
    """Time Series cross-validator variant with non-overlapping groups.
    Allows for a gap in groups to avoid potentially leaking info from
    train into test if the model has windowed or lag features.
    Provides train/test indices to split time series data samples
    that are observed at fixed time intervals according to a
    third-party provided group.
    In each split, test indices must be higher than before, and thus shuffling
    in cross validator is inappropriate.
    This cross-validation object is a variation of :class:`KFold`.
    In the kth split, it returns first k folds as train set and the
    (k+1)th fold as test set.
    The same group will not appear in two different folds (the number of
    distinct groups has to be at least equal to the number of folds).
    Note that unlike standard cross-validation methods, successive
    training sets are supersets of those that come before them.
    Read more in the :ref:`User Guide <cross_validation>`.
    Parameters
    ----------
    n_splits : int, default=5
        Number of splits. Must be at least 2.
    max_train_group_size : int, default=Inf
        Maximum group size for a single training set.
    group_gap : int, default=None
        Gap between train and test
    max_test_group_size : int, default=Inf
        We discard this number of groups from the end of each train split
    """

    @_deprecate_positional_args
    def __init__(self,
                 n_splits=5,
                 *,
                 max_train_group_size=np.inf,
                 max_test_group_size=np.inf,
                 group_gap=None,
                 verbose=False
                 ):
        super().__init__(n_splits, shuffle=False, random_state=None)
        self.max_train_group_size = max_train_group_size
        self.group_gap = group_gap
        self.max_test_group_size = max_test_group_size
        self.verbose = verbose

    def split(self, X, y=None, groups=None):
        """Generate indices to split data into training and test set.
        Parameters
        ----------
        X : array-like of shape (n_samples, n_features)
            Training data, where n_samples is the number of samples
            and n_features is the number of features.
        y : array-like of shape (n_samples,)
            Always ignored, exists for compatibility.
        groups : array-like of shape (n_samples,)
            Group labels for the samples used while splitting the dataset into
            train/test set.
        Yields
        ------
        train : ndarray
            The training set indices for that split.
        test : ndarray
            The testing set indices for that split.
        """
        if groups is None:
            raise ValueError(
                "The 'groups' parameter should not be None")
        X, y, groups = indexable(X, y, groups)
        n_samples = _num_samples(X)
        n_splits = self.n_splits
        group_gap = self.group_gap
        max_test_group_size = self.max_test_group_size
        max_train_group_size = self.max_train_group_size
        n_folds = n_splits + 1
        group_dict = {}
        u, ind = np.unique(groups, return_index=True)
        unique_groups = u[np.argsort(ind)]
        n_samples = _num_samples(X)
        n_groups = _num_samples(unique_groups)
        for idx in np.arange(n_samples):
            if (groups[idx] in group_dict):
                group_dict[groups[idx]].append(idx)
            else:
                group_dict[groups[idx]] = [idx]
        if n_folds > n_groups:
            raise ValueError(
                ("Cannot have number of folds={0} greater than"
                 " the number of groups={1}").format(n_folds,
                                                     n_groups))

        group_test_size = min(n_groups // n_folds, max_test_group_size)
        group_test_starts = range(n_groups - n_splits * group_test_size,
                                  n_groups, group_test_size)
        for group_test_start in group_test_starts:
            train_array = []
            test_array = []

            group_st = max(0, group_test_start - group_gap - max_train_group_size)
            for train_group_idx in unique_groups[group_st:(group_test_start - group_gap)]:
                train_array_tmp = group_dict[train_group_idx]

                train_array = np.sort(np.unique(
                                      np.concatenate((train_array,
                                                      train_array_tmp)),
                                      axis=None), axis=None)

            train_end = train_array.size

            for test_group_idx in unique_groups[group_test_start:
                                                group_test_start +
                                                group_test_size]:
                test_array_tmp = group_dict[test_group_idx]
                test_array = np.sort(np.unique(
                                              np.concatenate((test_array,
                                                              test_array_tmp)),
                                     axis=None), axis=None)

            test_array  = test_array[group_gap:]


            if self.verbose > 0:
                    pass

            yield [int(i) for i in train_array], [int(i) for i in test_array]

In [None]:
#simple units
m=1.0
w=1.0
hbar=1.0

def hermite(x, n):
    xi = np.sqrt(m*w/hbar)*x
    herm_coeffs = np.zeros(n+1)
    herm_coeffs[n] = 1
    return Herm.hermval(xi, herm_coeffs)

def stationary_state(x,n):
    xi = np.sqrt(m*w/hbar)*x
    prefactor = 1.0/math.sqrt(2.0**n * math.factorial(n)) * (m*w/(np.pi*hbar))**(0.25)
    psi = prefactor * np.exp(- xi**2 / 2) * hermite(x,n)
    return psi

In [None]:
def get_features(df, submissions=False):
    dff = df[['Count', 'Open', 'High', 'Low', 'Close', 'Volume', 'VWAP']].copy()
    
    dff['upper_Shadow'] = dff['High'] - np.maximum(dff['Close'], dff['Open'])  
    dff['lower_Shadow'] = np.minimum(dff['Close'], dff['Open']) - dff['Low']                  
    dff['hlco_ration']  = (dff['High'] - dff['Low'])/(dff['Close']-dff['Open'])
    dff['high_div_low'] = dff['High'] / dff['Low']
    dff['gtrade']       = (dff['Close'] - dff['Open']) / dff['Count']
    dff['shadow1']      = (dff['Close'] - dff['Open']) / dff['Volume']
    dff['shadow3']      = dff['upper_Shadow'] / dff['Volume']
    dff['shadow5']      = dff['lower_Shadow'] / dff['Volume']
    dff['mean2']        = (dff['shadow1'] + dff['Volume']) / 2
    dff['spread']       = dff['High'] - dff['Low']
    dff['log_exp_co']   = np.logaddexp(dff['Close'], dff['Open'])
    dff['volume_count'] = dff['Volume'] / (dff['Count'] + 1) 

    #Autoencoder(AE) with an applied quantum harmonic oscillator(QHO) for indicator
    dff['harmonic_oscillator_115v'] = stationary_state(dff['volume_count'], 115) 
    #114.59155903 == 360/pi, 6h*60min/pi ; 6h = 1/4 of day ≈ 115+-5 so 110 <= n >= 120 for training 
    #Bias in dataset Harmonic-Oscillator If the value is prohibitively exorbitant, you can adjust the hermite value.
    #Add :: df['hermite_n'] = hermite(df, n) ;In simple units, recommend 60 <= n >= 210
    #There might be a better value, or other factors could be used to adjust for the value and produce better outcomes.
     
    #drop & replace
    if submissions:
        dff = dff.drop(['VWAP', 'Close', 'Low', 'High', 'Open', 'Volume', 'Count']) 
    else:
        dff = dff.drop(['VWAP', 'Close', 'Low', 'High', 'Open', 'Volume', 'Count'],axis=1) 
    dff = dff.replace([np.inf, -np.inf, np.nan], 0)
    return dff

In [None]:
def plot_cv_indices(cv, X, y, group, ax, n_splits, lw=10):
    cmap_cv = plt.cm.coolwarm
    jet     = plt.cm.get_cmap('jet', 256)
    seq     = np.linspace(0, 1, 256)
    _       = np.random.shuffle(seq)   # inplace
    cmap_data = ListedColormap(jet(seq))    
    for ii, (tr, tt) in enumerate(list(cv.split(X=X, y=y, groups=group))):
        indices = np.array([np.nan] * len(X))
        indices[tt] = 1
        indices[tr] = 0        
        ax.scatter(range(len(indices)), [ii + .5] * len(indices), c=indices, marker='_', lw=lw, cmap=cmap_cv, vmin=-.2, vmax=1.2)
    ax.scatter(range(len(X)), [ii + 1.5] * len(X), c=y, marker='_', lw=lw, cmap=plt.cm.Set3)
    ax.scatter(range(len(X)), [ii + 2.5] * len(X), c=group, marker='_', lw=lw, cmap=cmap_data)
    yticklabels = list(range(n_splits)) + ['target', 'day']
    ax.set(yticks=np.arange(n_splits+2) + .5, yticklabels=yticklabels, xlabel='Sample index', ylabel="CV iteration", ylim=[n_splits+2.2, -.2], xlim=[0, len(y)])
    ax.set_title('{}'.format(type(cv).__name__), fontsize=15)
    return ax

In [None]:
from tensorflow.keras import backend as K
class Mish(tf.keras.layers.Layer):

    def __init__(self, **kwargs):
        super(Mish, self).__init__(**kwargs)
        self.supports_masking = True

    def call(self, inputs):
        return inputs * K.tanh(K.softplus(inputs))

    def get_config(self):
        base_config = super(Mish, self).get_config()
        return dict(list(base_config.items()) + list(config.items()))

    def compute_output_shape(self, input_shape):
        return input_shape

def mish(x):
	return tf.keras.layers.Lambda(lambda x: x*K.tanh(K.softplus(x)))(x)

tf.keras.utils.get_custom_objects().update({'mish': tf.keras.layers.Activation(mish)})

In [None]:
def build_model(fold, dim = 128, weight = 1.0):

    hidden_units      = UNITS_NETWORK[fold]
    dropout_rates     = 0.25
    dropout_rates_mlp = 0.4
    lr                = 9e-4

    inp = tf.keras.layers.Input(shape = (dim, ))
    x0  = tf.keras.layers.BatchNormalization()(inp)

    encoder = tf.keras.layers.GaussianNoise(dropout_rates)(x0)
    encoder = tf.keras.layers.Dense(hidden_units)(encoder)
    encoder = tf.keras.layers.BatchNormalization()(encoder)
    encoder = tf.keras.layers.Activation('mish')(encoder)

    decoder = tf.keras.layers.Dropout(dropout_rates)(encoder)
    decoder = tf.keras.layers.Dense(dim, name = 'decoder')(decoder)

    x_ae = tf.keras.layers.Dense(hidden_units)(decoder)
    x_ae = tf.keras.layers.BatchNormalization()(x_ae)
    x_ae = tf.keras.layers.Activation('mish')(x_ae)
    x_ae = tf.keras.layers.Dropout(dropout_rates)(x_ae)

    out_ae = tf.keras.layers.Dense(1, activation = 'mish', name = 'ae_out')(x_ae)

    x = tf.keras.layers.Concatenate()([x0, encoder])
    x = tf.keras.layers.BatchNormalization()(x)
    x = tf.keras.layers.Dropout(dropout_rates)(x)
  
    for i in range(len(UNITS_NETWORK_MLP)):
        x = tf.keras.layers.Dense(UNITS_NETWORK_MLP[i])(x)
        x = tf.keras.layers.BatchNormalization()(x)
        x = tf.keras.layers.Activation('mish')(x)
        x = tf.keras.layers.Dropout(dropout_rates)(x)

    out = tf.keras.layers.Dense(1, activation = 'mish', name = 'out')(x)

    model = tf.keras.models.Model(inputs = inp, outputs = [decoder, out_ae, out])
    model.compile(optimizer = tf.keras.optimizers.RMSprop(learning_rate = lr, momentum=0.09),
                  loss = {'decoder': tf.keras.losses.CosineSimilarity(),
                          'ae_out':  tf.keras.losses.MeanSquaredError(),
                          'out': mse_w(weight), 
                         },
                  metrics = {'decoder': tf.keras.metrics.CosineSimilarity(name='cosine'),
                             'ae_out':  tf.keras.metrics.RootMeanSquaredError(name='rmse'),
                             'out':  tf.keras.metrics.RootMeanSquaredError(name='rmse'),
                            },
                 )    
    
    return model

In [None]:
def mse(x, y): return  tf.keras.losses.mean_squared_error(x, y)

def weight_mse(x, y, w): 
    sum_ = 0
    sum_ += math.sqrt(w)*mse(x, y)
    return tf.reduce_sum(sum_) 

def mse_w(w): 
    def w_mse(x, y): return weight_mse(x, y ,w)
    return w_mse

In [None]:
def get_lr_callback(batch_size = 8):
    lr_start   = 5e-6
    lr_max     = 1.25e-6 * REPLICAS * batch_size
    lr_min     = 1e-6
    lr_ramp_ep = 5
    lr_sus_ep  = 0
    lr_decay   = 0.8
    def lrfn(epoch):
        if epoch < lr_ramp_ep: lr = (lr_max - lr_start) / lr_ramp_ep * epoch + lr_start
        elif epoch < lr_ramp_ep + lr_sus_ep: lr = lr_max
        else: lr = (lr_max - lr_min) * lr_decay**(epoch - lr_ramp_ep - lr_sus_ep) + lr_min
        return lr
    lr_callback = tf.keras.callbacks.LearningRateScheduler(lrfn, verbose=False)
    return lr_callback

In [None]:
def get_all_in_model_for_asset(asset_id, df_train, df_asset_details, fine_tuning=False, tuning_id=tuning_id):

    #weight
    asset_weight    = {df_asset_details['Asset_ID'].tolist()[idx]: df_asset_details['Weight'].tolist()[idx] for idx in range(len(df_asset_details))}

    df              = df_train[df_train['Asset_ID'] == asset_id]
    df_proc         = get_features(df)
    df_proc['date'] = pd.to_datetime(df['timestamp'], unit='s')
    df_proc['y']    = df['Target']#.fillna(0)
    df_proc         = df_proc.dropna(how="any")
    X               = df_proc.drop("y", axis=1)
    y               = df_proc["y"]
    groups          = pd.factorize(X['date'].dt.day.astype(str) + '_' + X['date'].dt.month.astype(str) + '_' + X['date'].dt.year.astype(str))[0]
    X               = X.drop(columns = 'date')
    del df
    del df_proc
    rubbish = gc.collect()
    gkf     = PurgedGroupTimeSeriesSplit(n_splits             = FOLDS,
                                         group_gap            = GROUP_GAP, 
                                         max_train_group_size = MAX_TRAIN_GROUP_SIZE, 
                                         max_test_group_size  = MAX_TEST_GROUP_SIZE).split(X, y, groups)
    
    models = []
    for fold, (train_idx, val_idx) in enumerate(gkf):
        # GET TRAINING, VALIDATION SET
        x_train, x_val = X.iloc[train_idx], X.iloc[val_idx]
        y_train, y_val = y.iloc[train_idx], y.iloc[val_idx]

        print('>>> FOLD:',fold); print('>>> Training: UNITS_NETWORK=%s | BATCH_SIZES=%s' % (UNITS_NETWORK[fold], BATCH_SIZES[fold]*REPLICAS))
        
        # BUILD MODEL
        K.clear_session()
        with strategy.scope(): model = build_model(fold, dim = x_train.shape[1], weight = asset_weight[asset_id])

        # SAVE BEST MODEL EACH FOLD
        model_save = tf.keras.callbacks.ModelCheckpoint(url_save_weight + '/id_%ifold-%i.hdf5' %(asset_id, fold), 
                                                         monitor           = 'val_out_rmse',
                                                         verbose           = 0, 
                                                         save_best_only    = True,
                                                         save_weights_only = True,
                                                         mode              = 'min',
                                                         save_freq         = 'epoch')
        early_stop = tf.keras.callbacks.EarlyStopping(monitor              = 'val_out_rmse', 
                                                      patience             = 1, 
                                                      mode                 = 'min', 
                                                      restore_best_weights = True)
        # TRAIN
        if not fine_tuning: 
            if fold == 0:
                history = model.fit( x_train, y_train, 
                                    epochs          = EPOCHS[fold], 
                                    callbacks       = [model_save, get_lr_callback(BATCH_SIZES[fold]), early_stop], 
                                    validation_data = (x_val, y_val), 
                                    verbose         = VERBOSE)
            elif fold == (FOLDS-1):
                
                model.load_weights(url_save_weight + '/id_%ifold-%i.hdf5' %(asset_id, fold-1)) 
                history = model.fit( x_train, y_train, 
                                   epochs          = EPOCHS[fold], 
                                   callbacks       = [model_save, get_lr_callback(BATCH_SIZES[fold]), early_stop], 
                                   validation_data = (x_val, y_val), 
                                   verbose         = VERBOSE)
                models.append(model)
            else:
                
                model.load_weights(url_save_weight + '/id_%ifold-%i.hdf5' %(asset_id, fold-1)) 
                history = model.fit( x_train, y_train, 
                                    epochs          = EPOCHS[fold], 
                                    callbacks       = [model_save, get_lr_callback(BATCH_SIZES[fold]), early_stop], 
                                    validation_data = (x_val, y_val), 
                                    verbose         = VERBOSE)
        else:
            model.load_weights(url_load_weight + '/id_%ifold-%i.hdf5' %(asset_id, fold)) 
            model.trainable = False
            for i in range(len(tuning_id)):
                if asset_id == tuning_id[i]:
                    model.trainable = True
                    history = model.fit(x_train, y_train, 
                                        epochs         = EPOCHS[fold], 
                                        callbacks      = [model_save, get_lr_callback(BATCH_SIZES[fold]), early_stop], 
                                        validation_data= (x_val, y_val), 
                                        verbose        = VERBOSE) 
        print('>>> Loading: FOLD=%s' %(fold))
        models.append(model)
    return models

In [None]:
def get_Xy_and_model_for_asset(asset_id, df_train, df_asset_details, fine_tuning=False, tuning_id=tuning_id):

    #weight
    asset_weight    = {df_asset_details['Asset_ID'].tolist()[idx]: df_asset_details['Weight'].tolist()[idx] for idx in range(len(df_asset_details))}

    df              = df_train[df_train['Asset_ID'] == asset_id]
    df_proc         = get_features(df)
    df_proc['date'] = pd.to_datetime(df['timestamp'], unit='s')
    df_proc['y']    = df['Target']
    df_proc         = df_proc.dropna(how="any")
    X               = df_proc.drop("y", axis=1)
    y               = df_proc["y"]
    groups          = pd.factorize(X['date'].dt.day.astype(str) + '_' + X['date'].dt.month.astype(str) + '_' + X['date'].dt.year.astype(str))[0]
    X               = X.drop(columns = 'date')
    del df
    del df_proc
    
    rubbish = gc.collect()
    gkf     = PurgedGroupTimeSeriesSplit(n_splits             = FOLDS,
                                         group_gap            = GROUP_GAP, 
                                         max_train_group_size = MAX_TRAIN_GROUP_SIZE, 
                                         max_test_group_size  = MAX_TEST_GROUP_SIZE).split(X, y, groups)
    
    models = []
    for fold, (train_idx, val_idx) in enumerate(gkf):
        # GET TRAINING, VALIDATION SET
        x_train, x_val = X.iloc[train_idx], X.iloc[val_idx]
        y_train, y_val = y.iloc[train_idx], y.iloc[val_idx]

        print('>>> FOLD:',fold); print('>>> Training: UNITS_NETWORK=%s | BATCH_SIZES=%s' % (UNITS_NETWORK[fold], BATCH_SIZES[fold]*REPLICAS))
        
        # BUILD MODEL
        K.clear_session()
        with strategy.scope(): model = build_model(fold, dim = x_train.shape[1], weight = asset_weight[asset_id])

        # SAVE BEST MODEL EACH FOLD
        model_save = tf.keras.callbacks.ModelCheckpoint(url_save_weight + '/id_%ifold-%i.hdf5' %(asset_id, fold), 
                                                         monitor = 'val_out_rmse', verbose = 0, 
                                                         save_best_only = True, save_weights_only = True,
                                                         mode = 'min', save_freq = 'epoch')
        early_stop = tf.keras.callbacks.EarlyStopping(monitor='val_out_rmse', patience=1, mode='min', restore_best_weights=True)
        # TRAIN
        if not fine_tuning:
            history = model.fit( x_train, y_train, 
                                epochs          = EPOCHS[fold], 
                                callbacks       = [model_save, get_lr_callback(BATCH_SIZES[fold]), early_stop], 
                                validation_data = (x_val, y_val), 
                                verbose         = VERBOSE) 
        else:
            model.load_weights(url_load_weight + '/id_%ifold-%i.hdf5' %(asset_id, fold)) 
            model.trainable = False
            for i in range(len(tuning_id)):
                if asset_id == tuning_id[i]:
                    model.trainable = True
                    history = model.fit(x_train, y_train, 
                                        epochs         = EPOCHS[fold], 
                                        callbacks      = [model_save, get_lr_callback(BATCH_SIZES[fold]), early_stop], 
                                        validation_data= (x_val, y_val), 
                                        verbose        = VERBOSE) 
        print('>>> Loading: FOLD=%s' %(fold))
        models.append(model)
    return models

In [None]:
def get_load_model_for_asset(asset_id, df_train):

    df              = df_train[df_train['Asset_ID'] == asset_id]
    df_proc         = get_features(df)
    df_proc['date'] = pd.to_datetime(df['timestamp'], unit='s')
    df_proc['y']    = df['Target']
    df_proc         = df_proc.dropna(how="any")
    X               = df_proc.drop("y", axis=1)
    y               = df_proc["y"]
    groups          = pd.factorize(X['date'].dt.day.astype(str) + '_' + X['date'].dt.month.astype(str) + '_' + X['date'].dt.year.astype(str))[0]
    X               = X.drop(columns = 'date')
    rubbish         = gc.collect()
    gkf             = PurgedGroupTimeSeriesSplit(n_splits             = FOLDS, 
                                                 group_gap            = GROUP_GAP, 
                                                 max_train_group_size = MAX_TRAIN_GROUP_SIZE, 
                                                 max_test_group_size  = MAX_TEST_GROUP_SIZE).split(X, y, groups)
    models = []
    for fold, (train_idx, val_idx) in enumerate(gkf):
        x_train, x_val = X.iloc[train_idx], X.iloc[val_idx]
        y_train, y_val = y.iloc[train_idx], y.iloc[val_idx]
        
        K.clear_session()
        with strategy.scope(): model = build_model(fold, dim = x_train.shape[1])
        model.load_weights(url_load_weight + '/id_%ifold-%i.hdf5' %(asset_id, fold)) 
        print('>>> Loading: FOLD=%s' %(fold))
        models.append(model)
    return models

# PROCESSING

In [None]:
if visualization:
    asset_id = 1 # 1 == btc
    df = df_train[df_train['Asset_ID'] == asset_id]
    df_proc = get_features(df)
    df_proc['date'] = pd.to_datetime(df['timestamp'], unit='s')
    df_proc['y'] = df['Target']
    df_proc = df_proc.dropna(how="any")
    X = df_proc.drop("y", axis=1)
    y = df_proc["y"]
    groups = pd.factorize(X['date'].dt.day.astype(str) + '_' + X['date'].dt.month.astype(str) + '_' + X['date'].dt.year.astype(str))[0]
    X = X.drop(columns = 'date')

    fig, ax = plt.subplots(figsize = (12, 6))
    cv = PurgedGroupTimeSeriesSplit(n_splits = FOLDS, group_gap = GROUP_GAP, max_train_group_size=MAX_TRAIN_GROUP_SIZE, max_test_group_size=MAX_TEST_GROUP_SIZE)
    plot_cv_indices(cv, X, y, groups, ax, FOLDS, lw=20)

In [None]:
%%time
models =  {}
for asset_id, asset_name in zip(df_asset_details['Asset_ID'], df_asset_details['Asset_Name']):
    if train_models:
        if not fine_tuning:
            print('.'*128); print(f"Training Model for {asset_name:<16} (ID={asset_id:<2})")
            curl_models = get_Xy_and_model_for_asset(asset_id, df_train, df_asset_details, fine_tuning=fine_tuning, tuning_id=tuning_id)
            #curl_models = get_all_in_model_for_assetasset_id, df_train, df_asset_details, fine_tuning=fine_tuning, tuning_id=tuning_id)
            models[asset_id] = curl_models
        else:
            print('.'*128); print(f"Fine Tuning Model for {asset_name:<16} (ID={asset_id:<2})")
            curl_models = get_Xy_and_model_for_asset(asset_id, df_train, df_asset_details, fine_tuning=fine_tuning, tuning_id=tuning_id)
            #curl_models = get_all_in_model_for_assetasset_id, df_train, df_asset_details, fine_tuning=fine_tuning, tuning_id=tuning_id)
            models[asset_id] = curl_models     
    else:
        print('.'*42); print(f"Loading of Model {asset_name:<16} (ID={asset_id:<2})")
        curl_models = get_load_model_for_asset(asset_id, df_train)
        models[asset_id] = curl_models

# OUTPUT

In [None]:
id_test = [0,1,2,3,4,5,6,7,8,9,10,11,12,13]
AD      = df_asset_details.set_index("Asset_ID")
if test_baseline_model:
    for x in range(len(id_test)):  
        y_pred = pd.DataFrame()
        sample = 15

        curl_models = models[id_test[x]]
        record      = df_valid[df_valid.Asset_ID == x].head(sample)   
        y_true      = record['Target']
        x_test      = get_features(record)
        rubbish     = gc.collect()

        x_pred = []
        for i in tqdm(range(x_test.shape[0])):
            xs_pred = curl_models[2].predict(x_test.iloc[i].to_frame().T)[0][-1][-1]
            x_pred.append(xs_pred)
        y_pred['x_pred'] = x_pred
        print(f"Test score for LR baseline of {AD.Asset_Name[id_test[x]]:<21}:{np.corrcoef(y_pred.x_pred, y_true)[0,1]:.5f}")
        del y_pred

In [None]:
if test_baseline_model:
    x_pred = pd.DataFrame()
    for x in tqdm(range(len(df_valid.Asset_ID.unique()))):
        record           = df_valid[df_valid.Asset_ID == x]     
        record           = record.drop(['Target','Asset_ID'],axis=1)
        x_test           = get_features(pd.DataFrame(record))
        curl_models      = models[x]
        x_test['y_pred'] = np.mean(np.concatenate([model.predict(x_test)[2][0] for model in curl_models], axis = 0), axis = 0)
        
        x_pred           = pd.concat([x_test,x_pred])    
    
    x_pred = x_pred.sort_index()    
    print('Test score for LR baseline: ', f"{np.corrcoef(x_pred.y_pred,df_valid.Target)[0,1]:.5f}")

It might not be the best method. However, it may also be used for learning that is as easy to use as the previous version. 
There should be a more lazy method.

# LAZY METHOD EXAMPLE

In [None]:
def get_model(hp, dim_f, dim_id=1, weight = 1.0, n_asset=14):
    '''
    (https://www.kaggle.com/vmuzhichenko/g-research-parallel-lstm-training, https://www.kaggle.com/ysairaajpatro/g-research-lstm-temporal-attention-notebook) + 
    [https://www.kaggle.com/aimind/bottleneck-encoder-mlp-keras-tuner-8601c5/notebook] + 
    [https://www.kaggle.com/lonnieqin/ubiquant-market-prediction-with-dnn]
    
    Recommend looking at such an example from to understand how it might be applied
    '''
    features_inputs = tf.keras.Input((dim_f, ))
    asset_id_inputs = tf.keras.Input((dim_id, ))
    
    asset_id_x = investment_id_lookup_layer(asset_id_inputs)
    asset_id_x = layers.Embedding(investment_id_size, n_asset, input_length=1)(asset_id_x)
    asset_id_x = layers.Reshape((-1, ))(asset_id_x)
    asset_id_x = layers.Dense(n_asset*3, activation='mish')(asset_id_x)

    x0  = tf.keras.layers.BatchNormalization()(features_inputs)
    encoder = tf.keras.layers.GaussianNoise(hp.Float(f'noise',0.0,0.5))(x0)
    encoder = tf.keras.layers.Dense(hp.Int('num_layers_en_0',2**6,2**12))(encoder)
    encoder = tf.keras.layers.Dense(hp.Int('num_layers_en_1',2**6,2**12))(encoder)
    encoder = tf.keras.layers.Dense(hp.Int('num_layers_en_2',2**6,2**12))(encoder)
    encoder = tf.keras.layers.BatchNormalization()(encoder)
    encoder = tf.keras.layers.Activation('mish')(encoder)
    
    decoder = tf.keras.layers.Dropout(hp.Float(f'dropout_de',0.0,0.5))(encoder)
    decoder = tf.keras.layers.Dense(hp.Int('num_layers_de',2**5, 2**7), name = 'decoder')(decoder)
    
    x_ae = tf.keras.layers.Dense(hp.Int('num_layers_ae',2**5, 2**7))(decoder)
    x_ae = tf.keras.layers.BatchNormalization()(x_ae)
    x_ae = tf.keras.layers.Activation('mish')(x_ae)
    x_ae = tf.keras.layers.Dropout(hp.Float(f'dropout_ae',0.0,0.5))(x_ae)
    
    out_ae = tf.keras.layers.Dense(1, activation='mish', name = 'ae_out')(x_ae)
    
    feature_x = tf.keras.layers.Concatenate()([x0, encoder])
    feature_x = tf.keras.layers.BatchNormalization()(feature_x)
    feature_x = tf.keras.layers.Dropout(hp.Float(f'dropout_fx',0.0,0.5))(feature_x)

    x = layers.Concatenate(axis=1)([asset_id_x, feature_x])
    x = layers.Dense(hp.Int('num_layers_mlp_0',2**5,2**9), activation="swish")(x)
    x = layers.Dropout(hp.Float(f'dropout_mlp_0',0.0,0.5))(x)
    x = layers.Dense(hp.Int('num_layers_mlp_1',2**5,2**9), activation="swish")(x)
    x = layers.Dropout(hp.Float(f'dropout_mlp_1',0.0,0.5))(x)
    x = layers.Dense(hp.Int('num_layers_,mlp_2',2**5,2**9), activation="swish")(x)
    x = layers.Dropout(hp.Float(f'dropout_mlp_2',0.0,0.5))(x)
    
    output = layers.Dense(1, activation='swish', name = 'output')(x)
    model = tf.keras.Model(inputs=[investment_id_inputs, features_inputs], outputs=[decoder, out_ae, output])
    model.compile(optimizer = tf.keras.optimizers.Adam(learning_rate=0.01),
                  loss = {'decoder': tf.keras.losses.MeanAbsoluteError(),
                          'ae_out' : tf.keras.losses.MeanAbsoluteError(),
                          'output' : mse_w(weight), 
                         },
                  metrics = {'decoder': tf.keras.metrics.MeanAbsoluteError(name = 'mae'),
                             'ae_out' : tf.keras.metrics.MeanAbsoluteError(name = 'mae'),
                             'output' : tf.keras.metrics.RootMeanSquaredError(name='rmse'),
                            },
                 ) 
    return model

# REFERENCE SOURCE CODE
1. [1st Place of Jane Street ➜ Adapted to Crypto ](https://www.kaggle.com/yamqwe/1st-place-of-jane-street-adapted-to-crypto/notebook#Submit-To-Kaggle-%F0%9F%87%B0)< [Jane Street: Supervised Autoencoder MLP](https://www.kaggle.com/gogo827jz/jane-street-supervised-autoencoder-mlp) < [Bottleneck encoder + MLP + Keras Tuner 8601c5](https://www.kaggle.com/aimind/bottleneck-encoder-mlp-keras-tuner-8601c5/notebook) : [Yam Peleg](https://www.kaggle.com/yamqwe), [Yirun Zhang](https://www.kaggle.com/gogo827jz), [骥](https://www.kaggle.com/aimind), Others

# PREVIOUS LEVEL
* [Crypto Forecasting(0/1) : LGBM-QHO : Natapong Nitarach](https://www.kaggle.com/code/natnitarach/crypto-forecasting-0-1-lgbm-qho)