In [1]:
import numpy as np
import pandas as pd
import tensorflow as tf

AUTO = tf.data.experimental.AUTOTUNE

In [2]:
# Function to get hardware strategy
def get_hardware_strategy():
    try:
        # TPU detection. No parameters necessary if TPU_NAME environment variable is
        # set: this is always the case on Kaggle.
        tpu = tf.distribute.cluster_resolver.TPUClusterResolver()
        print('Running on TPU ', tpu.master())
    except ValueError:
        tpu = None

    if tpu:
        tf.config.experimental_connect_to_cluster(tpu)
        tf.tpu.experimental.initialize_tpu_system(tpu)
        strategy = tf.distribute.experimental.TPUStrategy(tpu)
        policy = tf.keras.mixed_precision.Policy('mixed_bfloat16')
        tf.keras.mixed_precision.set_global_policy(policy)
    else:
        # Default distribution strategy in Tensorflow. Works on CPU and single GPU.
        strategy = tf.distribute.get_strategy()

    print("REPLICAS: ", strategy.num_replicas_in_sync)
    return tpu, strategy

tpu, strategy = get_hardware_strategy()
# Configuration
EPOCHS = 3
BATCH_SIZE = 2048 * strategy.num_replicas_in_sync
# Learning rate
LR = 0.001
# Verbosity
VERBOSE = 1
# For tf.dataset
AUTO = tf.data.experimental.AUTOTUNE
FEATURES = 300

REPLICAS:  1


## Training

In [3]:
feature_dict = {
    "target": tf.io.FixedLenFeature([], tf.float32),
    "features": tf.io.FixedLenFeature([FEATURES], tf.float32),
}

def read_tfrecord(example):
    example = tf.io.parse_single_example(example, feature_dict)
    X = example["features"]
    y = example["target"]

    return X, y

In [4]:
# parse an example
# ds = tf.data.TFRecordDataset('data/tfrecords/train/fold0/0.tfrec')
# iterator = iter(ds)
# raw_example = next(iterator)
# example = tf.io.parse_single_example(raw_example, feature_dict)

In [5]:
def build_model(shape):
    with strategy.scope(): 
        def fc_block(x, units):
            x = tf.keras.layers.Dropout(0.35)(x)
            x = tf.keras.layers.Dense(units, activation = 'relu')(x)
            return x
        
        inp = tf.keras.layers.Input((shape))
        x = fc_block(inp, units = 768)
        x = fc_block(x, units = 384)
        x = fc_block(x, units = 192)
        output = tf.keras.layers.Dense(1, activation = 'linear')(x)
        model = tf.keras.models.Model(inputs = [inp], outputs = [output])
        opt = tf.keras.optimizers.Adam(learning_rate=LR)
        model.compile(
            optimizer = opt,
            loss = [tf.keras.losses.MeanSquaredError()],
        )
        return model

In [6]:
# Calculate pearson correlation coefficient
def pearson_coef(data):
    return data.corr()['target']['prediction']

# Calculate mean pearson correlation coefficient
def comp_metric(valid_df):
    return np.mean(valid_df.groupby(['time_id']).apply(pearson_coef))

In [7]:
FOLDS = 5

In [8]:
models = []
for idx in range(FOLDS):
    train_filenames = tf.io.gfile.glob(f"data/tfrecords/train/fold{idx}/" + "*.tfrec")
    train_filenames_ds = tf.data.Dataset.from_tensor_slices(train_filenames)
    train_filenames_ds = train_filenames_ds.shuffle(len(train_filenames), reshuffle_each_iteration=True)
    train_dataset = train_filenames_ds.interleave(lambda x: tf.data.TFRecordDataset(x),
                                                  cycle_length=5,
                                                  num_parallel_calls=AUTO)
    train_dataset = train_dataset.map(read_tfrecord, num_parallel_calls=AUTO)
    train_dataset = train_dataset.shuffle(100000, reshuffle_each_iteration=True)
    train_dataset = train_dataset.batch(BATCH_SIZE).prefetch(AUTO)
    
    valid_filenames = tf.io.gfile.glob(f"data/tfrecords/validation/fold{idx}/" + "*.tfrec")
    valid_filenames_ds = tf.data.Dataset.from_tensor_slices(valid_filenames)
    valid_filenames_ds = valid_filenames_ds.shuffle(len(valid_filenames), reshuffle_each_iteration=True)
    valid_dataset = valid_filenames_ds.interleave(lambda x: tf.data.TFRecordDataset(x),
                                                  cycle_length=5,
                                                  num_parallel_calls=AUTO)
    valid_dataset = valid_dataset.map(read_tfrecord, num_parallel_calls=AUTO)
    valid_dataset = valid_dataset.batch(BATCH_SIZE).prefetch(AUTO)
    # build a model
    model = build_model(FEATURES)
    # model.summary()
    checkpoint = tf.keras.callbacks.ModelCheckpoint(f"data/keras_models/model_{idx}", save_best_only=True)
    history = model.fit(train_dataset, 
                        epochs=EPOCHS, 
                        verbose=VERBOSE,
                        validation_data=valid_dataset, 
                        callbacks=[checkpoint],
    )
    models.append(tf.keras.models.load_model(f"data/keras_models/model_{idx}"))
    # validation
    valid_df = pd.read_pickle(f"data/tfrecords/validation/fold{idx}/validation.pkl")
    features = [col for col in valid_df.columns if col not in ['row_id', 'time_id', 'investment_id', 'target']]
    x_val = valid_df[features]
    model = models[idx]
    val_pred = model.predict(x_val, batch_size = BATCH_SIZE).astype(np.float32).reshape(-1)
    valid_df['prediction'] = val_pred

    pearson_coefficient = comp_metric(valid_df)   
    print(f"The {idx} fold best model pearson coefficient is {pearson_coefficient}.")

2022-02-16 16:35:43.081144: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:939] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-02-16 16:35:43.113905: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:939] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-02-16 16:35:43.114084: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:939] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-02-16 16:35:43.114942: I tensorflow/core/platform/cpu_feature_guard.cc:151] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags

Epoch 1/3
   1226/Unknown - 76s 59ms/step - loss: 0.8549

2022-02-16 16:37:15.227704: W tensorflow/python/util/util.cc:368] Sets are not currently considered sequences, but this may change in the future, so consider avoiding using them.


INFO:tensorflow:Assets written to: data/keras_models/model_0/assets
Epoch 2/3
Epoch 3/3


2022-02-16 16:40:20.934726: W tensorflow/core/framework/cpu_allocator_impl.cc:82] Allocation of 753939600 exceeds 10% of free system memory.
2022-02-16 16:40:21.356063: W tensorflow/core/framework/cpu_allocator_impl.cc:82] Allocation of 753939600 exceeds 10% of free system memory.


The 0 fold best model pearson coefficient is 0.12966906085025978.
Epoch 1/3
   1225/Unknown - 76s 60ms/step - loss: 0.8573INFO:tensorflow:Assets written to: data/keras_models/model_1/assets
Epoch 2/3
Epoch 3/3


2022-02-16 16:45:24.892359: W tensorflow/core/framework/cpu_allocator_impl.cc:82] Allocation of 753942000 exceeds 10% of free system memory.
2022-02-16 16:45:25.236109: W tensorflow/core/framework/cpu_allocator_impl.cc:82] Allocation of 753942000 exceeds 10% of free system memory.


The 1 fold best model pearson coefficient is 0.13296177969217302.
Epoch 1/3
   1226/Unknown - 75s 59ms/step - loss: 0.8561INFO:tensorflow:Assets written to: data/keras_models/model_2/assets
Epoch 2/3
Epoch 3/3


2022-02-16 16:50:28.174779: W tensorflow/core/framework/cpu_allocator_impl.cc:82] Allocation of 753936000 exceeds 10% of free system memory.


The 2 fold best model pearson coefficient is 0.1301504026689765.
Epoch 1/3
   1225/Unknown - 76s 60ms/step - loss: 0.8628INFO:tensorflow:Assets written to: data/keras_models/model_3/assets
Epoch 2/3
Epoch 3/3
The 3 fold best model pearson coefficient is 0.12566473912485027.
Epoch 1/3
   1226/Unknown - 79s 62ms/step - loss: 0.8427INFO:tensorflow:Assets written to: data/keras_models/model_4/assets
Epoch 2/3
Epoch 3/3
The 4 fold best model pearson coefficient is 0.14051524029890652.
