In [None]:
import os
import pandas as pd
import numpy as np
import gc
import matplotlib.pyplot as plt
import tensorflow as tf
print("Num GPUs Available: ", len(tf.config.experimental.list_physical_devices('GPU')))
from tensorflow.keras import layers
from tensorflow import keras
from scipy import stats

def add_stock_sum_feature(train):
    train["trading_stock_sum"] = train["time_id"].map(train.groupby(['time_id'])['f_0'].count().to_dict())
    return train.rename(columns={'trading_stock_sum': 'f_300'})

train = pd.read_pickle('../input/ubiquant-market-prediction-half-precision-pickle/train.pkl')
train = add_stock_sum_feature(train)
n_features = sum([1 for name in train.columns if 'f_' in name])
features = [f'f_{i}' for i in range(n_features)]


investment_id = train.pop("investment_id")
time_id = train.pop("time_id")
train_mean = train.astype('float64').mean().astype('float16')
train_std = train.astype('float64').std().astype('float16')


from keras import backend as K

from tensorflow.keras.callbacks import Callback
class WarmupExponentialDecay(Callback):
    def __init__(self,lr_base=1e-4,lr_min=0.0,decay=0,warmup_epochs=3):
        self.num_passed_batchs = 0   #一个计数器
        self.warmup_epochs=warmup_epochs
        self.lr=lr_base #learning_rate_base
        self.lr_min=lr_min #最小的起始学习率,此代码尚未实现
        self.decay=decay  #指数衰减率
        self.steps_per_epoch=0 #也是一个计数器
    def on_batch_begin(self, batch, logs=None):
        # params是模型自动传递给Callback的一些参数
        if self.steps_per_epoch==0:
            #防止跑验证集的时候呗更改了
            if self.params['steps'] == None:
                self.steps_per_epoch = np.ceil(1. * self.params['samples'] / self.params['batch_size'])
            else:
                self.steps_per_epoch = self.params['steps']
        if self.num_passed_batchs < self.steps_per_epoch * self.warmup_epochs:
            K.set_value(self.model.optimizer.lr,
                        self.lr*(self.num_passed_batchs + 1) / self.steps_per_epoch / self.warmup_epochs)
        else:
            K.set_value(self.model.optimizer.lr,
                        self.lr*((1-self.decay)**(self.num_passed_batchs-self.steps_per_epoch*self.warmup_epochs)))
        self.num_passed_batchs += 1
    def on_epoch_begin(self,epoch,logs=None):
        print("learning_rate:",K.get_value(self.model.optimizer.lr))

def pearson_r(y_true, y_pred):
    x = y_true
    y = y_pred
    mx = K.mean(x, axis=0)
    my = K.mean(y, axis=0)
    xm, ym = x - mx, y - my
    r_num = K.sum(xm * ym)
    x_square_sum = K.sum(xm * xm)
    y_square_sum = K.sum(ym * ym)
    r_den = K.sqrt(x_square_sum * y_square_sum)
    r = r_num / r_den
    return K.mean(r)

def cccloss(t, o): #  t, 0
    o_m = K.mean(o,axis=0)
    o_var = K.std(o, axis=0)
    t_m = K.mean(t, axis=0)
    t_var = K.std(t, axis=0)
    covariance = K.mean((o - o_m) * (t - t_m), axis=0)
    ccc = 2 * covariance / (o_var + t_var + K.sqrt(o_m - t_m))
    return 1 - ccc

def mix_loss(y_true, y_pred):
    mse_loss = tf.keras.losses.MeanSquaredError()(y_true, y_pred)
    corr_loss = 1 - pearson_r(y_true, y_pred)
    ccc_loss = 1 - cccloss(y_true, y_pred)
    return 2 * mse_loss + corr_loss + ccc_loss

def preprocess_test(investment_id, feature):
    return (investment_id, feature), 0
def make_test_dataset(feature, investment_id, batch_size=1024):
    ds = tf.data.Dataset.from_tensor_slices(((investment_id, feature)))
    ds = ds.map(preprocess_test)
    ds = ds.batch(batch_size).cache().prefetch(tf.data.experimental.AUTOTUNE)
    return ds

def inference(models, ds):
    preds = []
    for i in range(len(models)):
        preds.append(models[i].predict(ds).ravel())
    ans = np.mean(preds, axis=0)
    ans = ans * train_std[-1] + train_mean[-1]
    return ans


def load_models(model_path, epoch_version):
    models = []
    for i in range(5):
        models.append(keras.models.load_model(f"{model_path}/model_{i}/{epoch_version[i]}", custom_objects={'pearson_r':pearson_r, 'mix_loss':mix_loss}))
    return models



model_path = "../input/early-epoch/output/dnn-clip-5-feature-multi-test/100_1300__1"
epoch_version = [10 for epoch_version in range(5)]
models = load_models(model_path, epoch_version)

import ubiquant

env = ubiquant.make_env()
iter_test = env.iter_test()
for (test_df, sample_prediction_df) in iter_test:
    if 'time_id' not in test_df.columns:
      test_df['time_id'] = test_df['row_id'].apply(lambda x: int(x.split('_')[0]))
    
    test_df = add_stock_sum_feature(test_df)
    
    input_feature = ((test_df[features] - train_mean[features]) / train_std[features]).clip(-5, 5)
    ds = make_test_dataset(input_feature, test_df["investment_id"])
    sample_prediction_df['target'] = inference(models, ds)
    env.predict(sample_prediction_df)

