In [None]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import pickle
import torch
from tensorflow import keras


# Submission

In [None]:
def correlationMetric(x, y, axis=-2):
    """Metric returning the Pearson correlation coefficient of two tensors over some axis, default -2."""
    x = tf.convert_to_tensor(x)
    y = math_ops.cast(y, x.dtype)
    n = tf.cast(tf.shape(x)[axis], x.dtype)
    xsum = tf.reduce_sum(x, axis=axis)
    ysum = tf.reduce_sum(y, axis=axis)
    xmean = xsum / n
    ymean = ysum / n
    xvar = tf.reduce_sum( tf.math.squared_difference(x, xmean), axis=axis)
    yvar = tf.reduce_sum( tf.math.squared_difference(y, ymean), axis=axis)
    cov = tf.reduce_sum( (x - xmean) * (y - ymean), axis=axis)
    corr = cov / tf.sqrt(xvar * yvar)
    return corr

In [None]:
nn_version = 3
nn_model = keras.models.load_model(f'../input/nn-version{nn_version}/nn_model{nn_version}', 
                                   custom_objects={'correlationMetric':correlationMetric})

lgbm_version = 3
lgbm_model, importance = pickle.load(open(f'../input/lgbm-version{lgbm_version}/lgbm_results{lgbm_version}.pkl', 'rb'))

In [None]:
def predict_lgbm(df, columns, mod):
    '''
    Returns LGBM predictions over columns in df given mod
    ''' 
    return mod.predict(df[columns])

In [None]:
def predict_nn(df, columns, mod):
    '''
    Returns NN predictions over columns in df given mod
    ''' 
    return mod.predict(df[columns])

In [None]:
import ubiquant

env = ubiquant.make_env()   # initialize the environment
iter_test = env.iter_test()    # an iterator which loops over the test set and sample submission

for (test_df, sample_prediction_df) in iter_test:
    
    
    
    #transform(test_df) # generate derived features
    
    features         = [col for col in test_df if col.startswith('f_')]
    #derived_features = [col for col in test_df if col.startswith('all')]
    #cluster_features = [col for col in test_df if col.startswith('clust')]
    
    
    test_df['target_lgbm'] = predict_lgbm(test_df, features, lgbm_model)
    test_df['target_nn']   = predict_nn(  test_df, features, nn_model)
    
    for x in ['target_lgbm', 'target_nn']:
        test_df[x] = test_df[x]/test_df[x].std()
     
    test_df['target_ensemble'] = test_df[['target_lgbm', 'target_nn']].mean(axis=1)

    # Choose version to submit
    test_df['target'] = test_df['target_ensemble']
    
    env.predict(test_df[['row_id','target']])