## Imports, data load, metric function definition

In [1]:
import numpy as np
import pandas as pd
from sklearn.neighbors import BallTree

In [2]:
input_path = "./dataset/source/"

X_train_filename = "X_train_surge_new.npz"
Y_train_filename = "Y_train_surge.csv"
X_test_filename = "X_test_surge_new.npz"

X_train_file = "{}{}".format(input_path, X_train_filename)
Y_train_file = "{}{}".format(input_path, Y_train_filename)
X_test_file = "{}{}".format(input_path, X_test_filename)

X_train = np.load(X_train_file)
Y_train = pd.read_csv(Y_train_file)
X_test = np.load(X_test_file)

In [3]:
surge_train = np.array(Y_train)[:,1:]

In [4]:
def surge_prediction_metric(dataframe_y_true, dataframe_y_pred):
    weights = np.linspace(1, 0.1, 10)[np.newaxis]
    surge1_columns = [
        'surge1_t0', 'surge1_t1', 'surge1_t2', 'surge1_t3', 'surge1_t4',
        'surge1_t5', 'surge1_t6', 'surge1_t7', 'surge1_t8', 'surge1_t9' ]
    surge2_columns = [
        'surge2_t0', 'surge2_t1', 'surge2_t2', 'surge2_t3', 'surge2_t4',
        'surge2_t5', 'surge2_t6', 'surge2_t7', 'surge2_t8', 'surge2_t9' ]
    surge1_score = (weights * (dataframe_y_true[surge1_columns].values - dataframe_y_pred[surge1_columns].values)**2).mean()
    surge2_score = (weights * (dataframe_y_true[surge2_columns].values - dataframe_y_pred[surge2_columns].values)**2).mean()

    return surge1_score + surge2_score

## Benchmark
Train using kNN of pressure fields at two instants in time, with 40 neighbours

In [5]:
nfields = 2; time_step_slp = 8
slp_train = []
slp_all = X_train['slp']
for i in range(5599): # 5559
    slp_train.append(np.ndarray.flatten(slp_all[i,-1]))
    for j in range(1,nfields):
        slp_train[-1] = np.concatenate( ( slp_train[-1], np.ndarray.flatten(slp_all[i,-1-j*time_step_slp]) ) )
slp_train = np.array(slp_train)

In [6]:
slp_test = []
slp_all_test = X_test['slp']
for i in range(509):
    slp_test.append(np.ndarray.flatten(slp_all_test[i,-1]))
    for j in range(1,nfields):
        slp_test[-1] = np.concatenate( ( slp_test[-1], np.ndarray.flatten(slp_all_test[i,-1-j*time_step_slp]) ) )
slp_test = np.array(slp_test)

In [7]:
tree = BallTree(slp_train)

In [11]:
tree

<sklearn.neighbors._ball_tree.BallTree at 0x1bb55e2fe50>

In [21]:
surge_train

array([[ 0.58693592,  1.06958024,  0.76792754, ..., -0.42270688,
        -0.45623606, -0.82505705],
       [ 0.76792754, -0.1001619 ,  0.07077463, ..., -0.82505705,
        -0.99270295, -0.99270295],
       [ 0.07077463, -0.24428486, -0.35489084, ..., -0.99270295,
        -0.32211934, -0.88373311],
       ...,
       [ 0.48303332,  0.53330877,  1.44832196, ...,  1.22022298,
         1.44654495,  2.67036005],
       [ 1.44832196,  1.82036029,  1.66283054, ...,  2.67036005,
         2.62006628,  2.67874235],
       [ 1.66283054,  1.52876268,  1.28408882, ...,  2.67874235,
         1.22022298,  0.72566756]])

In [20]:
surge_test_benchmark = []; k = 40
for i in range(509):
    dist, ind = tree.query([slp_test[i]], k=k)
    #print(dist) # taille 40
    print(ind)#[0]) # taille 40
    #print(surge_train[ind[0]])
    surge_test_benchmark.append(np.mean(surge_train[ind[0]], axis=0))
    print(surge_test_benchmark)
    break
surge_test_benchmark = np.array(surge_test_benchmark)

[[3724 4466 1560 1561 4110 4108 4109 1990 1086 5125 3850 3720 1101 2626
  3277 1892 5497 4467   33 4996  683  651 4929 5124 5405 5211 1981 1858
  3326  500 5498 4111 1945 3176 3939  265 1102 4997 2333 1960]]
[array([-0.82345804, -0.80326407, -0.72030957, -0.65612458, -0.57786247,
       -0.40106047, -0.31927907, -0.24554174, -0.25015032, -0.31944665,
       -0.35606763, -0.3522956 , -0.35774409, -0.30912678, -0.21734065,
       -0.18150633, -0.05032341,  0.07163898,  0.06011333,  0.15902441])]


In [9]:
y_columns = [f'surge1_t{i}' for i in range(10)] + [f'surge2_t{i}' for i in range(10)]
y_test_benchmark = pd.DataFrame(data=surge_test_benchmark, columns=y_columns, index=X_test['id_sequence'])

output_path = "./dataset/output/benchmark_output/"
y_test_benchmark.to_csv(output_path + 'Y_test_benchmark.csv', index_label='id_sequence', sep=',')