In [None]:
import joblib

import numpy as np
import pandas as pd
from sklearn.model_selection import GroupKFold
from tensorflow import keras
from tensorflow.keras import layers

In [None]:
train = pd.read_csv("../input/ubiquant-market-prediction/train.csv", nrows=100000)
max_investment_id = train["investment_id"].max()
print(max_investment_id)
train["investment_id"] = train["investment_id"] / max_investment_id
print(train.shape)
train.head()

In [None]:
train.head()

In [None]:
X_train = train.drop(["target", "row_id", "time_id"], axis=1)
y_train = train["target"].values
group_df = train["time_id"]

In [None]:
import gc
del train
gc.collect()

In [None]:
cv = GroupKFold(n_splits=5)
oof_train = np.zeros((len(X_train),))

for fold_id, (train_index, valid_index) in enumerate(cv.split(X_train, groups=group_df)):
    X_tr = X_train.loc[train_index, :]
    X_val = X_train.loc[valid_index, :]
    y_tr = y_train[train_index]
    y_val = y_train[valid_index]

    model = keras.Sequential([
#         layers.BatchNormalization(),
#         layers.Dropout(0.1),
        layers.Dense(4096, activation='relu'),
        layers.Reshape((256, 16)),
#         layers.BatchNormalization(),
#         layers.Dropout(0.1),
        layers.Conv1D(filters=16, kernel_size=5, strides=1, activation='relu'),
        layers.MaxPooling1D(pool_size=2),
        layers.Flatten(),
        layers.Dense(16, activation='relu'),
        layers.Dense(1, activation='linear'),
    ])

    model.compile(
        optimizer='adam',
        loss='mse',
        metrics=[keras.metrics.RootMeanSquaredError()]
    )

    early_stopping = keras.callbacks.EarlyStopping(
        patience=10,
        min_delta=0.001,
        restore_best_weights=True,
    )

    model.fit(
        X_tr, y_tr,
        validation_data=(X_val, y_val),
        batch_size=64,
        epochs=10,
        callbacks=[early_stopping],
    )

    oof_train[valid_index] = model.predict(X_val).reshape(1, -1)[0]
    model.save(f'keras_{fold_id}.h5')


In [None]:
pd.DataFrame({"y_train": y_train, "oof_train": oof_train}).corr()["y_train"]["oof_train"]