In [None]:
import pandas as pd
import numpy as np

from sklearn.decomposition import PCA
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import PolynomialFeatures, StandardScaler, MinMaxScaler

from tensorflow.keras.models import Model
from tensorflow.keras import layers, Input
from tensorflow.keras import losses, optimizers

In [None]:
train_df = pd.read_csv("../input/jane-street-market-prediction/train.csv")

In [None]:
resp = [c for c in train_df.columns if 'resp' in c]
pca = PCA(n_components=1)
train_df['resp'] = pca.fit_transform(train_df[resp])

features = [c for c in train_df.columns if 'feature' in c]
for i in features:
    x = train_df[i].mean()
    train_df[i] = train_df[i].fillna(x)
    
average = train_df[features].mean()
print(average)
train_df['action'] = ((train_df['weight'].values * train_df['resp'].values) > 0).astype('int')

X_train = train_df.loc[:, features]
y_train = train_df.iloc[:, -1]

sc = StandardScaler()
sc.fit(X_train)
X_train = sc.transform(X_train)

del train_df

In [None]:
X_train_np = np.array(X_train)
y_train_np = np.array(y_train)

input_tensor = Input(shape=(130,))
x = layers.Dense(32, activation='relu')(input_tensor)
x = layers.BatchNormalization()(x)
x = layers.Dense(16, activation='relu')(x)
x = layers.BatchNormalization()(x)
x = layers.Dense(16, activation='relu')(x)
x = layers.BatchNormalization()(x)
x = layers.Dense(16, activation='relu')(x)
x = layers.BatchNormalization()(x)
x = layers.Dense(8, activation='relu')(x)
x = layers.BatchNormalization()(x)
x = layers.Dense(4, activation='relu')(x)
output_tensor = layers.Dense(1, activation='sigmoid')(x)
model = Model(input_tensor, output_tensor)

model.compile(optimizer=optimizers.RMSprop(), loss=losses.binary_crossentropy, metrics=['accuracy'])

del X_train, y_train

In [None]:
history = model.fit(X_train_np, y_train_np, batch_size=2**8, epochs=25)

In [None]:
train_df = pd.read_csv("../input/jane-street-market-prediction/train.csv")

import janestreet
from tqdm import tqdm
env = janestreet.make_env()
iter_test = env.iter_test()

f_mean = np.mean(train_df[features[1:]].values,axis=0)

for (test_df, pred_df) in tqdm(iter_test):
    if test_df['weight'].item() > 0:
        x_tt = test_df.loc[:, features].values
        if np.isnan(x_tt[:, 1:].sum()):
            x_tt[:, 1:] = np.nan_to_num(x_tt[:, 1:]) + np.isnan(x_tt[:, 1:]) * f_mean
        x_tt = pd.DataFrame(x_tt, columns=features)
        x_tt = sc.transform(x_tt)
        pred = model.predict(x_tt)
        pred_df.action = pred.astype(int)
    else:
        pred_df.action = 0
    env.predict(pred_df)