In [None]:
import numpy as np
from tensorflow import keras as ks
import pandas as pd

---

In [None]:
from scipy.sparse import load_npz

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
X_train = load_npz('drive/MyDrive/Colab Notebooks/X_train.npz')
X_train

<63768x63822 sparse matrix of type '<class 'numpy.float64'>'
	with 1992232 stored elements in Compressed Sparse Row format>

In [None]:
X_test = load_npz('drive/MyDrive/Colab Notebooks/X_test.npz')
X_test

<21257x63822 sparse matrix of type '<class 'numpy.float64'>'
	with 651895 stored elements in Compressed Sparse Row format>

In [None]:
y_train = np.load('drive/MyDrive/Colab Notebooks/y_train.npy')
y_train.shape

(63768,)

In [None]:
y_test = np.load('drive/MyDrive/Colab Notebooks/y_test.npy')
y_test.shape

(21257,)

---

## Keras

In [None]:
from keras import backend as K
def r_square(y_true, y_pred):
    SS_res =  K.sum(K.square(y_true - y_pred)) 
    SS_tot = K.sum(K.square(y_true - K.mean(y_true))) 
    return ( 1 - SS_res/(SS_tot + K.epsilon()) )

In [None]:
from keras.layers import Dense, Input
from keras.models import Model

model_in = Input(shape=(63822), sparse=True)
out = Dense(512, activation='relu')(model_in)
out = Dense(192, activation='relu')(out)
out = Dense(64, activation='relu')(out)
out = Dense(1)(out)
model = Model(model_in, out)

model.compile(loss='mean_squared_error', optimizer=ks.optimizers.Adam(lr=3e-3), metrics=[r_square])
model.summary()

Model: "model_12"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_13 (InputLayer)        [(None, 63822)]           0         
_________________________________________________________________
dense_48 (Dense)             (None, 512)               32677376  
_________________________________________________________________
dense_49 (Dense)             (None, 192)               98496     
_________________________________________________________________
dense_50 (Dense)             (None, 64)                12352     
_________________________________________________________________
dense_51 (Dense)             (None, 1)                 65        
Total params: 32,788,289
Trainable params: 32,788,289
Non-trainable params: 0
_________________________________________________________________


In [None]:
from keras.callbacks import EarlyStopping

In [None]:
es = EarlyStopping(monitor='val_r_square', mode='max', patience=2, restore_best_weights=True)

In [None]:
model.fit(x=X_train, y=y_train, batch_size=128, validation_data=(X_test, y_test), callbacks=[es], epochs=1_000)

Epoch 1/1000
Epoch 2/1000
Epoch 3/1000
Epoch 4/1000
Epoch 5/1000
Epoch 6/1000
Epoch 7/1000
Epoch 8/1000


<tensorflow.python.keras.callbacks.History at 0x7faa51bf0e48>

In [None]:
model.fit(x=X_train, y=y_train, batch_size=256, validation_data=(X_test, y_test), callbacks=[es], epochs=1_000)

Epoch 1/1000
Epoch 2/1000
Epoch 3/1000
Epoch 4/1000


<tensorflow.python.keras.callbacks.History at 0x7faa51b8e3c8>

In [None]:
model.fit(x=X_train, y=y_train, batch_size=512, validation_data=(X_test, y_test), callbacks=[es], epochs=1_000)

Epoch 1/1000
Epoch 2/1000
Epoch 3/1000


<tensorflow.python.keras.callbacks.History at 0x7faa51b05470>

In [None]:
model.save('drive/MyDrive/Colab Notebooks/my_model.h5')

---

In [None]:
X_eval = load_npz('drive/MyDrive/Colab Notebooks/X_eval.npz')
X_eval

<30186x63822 sparse matrix of type '<class 'numpy.float64'>'
	with 932703 stored elements in Compressed Sparse Row format>

In [None]:
y_eval = model.predict(X_eval)

In [None]:
pd.DataFrame(y_eval, columns=['Predicted']).to_csv('output.csv', index_label='Id')