In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.ticker import MaxNLocator
from matplotlib.colors import ListedColormap
import seaborn as sns
from cycler import cycler
from IPython.display import display
import datetime
import math
import random
from pathlib import Path
from tqdm import tqdm

from sklearn.model_selection import KFold
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import StandardScaler

import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.callbacks import ReduceLROnPlateau, LearningRateScheduler, EarlyStopping
from tensorflow.keras.layers import Dense, Input, InputLayer, Add, Concatenate, Dropout, BatchNormalization
from tensorflow.keras.utils import plot_model

plt.rcParams['axes.prop_cycle'] = cycler(color=['#ffd700'] +
                                         plt.rcParams['axes.prop_cycle'].by_key()['color'][1:])

In [None]:
# Plot training history
def plot_history(history, *, n_epochs=None, plot_lr=False, title=None, bottom=None, top=None):
    """Plot (the last n_epochs epochs of) the training history
    
    Plots loss and optionally val_loss and lr."""
    plt.figure(figsize=(15, 6))
    from_epoch = 0 if n_epochs is None else max(len(history['loss']) - n_epochs, 0)
    
    # Plot training and validation losses
    plt.plot(np.arange(from_epoch, len(history['loss'])), history['loss'][from_epoch:], label='Training loss')
    try:
        plt.plot(np.arange(from_epoch, len(history['loss'])), history['val_loss'][from_epoch:], label='Validation loss')
        best_epoch = np.argmin(np.array(history['val_loss']))
        best_val_loss = history['val_loss'][best_epoch]
        if best_epoch >= from_epoch:
            plt.scatter([best_epoch], [best_val_loss], c='r', label=f'Best val_loss = {best_val_loss:.5f}')
        if best_epoch > 0:
            almost_epoch = np.argmin(np.array(history['val_loss'])[:best_epoch])
            almost_val_loss = history['val_loss'][almost_epoch]
            if almost_epoch >= from_epoch:
                plt.scatter([almost_epoch], [almost_val_loss], c='orange', label='Second best val_loss')
    except KeyError:
        pass
    if bottom is not None: plt.ylim(bottom=bottom)
    if top is not None: plt.ylim(top=top)
    plt.gca().xaxis.set_major_locator(MaxNLocator(integer=True))
    plt.xlabel('Epoch')
    plt.ylabel('Loss')
    plt.legend(loc='lower left')
    if title is not None: plt.title(title)
        
    # Plot learning rate
    if plot_lr and 'lr' in history:
        ax2 = plt.gca().twinx()
        ax2.plot(np.arange(from_epoch, len(history['lr'])), np.array(history['lr'][from_epoch:]), color='g', label='Learning rate')
        ax2.set_ylabel('Learning rate')
        ax2.legend(loc='upper right')
        
    plt.show()

In [None]:
input_path = Path('/kaggle/input/tabular-playground-series-jun-2022/')

data = pd.read_csv(input_path / 'data.csv', index_col='row_id')
submission = pd.read_csv(input_path / 'sample_submission.csv', index_col='row-col')
data_premuted = pd.read_csv('../input/lastlastlast/premutedRound2TwoNansWhat.csv', index_col='row_id')

In [None]:
f4NaNs = data.iloc[:,65:].isna().sum(axis=1)
f4_col_4NaNs = {}
sum = 0
for col1 in data.iloc[:,65:].columns:
    for col2 in data.iloc[:,66:].columns:
        for col3 in data.iloc[:,67:].columns:
            for col4 in data.iloc[:,68:].columns:
                if ( int(col1[4:]) >= int(col2[4:]) or int(col2[4:]) >= int(col3[4:]) or int(col3[4:]) >= int(col4[4:]) ):
                    continue
                key = str(col1)+str(col2)+str(col3)+str(col4)
                f4_col1 = data.iloc[:,65:].isna()[col1]
                f4_col2 = data.iloc[:,65:].isna()[col2]
                f4_col3 = data.iloc[:,65:].isna()[col3]
                f4_col4 = data.iloc[:,65:].isna()[col4]
                f4_col_4NaNs[key] = sorted(list(set(f4NaNs[f4NaNs == 4].index) & set(f4_col1[f4_col1 == 1].index) & set(f4_col2[f4_col2 == 1].index) & set(f4_col3[f4_col3 == 1].index) & set(f4_col4[f4_col4 == 1].index)))
                sum += len(f4_col_4NaNs[key])
print(sum)
print(len(f4_col_4NaNs))

In [None]:
index_list = data.columns.to_list()

for index1 in index_list[65:]:
    for index2 in index_list[66:]:
        for index3 in index_list[67:]:
            for index4 in index_list[68:]:
                if ( int(index1[4:]) >= int(index2[4:]) or int(index2[4:]) >= int(index3[4:]) or int(index3[4:]) >= int(index4[4:]) ):
                    continue
                key = str(index1)+str(index2)+str(index3)+str(index4)
                #if len(f4_col_4NaNs[key]) == 0:
                #    continue
                if (index1 != 'F_4_0'):
                    continue
                if (index2 != 'F_4_1'):
                    continue
                if (index3 != 'F_4_3'):
                    continue
                if (index4 != 'F_4_5'):
                    continue
                
                missing = f4_col_4NaNs[key]
                no_missing = f4NaNs[f4NaNs == 0].index
                train = data_premuted.iloc[:,65:].iloc[no_missing,]
                test = data_premuted.iloc[:,65:].iloc[missing,]
                X = train.drop([index1, index2, index3, index4],axis=1)
                y = train[[index1, index2, index3, index4]]
                X_test = test.drop([index1, index2, index3, index4],axis=1)
                print(index1, 'and', index2, 'and', index3, 'and', index4)
print(X.shape, X_test.shape)
print(y.shape)

In [None]:
from tensorflow.keras.layers import Activation
from tensorflow.keras.utils import get_custom_objects

class Mish(Activation):

    def __init__(self, activation, **kwargs):
        super(Mish, self).__init__(activation, **kwargs)
        self.__name__ = 'Mish'


def mish(inputs):
    return inputs * tf.math.tanh(tf.math.softplus(inputs))

get_custom_objects().update({'Mish': Mish(mish)})

In [None]:
def my_model():
    """Simple sequential neural network with four hidden layers.
    
    Returns a (not yet compiled) instance of tensorflow.keras.models.Model.
    """
    activation = 'Mish'
    inputs = Input(shape=(len(X.columns),))
    
    xa1 = Dense(32, kernel_regularizer=tf.keras.regularizers.l2(35e-6),
              activation=activation,
             )(inputs)
    xa2 = Dense(32, kernel_regularizer=tf.keras.regularizers.l2(35e-6),
              activation=activation,
             )(xa1)
    
    xa3 = Concatenate()([xa1, xa2])
    x = BatchNormalization()(xa3)
    
    x = Dense(16, kernel_regularizer=tf.keras.regularizers.l2(35e-6),
              activation=activation,
             )(x)
    #x = BatchNormalization()(x)
    x = Dense(16, kernel_regularizer=tf.keras.regularizers.l2(35e-6),
              activation=activation,
             )(x)
    #x = BatchNormalization()(x)
    x = Dense(16, kernel_regularizer=tf.keras.regularizers.l2(35e-6),
              activation=activation,
             )(x)
    x = Dense(16, kernel_regularizer=tf.keras.regularizers.l2(35e-6),
              activation=activation,
             )(x)
    x = Dense(8, kernel_regularizer=tf.keras.regularizers.l2(35e-6),
              activation=activation,
             )(x)
    x = Dense(4,     # for 4 NaNs
              activation='linear',
             )(x)
    model = Model(inputs, x)
    return model

plot_model(my_model(), show_layer_names=False, show_shapes=True)

In [None]:
#### %%time
# Cross-validation of the regressor

EPOCHS = 200
EPOCHS_COSINEDECAY = 150
CYCLES = 1
VERBOSE = 0 # set to 0 for less output, or to 2 for more output
DIAGRAMS = True
USE_PLATEAU = True
BATCH_SIZE = 2048
ONLY_FIRST_FOLD = True

# see https://keras.io/getting_started/faq/#how-can-i-obtain-reproducible-results-using-keras-during-development
np.random.seed(1)
random.seed(1)
tf.random.set_seed(1)

def fit_model(X_tr, y_tr, X_va=None, y_va=None, run=0):
    """Scale the data, fit a model, plot the training history and optionally validate the model
    
    Returns a trained instance of tensorflow.keras.models.Model.
    
    As a side effect, updates y_va_pred, history_list and score_list.
    """
    global y_va_pred
    start_time = datetime.datetime.now()
    
    scaler = StandardScaler()
    y_scaler = StandardScaler()
    X_tr = scaler.fit_transform(X_tr)
    y_tr_scaled = y_scaler.fit_transform(y_tr)
    
    if X_va is not None:
        X_va = scaler.transform(X_va)
        y_va_scaled = y_scaler.transform(y_va)
        validation_data = (X_va, y_va_scaled)
    else:
        validation_data = None

    # Define the learning rate schedule and EarlyStopping
    lr_start=0.01
    if USE_PLATEAU and X_va is not None: # use early stopping
        epochs = EPOCHS
        lr = ReduceLROnPlateau(monitor="val_loss", factor=0.7, 
                               patience=4, verbose=VERBOSE)
        es = EarlyStopping(monitor="val_loss",
                           patience=12, 
                           verbose=1,
                           mode="min", 
                           restore_best_weights=True)
        callbacks = [lr, es, tf.keras.callbacks.TerminateOnNaN()]

    else: # use cosine learning rate decay rather than early stopping
        epochs = EPOCHS_COSINEDECAY
        lr_end = 0.0002
        def cosine_decay(epoch):
            # w decays from 1 to 0 in every cycle
            # epoch == 0                  -> w = 1 (first epoch of cycle)
            # epoch == epochs_per_cycle-1 -> w = 0 (last epoch of cycle)
            epochs_per_cycle = epochs // CYCLES
            epoch_in_cycle = epoch % epochs_per_cycle
            if epochs_per_cycle > 1:
                w = (1 + math.cos(epoch_in_cycle / (epochs_per_cycle-1) * math.pi)) / 2
            else:
                w = 1
            return w * lr_start + (1 - w) * lr_end

        lr = LearningRateScheduler(cosine_decay, verbose=0)
        callbacks = [lr, tf.keras.callbacks.TerminateOnNaN()]
        
    # Construct and compile the model
    model = my_model()
    model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=lr_start),
                  metrics=tf.keras.metrics.RootMeanSquaredError(),
                  loss=tf.keras.losses.MeanSquaredError())

    # Train the model
    history = model.fit(X_tr, y_tr_scaled, 
                        validation_data=validation_data, 
                        epochs=epochs,
                        verbose=VERBOSE,
                        batch_size=BATCH_SIZE,
                        shuffle=True,
                        callbacks=callbacks)

    history_list.append(history.history)
    callbacks, es, lr, history = None, None, None, None
    
    if X_va is None:
        print(f"Training rmse: {history_list[-1]['root_mean_squared_error'][-1]:.4f}")
    else:
        lastloss = f"Training rmse: {history_list[-1]['root_mean_squared_error'][-1]:.4f} | Val rmse: {history_list[-1]['val_root_mean_squared_error'][-1]:.4f}"
        
        # Inference for validation
        y_va_pred = model.predict(X_va, batch_size=len(X_va), verbose=0)
        
        # Evaluation: Execution time, loss and RMSE
        score = mean_squared_error(y_va_scaled, y_va_pred, squared=False)
        
        y_va_pred_unscaled = y_scaler.inverse_transform(y_va_pred)
        score_unscaled = mean_squared_error(y_va, y_va_pred_unscaled, squared=False)
        print(f"Fold {run}.{fold} | {str(datetime.datetime.now() - start_time)[-12:-7]}"
              f" | {lastloss} | RMSE_scaled: {score:.5f}"
              f" | RMSE_unscaled: {score_unscaled:.5f}")
        score_list.append(score)
        score_list_unscaled.append(score_unscaled)
        
        if DIAGRAMS and fold == 0 and run == 0:
            # Plot training history
            plot_history(history_list[-1], 
                         title=f"Learning curve (validation RMSE_scaled = {score:.5f})",
                         plot_lr=True)

    return model, scaler, y_scaler


print(f"{len(X.columns)} features")
history_list = []
score_list = []
score_list_unscaled = []
kf = KFold(n_splits=5)
for fold, (idx_tr, idx_va) in enumerate(kf.split(X)):
    X_tr = X.iloc[idx_tr]
    X_va = X.iloc[idx_va]
    y_tr = y.iloc[idx_tr]
    y_va = y.iloc[idx_va]
    
    fit_model(X_tr, y_tr, X_va, y_va)
    if ONLY_FIRST_FOLD: break # we only need the first fold

print(f"RMSE_scaled:   {np.mean(score_list):.5f}")
print(f"RMSE_unscaled: {np.mean(score_list_unscaled):.5f}")

In [None]:
index_list = data.columns.to_list()
#summing = 0
#flag = True
for index1 in index_list[65:]:
    for index2 in index_list[66:]:
        for index3 in index_list[67:]:
            for index4 in index_list[68:]:
                if ( int(index1[4:]) >= int(index2[4:]) or int(index2[4:]) >= int(index3[4:]) or int(index3[4:]) >= int(index4[4:]) ):
                    continue
                key = str(index1)+str(index2)+str(index3)+str(index4)
                if len(f4_col_4NaNs[key]) == 0:
                    continue
                #if (index1 == 'F_4_0' and index2 == 'F_4_2' and index3 == 'F_4_4' and index4 == 'F_4_12'):
                #    flag = False
                #    continue
                #if flag:
                #    continue
                print()
                print(index1, 'and', index2, 'and', index3, 'and', index4)
                #if summing >= 36:
                #    break
                #summing += 1
                missing = f4_col_4NaNs[key]
                no_missing = f4NaNs[f4NaNs == 0].index
                train = data_premuted.iloc[:,65:].iloc[no_missing,]
                test = data_premuted.iloc[:,65:].iloc[missing,]
                X = train.drop([index1, index2, index3, index4],axis=1)
                y = train[[index1, index2, index3, index4]]
                X_test = test.drop([index1, index2, index3, index4],axis=1)
                print(X.shape, X_test.shape)
                print(y.shape)

                X_tr = X
                y_tr = y
                #pred_list = []
                for seed in range(2, 3):
                    np.random.seed(seed)
                    random.seed(seed)
                    tf.random.set_seed(seed)
                    model, scaler, y_scaler = fit_model(X_tr, y_tr, run=seed)
                    preds = y_scaler.inverse_transform(model.predict(scaler.transform(X_test), 
                                                                              batch_size=len(X_test), verbose=0))
                    print(f"{seed:2}", preds.shape)

                #preds = np.array(pred_list).mean(axis=0)
                data_completed = pd.DataFrame()
                data_all = data_premuted.iloc[:,65:][[index1, index2, index3, index4]]
                data_all.iloc[missing,] = preds
                data_completed = pd.concat([data_completed, data_all],axis=1)
                data_premuted.loc[:,[index1, index2, index3, index4]] = data_completed

In [None]:
for i in tqdm(submission.index):
    row = int(i.split('-')[0])
    col = i.split('-')[1]
    submission.loc[i, 'value'] = data_premuted.loc[row, col]

submission

In [None]:
data_premuted.to_csv('premutedAfterFourNans_final3.csv')

In [None]:
submission.to_csv('submission.csv')