In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
dataset=pd.read_csv('/kaggle/input/tabular-playground-series-jun-2022/data.csv')
submission=pd.read_csv('/kaggle/input/tabular-playground-series-jun-2022/sample_submission.csv')

In [None]:
dataset.head()

# Import Libraries

In [None]:
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.callbacks import ReduceLROnPlateau,LearningRateScheduler,EarlyStopping
from tensorflow.keras.layers import Dense,Input,InputLayer,Add,BatchNormalization,Dropout

from sklearn.preprocessing import StandardScaler,RobustScaler,MinMaxScaler
from sklearn.model_selection import KFold
from sklearn.metrics import mean_absolute_error
import random
import math

# define model parameters

In [None]:
BATCH_SIZE=2048
EPOCHS = 30
VERBOSE= 0
NUM_FOLDS=3

In [None]:
features = dataset.columns.to_list()

# Simple Network Architecture

In [None]:
def nn_model():
    L2=50e-6
    activation_function="relu"
    
    inputs=Input(shape=(len(features)-2))
    
    x=Dense(128,kernel_regularizer=tf.keras.regularizers.l2(L2),
           activation="relu")(inputs)
    x=BatchNormalization()(x)
    
    x=Dense(64,kernel_regularizer=tf.keras.regularizers.l2(L2),
           activation="relu")(x)
    x=BatchNormalization()(x)
    
    x=Dense(32,kernel_regularizer=tf.keras.regularizers.l2(L2),
           activation="relu")(x)
    x=BatchNormalization()(x)
    
    x = Dense(1 , activation = 'linear')(x) 
    
    model=Model(inputs,x)
    
    return model
    
    

In [None]:
def fit_model(X_train, y_train, X_val, y_val, X_test, run = 0):
    '''
    '''
    lr_start = 0.01
    start_time = datetime.datetime.now()
    
    scaler = StandardScaler()
    X_train = scaler.fit_transform(X_train)

    epochs = EPOCHS    
    lr = ReduceLROnPlateau(monitor = 'val_loss', factor = 0.7, patience = 4, verbose = VERBOSE)
    es = EarlyStopping(monitor = 'val_loss',patience = 12, verbose = 1, mode = 'min', restore_best_weights = True)
    callbacks = [lr, es]
    
    model = nn_model()
    optimizer_func = tf.keras.optimizers.Adam(learning_rate = lr_start)
    loss_func = tf.keras.losses.MeanSquaredError()
    
    model.compile(optimizer = optimizer_func, loss = loss_func)
    
    X_val = scaler.transform(X_val)
    validation_data = (X_val, y_val)
    
    history = model.fit(X_train, 
                        y_train, 
                        validation_data = validation_data, 
                        epochs          = epochs,
                        verbose         = VERBOSE,
                        batch_size      = BATCH_SIZE,
                        shuffle         = True,
                        callbacks       = callbacks
                       )
    
    history_list.append(history.history)
    #print(f'Training Loss:{history_list[-1]["loss"][-1]:.5f}')
    callbacks, es, lr, history = None, None, None, None
    
    
    y_val_pred = model.predict(X_val, batch_size = BATCH_SIZE, verbose = VERBOSE)
    score = mean_absolute_error(y_val, y_val_pred)
    
    #print(f'Fold {run}.{fold} | {str(datetime.datetime.now() - start_time)[-12:-7]}'
    #      f'| MSE: {score:.5f}')
    
    score_list.append(score)
    
    tst_data_scaled = scaler.transform(X_test)
    tst_pred = model.predict(tst_data_scaled)
    predictions.append(tst_pred)
    
    return model

In [None]:
from tqdm import tqdm
import datetime
# missing_values = list(np.where(data[features[1]].isnull()))
# missing_values

In [None]:
features = dataset.columns.to_list()
data_completed = pd.DataFrame()


for feat in tqdm(features):
    # Create empty lists to store NN training metrics and predictions
    history_list = []
    score_list   = []
    predictions  = []
    
    if dataset[feat].isnull().any():
        #print('Training Model For: ',feat)
        
        # Identify missing values...
        missing_values = list(np.where(dataset[feat].isnull())[0])
        not_missing_values = list(np.where(dataset[feat].isnull() == False)[0])
        
        
        trn_data = dataset.iloc[not_missing_values,]
        tst_data = dataset.iloc[missing_values,]
        
        # Define kfolds for training purposes...
        kf = KFold(n_splits = NUM_FOLDS)

        for fold, (trn_idx, val_idx) in enumerate(kf.split(trn_data)):
            #print(f' Training fold: {fold}...')
            X_train, X_val = trn_data.iloc[trn_idx].drop([feat,'row_id'],axis = 1), trn_data.iloc[val_idx].drop([feat,'row_id'], axis = 1)
            y_train, y_val = trn_data.iloc[trn_idx][feat], trn_data.iloc[val_idx][feat]
            X_test = tst_data.drop([feat,'row_id'], axis = 1)
            
            X_train, X_val = X_train.fillna(X_train.mean()), X_val.fillna(X_val.mean())
            X_test = X_test.fillna(X_test.mean())
            
            fit_model(X_train, y_train, X_val, y_val, X_test)
        
        mean_values = np.array(predictions).mean(axis = 0)
        imputed_data = dataset[feat]
        imputed_data.iloc[missing_values] = mean_values.ravel()
        data_completed = pd.concat([data_completed, imputed_data],axis = 1)
    
    else:
        data_completed = pd.concat([data_completed, dataset[feat]],axis = 1)

# For submission

In [None]:
data_completed

In [None]:
submission = pd.read_csv('/kaggle/input/tabular-playground-series-jun-2022/sample_submission.csv', index_col='row-col')

In [None]:
for i in tqdm(submission.index):
    row = int(i.split('-')[0])
    col = i.split('-')[1]
    submission.loc[i, 'value'] = data_completed.loc[row, col]

submission.to_csv("submission.csv")