In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns

from tensorflow import keras
import os
import tensorflow as tf
from tensorflow.keras import layers
from sklearn.model_selection import KFold
from sklearn.model_selection import train_test_split
import warnings
warnings.filterwarnings('ignore')

In [None]:
from sklearn.preprocessing import StandardScaler,RobustScaler
sc = StandardScaler()
rb = RobustScaler()

# **Loading The Main Data**

In [None]:
data = pd.read_csv('../input/tabular-playground-series-jun-2022/data.csv')

# **Data Analysis**

**Finding The Missing Percentage of Value**

In [None]:
def missing_percent(df):
        # Total missing values
        mis_val = df.isnull().sum()
        
        # Percentage of missing values
        mis_percent = 100 * df.isnull().sum() / len(df)
        
        # Make a table with the results
        mis_table = pd.concat([mis_val, mis_percent], axis=1)
        
        # Rename the columns
        mis_columns = mis_table.rename(
        columns = {0 : 'Missing Values', 1 : 'Percent of Total Values'})
        
        # Sort the table by percentage of missing descending
        mis_columns = mis_columns[
            mis_columns.iloc[:,1] != 0].sort_values(
        'Percent of Total Values', ascending=False).round(2)
        
        # Print some summary information
        print ("Your selected dataframe has " + str(df.shape[1]) + " columns.\n"      
            "There are " + str(mis_columns.shape[0]) +
              " columns that have missing values.")
        
        # Return the dataframe with missing information
        return mis_columns
    
missing_percent(data)

**HeatMap to find Correlation**

In [None]:
plt.subplots(figsize=(25,20))
sns.heatmap(data.corr(), annot= True, cmap="RdYlGn", fmt = '0.1f', vmin=-0.6, vmax=0.6, cbar=False);

# **Model Architecture**

In [None]:
def build_model():   
    model = tf.keras.Sequential()
    model.add(layers.Dense(256, activation="relu", input_shape=(79,)))

    model.add(layers.Dense(128, activation="relu"))
    model.add(layers.Dropout(0.5))
    
    model.add(layers.Dense(64, activation="relu"))
    model.add(layers.Dropout(0.25))
    
    model.add(layers.Dense(64, activation="relu"))
    
    model.add(layers.Dense(32, activation="relu"))
    
    model.add(layers.Dense(1, activation="linear"))

    model.compile(optimizer = tf.keras.optimizers.Adam(learning_rate = 0.001), loss='mse')

    #model.summary()

    return model

**Features List and Final Prediction Dataframe**

In [None]:
features = data.columns.to_list()
final = pd.DataFrame()

# **Model Training**

In [None]:
def fit_model(X_train, y_train, X_val, y_val, X_test, feat):
    X_train = sc.fit_transform(X_train)
    
    model = build_model()
    
    X_val = sc.transform(X_val)
    validation_data = (X_val, y_val)
    
    history = model.fit(X_train, y_train, validation_data = validation_data, epochs = 5, verbose = 1, batch_size = 1024, shuffle = True)
    
    history_list.append(history.history)
    history = None
    
    tst_data_scaled = sc.transform(X_test)
    print('\nPredicting For: ',feat)
    tst_pred = model.predict(tst_data_scaled, verbose=1)
    predictions.append(tst_pred)
    
    return model

# **K-Fold Split for Model Training**

In [None]:
for feat in features:
    history_list = []
    predictions  = []
    
    if data[feat].isnull().any():
        print('\n\nTraining Model For: ',feat)
        
        # Identify missing values...
        missing_values = list(np.where(data[feat].isnull())[0])
        not_missing_values = list(np.where(data[feat].isnull() == False)[0])
        
        
        trn_data = data.iloc[not_missing_values,]
        tst_data = data.iloc[missing_values,]
        
        kf = KFold(n_splits = 2)

        for fold, (trn_idx, val_idx) in enumerate(kf.split(trn_data)):
            print(f' Training fold: {fold+1}...')
            X_train, X_val = trn_data.iloc[trn_idx].drop([feat,'row_id'],axis = 1), trn_data.iloc[val_idx].drop([feat,'row_id'], axis = 1)
            y_train, y_val = trn_data.iloc[trn_idx][feat], trn_data.iloc[val_idx][feat]
            X_test = tst_data.drop([feat,'row_id'], axis = 1)
            
            X_train, X_val = X_train.fillna(X_train.mean()), X_val.fillna(X_val.mean())
            X_test = X_test.fillna(X_test.mean())
            
            fit_model(X_train, y_train, X_val, y_val, X_test, feat)
        
        mean_values = np.array(predictions).mean(axis = 0)
        imputed_data = data[feat]
        imputed_data.iloc[missing_values] = mean_values.ravel()
        final = pd.concat([final, imputed_data],axis = 1)
    
    else:
        final = pd.concat([final, data[feat]],axis = 1)

# **Saving the Final Dataframe**

In [None]:
final.to_csv('Final.csv')

# **Reading The Submission File**

In [None]:
sample_sub = pd.read_csv('../input/tabular-playground-series-jun-2022/sample_submission.csv', index_col='row-col')

**Replace all Missing Values**

In [None]:
for i in sample_sub.index:
    row = int(i.split('-')[0])
    col = i.split('-')[1]
    sample_sub.loc[i, 'value'] = final.loc[row, col]

sample_sub.to_csv("submission.csv")

**Final Submissing File.....**

In [None]:
sample_sub