In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [None]:
# Import as DataFrames
df = pd.read_csv("../input/tabular-playground-series-jun-2022/data.csv", index_col="row_id")
sub = pd.read_csv("../input/tabular-playground-series-jun-2022/sample_submission.csv", index_col="row-col")
df.head(6)

In [None]:
# Replace NaN with 0
df = df.fillna(df.mean())
df.head(6)

In [None]:
# Create pytorch tensor
data = df.to_numpy()
dataT = torch.tensor(data).float()
dataT.shape[0]

In [None]:
# Create class for the AutoEncoder model
def createAE():
    
    class aenet(nn.Module):
        def __init__(self):
            super().__init__()
            
            # Input layer
            self.input = nn.Linear(80, 70)
            
            # Encoder layer
            self.enc = nn.Linear(70, 60)
            
            # Latent layer
            self.lat = nn.Linear(60, 70)
            
            # Decode layer
            self.dec = nn.Linear(70, 80)
        
        # Forward pass without Sigmoid
        def forward(self, x):
            x = F.relu(self.input(x))
#             x = F.dropout(x, 0.9)
            x = F.relu(self.enc(x))
#             x = F.dropout(x, 0.5)
            x = F.relu(self.lat(x))
            y = torch.tanh(self.dec(x))
            return y
    
    # Create instance
    net = aenet()
    
    # Loss function
    lossfun = nn.MSELoss()
    
    # Optimizer
    optimizer = torch.optim.Adam(net.parameters(), lr=0.001)
    
    return net, lossfun, optimizer

In [None]:
# Function to train the model
def function2trainTheModel():
    
    # Number of epochs
    numepochs = 1000
    
    # Create a new model
    net, lossfun, optimizer = createAE()
    
    # Initiate losses
    losses = []
    
    # Batchsize and number of batches
    batchsize = dataT.shape[0]
    numBatches = int(dataT.shape[0]/batchsize)
    epochs = 0
    
    # Loop over epochs
    for epochi in range(numepochs):
        
        # Losses during batches
        batchloss = []
        
        for batchi in range(numBatches):
            
            # Forward pass and loss
            yHat = net(dataT)
            loss = lossfun(yHat,dataT)
            
            # Backprop
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            
            # Losses in this batch
            batchloss.append(loss.item())
        
        # Losses in this epoch
        losses.append(loss.item())
        epochs += 1
        print(f"Epochs [{epochs}/{numepochs}]", end='\r')
    
    return losses, net

In [None]:
# Train the AutoEncoder
losses, net = function2trainTheModel()

In [None]:
# Plot the losses
plt.figure(figsize=(10,6), dpi=150)

plt.plot(losses,"o-")
plt.xlabel("Epoch")
plt.ylabel("Loss")
plt.title(f"Final Loss: {losses[-1]:.4f}")
plt.show()

In [None]:
# Forward pass of all data
dfT = net(dataT)
dfT

In [None]:
# Convert to Pandas DataFrame
df_ae = pd.DataFrame(data=dfT.detach().numpy(), columns=df.columns)
df_ae.head()

In [None]:
df.head()

In [None]:
# Filter missing values from all rows
for i in sub.index:
    row = int(i.split('-')[0])
    col = i.split('-')[1]
    sub.loc[i, 'value'] = df_ae.loc[row, col]

In [None]:
# Reset index as per submission requirement
sub = sub.reset_index()
sub.head()

In [None]:
# Save the file
sub.to_csv('submission.csv', index=False)