In [None]:
# Transformer Imputer
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.metrics import mean_squared_error
import matplotlib.pyplot as plt
import seaborn as sns
import warnings

warnings.filterwarnings('ignore')

# Load the dataset
file_path = 'D:/LNN Based Imputation/datasets/Missing_Data/501_Seoul_30c_MAR.csv'
data = pd.read_csv(file_path)

# Convert the 'datetime' feature to datetime object with the correct format
data['datetime'] = pd.to_datetime(data['datetime'], format='%d/%m/%Y %H:%M')

# Handle missing values in 'value' column by replacing empty strings with NaN
data['value'] = pd.to_numeric(data['value'], errors='coerce')

# Encode categorical features
label_encoders = {}
for column in ['weekday', 'hour', 'is_weekend', 'is_working_hour', 'season']:
    le = LabelEncoder()
    data[column] = le.fit_transform(data[column])
    label_encoders[column] = le

# Normalize the 'value' column
scaler = StandardScaler()
data['value_normalized'] = scaler.fit_transform(data[['value']])

# Separate rows with missing values
missing_data = data[data['value'].isna()]
data_train = data.dropna()

# Split the dataset into features and labels
features = data_train.drop(columns=['value', 'value_normalized', 'datetime', 'meter_no', 'ytrue'])
labels = data_train['value_normalized']

# Split the dataset into training and test sets
X_train, X_test, y_train, y_test = train_test_split(features, labels, test_size=0.2, random_state=42)

# Convert data to PyTorch tensors
X_train = torch.tensor(X_train.values, dtype=torch.float32)
y_train = torch.tensor(y_train.values, dtype=torch.float32).unsqueeze(1)
X_test = torch.tensor(X_test.values, dtype=torch.float32)
y_test = torch.tensor(y_test.values, dtype=torch.float32).unsqueeze(1)

# Define the Transformer Model
class TransformerModel(nn.Module):
    def __init__(self, input_size, num_heads, num_layers, hidden_dim, d_ffn, d_k, d_v, dropout, attn_dropout):
        super(TransformerModel, self).__init__()
        self.input_linear = nn.Linear(input_size, hidden_dim)
        encoder_layers = nn.TransformerEncoderLayer(
            d_model=hidden_dim,
            nhead=num_heads,
            dim_feedforward=d_ffn,
            dropout=dropout,
            activation='relu'
        )
        self.transformer_encoder = nn.TransformerEncoder(encoder_layers, num_layers) #Transformer Encoder with the specified encoder_layers and num_layers.nn.TransformerEncoder is a stack of N transformer encoder layers.
        self.output_linear = nn.Linear(hidden_dim, 1) #fully connected linear layer that maps the hidden_dim features to a single output.
        self.dropout = nn.Dropout(dropout) #Dropout is a regularization technique to prevent overfitting by randomly setting a fraction of input units to 0 at each update during training.

    def forward(self, x):
        x = self.input_linear(x)
        x = self.dropout(x)
        x = self.transformer_encoder(x)
        x = self.output_linear(x)
        return x

# Training Loop for Transformer Model with Early Stopping
def train(model, train_loader, val_loader, optimizer, epochs, patience):
    model.train()
    train_losses = []
    val_losses = []
    best_val_loss = float('inf')
    patience_counter = 0

    for epoch in range(epochs):
        total_loss = 0
        model.train()
        for inputs, labels in train_loader:
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = nn.MSELoss()(outputs, labels)
            loss.backward()
            optimizer.step()
            total_loss += loss.item()
        avg_train_loss = total_loss / len(train_loader)
        train_losses.append(avg_train_loss)

        # Validation phase
        model.eval()
        total_val_loss = 0
        with torch.no_grad():
            for inputs, labels in val_loader:
                outputs = model(inputs)
                loss = nn.MSELoss()(outputs, labels)
                total_val_loss += loss.item()
        avg_val_loss = total_val_loss / len(val_loader)
        val_losses.append(avg_val_loss)

        print(f'Epoch {epoch+1}, Train Loss: {avg_train_loss}, Val Loss: {avg_val_loss}')

        # Early stopping check
        if avg_val_loss < best_val_loss:
            best_val_loss = avg_val_loss
            patience_counter = 0
        else:
            patience_counter += 1
            if patience_counter >= patience:
                print("Early stopping triggered")
                break

    return train_losses, val_losses

# Prepare the dataset for training and validation
train_dataset = torch.utils.data.TensorDataset(X_train, y_train)
val_dataset = torch.utils.data.TensorDataset(X_test, y_test)
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=32, shuffle=False)

# Best parameters based on previous grid search
best_params = {
    'num_heads': 8,
    'num_layers': 4,
    'hidden_dim': 64
}

# Train the best model with the best parameters
best_model = TransformerModel(
    input_size=X_train.shape[1],
    num_heads=best_params['num_heads'],
    num_layers=best_params['num_layers'],
    hidden_dim=best_params['hidden_dim'],
    d_ffn=512,
    d_k=8,
    d_v=8,
    dropout=0.2,
    attn_dropout=0
)
optimizer = optim.Adam(best_model.parameters(), lr=1e-5)
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=32, shuffle=False)

train_losses, val_losses = train(best_model, train_loader, val_loader, optimizer, epochs=250, patience=20)

sns.set()
plt.figure(figsize=(6, 4))
plt.plot(train_losses, label="Training loss")
plt.plot(val_losses, label="Validation loss")
plt.legend(loc="upper right")
plt.xlabel("Training steps")
plt.title("Training and Validation Loss")
plt.show()

# Impute missing values in the dataset
missing_features = missing_data.drop(columns=['value', 'value_normalized', 'datetime', 'meter_no', 'ytrue'])
missing_features = torch.tensor(missing_features.values, dtype=torch.float32)

best_model.eval()
with torch.no_grad():
    imputed_values = best_model(missing_features).numpy()

# Scale back the imputed values to the original scale
imputed_values = scaler.inverse_transform(imputed_values)

# Update the 'value' column with the imputed values at the missing indices
data.loc[missing_data.index, 'value'] = imputed_values.flatten()  # Ensure correct shape

# Save the updated dataset to a CSV file
output_path = 'D:/LNN Based Imputation/datasets/Imputed_Data/501_Transformer_Suwon_40_MAR.csv'
data.to_csv(output_path, index=False)

# Compute the RMSE score on the test set
best_model.eval()
with torch.no_grad():
    test_predictions = best_model(X_test).numpy()

# Scale back the test predictions to the original scale
y_test_inverse = scaler.inverse_transform(y_test.cpu().numpy())
test_predictions_inverse = scaler.inverse_transform(test_predictions)

# Plot for after training
plt.figure(figsize=(6, 4))
plt.plot(y_test_inverse, label="Target output")
plt.plot(test_predictions_inverse[:, 0], label="Transformer output", linestyle="dashed")
plt.legend(loc="upper right")
plt.title("After Training")
plt.show()

# Compute RMSE
rmse = np.sqrt(mean_squared_error(y_test_inverse, test_predictions_inverse[:, 0]))
print(f'RMSE: {rmse}')

# Calculate the correlation between 'ytrue' and imputed 'value'
if 'ytrue' in data.columns:
    correlation = data['ytrue'].corr(data['value'])
    print(f"Correlation: {correlation}")

# Visualize the training data
sns.set()
plt.figure(figsize=(6, 4))
plt.plot(data['ytrue'], label="True value")
plt.plot(data['value'], label="Transformer prediction", linestyle="dashed")
plt.title("Transformer Training Data")
plt.legend(loc="upper right")
plt.show()
