In [1]:
# Import necessary libraries
import pandas as pd
import os
import sys
import numpy as np
import time
import os
import pandas as pd
from sklearn.cluster import KMeans, DBSCAN
from sklearn.preprocessing import StandardScaler
import hiplot as hip
import matplotlib.pyplot as plt
import torch.optim as optim
from torch.cuda.amp import autocast, GradScaler
from tqdm import tqdm
import mlflow
import mlflow.pytorch
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import mlflow

import pyarrow.feather as feather

import logging

logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)


raw_path = '../data/raw/'
processed_path = '../data/processed/'
sys.path.append(raw_path)
sys.path.append(processed_path)

## Read the files

In [2]:
# Measure the time taken to load the data
start_time = time.time()
print("Loading data from Feather file...")
try:
    # Load Feather file using pyarrow
    df_metered_monthly = feather.read_feather(processed_path + "df_metered_monthly_500_balanced.feather")
    df_unmetered_monthly = feather.read_feather(processed_path + "df_unmetered_monthly_500_balanced.feather")
    print("Feather file loaded successfully")
except Exception as e:
    print(f"Error loading Feather file: {e}")
end_time = time.time()

load_time = end_time - start_time
print(f"Time taken to load data: {load_time:.2f} seconds")

Loading data from Feather file...
Feather file loaded successfully
Time taken to load data: 11.79 seconds


In [3]:
df_unmetered_monthly['consumption'].max()

161.5

In [4]:
df_metered_monthly['consumption'].max()

184.0

In [5]:
df_unmetered_monthly['RND_ID'].unique()

array(['9655', '12332', '11500', '4247', '5261', '10341', '4873', '2607',
       '769', '5562', '5445', '2212', '2081', '1241', '3515', '10938',
       '7313', '5383', '2748', '12066', '5918', '10042', '5486', '9599',
       '706', '1738', '7405', '3771', '966', '4964', '1531', '2068',
       '6388', '2562', '9596', '10569', '2060', '12885', '11510', '1358',
       '7259', '3551', '8383', '6315', '6357', '727', '7397', '5098',
       '6975', '4731'], dtype=object)

## Initialise, upload and Continue training the model (Later after initial training was stopped)

In [None]:
# Energy Dataset Class Definition
# Define the dataset class for handling energy consumption data. This will be used to load and preprocess the data.
class EnergyDataset(Dataset):
    def __init__(self, data, max_consumption):
        self.data = data
        self.max_consumption = max_consumption

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        x = torch.tensor([self.data.iloc[idx]['consumption_normalized']], dtype=torch.float32)
        time_int = int(self.data.iloc[idx]['time_int'])
        month_numeric = int(self.data.iloc[idx]['month'])
        day_of_week = self.data.iloc[idx]['day_of_week']
        day_of_year = self.data.iloc[idx]['day_of_year']
        is_weekend = int(self.data.iloc[idx]['is_weekend'])
        high_tariff = self.data.iloc[idx]['high_tariff']
        consumption_monthly_avg_normalized = self.data.iloc[idx]['consumption_monthly_avg_normalized']
        consumption_monthly_max_normalized = self.data.iloc[idx]['consumption_monthly_max_normalized']
        
        # Extract one-hot encoded values
        total_bin_values = self.data.iloc[idx][self.data.columns.str.startswith('total_bin_')].to_numpy(dtype=np.float32)
        baseload_values = self.data.iloc[idx][self.data.columns.str.startswith('baseload_')].to_numpy(dtype=np.float32)
        
        # Combine all features
        c = torch.tensor([time_int, month_numeric, day_of_week, day_of_year, is_weekend, high_tariff, 
                          consumption_monthly_avg_normalized, consumption_monthly_max_normalized], dtype=torch.float32)
        c = torch.cat((c, torch.tensor(total_bin_values, dtype=torch.float32), torch.tensor(baseload_values, dtype=torch.float32)))
        
        return {'x': x, 'c': c}

# Load and preprocess the data
# Here we define the maximum consumption for normalization purposes.
max_consumption = df_metered_monthly['consumption'].max()

# Create datasets
unmetered_dataset = EnergyDataset(df_unmetered_monthly, max_consumption)
metered_dataset = EnergyDataset(df_metered_monthly, max_consumption)

# Create data loaders for batch processing
train_loader = DataLoader(metered_dataset, batch_size=128, shuffle=True)
test_loader = DataLoader(unmetered_dataset, batch_size=64, shuffle=False)

# Define the CVAE model
class CVAE(nn.Module):
    def __init__(self, input_dim, condition_dim, latent_dim):
        super(CVAE, self).__init__()
        # Define layers with layer normalization and dropout for better training stability
        self.fc1 = nn.Linear(input_dim + condition_dim, 256)
        self.fc2 = nn.Linear(256, 128)
        self.fc21 = nn.Linear(128, latent_dim)
        self.fc22 = nn.Linear(128, latent_dim)
        self.fc3 = nn.Linear(latent_dim + condition_dim, 128)
        self.fc4 = nn.Linear(128, 256)
        self.fc5 = nn.Linear(256, input_dim)
        self.layer_norm1 = nn.LayerNorm(256)
        self.layer_norm2 = nn.LayerNorm(128)
        self.layer_norm3 = nn.LayerNorm(latent_dim)
        self.layer_norm4 = nn.LayerNorm(128)
        self.layer_norm5 = nn.LayerNorm(256)
        self.dropout = nn.Dropout(0.3)
        self.init_weights()

    def init_weights(self):
        # Initialize weights for better convergence
        for m in self.modules():
            if isinstance(m, nn.Linear):
                nn.init.normal_(m.weight, mean=0, std=0.01)
                if m.bias is not None:
                    m.bias.data.fill_(0.01)

    def encode(self, x, c):
        # Encode the input and conditions to latent space
        h1 = torch.relu(self.layer_norm1(self.fc1(torch.cat([x, c], dim=1))))
        h1 = self.dropout(h1)
        h2 = torch.relu(self.layer_norm2(self.fc2(h1)))
        h2 = self.dropout(h2)
        mu = self.fc21(h2)
        logvar = self.fc22(h2)
        return mu, logvar

    def reparameterize(self, mu, logvar):
        # Reparameterization trick to sample z
        logvar_clamped = torch.clamp(logvar, min=-10, max=10)
        std = torch.exp(0.5 * logvar_clamped)
        eps = torch.randn_like(std) * 1e-6
        z = mu + eps * std
        return z

    def decode(self, z, c):
        # Decode z and conditions to reconstruct the input
        h3 = torch.relu(self.layer_norm4(self.fc3(torch.cat([z, c], dim=1))))
        h3 = self.dropout(h3)
        h4 = torch.relu(self.layer_norm5(self.fc4(h3)))
        h4 = self.dropout(h4)
        recon = torch.sigmoid(self.fc5(h4))
        return recon

    def forward(self, x, c):
        # Forward pass through the network
        mu, logvar = self.encode(x, c)
        z = self.reparameterize(mu, logvar)
        recon = self.decode(z, c)
        return recon, mu, logvar
        
# MMD Calculation Function
# Define the function to compute Maximum Mean Discrepancy (MMD).   
def compute_mmd(x, y, sigma_squares=[1, 2, 4, 8, 16]):
    mmd = 0
    for sigma_square in sigma_squares:
        gamma = 1 / (2 * sigma_square)
        K_XX = torch.exp(-gamma * torch.cdist(x, x, p=2))
        K_YY = torch.exp(-gamma * torch.cdist(y, y, p=2))
        K_XY = torch.exp(-gamma * torch.cdist(x, y, p=2))
        mmd += (K_XX.mean() + K_YY.mean() - 2 * K_XY.mean())
    return mmd
    
# Loss Function Definition
# Define the loss function combining MSE, MMD, and KLD.
def loss_function(recon_x, x, mu, logvar):
    MSE = nn.functional.mse_loss(recon_x, x, reduction='sum')
    z = mu + torch.randn_like(mu) * torch.exp(0.5 * logvar)  # Reparameterization trick
    prior_z = torch.randn_like(z)  # Sample from standard normal distribution

    MMD = compute_mmd(z, prior_z)
    MMD = torch.clamp(MMD, min=0)  # Ensure MMD is non-negative

    KLD = -0.5 * torch.sum(1 + logvar - mu.pow(2) - logvar.exp())

    return MSE + MMD + KLD

# Initialize model, optimizer, and set learning rate
# Here we define the input dimensions, condition dimensions, latent space dimensions, and learning rate.
input_dim = 1
condition_dim = 8 + len(df_metered_monthly.columns[df_metered_monthly.columns.str.startswith('total_bin_')]) + len(df_metered_monthly.columns[df_metered_monthly.columns.str.startswith('baseload_')])
latent_dim = 50
lr = 1e-4

# Determine the device to be used for training (GPU if available, otherwise CPU).
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Instantiate the CVAE model and load pre-trained weights.
model = CVAE(input_dim, condition_dim, latent_dim).to(device)
model.load_state_dict(torch.load('cvae_model_epoch_3_no_attention_.pth'))

optimizer = optim.Adam(model.parameters(), lr=lr)
scaler = GradScaler()  # Initialize GradScaler for mixed precision training

num_epochs = 20  # Adjusted number of epochs

# Start a parent run
with mlflow.start_run(run_name=f"cvae_model_layers{model.fc1.out_features}_latent_dims_{latent_dim}_conds_{condition_dim}_lr_{lr}_num_epochs_{num_epochs}_pe_attention_lstm"):

    # Log parameters in the parent run
    mlflow.log_param("latent_dim", latent_dim)
    mlflow.log_param("condition_dim", condition_dim)

    for epoch in range(num_epochs):
        with mlflow.start_run(nested=True):
            model.train()
            train_loss = 0
            epoch_pbar = tqdm(total=len(train_loader), desc=f"Epoch {epoch + 1}/{num_epochs}")

            for batch in train_loader:
                x, c = batch['x'].to(device, non_blocking=True), batch['c'].to(device, non_blocking=True)

                if torch.isnan(x).any() or torch.isnan(c).any():
                    print(f"NaN input encountered at epoch {epoch}")
                    continue

                optimizer.zero_grad()
                with autocast():  # Mixed precision training
                    recon_batch, mu, logvar = model(x, c)
                    loss = loss_function(recon_batch, x, mu, logvar)

                scaler.scale(loss).backward()
                scaler.step(optimizer)
                scaler.update()

                train_loss += loss.item()
                epoch_pbar.update(1)

            epoch_pbar.close()
            avg_train_loss = train_loss / len(train_loader.dataset)
            print(f'Epoch {epoch + 1}, Loss: {avg_train_loss}')
            mlflow.log_metric("train_loss", avg_train_loss, step=epoch)

            # Save model checkpoint
            torch.save(model.state_dict(), f'cvae_model_epoch_{epoch + 1}_no_attention_.pth')

            # Log the model checkpoint with MLflow
            mlflow.pytorch.log_model(model, f"cvae_model_epoch_{epoch + 1}_no_attention")

Epoch 1/20: 100%|████████████████████████████| 123188/123188 [10:59:44<00:00,  3.11it/s]


Epoch 1, Loss: 0.005716422992211756


Epoch 2/20: 100%|█████████████████████████████| 123188/123188 [6:04:02<00:00,  5.64it/s]


Epoch 2, Loss: 0.0056994685093890675


Epoch 3/20: 100%|█████████████████████████████| 123188/123188 [6:42:16<00:00,  5.10it/s]


Epoch 3, Loss: 0.00568424289383066


Epoch 4/20:   0%|          | 10/123188 [00:02<6:51:29,  4.99it/s]

## Initialise and train the model (Initial)

In [None]:
# Energy Dataset Class Definition
# Define the dataset class for handling energy consumption data. This will be used to load and preprocess the data.
class EnergyDataset(Dataset):
    def __init__(self, data, max_consumption):
        self.data = data
        self.max_consumption = max_consumption

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        x = torch.tensor([self.data.iloc[idx]['consumption_normalized']], dtype=torch.float32)
        time_int = int(self.data.iloc[idx]['time_int'])
        month_numeric = int(self.data.iloc[idx]['month'])
        day_of_week = self.data.iloc[idx]['day_of_week']
        day_of_year = self.data.iloc[idx]['day_of_year']
        is_weekend = int(self.data.iloc[idx]['is_weekend'])
        high_tariff = self.data.iloc[idx]['high_tariff']
        consumption_monthly_avg_normalized = self.data.iloc[idx]['consumption_monthly_avg_normalized']
        consumption_monthly_max_normalized = self.data.iloc[idx]['consumption_monthly_max_normalized']
        
        # Extract one-hot encoded values
        total_bin_values = self.data.iloc[idx][self.data.columns.str.startswith('total_bin_')].to_numpy(dtype=np.float32)
        baseload_values = self.data.iloc[idx][self.data.columns.str.startswith('baseload_')].to_numpy(dtype=np.float32)
        
        # Combine all features
        c = torch.tensor([time_int, month_numeric, day_of_week, day_of_year, is_weekend, high_tariff, 
                          consumption_monthly_avg_normalized, consumption_monthly_max_normalized], dtype=torch.float32)
        c = torch.cat((c, torch.tensor(total_bin_values, dtype=torch.float32), torch.tensor(baseload_values, dtype=torch.float32)))
        
        return {'x': x, 'c': c}

# Load and preprocess the data
# Here we define the maximum consumption for normalization purposes.
max_consumption = df_metered_monthly['consumption'].max()

# Create datasets
unmetered_dataset = EnergyDataset(df_unmetered_monthly, max_consumption)
metered_dataset = EnergyDataset(df_metered_monthly, max_consumption)

# Create data loaders for batch processing
train_loader = DataLoader(metered_dataset, batch_size=128, shuffle=True)
test_loader = DataLoader(unmetered_dataset, batch_size=64, shuffle=False)

# Define the CVAE model
class CVAE(nn.Module):
    def __init__(self, input_dim, condition_dim, latent_dim):
        super(CVAE, self).__init__()
        # Define layers with layer normalization and dropout for better training stability
        self.fc1 = nn.Linear(input_dim + condition_dim, 256)
        self.fc2 = nn.Linear(256, 128)
        self.fc21 = nn.Linear(128, latent_dim)
        self.fc22 = nn.Linear(128, latent_dim)
        self.fc3 = nn.Linear(latent_dim + condition_dim, 128)
        self.fc4 = nn.Linear(128, 256)
        self.fc5 = nn.Linear(256, input_dim)
        self.layer_norm1 = nn.LayerNorm(256)
        self.layer_norm2 = nn.LayerNorm(128)
        self.layer_norm3 = nn.LayerNorm(latent_dim)
        self.layer_norm4 = nn.LayerNorm(128)
        self.layer_norm5 = nn.LayerNorm(256)
        self.dropout = nn.Dropout(0.3)
        self.init_weights()

    def init_weights(self):
        # Initialize weights for better convergence
        for m in self.modules():
            if isinstance(m, nn.Linear):
                nn.init.normal_(m.weight, mean=0, std=0.01)
                if m.bias is not None:
                    m.bias.data.fill_(0.01)

    def encode(self, x, c):
        # Encode the input and conditions to latent space
        h1 = torch.relu(self.layer_norm1(self.fc1(torch.cat([x, c], dim=1))))
        h1 = self.dropout(h1)
        h2 = torch.relu(self.layer_norm2(self.fc2(h1)))
        h2 = self.dropout(h2)
        mu = self.fc21(h2)
        logvar = self.fc22(h2)
        return mu, logvar

    def reparameterize(self, mu, logvar):
        # Reparameterization trick to sample z
        logvar_clamped = torch.clamp(logvar, min=-10, max=10)
        std = torch.exp(0.5 * logvar_clamped)
        eps = torch.randn_like(std) * 1e-6
        z = mu + eps * std
        return z

    def decode(self, z, c):
        # Decode z and conditions to reconstruct the input
        h3 = torch.relu(self.layer_norm4(self.fc3(torch.cat([z, c], dim=1))))
        h3 = self.dropout(h3)
        h4 = torch.relu(self.layer_norm5(self.fc4(h3)))
        h4 = self.dropout(h4)
        recon = torch.sigmoid(self.fc5(h4))
        return recon

    def forward(self, x, c):
        # Forward pass through the network
        mu, logvar = self.encode(x, c)
        z = self.reparameterize(mu, logvar)
        recon = self.decode(z, c)
        return recon, mu, logvar
        
# MMD Calculation Function
# Define the function to compute Maximum Mean Discrepancy (MMD).   
def compute_mmd(x, y, sigma_squares=[1, 2, 4, 8, 16]):
    mmd = 0
    for sigma_square in sigma_squares:
        gamma = 1 / (2 * sigma_square)
        K_XX = torch.exp(-gamma * torch.cdist(x, x, p=2))
        K_YY = torch.exp(-gamma * torch.cdist(y, y, p=2))
        K_XY = torch.exp(-gamma * torch.cdist(x, y, p=2))
        mmd += (K_XX.mean() + K_YY.mean() - 2 * K_XY.mean())
    return mmd
    
# Loss Function Definition
# Define the loss function combining MSE, MMD, and KLD.
def loss_function(recon_x, x, mu, logvar):
    MSE = nn.functional.mse_loss(recon_x, x, reduction='sum')
    z = mu + torch.randn_like(mu) * torch.exp(0.5 * logvar)  # Reparameterization trick
    prior_z = torch.randn_like(z)  # Sample from standard normal distribution

    MMD = compute_mmd(z, prior_z)
    MMD = torch.clamp(MMD, min=0)  # Ensure MMD is non-negative

    KLD = -0.5 * torch.sum(1 + logvar - mu.pow(2) - logvar.exp())

    return MSE + MMD + KLD

# Initialize model, optimizer, and set learning rate
# Here we define the input dimensions, condition dimensions, latent space dimensions, and learning rate.
input_dim = 1
condition_dim = 8 + len(df_metered_monthly.columns[df_metered_monthly.columns.str.startswith('total_bin_')]) + len(df_metered_monthly.columns[df_metered_monthly.columns.str.startswith('baseload_')])
latent_dim = 50
lr = 1e-4

# Determine the device to be used for training (GPU if available, otherwise CPU).
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Instantiate the CVAE model and load pre-trained weights.
model = CVAE(input_dim, condition_dim, latent_dim).to(device)

optimizer = optim.Adam(model.parameters(), lr=lr)
scaler = GradScaler()  # Initialize GradScaler for mixed precision training

num_epochs = 20  # Adjusted number of epochs

# Start a parent run
with mlflow.start_run(run_name=f"cvae_model_layers{model.fc1.out_features}_latent_dims_{latent_dim}_conds_{condition_dim}_lr_{lr}_num_epochs_{num_epochs}_pe_attention_lstm"):

    # Log parameters in the parent run
    mlflow.log_param("latent_dim", latent_dim)
    mlflow.log_param("condition_dim", condition_dim)

    for epoch in range(num_epochs):
        with mlflow.start_run(nested=True):
            model.train()
            train_loss = 0
            epoch_pbar = tqdm(total=len(train_loader), desc=f"Epoch {epoch + 1}/{num_epochs}")

            for batch in train_loader:
                x, c = batch['x'].to(device, non_blocking=True), batch['c'].to(device, non_blocking=True)

                if torch.isnan(x).any() or torch.isnan(c).any():
                    print(f"NaN input encountered at epoch {epoch}")
                    continue

                optimizer.zero_grad()
                with autocast():  # Mixed precision training
                    recon_batch, mu, logvar = model(x, c)
                    loss = loss_function(recon_batch, x, mu, logvar)

                scaler.scale(loss).backward()
                scaler.step(optimizer)
                scaler.update()

                train_loss += loss.item()
                epoch_pbar.update(1)

            epoch_pbar.close()
            avg_train_loss = train_loss / len(train_loader.dataset)
            print(f'Epoch {epoch + 1}, Loss: {avg_train_loss}')
            mlflow.log_metric("train_loss", avg_train_loss, step=epoch)

            # Save model checkpoint
            torch.save(model.state_dict(), f'cvae_model_epoch_{epoch + 1}_no_attention_.pth')

            # Log the model checkpoint with MLflow
            mlflow.pytorch.log_model(model, f"cvae_model_epoch_{epoch + 1}_no_attention")

Epoch 1/20: 100%|████████████████████████████████████████████████████| 123188/123188 [5:52:09<00:00,  5.83it/s]


Epoch 1, Loss: 0.005996097603505681


Epoch 2/20: 100%|████████████████████████████████████████████████████| 123188/123188 [5:52:09<00:00,  5.83it/s]


Epoch 2, Loss: 0.006218139759772838


Epoch 3/20: 100%|████████████████████████████████████████████████████| 123188/123188 [5:52:11<00:00,  5.83it/s]


Epoch 3, Loss: 0.006073795790443379


Epoch 4/20: 100%|████████████████████████████████████████████████████| 123188/123188 [5:51:15<00:00,  5.85it/s]


Epoch 4, Loss: 0.005972387740824169


Epoch 5/20: 100%|████████████████████████████████████████████████████| 123188/123188 [5:50:38<00:00,  5.86it/s]


Epoch 5, Loss: 0.005896326259353148


Epoch 6/20: 100%|████████████████████████████████████████████████████| 123188/123188 [5:50:52<00:00,  5.85it/s]


Epoch 6, Loss: 0.005844758968443086


Epoch 7/20: 100%|████████████████████████████████████████████████████| 123188/123188 [5:52:59<00:00,  5.82it/s]


Epoch 7, Loss: 0.005815199876363843


Epoch 8/20: 100%|████████████████████████████████████████████████████| 123188/123188 [6:06:00<00:00,  5.61it/s]


Epoch 8, Loss: 0.005770012054800277


Epoch 9/20: 100%|████████████████████████████████████████████████████| 123188/123188 [6:05:04<00:00,  5.62it/s]


Epoch 9, Loss: 0.005759814589106223


Epoch 10/20: 100%|███████████████████████████████████████████████████| 123188/123188 [6:04:56<00:00,  5.63it/s]


Epoch 10, Loss: 0.0057251369684257975


Epoch 11/20:  58%|████████████████████████████▊                     | 70891/123188 [4:05:26<2:34:45,  5.63it/s]

## Save the model

In [None]:
# Save the model after training
model_path = f"cvae_model_layers{model.fc1.out_features}-{model.fc2.out_features}_latent_{latent_dim}_num_epochs_{num_epochs}_conds_{condition_dim}_no_attention.pth"
torch.save(model.state_dict(), model_path)
print(f"Model saved to {model_path}")

In [None]:
# Save the model
mlflow.pytorch.log_model(model, f"cvae_model_layers{model.fc1.out_features}-{model.fc2.out_features}_latent_{latent_dim}_num_epochs_{num_epochs}_conds_{condition_dim}_no_attention")