In [11]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import matplotlib.pyplot as plt


In [12]:
torch.cuda.get_device_name(torch.cuda.current_device())

'NVIDIA GeForce RTX 3060'

In [13]:
df = pd.read_csv('./Form-714-csv-files-June-2021/Part 3 Schedule 2 - Planning Area Hourly Demand.csv')
respondent_id = pd.read_csv('./Form-714-csv-files-June-2021/Respondent IDs.csv')
good_ids = respondent_id['respondent_id'].unique()[3:]
df = df[df['respondent_id'].isin(good_ids)]
hour_cols = [f'hour{i:02d}' for i in range(1, 25)]
df = df.loc[~(df[hour_cols] == 0).any(axis=1)]


In [15]:
def prepare_vae_data(df):
    # Keep hourly columns as features
    hour_cols = [f'hour{i:02d}' for i in range(1, 25)]
    
    # Convert date and extract features
    df['plan_date'] = pd.to_datetime(df['plan_date'])
    df['year'] = df['plan_date'].dt.year
    df['month'] = df['plan_date'].dt.month
    df['day_of_week'] = df['plan_date'].dt.dayofweek
    
    # Cyclic encoding for temporal features
    df['month_sin'] = np.sin(2 * np.pi * df['month'] / 12)
    df['month_cos'] = np.cos(2 * np.pi * df['month'] / 12)
    df['day_sin'] = np.sin(2 * np.pi * df['day_of_week'] / 7)
    df['day_cos'] = np.cos(2 * np.pi * df['day_of_week'] / 7)
    
    # Normalize year
    df['year'] = (df['year'] - df['year'].mean()) / df['year'].std()
    
    # Encode respondents
    df['respondent_id'] = df['respondent_id'].astype('category')
    df['respondent_idx'] = df['respondent_id'].cat.codes
    
    # Normalize load values per respondent
    for col in hour_cols:
        df[col] = df.groupby('respondent_id')[col].transform(
            lambda x: (x - x.mean()) / x.std()
        )
    
    return df


In [16]:
# VAE Dataset
class VAEDataset(Dataset):
    def __init__(self, df):
        self.loads = torch.FloatTensor(df[[f'hour{i:02d}' for i in range(1, 25)]].values)
        self.respondents = torch.LongTensor(df['respondent_idx'].values)
        self.temporal = torch.FloatTensor(df[['year', 'month_sin', 'month_cos', 'day_sin', 'day_cos']].values)
        
    def __len__(self):
        return len(self.loads)
    
    def __getitem__(self, idx):
        return self.loads[idx], self.respondents[idx], self.temporal[idx]


In [17]:

# VAE Model
class VAE(nn.Module):
    def __init__(self, num_respondents, temporal_dim=5, load_dim=24, 
                 embed_dim=10, hidden_dim=256, latent_dim=32):
        super().__init__()
        
        self.embed = nn.Embedding(num_respondents, embed_dim)
        self.temporal_dim = temporal_dim
        self.load_dim = load_dim
        
        # Encoder
        self.encoder = nn.Sequential(
            nn.Linear(load_dim + embed_dim + temporal_dim, hidden_dim),
            nn.ReLU(),
            nn.Linear(hidden_dim, hidden_dim//2),
            nn.ReLU()
        )
        self.fc_mu = nn.Linear(hidden_dim//2, latent_dim)
        self.fc_logvar = nn.Linear(hidden_dim//2, latent_dim)
        
        # Decoder
        self.decoder = nn.Sequential(
            nn.Linear(latent_dim + embed_dim + temporal_dim, hidden_dim//2),
            nn.ReLU(),
            nn.Linear(hidden_dim//2, hidden_dim),
            nn.ReLU(),
            nn.Linear(hidden_dim, load_dim)
        )
    
    def encode(self, x, r, t):
        embedded = self.embed(r)
        combined = torch.cat([x, embedded, t], dim=1)
        h = self.encoder(combined)
        return self.fc_mu(h), self.fc_logvar(h)
    
    def reparameterize(self, mu, logvar):
        std = torch.exp(0.5*logvar)
        eps = torch.randn_like(std)
        return mu + eps*std
    
    def decode(self, z, r, t):
        embedded = self.embed(r)
        combined = torch.cat([z, embedded, t], dim=1)
        return self.decoder(combined)
    
    def forward(self, x, r, t):
        mu, logvar = self.encode(x, r, t)
        z = self.reparameterize(mu, logvar)
        return self.decode(z, r, t), mu, logvar


In [18]:

# Training Function
def train_vae(df, num_epochs=50, batch_size=256):
    df = prepare_vae_data(df)
    dataset = VAEDataset(df)
    loader = DataLoader(dataset, batch_size=batch_size, shuffle=True)
    
    num_respondents = len(df['respondent_id'].cat.categories)
    model = VAE(num_respondents)
    optimizer = optim.Adam(model.parameters(), lr=1e-3)
    
    for epoch in range(num_epochs):
        model.train()
        train_loss = 0
        for loads, respondents, temporal in loader:
            optimizer.zero_grad()
            recon, mu, logvar = model(loads, respondents, temporal)
            
            # Reconstruction loss + KL divergence
            recon_loss = nn.functional.mse_loss(recon, loads, reduction='sum')
            kl_div = -0.5 * torch.sum(1 + logvar - mu.pow(2) - logvar.exp())
            
            loss = recon_loss + kl_div
            loss.backward()
            optimizer.step()
            
            train_loss += loss.item()
        
        print(f'Epoch {epoch+1}, Loss: {train_loss/len(dataset):.4f}')
    
    return model


In [19]:
vae_model = train_vae(df)

  df[col] = df.groupby('respondent_id')[col].transform(


Epoch 1, Loss: 4.7170
Epoch 2, Loss: 3.6255
Epoch 3, Loss: 3.3675
Epoch 4, Loss: 3.2561
Epoch 5, Loss: 3.1849
Epoch 6, Loss: 3.1356
Epoch 7, Loss: 3.0983
Epoch 8, Loss: 3.0736
Epoch 9, Loss: 3.0457
Epoch 10, Loss: 3.0234
Epoch 11, Loss: 3.0053
Epoch 12, Loss: 2.9923
Epoch 13, Loss: 2.9792
Epoch 14, Loss: 2.9624
Epoch 15, Loss: 2.9532
Epoch 16, Loss: 2.9421
Epoch 17, Loss: 2.9334
Epoch 18, Loss: 2.9248
Epoch 19, Loss: 2.9152
Epoch 20, Loss: 2.9031
Epoch 21, Loss: 2.8998
Epoch 22, Loss: 2.8911
Epoch 23, Loss: 2.8857
Epoch 24, Loss: 2.8797
Epoch 25, Loss: 2.8743
Epoch 26, Loss: 2.8685
Epoch 27, Loss: 2.8596
Epoch 28, Loss: 2.8607
Epoch 29, Loss: 2.8516
Epoch 30, Loss: 2.8493
Epoch 31, Loss: 2.8450
Epoch 32, Loss: 2.8404
Epoch 33, Loss: 2.8380
Epoch 34, Loss: 2.8347
Epoch 35, Loss: 2.8297
Epoch 36, Loss: 2.8265
Epoch 37, Loss: 2.8231
Epoch 38, Loss: 2.8222
Epoch 39, Loss: 2.8168
Epoch 40, Loss: 2.8104
Epoch 41, Loss: 2.8113
Epoch 42, Loss: 2.8093
Epoch 43, Loss: 2.8089
Epoch 44, Loss: 2.80

In [20]:
torch.save(vae_model.state_dict(), 'load_vae_model.pth')