In [5]:
import torch
import torch.optim as optim
import torch.nn as nn
from torch.utils.data import DataLoader, TensorDataset
import torch.nn.functional as F  # Add this import for loss functions
import time

from VRAE import TimeGAN

In [6]:
import numpy as np

def random_generator(batch_size, z_dim, T_mb, max_seq_len):
    Z_mb = []
    for i in range(batch_size):
        # Inizializza un array di zeri con dimensione [max_seq_len, z_dim]
        temp = np.zeros([max_seq_len, z_dim])
        # Genera rumore per la parte attiva della sequenza
        temp_Z = np.random.uniform(0., 1, [T_mb[i].item() if isinstance(T_mb[i], torch.Tensor) else T_mb[i], z_dim])
        # Inserisci il rumore nel posto giusto
        temp[:T_mb[i], :] = temp_Z
        Z_mb.append(temp)
    # Converte la lista in un array NumPy e poi in un tensore PyTorch
    Z_mb = torch.tensor(np.array(Z_mb), dtype=torch.float32)
    return Z_mb

In [7]:
def weights_init(m):
    if isinstance(m, nn.Linear):
        nn.init.xavier_uniform_(m.weight)
        if m.bias is not None:
            nn.init.zeros_(m.bias)
    elif isinstance(m, nn.GRU) or isinstance(m, nn.LSTM) or isinstance(m, nn.RNN):
        for name, param in m.named_parameters():
            if 'weight_ih' in name:
                nn.init.xavier_uniform_(param)
            elif 'weight_hh' in name:
                nn.init.orthogonal_(param)
            elif 'bias' in name:
                nn.init.zeros_(param)
    elif isinstance(m, nn.Conv1d):
        nn.init.xavier_uniform_(m.weight)
        if m.bias is not None:
            nn.init.zeros_(m.bias)


In [8]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

cpu


In [9]:
random_seed = 42
gen_lr = 0.001
dis_lr = 0.001
embedder_lr = 0.001
supervise_lr = 0.001

EPOCHS = 100
BATCH_SIZE = 64
LATENT_DIM = 100
gamma = 0.1
seq_len = 50
input_dim = 5

# Set random seed for reproducibility
torch.manual_seed(random_seed)

<torch._C.Generator at 0x122e8f0d0>

In [10]:
# Initialize the model
model = TimeGAN(
    input_dim=input_dim,       # input dimension
    hidden_dim=100,            # embedding dimension
    num_layers=1,              # number of layers
    cell_type='GRU'
).to(device)

In [11]:
# Define optimizers (one per parameter group)
optimizer_E = optim.Adam(
    list(model.embedder.parameters()) + list(model.recovery.parameters()),
    lr=embedder_lr
)
optimizer_G = optim.Adam(
    list(model.generator.parameters()) + list(model.supervisor.parameters()),
    lr=gen_lr
)
optimizer_D = optim.Adam(model.discriminator.parameters(), lr=dis_lr)


In [12]:
# Define loss functions
mse_loss = nn.MSELoss()
bce_loss = nn.BCELoss()

In [13]:
import torch
# Load the trajectory dataset
trajectory_dataset = torch.load("./VRAE/trajectory_dataset.pt")

# Print basic information about the dataset
print(f"Type of loaded file: {type(trajectory_dataset)}")



# If it's a dictionary, print the keys
if isinstance(trajectory_dataset, dict):
    print(f"Keys: {trajectory_dataset.keys()}")
    # Print shapes of items
    for key, value in trajectory_dataset.items():
        if isinstance(value, torch.Tensor):
            print(f"{key} shape: {value.shape}")
        elif hasattr(value, '__len__'):
            print(f"{key} length: {len(value)}")
        else:
            print(f"{key} type: {type(value)}")
# If it's a tensor, print its shape
elif isinstance(trajectory_dataset, torch.Tensor):
    print(f"Tensor shape: {trajectory_dataset.shape}")
# If it's another container type, print its length
elif hasattr(trajectory_dataset, '__len__'):
    print(f"Dataset length: {len(trajectory_dataset)}")

max_seq_len = trajectory_dataset['max_length']

dataset = TensorDataset(trajectory_dataset['data'], trajectory_dataset['lengths'])

train_loader = DataLoader(dataset, batch_size=64, shuffle=True)

for i, (real_samples, seq_lens) in enumerate(train_loader):
    print(f"Batch {i}:")
    print(f"Real samples shape: {real_samples.shape}")
    print(f"Sequence lengths: {seq_lens}") 

Type of loaded file: <class 'dict'>
Keys: dict_keys(['data', 'lengths', 'max_length'])
data shape: torch.Size([2175, 3110, 5])
lengths shape: torch.Size([2175])
max_length type: <class 'int'>
Batch 0:
Real samples shape: torch.Size([64, 3110, 5])
Sequence lengths: tensor([1134,  866, 1815, 1170, 1805, 1058, 1739, 1953, 2112, 2425, 1748, 1353,
        2041, 1874, 2002, 1838, 1235, 2123, 2133, 1572, 1844, 2322, 1340, 2021,
        1899, 1073, 1251, 1478, 2185, 2245, 1146, 2080, 1492,  737, 2140, 1444,
        1212,  756, 1967, 1109, 1848, 1946, 1139, 1100, 1969, 1939, 1074, 1415,
        1446, 1995, 1694, 1869, 1403, 2521, 1255, 1410, 1107, 1752, 2091, 1470,
        1156, 1860, 2535, 1129])
Batch 1:
Real samples shape: torch.Size([64, 3110, 5])
Sequence lengths: tensor([1892, 1665,  788, 1879, 1812, 2438, 1778, 1196, 1955, 2114, 1829, 1048,
        1759, 1755, 2458, 1740, 1493, 1829, 1845, 1915, 1965, 1478, 1839, 2130,
        1894, 1191,  750, 2074, 1809, 1086, 1996, 2223, 1773, 2712, 1

Generator module

In [14]:
from torch.nn.utils.rnn import pack_padded_sequence, pad_packed_sequence


class Generator(nn.Module):
    def __init__(self, hidden_dim, num_layers, max_seq = 50, cell_type = 'GRU'):
        super(Generator,self).__init__()
        self.max_seq = max_seq
        if cell_type == 'LSTM':
            self.rnn = nn.LSTM(hidden_dim, hidden_dim, num_layers, batch_first=True)
        elif cell_type == 'GRU':
            self.rnn = nn.GRU(hidden_dim, hidden_dim, num_layers, batch_first=True)
        elif cell_type == 'RNN':
            self.rnn = nn.RNN(hidden_dim, hidden_dim, num_layers, batch_first=True)
        else:
            print('Invalid cell type, using LSTM')
            self.rnn = nn.GRU(hidden_dim, hidden_dim, num_layers, batch_first=True)
        
        self.fc = nn.Sequential(
            nn.Linear(hidden_dim, hidden_dim),
            nn.Sigmoid())
        
    def forward(self, x, T):
        
        packed = pack_padded_sequence(x, T, batch_first=True, enforce_sorted=False)
        output, hidden = self.rnn(packed)
        output, _ = pad_packed_sequence(output, batch_first=True,total_length=self.max_seq)
        E = self.fc(output)

        return E # return the generated data embeddings
    

hidden_dim_generator = 100
num_layers_generator = 1
cell_type_generator = 'GRU'
generator = Generator(hidden_dim = hidden_dim_generator,
                      num_layers = num_layers_generator,
                      max_seq = max_seq_len,
                      cell_type = cell_type_generator)
generator.apply(weights_init)
generator.to(device)

Generator(
  (rnn): GRU(100, 100, batch_first=True)
  (fc): Sequential(
    (0): Linear(in_features=100, out_features=100, bias=True)
    (1): Sigmoid()
  )
)

Discriminator

In [15]:
from torch.nn.utils.rnn import pack_padded_sequence, pad_packed_sequence

class Discriminator(nn.Module):
    def __init__(self, num_hidden, num_layers, max_seq, cell_type = 'GRU'):
        super(Discriminator,self).__init__()
        self.max_seq = max_seq
        if cell_type == 'LSTM':
            self.rnn = nn.LSTM(num_hidden, num_hidden, num_layers, batch_first=True, bidirectional=True)
        elif cell_type == 'GRU':
            self.rnn = nn.GRU(num_hidden, num_hidden, num_layers, batch_first=True, bidirectional=True)
        elif cell_type == 'RNN':
            self.rnn = nn.RNN(num_hidden, num_hidden, num_layers, batch_first=True, bidirectional=True)
        else:
            print('Invalid cell type, using default GRU')
            self.rnn = nn.GRU(num_hidden, num_hidden, num_layers, batch_first=True, bidirectional=True)
        
        # Output size is doubled due to bidirectional RNN
        
        self.fc = nn.Sequential(
            nn.Linear(num_hidden*2, 1),
            nn.Sigmoid())
    
    def forward(self, x, T):
        
        packed = pack_padded_sequence(x, T, batch_first=True, enforce_sorted=False)
        output, hidden = self.rnn(packed)
        output, _ = pad_packed_sequence(output, batch_first=True, total_length=self.max_seq)
        Y_hat = self.fc(output)

        return Y_hat
    
num_layers_discriminator = 1
hidden_dim_discriminator = 100
cell_type_discriminator = 'GRU'

discriminator = Discriminator(num_hidden = hidden_dim_discriminator,
                              num_layers = num_layers_discriminator,
                                max_seq = max_seq_len,
                              cell_type = cell_type_discriminator)
discriminator.apply(weights_init)
discriminator.to(device)

Discriminator(
  (rnn): GRU(100, 100, batch_first=True, bidirectional=True)
  (fc): Sequential(
    (0): Linear(in_features=200, out_features=1, bias=True)
    (1): Sigmoid()
  )
)

Supervisor

In [16]:
from torch.nn.utils.rnn import pack_padded_sequence, pad_packed_sequence

class Supervisor(nn.Module):

    """ The supervisor network for the timeGAN model, it takes as input the embeddings 
    generated by the generator and returns a new set of embeddings that better follows
    the temporal dynamics. """

    def __init__(self, num_hidden, num_layers,max_seq,cell_type = 'GRU'):
        super(Supervisor,self).__init__()
        self.max_seq = max_seq
        if cell_type == 'LSTM':
            self.rnn = nn.LSTM(num_hidden, num_hidden, num_layers, batch_first=True)
        elif cell_type == 'GRU':
            self.rnn = nn.GRU(num_hidden, num_hidden, num_layers, batch_first=True)
        elif cell_type == 'RNN':
            self.rnn = nn.RNN(num_hidden, num_hidden, num_layers, batch_first=True)
        else:
            print('Invalid cell type, using LSTM')
            self.rnn = nn.GRU(num_hidden, num_hidden, num_layers, batch_first=True)

        self.fc = nn.Sequential(
            nn.Linear(num_hidden, num_hidden),
            nn.Sigmoid())
        
    def forward(self, x, T):
            """
            Parametri:
              x: (batch, seq_len, num_hidden)
              T : (Sequence Lengths)
            Restituisce:
              output: (batch, seq_len, num_hidden)
              hidden: (num_layers, batch, num_hidden)
            """
            packed = pack_padded_sequence(x, T, batch_first=True, enforce_sorted=False)
            output, hidden = self.rnn(packed)
            output, _ = pad_packed_sequence(output, batch_first=True, total_length=self.max_seq)
            S = self.fc(output)
            
            return S # retrn the refined embeddings
        

num_layers_supervisor = 1
hidden_dim_supervisor = 100
cell_type_supervisor = 'GRU'

supervisor = Supervisor(num_hidden = hidden_dim_supervisor,
                        num_layers = num_layers_supervisor,
                        max_seq = max_seq_len,
                        cell_type = cell_type_supervisor)
supervisor.apply(weights_init)
supervisor.to(device)

Supervisor(
  (rnn): GRU(100, 100, batch_first=True)
  (fc): Sequential(
    (0): Linear(in_features=100, out_features=100, bias=True)
    (1): Sigmoid()
  )
)

Embedder

In [17]:
from torch.nn.utils.rnn import pack_padded_sequence, pad_packed_sequence


class Embedder(nn.Module):
    def __init__(self, inpt_dim, hidden_dim, num_layers,max_seq, cell_type = 'GRU'):
        super(Embedder,self).__init__()
        self.max_seq = max_seq
        if cell_type == 'LSTM':
            self.rnn = nn.LSTM(inpt_dim, hidden_dim, num_layers, batch_first=True)
        elif cell_type == 'GRU':
            self.rnn = nn.GRU(inpt_dim, hidden_dim, num_layers, batch_first=True)
        elif cell_type == 'RNN':
            self.rnn = nn.RNN(inpt_dim, hidden_dim, num_layers, batch_first=True)
        else:
            print('Invalid cell type, using LSTM')
            self.rnn = nn.GRU(inpt_dim, hidden_dim, num_layers, batch_first=True)
        
        self.fc = nn.Sequential(
            nn.Linear(hidden_dim, hidden_dim),
            nn.Sigmoid())
    def forward(self, x, T):
        """
        Parametri:
          x: (batch, seq_len, inpt_dim)
          T : (Sequence Lengths)
        Restituisce:
          output: (batch, seq_len, hidden_dim)
          hidden: (num_layers, batch, hidden_dim)
        """
        packed = pack_padded_sequence(x, T, batch_first=True, enforce_sorted=False)
        output, hidden = self.rnn(packed)
        output, _ = pad_packed_sequence(output, batch_first=True, total_length=self.max_seq)
        H = self.fc(output)
        
        return H # retrn the embeddings

num_layers_embedder = 1
hidden_dim_embedder = 100
cell_type_embedder = 'GRU'

embedder = Embedder(inpt_dim = input_dim,
                    hidden_dim = hidden_dim_embedder,
                    num_layers = num_layers_embedder,
                    max_seq = max_seq_len,
                    cell_type = cell_type_embedder)
embedder.apply(weights_init)
embedder.to(device)

Embedder(
  (rnn): GRU(5, 100, batch_first=True)
  (fc): Sequential(
    (0): Linear(in_features=100, out_features=100, bias=True)
    (1): Sigmoid()
  )
)

Recovery

In [18]:
from torch.nn.utils.rnn import pack_padded_sequence, pad_packed_sequence

class Recovery(nn.Module):
    def __init__(self, hidden_dim, feature_dim, num_layers,max_seq, cell_type = 'GRU'):
        super(Recovery,self).__init__()
        self.max_seq = max_seq
        if cell_type == 'LSTM':
            self.rnn = nn.LSTM(hidden_dim, hidden_dim, num_layers, batch_first=True)
        elif cell_type == 'GRU':
            self.rnn = nn.GRU(hidden_dim, hidden_dim, num_layers, batch_first=True)
        elif cell_type == 'RNN':
            self.rnn = nn.RNN(hidden_dim, hidden_dim, num_layers, batch_first=True)
        else:
            print('Invalid cell type, using LSTM')
            self.rnn = nn.GRU(hidden_dim, hidden_dim, num_layers, batch_first=True)
        
        self.fc = nn.Sequential(
            nn.Linear(hidden_dim, feature_dim),
            nn.Sigmoid())
        
    def forward(self, x, T):
        
        packed = pack_padded_sequence(x, T, batch_first=True, enforce_sorted=False)
        output, hidden = self.rnn(packed)
        output, _ = pad_packed_sequence(output, batch_first=True,total_length=self.max_seq)
        X_tilde = self.fc(output)

        return X_tilde # retrn the original data reconstruction
    
hidden_dim_recovery = 100
num_layers_recovery = 1
cell_type_recovery = 'GRU'

recovery = Recovery(hidden_dim = hidden_dim_recovery,
                    feature_dim = input_dim,
                    num_layers = num_layers_recovery,
                    max_seq = max_seq_len,
                    cell_type = cell_type_recovery)
recovery.apply(weights_init)
recovery.to(device)

Recovery(
  (rnn): GRU(100, 100, batch_first=True)
  (fc): Sequential(
    (0): Linear(in_features=100, out_features=5, bias=True)
    (1): Sigmoid()
  )
)

Adesso la fase di training

In [None]:
emb_costs = []
emb_rec_costs = []
PRE_TRAIN_EPOCHS = 100

for epoch in range(PRE_TRAIN_EPOCHS):
    # pre-training of the embedder and recovery
    for i, (real_samples, seq_lens) in enumerate(train_loader): 
        
        embedder.train()
        recovery.train()

        real_samples = real_samples.to(device) # Send data to GPU
        seq_lens = seq_lens.to(device) # Send sequence lengths to GPU

        real_labels = torch.ones(real_samples.size(0), 1).to(device)
        fake_labels = torch.zeros(real_samples.size(0), 1).to(device)

        # Train the embedder
        optimizer_E.zero_grad() # Reset gradients
        
        # create latent representation
        H = embedder(real_samples, seq_lens) 
        # refine latent representation
        X_tilde = recovery(H,seq_lens)
        
        # Calculate embedder loss
        E_loss_T0 = mse_loss(X_tilde, real_samples) # reconstruction loss
        E_loss0 = 10*torch.sqrt(E_loss_T0)
        E_loss0.backward()

        # Update embedder and recovery parameters
        optimizer_E.step()
        
        emb_costs.append(E_loss0.item())

        if not i % 100: 
            print ('Epoch: %03d/%03d | Batch %03d/%03d | Reconstruction Loss: %.4f' 
                    %(epoch+1, EPOCHS, i, 
                        len(train_loader), E_loss0))

    # supervised training of the embedder

Embedding training using the supervisor to have insight in the temporal dynamics

In [None]:

for epoch in range(PRE_TRAIN_EPOCHS):
    for i, (real_samples, seq_lens) in enumerate(train_loader): 
        
        real_samples = real_samples.to(device)
        seq_lens = seq_lens.to(device)

        embedder.train()
        recovery.train()

        # create latent representation
        H = embedder(real_samples,seq_lens)
        # refine latent representation
        H_hat_Supervise = supervisor(H,seq_lens).detach()
        # reconstruct data from latent representation
        X_tilde = recovery(H,seq_lens) 
        
        # Calculate embedder loss
        # supervised loss
        G_loss_S = mse_loss(H[:, 1:, :], H_hat_Supervise[:, :-1, :])
        # reconstruction loss
        E_loss_T0 = mse_loss(X_tilde, real_samples)

        E_loss0 = 10*torch.sqrt(E_loss_T0)
        E_loss = E_loss0 + 0.1*G_loss_S # total embedder loss

        # Backpropagate and update weights
        E_loss.backward()

        optimizer_E.step()
        emb_rec_costs.append(E_loss.item())

        if not i % 100: 
            print ('Epoch: %03d/%03d | Batch %03d/%03d | Reconstruction Loss: %.4f' 
                    %(epoch+1, EPOCHS, i, 
                        len(train_loader), E_loss))


Epoch: 001/100 | Batch 000/002 | Reconstruction Loss: 8.7994
Epoch: 002/100 | Batch 000/002 | Reconstruction Loss: 8.9522
Epoch: 003/100 | Batch 000/002 | Reconstruction Loss: 8.8981
Epoch: 004/100 | Batch 000/002 | Reconstruction Loss: 8.8690
Epoch: 005/100 | Batch 000/002 | Reconstruction Loss: 8.8483
Epoch: 006/100 | Batch 000/002 | Reconstruction Loss: 8.8708
Epoch: 007/100 | Batch 000/002 | Reconstruction Loss: 8.8509
Epoch: 008/100 | Batch 000/002 | Reconstruction Loss: 8.8471
Epoch: 009/100 | Batch 000/002 | Reconstruction Loss: 8.9242
Epoch: 010/100 | Batch 000/002 | Reconstruction Loss: 8.9138
Epoch: 011/100 | Batch 000/002 | Reconstruction Loss: 8.9217
Epoch: 012/100 | Batch 000/002 | Reconstruction Loss: 8.9187
Epoch: 013/100 | Batch 000/002 | Reconstruction Loss: 8.8943
Epoch: 014/100 | Batch 000/002 | Reconstruction Loss: 8.9210
Epoch: 015/100 | Batch 000/002 | Reconstruction Loss: 8.8206
Epoch: 016/100 | Batch 000/002 | Reconstruction Loss: 8.9198
Epoch: 017/100 | Batch 0

KeyboardInterrupt: 

Joint Training of the supervisor

In [None]:
supervisor_costs = []

for epoch in range(EPOCHS):
    # Step 2: Generator, supervised only
    for i, (real_samples, seq_lens) in enumerate(train_loader):  # loop over batches

        real_samples = real_samples.to(device)
        seq_lens = seq_lens.to(device)

        generator.train()
        supervisor.train()

        optimizer_G.zero_grad()

        # generate real data embeddings
        H = embedder(real_samples,seq_lens).detach()
        H_hat_Supervise = supervisor(H, seq_lens)

        G_loss_S = mse_loss(H[:, 1:, :], H_hat_Supervise[:, :-1, :]) # supervised loss
        G_loss_S.backward()
        
        optimizer_G.step()

        supervisor_costs.append(G_loss_S.item())

        if not i % 100: 
            print ('Epoch: %03d/%03d | Batch %03d/%03d | Supervisor Loss: %.4f' 
                    %(epoch+1, EPOCHS, i, 
                        len(train_loader), G_loss_S))



Epoch: 001/100 | Batch 000/002 | Supervisor Loss: 0.0072
Epoch: 002/100 | Batch 000/002 | Supervisor Loss: 0.0072
Epoch: 003/100 | Batch 000/002 | Supervisor Loss: 0.0072
Epoch: 004/100 | Batch 000/002 | Supervisor Loss: 0.0072
Epoch: 005/100 | Batch 000/002 | Supervisor Loss: 0.0072
Epoch: 006/100 | Batch 000/002 | Supervisor Loss: 0.0072
Epoch: 007/100 | Batch 000/002 | Supervisor Loss: 0.0072
Epoch: 008/100 | Batch 000/002 | Supervisor Loss: 0.0072
Epoch: 009/100 | Batch 000/002 | Supervisor Loss: 0.0072
Epoch: 010/100 | Batch 000/002 | Supervisor Loss: 0.0072
Epoch: 011/100 | Batch 000/002 | Supervisor Loss: 0.0072
Epoch: 012/100 | Batch 000/002 | Supervisor Loss: 0.0072
Epoch: 013/100 | Batch 000/002 | Supervisor Loss: 0.0072
Epoch: 014/100 | Batch 000/002 | Supervisor Loss: 0.0072
Epoch: 015/100 | Batch 000/002 | Supervisor Loss: 0.0072
Epoch: 016/100 | Batch 000/002 | Supervisor Loss: 0.0072
Epoch: 017/100 | Batch 000/002 | Supervisor Loss: 0.0072
Epoch: 018/100 | Batch 000/002 

Training Congiunto dell'architettura

In [None]:
generator_losses = []
discriminator_losses = []
emb_costs = []
supervisor_costs = []
for epoch in range(EPOCHS):
    for i,(real_samples, seq_lens) in enumerate(train_loader):
        
        # discriminator network
        discriminator.train()
        # generator network
        generator.train()
        supervisor.train()
        # embedding network 
        embedder.train()
        recovery.train()

        
        real_samples = real_samples.to(device)
        seq_lens = seq_lens.to(device)
        # train the generator twice as much as the discriminator
        for kk in range(2): 
            # Step 3.1: Generator training
            optimizer_G.zero_grad()

            
            # sample random noise
            Z_mb = random_generator(len(real_samples), hidden_dim_generator, seq_lens, seq_len)
            H = embedder(real_samples,seq_lens).detach() 
            # generate fake data embeddings
            E_hat = generator(Z_mb,seq_lens)
            # refine fake data embeddings
            H_hat = supervisor(E_hat,seq_lens)
            # reconstruct fake data
            X_hat = recovery(H_hat,seq_lens).detach()
            # discriminator outputs
            Y_fake = discriminator(H_hat, seq_lens).detach() # 
            Y_real = discriminator(H, seq_lens).detach()     
            Y_fake_e = discriminator(E_hat, seq_lens).detach()
            
            # unsupervised loss
            G_loss_U = bce_loss(torch.ones_like(Y_fake), Y_fake) # after supervisor
            G_loss_U_e = bce_loss(torch.ones_like(Y_fake_e), Y_fake_e) # before supervisor
            # supervised loss
            # H_hat = supervisor(generator(Z_mb,seq_lens),seq_lens)
            G_loss_S = mse_loss(H[:, 1:, :], H_hat[:, :-1, :]) # supervised loss
            
            G_loss_V1 = torch.mean(torch.abs(torch.sqrt(torch.var(real_samples, dim=0) + 1e-6) - torch.sqrt(torch.var(X_hat, dim=0) + 1e-6)))
            G_loss_V2 = torch.mean(torch.abs(torch.mean(real_samples, dim=0) - torch.mean(X_hat, dim=0)))
            G_loss_V = G_loss_V1 + G_loss_V2
            
            # Total generator loss
            G_loss = G_loss_U + G_loss_U_e + 100*torch.sqrt(G_loss_S) + 100*G_loss_V 
            G_loss.backward()
            optimizer_G.step()
            generator_losses.append(G_loss.item())
            supervisor_costs.append(G_loss_S.item())


            # step 3.2: embedder and recovery training
            optimizer_E.zero_grad()
            
            X_tilde = recovery(H, seq_lens) 
            # reconstrucion loss
            E_loss_T0 = mse_loss(X_tilde, real_samples)
            E_loss_T0.backward()
            optimizer_E.step()
            emb_costs.append(E_loss_T0.item())
        
        # Step 4: Discriminator training
        optimizer_D.zero_grad()  # Zero discriminator gradients
        
        # Get discriminator outputs for real data

        H = embedder(real_samples, seq_lens).detach()  # Embed real data

        E_hat = generator(Z_mb, seq_lens).detach()  # Generate fake data
        H_hat = supervisor(E_hat, seq_lens).detach()  # Supervise fake data
        X_hat = recovery(H_hat, seq_lens).detach()  # Recover fake data

        Y_fake = discriminator(H_hat, seq_lens)  # Prediction on supervised fake data
        Y_fake_e = discriminator(E_hat, seq_lens)  # Prediction on directly generated data
        Y_real = discriminator(H, seq_lens)  # Discriminator prediction on real data
        
        # Calculate discriminator loss - wants to predict real=1, fake=0
        D_loss_real = F.binary_cross_entropy(Y_real, torch.ones_like(Y_real))
    
        # For generated data, target is 0
        D_loss_fake = F.binary_cross_entropy(Y_fake, torch.zeros_like(Y_fake))
        D_loss_fake_e = F.binary_cross_entropy(Y_fake_e, torch.zeros_like(Y_fake_e))
        
        # Total loss: sum of components, with gamma balancing the loss_fake_e term
        D_loss = D_loss_real + D_loss_fake + gamma * D_loss_fake_e
    

        # Backpropagate and update discriminator weights
        D_loss.backward()
        
        optimizer_D.step()
        discriminator_losses.append(D_loss.item())

        if not i % 100:
            print ('Epoch: %03d/%03d | Batch %03d/%03d | Discriminator Loss: %.4f' 
                    %(epoch+1, EPOCHS, i, 
                        len(train_loader), D_loss))
            print ('Generator Loss: %.4f | Embedder Loss: %.4f | Supervisor Loss: %.4f'
                    %(G_loss, E_loss_T0, G_loss_S))

Epoch: 001/100 | Batch 000/002 | Discriminator Loss: 1.4667
Generator Loss: 191.1326 | Embedder Loss: 0.7717 | Supervisor Loss: 0.0074
Epoch: 002/100 | Batch 000/002 | Discriminator Loss: 1.4669
Generator Loss: 192.8333 | Embedder Loss: 0.7763 | Supervisor Loss: 0.0075
Epoch: 003/100 | Batch 000/002 | Discriminator Loss: 1.4671
Generator Loss: 191.8645 | Embedder Loss: 0.7674 | Supervisor Loss: 0.0074
Epoch: 004/100 | Batch 000/002 | Discriminator Loss: 1.4671
Generator Loss: 192.2735 | Embedder Loss: 0.7801 | Supervisor Loss: 0.0075
Epoch: 005/100 | Batch 000/002 | Discriminator Loss: 1.4670
Generator Loss: 192.1727 | Embedder Loss: 0.7781 | Supervisor Loss: 0.0074
Epoch: 006/100 | Batch 000/002 | Discriminator Loss: 1.4670
Generator Loss: 193.0482 | Embedder Loss: 0.7872 | Supervisor Loss: 0.0075
Epoch: 007/100 | Batch 000/002 | Discriminator Loss: 1.4671
Generator Loss: 191.8546 | Embedder Loss: 0.7764 | Supervisor Loss: 0.0074
Epoch: 008/100 | Batch 000/002 | Discriminator Loss: 1.

In [None]:
import os
import numpy as np
import json
from datetime import datetime

# Crea una cartella con timestamp per organizzare meglio i salvataggi
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
save_dir = f"./saved_models/timegan_{timestamp}"
os.makedirs(save_dir, exist_ok=True)

# Salva gli iperparametri per poter riprodurre il modello
hyperparams = {
    "hidden_dim": 100,  # Sostituisci con i tuoi valori effettivi
    "num_layers": 1,
    "cell_type": "GRU",
    "input_dim": 5,
    "latent_dim": 100,
    "gamma": 0.1,
    "batch_size": 64,
    "pre_train_epochs": 100,
    "train_epochs": 50  # Numero di epoche effettivamente completate
}

# Salva gli iperparametri
with open(f"{save_dir}/hyperparameters.json", "w") as f:
    json.dump(hyperparams, f, indent=4)

# Salva il modello completo
torch.save(model.state_dict(), f"{save_dir}/timegan_full_model.pt")

# Salva i componenti individuali
torch.save(generator.state_dict(), f"{save_dir}/generator.pt")
torch.save(discriminator.state_dict(), f"{save_dir}/discriminator.pt")
torch.save(supervisor.state_dict(), f"{save_dir}/supervisor.pt")
torch.save(embedder.state_dict(), f"{save_dir}/embedder.pt")
torch.save(recovery.state_dict(), f"{save_dir}/recovery.pt")

# Salva anche gli stati degli ottimizzatori (utile per riprendere l'addestramento)
optimizer_states = {
    "optimizer_G": optimizer_G.state_dict(),
    "optimizer_D": optimizer_D.state_dict(),
    "optimizer_E": optimizer_E.state_dict(),
}
torch.save(optimizer_states, f"{save_dir}/optimizer_states.pt")

# Salva la storia dell'addestramento
training_history = {
    "generator_losses": generator_losses,
    "discriminator_losses": discriminator_losses,
    "embedding_losses": emb_costs,
    "supervisor_losses": supervisor_costs
}
torch.save(training_history, f"{save_dir}/training_history.pt")
# Salva anche come numpy per facilità di analisi
np.save(f"{save_dir}/generator_losses.npy", np.array(generator_losses))
np.save(f"{save_dir}/discriminator_losses.npy", np.array(discriminator_losses))
np.save(f"{save_dir}/embedding_losses.npy", np.array(emb_costs))
np.save(f"{save_dir}/supervisor_losses.npy", np.array(supervisor_costs))

# Salva un file README con informazioni sul modello
with open(f"{save_dir}/README.md", "w") as f:
    f.write(f"# TimeGAN Model saved on {timestamp}\n\n")
    f.write("## Training Information\n")
    f.write(f"- Pre-training epochs: {hyperparams['pre_train_epochs']}\n")
    f.write(f"- Training epochs: {hyperparams['train_epochs']}\n")
    f.write(f"- Final generator loss: {generator_losses[-1] if generator_losses else 'N/A'}\n")
    f.write(f"- Final discriminator loss: {discriminator_losses[-1] if discriminator_losses else 'N/A'}\n")

print(f"Model and training history saved to {save_dir}")
print(f"Use 'model = TimeGAN(**hyperparams); model.load_state_dict(torch.load(\"{save_dir}/timegan_full_model.pt\"))' to load")

# Save individual components for flexibility
torch.save(generator.state_dict(), f"{save_dir}/generator.pt")
torch.save(discriminator.state_dict(), f"{save_dir}/discriminator.pt")
torch.save(supervisor.state_dict(), f"{save_dir}/supervisor.pt")
torch.save(embedder.state_dict(), f"{save_dir}/embedder.pt")
torch.save(recovery.state_dict(), f"{save_dir}/recovery.pt")

# Save training history
np.save(f"{save_dir}/generator_losses.npy", np.array(generator_losses))
np.save(f"{save_dir}/discriminator_losses.npy", np.array(discriminator_losses))
np.save(f"{save_dir}/embedding_losses.npy", np.array(emb_costs))
np.save(f"{save_dir}/supervisor_losses.npy", np.array(supervisor_costs))

print(f"Model and training history saved to {save_dir}")