In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, random_split, TensorDataset

In [2]:
# Environment Setup
# ----------------------
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
torch.manual_seed(42)

# ----------------------
# Preprocessing
# ----------------------
# Generate random data for faster performance
num_samples, input_dim = 1000, 100
real_data = torch.randn(num_samples, input_dim)

dataset = TensorDataset(real_data)

# ----------------------
# Train-Test Split
# ----------------------
train_size = int(0.8 * len(dataset))
val_size = int(0.1 * len(dataset))
test_size = len(dataset) - train_size - val_size

train_dataset, val_dataset, test_dataset = random_split(dataset, [train_size, val_size, test_size])
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32)
test_loader = DataLoader(test_dataset, batch_size=32)

In [3]:
# GAN Base Model
# ----------------------
class Generator(nn.Module):
    def __init__(self, noise_dim, output_dim):
        super(Generator, self).__init__()
        self.model = nn.Sequential(
            nn.Linear(noise_dim, 64),
            nn.ReLU(),
            nn.Linear(64, output_dim),
            nn.Tanh()
        )

    def forward(self, x):
        return self.model(x)

class Discriminator(nn.Module):
    def __init__(self, input_dim):
        super(Discriminator, self).__init__()
        self.model = nn.Sequential(
            nn.Linear(input_dim, 64),
            nn.LeakyReLU(0.2),
            nn.Linear(64, 1),
            nn.Sigmoid()
        )

    def forward(self, x):
        return self.model(x)

# Initialize models
noise_dim = 50
gen = Generator(noise_dim, input_dim).to(device)
disc = Discriminator(input_dim).to(device)

# Optimizers and Loss
criterion = nn.BCELoss()
gen_optimizer = optim.Adam(gen.parameters(), lr=0.0002)
disc_optimizer = optim.Adam(disc.parameters(), lr=0.0002)

In [4]:
# Train Base Model (GAN)
# ----------------------
def train_gan(epochs=10):
    for epoch in range(epochs):
        for real_batch, in train_loader:
            real_batch = real_batch.to(device)
            batch_size = real_batch.size(0)

            # Train Discriminator
            disc_optimizer.zero_grad()
            real_labels = torch.ones(batch_size, 1, device=device)
            fake_labels = torch.zeros(batch_size, 1, device=device)

            outputs = disc(real_batch)
            real_loss = criterion(outputs, real_labels)

            noise = torch.randn(batch_size, noise_dim, device=device)
            fake_data = gen(noise)
            outputs = disc(fake_data.detach())
            fake_loss = criterion(outputs, fake_labels)

            disc_loss = real_loss + fake_loss
            disc_loss.backward()
            disc_optimizer.step()

            # Train Generator
            gen_optimizer.zero_grad()
            outputs = disc(fake_data)
            gen_loss = criterion(outputs, real_labels)
            gen_loss.backward()
            gen_optimizer.step()

        print(f"Epoch [{epoch+1}/{epochs}] | D Loss: {disc_loss.item():.4f} | G Loss: {gen_loss.item():.4f}")

train_gan()

Epoch [1/10] | D Loss: 1.5105 | G Loss: 0.7729
Epoch [2/10] | D Loss: 1.4678 | G Loss: 0.7374
Epoch [3/10] | D Loss: 1.4779 | G Loss: 0.7183
Epoch [4/10] | D Loss: 1.4301 | G Loss: 0.7314
Epoch [5/10] | D Loss: 1.3795 | G Loss: 0.7321
Epoch [6/10] | D Loss: 1.3103 | G Loss: 0.7670
Epoch [7/10] | D Loss: 1.3025 | G Loss: 0.7813
Epoch [8/10] | D Loss: 1.2785 | G Loss: 0.7960
Epoch [9/10] | D Loss: 1.2695 | G Loss: 0.7772
Epoch [10/10] | D Loss: 1.2443 | G Loss: 0.7580


In [5]:
# Planning (Simulated Experience Using GAN-Generated Samples)
# ----------------------
def simulate_experience(samples=5):
    noise = torch.randn(samples, noise_dim, device=device)
    generated_samples = gen(noise)
    print("Simulated Samples:", generated_samples.cpu().detach().numpy())

simulate_experience()

# ----------------------
# Fine-Tune Model
# ----------------------
def fine_tune_model(epochs=5, new_lr=0.0001):
    for param_group in gen_optimizer.param_groups:
        param_group['lr'] = new_lr
    
    train_gan(epochs=epochs)

fine_tune_model()

# ----------------------
# Evaluate
# ----------------------
def evaluate_model():
    disc.eval()
    correct, total = 0, 0
    with torch.no_grad():
        for real_batch, in test_loader:
            real_batch = real_batch.to(device)
            outputs = disc(real_batch)
            predictions = (outputs > 0.5).float()
            correct += (predictions == 1).sum().item()
            total += real_batch.size(0)
    
    print(f"Discriminator Accuracy on Real Data: {correct / total * 100:.2f}%")

evaluate_model()

# ----------------------
# Deploy Policy (Placeholder)
# ----------------------
def deploy_policy():
    torch.save(gen.state_dict(), "generator_model.pth")
    print("Generator model saved for deployment.")

deploy_policy()

Simulated Samples: [[-0.4624232   0.01267088 -0.19819123 -0.01729728  0.14963311 -0.06387186
  -0.00896272 -0.03760966 -0.04962032 -0.18005846  0.2499284  -0.02998625
  -0.06323873 -0.24105936 -0.15879817 -0.30380717 -0.01485939  0.15918578
  -0.2617735  -0.02947399 -0.18233891  0.14906351 -0.20814711 -0.01035118
   0.27262878  0.3734521  -0.06116987 -0.16782628  0.1630097  -0.10889757
  -0.3791046  -0.2545691  -0.11808609  0.16982019 -0.0085499   0.07322483
  -0.33104935  0.04626653  0.30111957  0.2651047   0.46522132  0.23152328
  -0.16454524  0.04745779 -0.05885737 -0.16711989  0.39737317 -0.13763535
  -0.21468101 -0.17249255 -0.15834104 -0.06591313 -0.30264142 -0.16576162
   0.194803    0.08971837 -0.04338685  0.2861731  -0.34678552  0.2688875
   0.14566807 -0.05023527  0.32818884  0.26884374 -0.30043936 -0.02828462
  -0.22224754  0.08930578 -0.28315994 -0.02926033  0.03947598  0.01411606
  -0.05035447  0.23700282  0.55431384 -0.4291202  -0.48415208 -0.06111975
   0.10061734  0.102