In [None]:
import torch
import torch.nn as nn
from torch.utils.data import TensorDataset, DataLoader, RandomSampler
from torch.utils.tensorboard import SummaryWriter
import numpy as np

In [None]:
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

In [None]:
def pcg3dFloat(v):
	# Range of uint values: [0, 4294967295]
    v = v * np.uint32(1664525) + np.uint32(1013904223)

    v[..., 0] += v[..., 1] * v[..., 2]
    v[..., 1] += v[..., 2] * v[..., 0]
    v[..., 2] += v[..., 0] * v[..., 1]

    v ^= v >> np.uint32(16)
    
    v[..., 0] += v[..., 1] * v[..., 2]
    v[..., 1] += v[..., 2] * v[..., 0]
    v[..., 2] += v[..., 0] * v[..., 1]

    return (v * (1.0 / 4294967296.0)).astype(np.float32)

def normalise_linear(arr):
    return (arr - np.min(arr)) / (np.max(arr) - np.min(arr))

In [None]:
class MLP(nn.Module):
    def __init__(self, input_size, output_size):
        super(MLP, self).__init__()        
        self.model = nn.Sequential(
            nn.Linear(input_size, 256),
            nn.LeakyReLU(0.01),
            nn.Linear(256, 512),
            nn.LeakyReLU(0.01),
            nn.Linear(512, 1024),
            nn.LeakyReLU(0.01),
            nn.Linear(1024, 512),
            nn.LeakyReLU(0.01),
            nn.Linear(512, 256),
            nn.LeakyReLU(0.01),
            nn.Linear(256, 128),
            nn.LeakyReLU(0.01),
            nn.Linear(128, output_size),
            nn.Sigmoid()
        )
        
    def forward(self, x):
        x = self.model(x)
        
        return x

In [None]:
# Generata raw data
n = int(1000000)
inputs = np.random.randint(0, 4294967296, size=(n,3), dtype=np.uint32)
inputs_normalised = torch.from_numpy(normalise_linear(inputs).astype(np.float32)).to(device)
outputs = torch.from_numpy(pcg3dFloat(inputs)).to(device)

# Batch data for training
dataset = TensorDataset(inputs_normalised, outputs)
sampler = RandomSampler(dataset)
# My CPU has 10 cores. Checked by sysctl -n hw.ncpu.
dataLoader = DataLoader(dataset, batch_size=32768, sampler=sampler, num_workers=5)
# Define model, loss function and optimizer
model = MLP(3, 3).to(device)
mse = nn.MSELoss()
mae = nn.L1Loss()
LOSS = ['MAE', 'MSE'][0]
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=1000, gamma=0.1)
# scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer)

In [None]:
# Initialize SummaryWriter
writer = SummaryWriter()
loss_fn = mae if LOSS == 'MAE' else mse

In [None]:
# Training loop
epochs = 4000
global_step = 0.0  # Global step counter for accurate batch-level logging

for epoch in range(epochs):
    epoch_loss = 0.0
    num_batches = len(dataLoader)
    
    for batch_inputs, batch_outputs in dataLoader:
        outputs = model(batch_inputs)
        loss = loss_fn(outputs, batch_outputs)
        
        # Accumulate loss for averaging later
        epoch_loss += loss.item()
        
        # TensorBoard logging for each batch
        # writer.add_scalar(f'{LOSS} Loss/train', loss.item(), global_step)
        global_step += 1
        
        # Backpropagation
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    
    # Logging the average loss per epoch
    avg_epoch_loss = epoch_loss / num_batches
    writer.add_scalar(f'{LOSS} Loss/epoch', avg_epoch_loss, epoch)
    
    # Learning rate scheduling
    scheduler.step()
    # scheduler.step(avg_epoch_loss)
    
    print(f'Epoch {epoch+1}/{epochs}, Avg Epoch Loss: {avg_epoch_loss:,}')

# Close the writer after training
writer.flush()
writer.close()

In [None]:
pcg3dFloat(inputs)

In [None]:
model(inputs_normalised).detach().cpu().numpy()