In [1]:
import argparse
import random

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from torch.utils.data import TensorDataset

from model_gae import GAE

def set_seed(seed):
    random.seed(seed)                         # Python random
    np.random.seed(seed)                      # NumPy
    torch.manual_seed(seed)                   # PyTorch CPU
    torch.cuda.manual_seed(seed)              # PyTorch GPU
    torch.cuda.manual_seed_all(seed)          # if multi-GPU

In [5]:
seed = 0
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

set_seed(seed)

# --- IMPORT DATA ---

ukb_sasp = pd.read_csv("ukb/ukb_sasp_2.csv")

ukb_sample = ukb_sasp.iloc[:,7:46]
ukb_target = ukb_sasp.iloc[:,[6, 45]] # let target be age

# # Standardize telomere length data
# ukb_target = ukb_target.assign(
#     tl_std  = (ukb_target.iloc[:,0] - ukb_target.iloc[:,0].mean()) / ukb_target.iloc[:,0].std()
# )
# ukb_target = ukb_target.iloc[:,[1,2]]

# split sample data by labels
ukb_sample_train = ukb_sample[ukb_sample['label'] == 0].drop(columns=['label'])
ukb_sample_val = ukb_sample[ukb_sample['label'] == 1].drop(columns=['label'])
ukb_sample_test = ukb_sample[ukb_sample['label'] == 2].drop(columns=['label'])

# split target data by labels
ukb_target_train = ukb_target[ukb_target['label'] == 0].drop(columns=['label'])
ukb_target_val = ukb_target[ukb_target['label'] == 1].drop(columns=['label'])
ukb_target_test = ukb_target[ukb_target['label'] == 2].drop(columns=['label'])

# Convert to NumPy arrays
ukb_sample_train = ukb_sample_train.values.astype(np.float32)
ukb_sample_val = ukb_sample_val.values.astype(np.float32)
ukb_sample_test = ukb_sample_test.values.astype(np.float32)

# Convert to NumPy arrays
ukb_target_train = ukb_target_train.values.astype(np.float32)
ukb_target_val = ukb_target_val.values.astype(np.float32)
ukb_target_test = ukb_target_test.values.astype(np.float32)

ukb_train = TensorDataset(torch.tensor(ukb_sample_train).to(device),
                        torch.tensor(ukb_target_train).to(device))

ukb_val = TensorDataset(torch.tensor(ukb_sample_val).to(device),
                        torch.tensor(ukb_target_val).to(device))

ukb_test = TensorDataset(torch.tensor(ukb_sample_test).to(device),
                        torch.tensor(ukb_target_test).to(device))

# Load the data to dataloader
ukb_train_loader = DataLoader(ukb_train, batch_size=128, shuffle=True)
ukb_val_loader = DataLoader(ukb_val, batch_size=128, shuffle=False)
ukb_test_loader = DataLoader(ukb_test, batch_size=128, shuffle=False)


In [9]:

# --- NETWORK INSTANTIATE ---
alpha = 0.05

autoencoder = GAE(input_dim=38, latent_dim=6, code_dim=1).to(device)

optimizer = optim.Adam(
    autoencoder.parameters(),
    lr=1e-3,
    weight_decay=1e-5
)

def criterion(recon_x, x, latent, tl, alpha=0.5):
    MSE = nn.functional.mse_loss(recon_x, x, reduction='sum')
    GUD = nn.functional.mse_loss(latent, tl, reduction='sum')
    return alpha * MSE + (1-alpha) * GUD, alpha * MSE, (1-alpha) * GUD

# --- TRAIN LOOP ---

train_losses = []
test_losses = []

num_epochs = 100

for _ in range(num_epochs):

    autoencoder.train()
    running_loss = 0.0

    for samples, targets in ukb_train_loader:
        latents, recon_x  = autoencoder(samples)

        loss, loss_1, loss_2 = criterion(recon_x, samples, latents, targets, alpha=alpha)
        loss.backward()
        running_loss += loss.item()
        print(f"Recon Loss: {loss_1.item():.4f}")
        print(f"Guide Loss: {loss_2.item():.4f}")

        optimizer.step()
        optimizer.zero_grad()

    avg_train_loss = running_loss / len(ukb_train_loader)
    train_losses.append(avg_train_loss)

    autoencoder.eval()
    running_loss = 0.0

    with torch.no_grad():
        for samples, targets in ukb_val_loader:
            latents, recon_x = autoencoder(samples)

            loss, _, _ = criterion(recon_x, samples, latents, targets, alpha=alpha)
            running_loss += loss.item()

    avg_test_loss = running_loss / len(ukb_val_loader)
    test_losses.append(avg_test_loss)


Recon Loss: 134.9212
Guide Loss: 391646.0625
Recon Loss: 118.8239
Guide Loss: 396303.4375
Recon Loss: 118.5375
Guide Loss: 379292.0938
Recon Loss: 122.0661
Guide Loss: 393423.3438
Recon Loss: 117.9648
Guide Loss: 402476.9062
Recon Loss: 120.5901
Guide Loss: 392775.9688
Recon Loss: 127.9771
Guide Loss: 379690.1875
Recon Loss: 133.3992
Guide Loss: 396622.0938
Recon Loss: 123.8439
Guide Loss: 390672.3750
Recon Loss: 119.4005
Guide Loss: 383522.5312
Recon Loss: 121.3555
Guide Loss: 405478.8125
Recon Loss: 115.8566
Guide Loss: 381227.7500
Recon Loss: 139.3762
Guide Loss: 413048.4062
Recon Loss: 137.7206
Guide Loss: 405632.9062
Recon Loss: 129.1805
Guide Loss: 413222.9375
Recon Loss: 137.2054
Guide Loss: 395465.2188
Recon Loss: 114.4143
Guide Loss: 392765.9688
Recon Loss: 121.1051
Guide Loss: 401521.4062
Recon Loss: 128.8728
Guide Loss: 428616.3750
Recon Loss: 119.9692
Guide Loss: 407590.5000
Recon Loss: 112.2338
Guide Loss: 397888.3438
Recon Loss: 117.9483
Guide Loss: 409118.8750
Recon Loss

KeyboardInterrupt: 