In [1]:
from deepx.tasks.gan import (
    GANModelConfig,
    GANTaskConfig,
    GANDMConfig,
    GANTrainer,
)
from deepx.tasks.core import DataModuleConfig, TrainingConfig

In [2]:
model_cfg = GANModelConfig(
    model="dcgan",
    latent_dim=100,
    base_dim_g=128,
    base_dim_d=128,
    dropout=0.4,
    negative_slope=0.1,
)

In [3]:
task_cfg = GANTaskConfig(
    lr=1e-4,
    loss_fn="bce",
    beta1=0.5,
    beta2=0.999,
    one_side_label_smoothing=0.9,
    scheduler=None,
    optimizer="adam",
)

In [4]:
dm_cfg = GANDMConfig(
    dm="lfw",
    batch_size=16,
    num_workers=4,
    train_ratio=0.8,
    data_dir="/workspace/experiments/data",
)

In [5]:
train_cfg = TrainingConfig(
    ckpt_path=None,
    epochs=2,
    patience=5,
    max_depth=1,
    benchmark=True,
    debug=False,
    monitor_metric="val_loss_g",
    monitor_mode="min",
    logging=True,
    logger="mlflow",
    accelerator="auto",
    devices=1,
    root_dir="/workspace",
    log_dir="/workspace/experiments/mlruns",
)

In [6]:
trainer = GANTrainer(
    model_cfg=model_cfg,
    task_cfg=task_cfg,
    dm_cfg=dm_cfg,
    train_cfg=train_cfg,
)

In [7]:
trainer.train()

Experiment ID: 267823075483077457
Run ID: 6323e70b25664aae9d5d9919f39c3c29


Trainer already configured with model summary callbacks: [<class 'lightning.pytorch.callbacks.model_summary.ModelSummary'>]. Skipping setting a default `ModelSummary` callback.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs


Datamodule Config:
	dm: lfw
	data_dir: /workspace/experiments/data
	batch_size: 16
	num_workers: 4
	train_ratio: 0.8
	download: False
Model Config:
	model: dcgan
	latent_dim: 100
	base_dim_g: 128
	base_dim_d: 128
	dropout: 0.4
	negative_slope: 0.1
	tgt_shape: (3, 128, 128)
Task Config:
	lr: 0.0001
	loss_fn: bce
	optimizer: adam
	scheduler: None
	beta1: 0.5
	beta2: 0.999
	ignore_index: -100
	one_side_label_smoothing: 0.9
Training Config:
	ckpt_path: None
	epochs: 2
	patience: 5
	max_depth: 1
	benchmark: True
	debug: False
	monitor_metric: val_loss_g
	monitor_mode: min
	logging: True
	logger: mlflow
	accelerator: auto
	devices: 1
	root_dir: /workspace
	log_dir: /workspace/experiments/mlruns


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name          | Type                     | Params
-----------------------------------------------------------
0 | model         | DCGAN                    | 92.5 M
1 | loss_fn       | BCELoss                  | 0     
2 | generator     | Generator                | 47.9 M
3 | discriminator | Discriminator            | 44.6 M
4 | test_metric   | FrechetInceptionDistance | 23.9 M
-----------------------------------------------------------
92.5 M    Trainable params
23.9 M    Non-trainable params
116 M     Total params
465.271   Total estimated model params size (MB)


Generator optimizer: Adam (
Parameter Group 0
    amsgrad: False
    betas: (0.5, 0.999)
    capturable: False
    differentiable: False
    eps: 1e-08
    foreach: None
    fused: None
    lr: 0.0001
    maximize: False
    weight_decay: 0
)
Discriminator optimizer: Adam (
Parameter Group 0
    amsgrad: False
    betas: (0.5, 0.999)
    capturable: False
    differentiable: False
    eps: 1e-08
    foreach: None
    fused: None
    lr: 0.0001
    maximize: False
    weight_decay: 0
)


  rank_zero_warn(


Training: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

`Trainer.fit` stopped: `max_epochs=2` reached.
Restoring states from the checkpoint path at ///workspace/experiments/mlruns/267823075483077457/6323e70b25664aae9d5d9919f39c3c29/checkpoints/epoch=1-step=8.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from the checkpoint at ///workspace/experiments/mlruns/267823075483077457/6323e70b25664aae9d5d9919f39c3c29/checkpoints/epoch=1-step=8.ckpt


Testing: 0it [00:00, ?it/s]