In [1]:
# @title Import Drive
from google.colab import drive
drive.mount('/content/drive', force_remount=True)

Mounted at /content/drive


In [2]:
# @title Clone Repository
import os
github_personal_access_token = "" # @param {type:"string"}
os.environ["GITHUB_AUTH"] = github_personal_access_token
!git clone https://$GITHUB_AUTH@github.com/namanxkumar/fea-diffusion.git &> /dev/null

In [3]:
# @title Install Dependencies
!pip install accelerate einops ema_pytorch wandb &> /dev/null

In [4]:
# @title Git Pull Latest
%cd /content/fea-diffusion
!git pull
%cd ../

/content/fea-diffusion
Already up to date.
/content


In [5]:
# @title WandB Login for tracking
wandb_login = "" # @param {type:"string"}
os.environ["WANDB_LOGIN"] = wandb_login
!wandb login --relogin $WANDB_LOGIN

[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


In [6]:
# @title Unzip Data
%mkdir /content/data
!unzip drive/MyDrive/BTP/feadata2500.zip -d /content/data &> /dev/null
!unzip drive/MyDrive/BTP/sample_1.zip -d /content/data &> /dev/null

In [7]:
%cd /content/fea-diffusion

from model.diffusion import Trainer
from model.unet import UNet

model = UNet(
    input_dim=64,
    num_channels=2, # geometry/displacement (2)
    num_condition_channels=4, # constraints (1) + force (2) + geometry (1)
)

trainer = Trainer(
    model=model,
    dataset_folder="../data/feadata2500",
    use_dataset_augmentation=False,
    sample_dataset_folder="../data/sample_1",
    num_sample_conditions_per_plate=1,
    num_gradient_accumulation_steps=4,
    dataset_image_size=256,
    train_batch_size=6,
    train_learning_rate=3e-4,
    num_train_steps=10000,
    num_steps_per_milestone=500,
    ema_steps_per_milestone=10,
    results_folder="../drive/MyDrive/BTP/results",
    use_batch_split_over_devices=True,
)

/content/fea-diffusion


  from tqdm.autonotebook import tqdm


In [8]:
import wandb

wandb.init(
    # set the wandb project where this run will be logged
    project="fea-diffusion",
)
wandb.define_metric("step")
wandb.define_metric("train_loss", step_metric="step")
wandb.define_metric("sample_loss", step_metric="step")

def inject_function(step, loss, sample_loss, sampled_images):
    if sample_loss is not None and sampled_images is not None:
        wandb.log({'step': step, 'train_loss': loss, 'sample_loss': sample_loss, 'samples': [wandb.Image(image) for image in sampled_images]})
    else:
        wandb.log({'step': step, 'train_loss': loss})

[34m[1mwandb[0m: Currently logged in as: [33mnamanxkumar[0m ([33mfea-diffusion[0m). Use [1m`wandb login --relogin`[0m to force relogin


In [9]:
trainer.load_checkpoint(9)

In [None]:
# !python fea-diffusion/train.py --data_dir data/feadata2500 --sample_data_dir data/sample_1 --results_dir drive/MyDrive/BTP/results --batch_size 6 --num_gradient_accumulation_steps 4 --num_steps 4167 --num_steps_per_milestone 1000 --ema_steps_per_milestone 10 --learning_rate 1e-4
trainer.train(wandb_inject_function=inject_function)

Epoch Size: 4166.75 effective batches
Number of Effective Epochs: 2.399952000959981


 45%|####5     | 4500/10000 [00:00<?, ?it/s]