In [6]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [7]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [2]:
import os

GOOGLE_DRIVE_PATH_AFTER_MYDRIVE = "Raffaello_Sanzio/styleid"
GOOGLE_DRIVE_PATH = os.path.join('/content/drive', 'My Drive', GOOGLE_DRIVE_PATH_AFTER_MYDRIVE)
print(os.listdir(GOOGLE_DRIVE_PATH))

['ckpt', 'data', 'data_model', 'config', 'ldm', 'models', 'output', 'precomputed_feats', 'IMG_3724.JPG', 'IMG_3724_styled_joana-abreu-aFkzShngdaw-unsplash.png', 'lora', 'train', 'train.ipynb', 'styleid.ipynb']


In [3]:
import sys
sys.path.append(GOOGLE_DRIVE_PATH)

In [10]:
!pip install pytorch-lightning==1.4.2
!pip install omegaconf==2.1.1
!pip install torchmetrics==0.6.0
!pip install git+https://github.com/openai/CLIP.git
!pip install kornia==0.6
# !pip install lightning==1.4.2

Collecting git+https://github.com/openai/CLIP.git
  Cloning https://github.com/openai/CLIP.git to /tmp/pip-req-build-gqzhriv7
  Running command git clone --filter=blob:none --quiet https://github.com/openai/CLIP.git /tmp/pip-req-build-gqzhriv7
  Resolved https://github.com/openai/CLIP.git to commit dcba3cb2e2827b402d2701e7e1c7d9fed8a20ef1
  Preparing metadata (setup.py) ... [?25l[?25hdone


In [4]:
import os
import yaml
import torch
import random
import numpy as np
import torch.nn as nn
from torch.utils.data import DataLoader
from diffusers import StableDiffusionPipeline
from peft import LoraConfig, get_peft_model
from accelerate.utils import write_basic_config
from accelerate import Accelerator
from tqdm import tqdm
import pytorch_lightning as pl

from data_model.sanzio_dataset import SanzioDataset
from ldm.util import instantiate_from_config
from ldm.models.diffusion.ddim import DDIMSampler
from ldm.modules.diffusionmodules.util import extract_into_tensor
from train.modelwrapper import SanzioModule

In [5]:
config_path = os.path.join(GOOGLE_DRIVE_PATH, "config/sanzio.yaml")
with open(config_path, "r") as f:
    config = yaml.safe_load(f)

lora_config = config["lora_params"]
diffuser_model_config = config["diffuser_model_params"]
lora_train_config = config["train_params"]

model_config_path = os.path.join(GOOGLE_DRIVE_PATH, "config/style.yaml")
with open(model_config_path) as f:
    config = yaml.safe_load(f)

ldm_train_config = config["train_params"]

with open(os.path.join(GOOGLE_DRIVE_PATH, ldm_train_config["model_config"])) as f:
    model_config = yaml.safe_load(f)

In [6]:
def load_model_from_config(model_config, ckpt_path, verbose=False):
    print(f"Loading model from {ckpt_path}")
    pl_sd = torch.load(ckpt_path, map_location="cpu")
    if "global_step" in pl_sd:
        print(f"Global Step: {pl_sd['global_step']}")
    sd = pl_sd["state_dict"]
    model = instantiate_from_config(model_config["model"])
    m, u = model.load_state_dict(sd, strict=False)
    if len(m) > 0 and verbose:
        print("missing keys:")
        print(m)
    if len(u) > 0 and verbose:
        print("unexpected keys:")
        print(u)

    model.cuda()
    return model

In [7]:
ckpt_path = os.path.join(GOOGLE_DRIVE_PATH, ldm_train_config["ckpt"])
pipeline = load_model_from_config(model_config, ckpt_path)

Loading model from /content/drive/My Drive/Raffaello_Sanzio/styleid/models/ldm/stable-diffusion-v1/model.ckpt


  pl_sd = torch.load(ckpt_path, map_location="cpu")


Global Step: 470000
LatentDiffusion: Running in eps-prediction mode
DiffusionWrapper has 859.52 M params.
making attention of type 'vanilla' with 512 in_channels
Working with z of shape (1, 4, 32, 32) = 4096 dimensions.
making attention of type 'vanilla' with 512 in_channels


Downloading: "https://github.com/DagnyT/hardnet/raw/master/pretrained/train_liberty_with_aug/checkpoint_liberty_with_aug.pth" to /root/.cache/torch/hub/checkpoints/checkpoint_liberty_with_aug.pth
100%|██████████| 5.10M/5.10M [00:00<00:00, 334MB/s]
The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/905 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/961k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/525k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/389 [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/2.22M [00:00<?, ?B/s]

config.json:   0%|          | 0.00/4.52k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/1.71G [00:00<?, ?B/s]

In [8]:
lora_config = LoraConfig(
        r=lora_config["lora_r"],
        lora_alpha=lora_config["lora_alpha"],
        init_lora_weights='gaussian',
        target_modules=['to_k', 'to_q', 'to_v', 'to_out.0'],
        lora_dropout=lora_config["lora_dropout"],
        bias=lora_config["bias"]
    )

In [9]:
unet_model = pipeline.model.diffusion_model

unet_model = get_peft_model(unet_model, lora_config)
unet_model.print_trainable_parameters()

trainable params: 1,594,368 || all params: 861,115,332 || trainable%: 0.1852


In [10]:
print(pipeline.get_learned_conditioning(["", ""]).shape)

torch.Size([2, 77, 768])


In [11]:
sampler = DDIMSampler(pipeline)
ddim_steps = ldm_train_config["save_feat_steps"]
sampler.make_schedule(ddim_num_steps=ddim_steps, ddim_eta=ldm_train_config["ddim_eta"], verbose=False)


In [12]:

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

seed = lora_train_config["seed"]

num_epochs = lora_train_config["epochs"]
batch_size = lora_train_config["batch_size"]
lr=lora_train_config["lr"]

dataset_path = os.path.join(GOOGLE_DRIVE_PATH, "data/images")
dataset = SanzioDataset(dataset_path)

dataloader = DataLoader(dataset,
                        batch_size=batch_size,
                        shuffle=True,
                        drop_last=True,
                        num_workers=0)

pipeline = pipeline.to(device)

pipeline.train()

model = SanzioModule(
    pipeline=pipeline,
    unet_model=unet_model,
    sampler=sampler,
    ddim_steps=ddim_steps,
    lr=lr
)


model.to(device)
lora_path = os.path.join(GOOGLE_DRIVE_PATH, "lora")
if not os.path.exists(lora_path):
    os.mkdir(lora_path)

trainer = pl.Trainer(
    max_epochs=num_epochs,
    precision=32,
    accumulate_grad_batches=lora_train_config["gradient_accumulation_steps"],
    accelerator="auto",
    devices=1
)

trainer.fit(model, train_dataloaders=dataloader)

unet_model.save_pretrained(os.path.join(lora_path, lora_train_config['lora_ckpt_name']))


INFO:pytorch_lightning.utilities.distributed:GPU available: True, used: True
INFO:pytorch_lightning.utilities.distributed:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.distributed:IPU available: False, using: 0 IPUs
INFO:pytorch_lightning.accelerators.gpu:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO:pytorch_lightning.core.lightning:
  | Name       | Type            | Params
-----------------------------------------------
0 | pipeline   | LatentDiffusion | 1.1 B 
1 | unet_model | PeftModel       | 861 M 
-----------------------------------------------
1.6 M     Trainable params
1.1 B     Non-trainable params
1.1 B     Total params
4,271.319 Total estimated model params size (MB)
  rank_zero_warn(


Training: -1it [00:00, ?it/s]

In [None]:

unet_model.train()
for name, param in unet_model.named_parameters():
    if param.requires_grad:
        print(f"Trainable param: {name}")

In [None]:
print(any(param.requires_grad for param in unet_model.parameters()))


In [None]:
# accelerator = Accelerator(
#     mixed_precision=lora_train_config["mixed_precision"],
#     gradient_accumulation_steps=lora_train_config["gradient_accumulation_steps"],
# )
# text emb shape torch.Size([2, 77, 768])
# device = accelerator.device

# criterion = nn.MSELoss()
# optimizer = torch.optim.AdamW(unet_model.parameters(), lr=lora_train_config["lr"])

# pipeline, optimizer, dataloader = accelerator.prepare(
#     pipeline, optimizer, dataloader
# )

# print(len(dataloader))
# for epoch in range(num_epochs):
#     losses = []
#     for ims in tqdm(dataloader):
#         ims = ims.to(device)
#         optimizer.zero_grad()

#         latents = pipeline.get_first_stage_encoding(pipeline.encode_first_stage(ims))
#         latents = latents.detach().requires_grad_(True)

#         text = ["Raffaello Sanzio Painting" for _ in ims]
#         input_text_emb = pipeline.get_learned_conditioning(text)

#         noise = torch.randn_like(latents, device=device, requires_grad=True)
#         t = torch.randint(0, ddim_steps, (latents.shape[0],),
#                           device=device, dtype=torch.long)

#         print("Before encode:")
#         print(f"latents grad: {latents.requires_grad}")
#         print(f"noise grad: {noise.requires_grad}")

#         noisy_latents = sampler.stochastic_encode(latents, t, noise=noise)

#         print("After encode:")
#         print(f"noisy_latents grad: {noisy_latents.requires_grad}")

#         noise_pred = pipeline.apply_model(noisy_latents, t, input_text_emb)
#         print("After model:")
#         print(f"noise_pred grad: {noise_pred.requires_grad}")

#         # Print grad_fn to see computational graph
#         print("Grad functions:")
#         print(f"noisy_latents grad_fn: {noisy_latents.grad_fn}")
#         print(f"noise_pred grad_fn: {noise_pred.grad_fn}")

#         loss = criterion(noise_pred, noise)
#         print(f"Loss requires grad: {loss.requires_grad}")

#         losses.append(loss.item())

#         # Try standard backward first
#         loss.backward()

#         optimizer.step()

#     print(f"Epoch {epoch+1}/{num_epochs}, Loss: {sum(losses)/len(losses)}")

#         # If that fails, then try accelerator.backward
#         # accelerator.backward(loss)
# pipeline.unet.save_pretrained(os.path.join(lora_path, lora_train_config['lora_ckpt_name']))

# pipeline.vae.config.scaling_factor
# !accelerate config
# lora_config = LoraConfig(
#         r=8,
#         lora_alpha=16,
#         init_lora_weights='gaussian',
#         target_modules=['to_k', 'to_q', 'to_v', 'to_out.0', 'add_k_proj', 'add_v_proj'],
#         lora_dropout=0.0,
#         bias='none'
#     )