In [None]:
from google.colab import drive

drive.mount("/content/gdrive")

Mounted at /content/gdrive


In [None]:
%cd ./gdrive/MyDrive/LELSD/notebooks/

/content/gdrive/MyDrive/LELSD/notebooks


In [None]:
# !pip install Ninja
# !python3.7 -m pip install torch==1.9.0
# !python3.7 -m pip install torchvision==0.10.0

In [None]:
%load_ext autoreload
%autoreload 2
import warnings
warnings.filterwarnings("ignore")
import sys

import torch
sys.path.append("../")
import models
import torch
from utils.stylegan2_utils import StyleGAN2SampleGenerator
from utils.stylegan3_utils import StyleGAN3SampleGenerator
from utils.segmentation_utils import FaceSegmentation
from lelsd import LELSD

# Training StyleGAN2 with Supervised Segmentation

### StyleGAN2 FFHQ

In [None]:
device = torch.device('cuda')

exp_dir = "../out"
G2 = models.get_model("stylegan2", "../pretrained/stylegan2/ffhq.pkl")
stylegan2_sample_generator = StyleGAN2SampleGenerator(G=G2, device=device)

face_bisenet = models.get_model("face_bisenet", "../pretrained/face_bisenet/model.pth")
face_segmentation = FaceSegmentation(face_bisenet=face_bisenet, device=device)

for latent_space in ["S6"]:
    for loss_function in ["L2"]:
        for mask_aggregation in [
            'average',
            'union',
            'intersection',
        ]:

            for num_latent_dirs in [1, 2]:
                for part_name, face_parts in zip(
                        [
                            "mouth",
                            "skin",
                            "eyes",
                            "nose",
                            "ears",
                            "background",
                            "eyebrows",
                            "hair",
                            "cloth", "eyeglass"

                        ],
                        [
                            ["mouth", "u_lip", "l_lip"],
                            ["skin"],
                            ["l_eye", "r_eye"],
                            ["nose"],
                            ["l_ear", "r_ear", "earrings"],
                            ["background"],
                            ["l_brow", "r_brow"],
                            ["hair", "hat"],
                            ["hair"],
                            ["cloth", "neck", "necklace"],
                            ["eyeglass"]

                        ]
                ):
                    lr = 0.001
                    min_alpha_value = -1.0
                    max_alpha_value = 1.0
                    min_abs_alpha_value = 0.0
                    gamma_correlation = 5.0
                    onehot_temperature = 0.001
                    batch_size = 4
                    localization_layers = list(range(1, 18))
                    localization_layer_weights = None
                    log_dir = f'{exp_dir}/lelsd_stylegan2_ffhq/{latent_space}_{loss_function}_{mask_aggregation}/{num_latent_dirs}D/face_bisenet/{part_name}'
                    lelsd = LELSD(device=device,
                                  localization_layers=localization_layers,
                                  semantic_parts=face_parts,
                                  loss_function=loss_function,
                                  localization_layer_weights=localization_layer_weights,
                                  mode='foreground',
                                  mask_aggregation=mask_aggregation,
                                  n_layers=18,
                                  latent_dim=512,
                                  num_latent_dirs=num_latent_dirs,
                                  learning_rate=lr,
                                  batch_size=batch_size,
                                  gamma_correlation=gamma_correlation,
                                  unit_norm=False,
                                  latent_space=latent_space,
                                  onehot_temperature=onehot_temperature,
                                  min_alpha_value=min_alpha_value,
                                  max_alpha_value=max_alpha_value,
                                  min_abs_alpha_value=min_abs_alpha_value,
                                  log_dir=log_dir,
                                  )

                    lelsd.fit(stylegan2_sample_generator, face_segmentation, num_batches=200 * num_latent_dirs,
                              num_lr_halvings=3,
                              pgbar=True, summary=True)
                    lelsd.save()

# Training StyleGAN3 with Supervised Segmentation

### StyleGAN3-R FFHQ

In [None]:
device = torch.device('cuda')

exp_dir = "../out"
G2 = models.get_model("stylegan3", "../pretrained/stylegan3/stylegan3-r-ffhqu-1024x1024.pkl")
stylegan3_sample_generator = StyleGAN3SampleGenerator(G=G2, device=device)

face_bisenet = models.get_model("face_bisenet", "../pretrained/face_bisenet/model.pth")
face_segmentation = FaceSegmentation(face_bisenet=face_bisenet, device=device)
SG = "sg3" # activate this for sg3-r

for latent_space in ["S3", "S5"]:
    for loss_function in ["L2"]:
        for mask_aggregation in [
            'average',
            'union',
            'intersection',
        ]:

            for num_latent_dirs in [1, 2]:
                for part_name, face_parts in zip(
                        [
                            "mouth",
                            "skin",
                            "eyes",
                            "nose",
                            "ears",
                            "background",
                            "eyebrows",
                            "hair",
                            "cloth", "eyeglass"

                        ],
                        [
                            ["mouth", "u_lip", "l_lip"],
                            ["skin"],
                            ["l_eye", "r_eye"],
                            ["nose"],
                            ["l_ear", "r_ear", "earrings"],
                            ["background"],
                            ["l_brow", "r_brow"],
                            ["hair", "hat"],
                            ["hair"],
                            ["cloth", "neck", "necklace"],
                            ["eyeglass"]

                        ]
                ):
                    lr = 0.001
                    min_alpha_value = -1.0
                    max_alpha_value = 1.0
                    min_abs_alpha_value = 0.0
                    gamma_correlation = 5.0
                    onehot_temperature = 0.001
                    batch_size = 1
                    localization_layers = list(range(1, 15))
                    localization_layer_weights = None
                    log_dir = f'{exp_dir}/lelsd_stylegan3_ffhq/{latent_space}_{loss_function}_{mask_aggregation}/{num_latent_dirs}D/face_bisenet/{part_name}'
                    lelsd = LELSD(device=device,
                                  localization_layers=localization_layers,
                                  semantic_parts=face_parts,
                                  loss_function=loss_function,
                                  localization_layer_weights=localization_layer_weights,
                                  mode='foreground',
                                  mask_aggregation=mask_aggregation,
                                  n_layers=15,
                                  latent_dim=1024,
                                  num_latent_dirs=num_latent_dirs,
                                  learning_rate=lr,
                                  batch_size=batch_size,
                                  gamma_correlation=gamma_correlation,
                                  unit_norm=False,
                                  latent_space=latent_space,
                                  onehot_temperature=onehot_temperature,
                                  min_alpha_value=min_alpha_value,
                                  max_alpha_value=max_alpha_value,
                                  min_abs_alpha_value=min_abs_alpha_value,
                                  log_dir=log_dir, sg=SG
                                  )

                    lelsd.fit(stylegan3_sample_generator, face_segmentation, num_batches=200 * num_latent_dirs,
                              num_lr_halvings=3,
                              pgbar=True, summary=True)
                    lelsd.save()

  0%|          | 0/200 [00:00<?, ?it/s]

Setting up PyTorch plugin "bias_act_plugin"... Done.
Setting up PyTorch plugin "filtered_lrelu_plugin"... Done.


100%|██████████| 200/200 [02:58<00:00,  1.12it/s]
100%|██████████| 200/200 [02:58<00:00,  1.12it/s]
100%|██████████| 200/200 [02:57<00:00,  1.13it/s]
100%|██████████| 200/200 [02:58<00:00,  1.12it/s]
100%|██████████| 200/200 [02:57<00:00,  1.13it/s]
100%|██████████| 200/200 [02:58<00:00,  1.12it/s]
100%|██████████| 200/200 [02:58<00:00,  1.12it/s]
100%|██████████| 200/200 [02:58<00:00,  1.12it/s]
100%|██████████| 200/200 [02:58<00:00,  1.12it/s]
100%|██████████| 200/200 [02:58<00:00,  1.12it/s]
100%|██████████| 400/400 [05:56<00:00,  1.12it/s]
100%|██████████| 400/400 [05:57<00:00,  1.12it/s]
100%|██████████| 400/400 [05:55<00:00,  1.13it/s]
100%|██████████| 400/400 [05:56<00:00,  1.12it/s]
100%|██████████| 400/400 [05:55<00:00,  1.12it/s]
100%|██████████| 400/400 [05:57<00:00,  1.12it/s]
100%|██████████| 400/400 [05:56<00:00,  1.12it/s]
100%|██████████| 400/400 [05:57<00:00,  1.12it/s]
100%|██████████| 400/400 [05:57<00:00,  1.12it/s]
100%|██████████| 400/400 [05:56<00:00,  1.12it/s]


### StyleGAN3-T FFHQ

In [None]:
device = torch.device('cuda')

exp_dir = "../out"
G2 = models.get_model("stylegan3", "../pretrained/stylegan3/stylegan3-t-ffhq-1024x1024.pkl")
stylegan3_sample_generator = StyleGAN3SampleGenerator(G=G2, device=device)

face_bisenet = models.get_model("face_bisenet", "../pretrained/face_bisenet/model.pth")
face_segmentation = FaceSegmentation(face_bisenet=face_bisenet, device=device)
SG = "sg3-t"

for latent_space in ["S3", "S5"]:
    for loss_function in ["L2"]:
        for mask_aggregation in [
            'average',
            'union',
            'intersection',
        ]:

            for num_latent_dirs in [1, 2]:
                for part_name, face_parts in zip(
                        [
                            "mouth",
                            "skin",
                            "eyes",
                            "nose",
                            "ears",
                            "background",
                            "eyebrows",
                            "hair",
                            "cloth", "eyeglass"

                        ],
                        [
                            ["mouth", "u_lip", "l_lip"],
                            ["skin"],
                            ["l_eye", "r_eye"],
                            ["nose"],
                            ["l_ear", "r_ear", "earrings"],
                            ["background"],
                            ["l_brow", "r_brow"],
                            ["hair", "hat"],
                            ["hair"],
                            ["cloth", "neck", "necklace"],
                            ["eyeglass"]

                        ]
                ):
                    lr = 0.001
                    min_alpha_value = -1.0
                    max_alpha_value = 1.0
                    min_abs_alpha_value = 0.0
                    gamma_correlation = 5.0
                    onehot_temperature = 0.001
                    batch_size = 1
                    localization_layers = list(range(1, 15))
                    localization_layer_weights = None
                    log_dir = f'{exp_dir}/lelsd_stylegan3_ffhq/{latent_space}_{loss_function}_{mask_aggregation}/{num_latent_dirs}D/face_bisenet/{part_name}'
                    lelsd = LELSD(device=device,
                                  localization_layers=localization_layers,
                                  semantic_parts=face_parts,
                                  loss_function=loss_function,
                                  localization_layer_weights=localization_layer_weights,
                                  mode='foreground',
                                  mask_aggregation=mask_aggregation,
                                  n_layers=15,
                                  latent_dim=1024,
                                  num_latent_dirs=num_latent_dirs,
                                  learning_rate=lr,
                                  batch_size=batch_size,
                                  gamma_correlation=gamma_correlation,
                                  unit_norm=False,
                                  latent_space=latent_space,
                                  onehot_temperature=onehot_temperature,
                                  min_alpha_value=min_alpha_value,
                                  max_alpha_value=max_alpha_value,
                                  min_abs_alpha_value=min_abs_alpha_value,
                                  log_dir=log_dir, sg=SG
                                  )

                    lelsd.fit(stylegan3_sample_generator, face_segmentation, num_batches=200 * num_latent_dirs,
                              num_lr_halvings=3,
                              pgbar=True, summary=True)
                    lelsd.save()
