論文<br>
https://arxiv.org/abs/2210.02347<br>
<br>
GitHub  
https://github.com/justinpinkney/clip2latent<br>
<br>
<a href="https://colab.research.google.com/github/kaz12tech/ai_demos/blob/master/clip2latent_demo.ipynb" target="_blank"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# 環境セットアップ

## GPU確認

In [None]:
!nvidia-smi

## GitHubからコード取得

In [None]:
%cd /content

!git clone https://github.com/justinpinkney/clip2latent.git

# Commits on Oct 7, 2022
%cd /content/clip2latent
!git checkout fc18ab19ec03f8a4fd4d6ac0e04f2b34db696946

## ライブラリのインストール

In [None]:
%cd /content/clip2latent

# install requirements
!pip install -r requirements-colab.txt

## ライブラリのインポート

In [None]:
import torch
from clip2latent import models
from PIL import Image

device = 'cuda' if torch.cuda.is_available() else "cpu"
print("using device is", device)

# 学習済みモデルのセットアップ

In [None]:
model_choices = {
    "faces": {
        "checkpoint": "https://huggingface.co/lambdalabs/clip2latent/resolve/main/ffhq-sg2-510.ckpt",
        "config": "https://huggingface.co/lambdalabs/clip2latent/resolve/main/ffhq-sg2-510.yaml",
        },
    "landscape": {
        "checkpoint": "https://huggingface.co/lambdalabs/clip2latent/resolve/main/lhq-sg3-410.ckpt",
        "config": "https://huggingface.co/lambdalabs/clip2latent/resolve/main/lhq-sg3-410.yaml",
    }
}

In [None]:
chosen_model = "faces" #@param ['faces', 'landscape']

In [None]:
checkpoint = model_choices[chosen_model]["checkpoint"]
cfg_file = model_choices[chosen_model]["config"]
model = models.Clip2StyleGAN(cfg_file, device, checkpoint)

# Text to Image

In [None]:
prompt = "Photo of happy pierced blonde woman at shrine" #@param {type:"string"}

In [None]:
@torch.no_grad()
def infer(model, prompt, n_samples, scale, skips=250):
    images, clip_score = model(prompt, n_samples_per_txt=n_samples, cond_scale=scale, skips=skips, clip_sort=True)
    images = images.cpu()
    make_im = lambda x: (255*x.clamp(-1, 1)/2 + 127.5).to(torch.uint8).permute(1,2,0).numpy()
    images = [Image.fromarray(make_im(x)) for x in images]
    return images, clip_score

In [None]:
%%time

outputs, clip_score = infer(model, prompt, 2, 2)

In [None]:
for o, s in zip(outputs, clip_score):
  display(o.resize((256, 256)))
  print(f"CLIP similarity score = {s.item()}")