In [1]:
from pydrive.auth import GoogleAuth
from pydrive.drive import GoogleDrive
from google.colab import auth
from oauth2client.client import GoogleCredentials

download_with_pydrive = True #@param {type:"boolean"}  

class Downloader(object):
    def __init__(self, use_pydrive):
        self.use_pydrive = use_pydrive

        if self.use_pydrive:
            self.authenticate()
        
    def authenticate(self):
        auth.authenticate_user()
        gauth = GoogleAuth()
        gauth.credentials = GoogleCredentials.get_application_default()
        self.drive = GoogleDrive(gauth)
    
    def download_file(self, file_id, file_dst):
        if self.use_pydrive:
            downloaded = self.drive.CreateFile({'id':file_id})
            downloaded.FetchMetadata(fetch_all=True)
            downloaded.GetContentFile(file_dst)
        else:
            !gdown --id $file_id -O $file_dst

downloader = Downloader(download_with_pydrive)

In [None]:
import os
import glob
import sys
import pickle
import numpy as np
from PIL import Image
import matplotlib.pyplot as plt
from google.colab.patches import cv2_imshow
import cv2
from tqdm.auto import tqdm

#@title Setup (may take a few minutes)
#@markdown Installs CLIP and other dependencies
!gdown --id 1kwJndtv5tCd0LEzRTi4NHJ2sJbVZeDLG

!pip uninstall -y torchtext torchaudio
!pip install torch==1.7.1+cu101 torchvision==0.8.2+cu101 -f https://download.pytorch.org/whl/torch_stable.html
!pip install ftfy regex tqdm click requests pyspng ninja imageio-ffmpeg==0.4.3 ffmpeg-python
!pip install git+https://github.com/openai/CLIP.git

# # Clone StyleCLIP repo
# !git clone https://github.com/orpatashnik/StyleCLIP
# %cd StyleCLIP

!git clone https://github.com/thepowerfuldeez/stylemc
%cd stylemc

## Download pretrained StyleGAN on FFHQ 1024x1024
current_directory = os.getcwd()
save_path = os.path.join(os.path.dirname(current_directory), "pretrained_models")
os.makedirs(save_path, exist_ok=True)
downloader.download_file("125OG7SMkXI-Kf2aqiwLLHyCvSW-gZk3M", os.path.join(save_path, 'ffhq.pkl'))

# ## Download Dlib tool for alingment, used for preprocessing images before PTI
# downloader.download_file("1xPmn19T6Bdd-_RfCVlgNBbfYoh1muYxR", os.path.join(save_path, 'align.dat'))

# %cd encoder4editing
# !gdown --id 1XB47NiJEi4tlmATWmiCARGUQrDYYHGi8
# !gdown --id 1_9CUVCQ1ZGy0h2V_GgTILqi-Z8alzL2G
# %cd ..

!gdown --id 1xG_YHGcbzd5LWwqQCDDQDcqsSw7OvODY -O id_loss/model_ir_se50.pth
# !gdown --id 1odzfkLhB_uPErr5xRpFvHMZ87ORkBZSv
# !unzip out.zip

!mkdir deeplab_model/
!gdown --id 1oRGgrI4KNdefbWVpw0rRkEP1gbJIRokM -O deeplab_model/R-101-GN-WS.pth.tar
!gdown --id 1w2XjDywFr2NjuUWaLQDRktH7VwIfuNlY -O deeplab_model/deeplab_model.pth

!gdown --id 1Le5UdpMkKOTRr1sTp4lwkw8263sbgdSe

# !mkdir out
# %cd out
# !gdown --id 1dPitYY7OR2tCewKXrE6a9VyOH2Kb04_c
# !unzip directions.zip
# %cd ..

In [3]:
!python generate_w.py --trunc=0.7 --seeds="100000-100999" --network=https://nvlabs-fi-cdn.nvidia.com/stylegan2-ada-pytorch/pretrained/transfer-learning-source-nets/ffhq-res512-mirror-stylegan2-noaug.pkl
!python w_s_converter.py --outdir=out --projected-w=encoder4editing/projected_w.npz --network=https://nvlabs-fi-cdn.nvidia.com/stylegan2-ada-pytorch/pretrained/transfer-learning-source-nets/ffhq-res512-mirror-stylegan2-noaug.pkl
!python generate_fromS.py --text_prompt="" --change_power=0 --outdir=out --projected-w=encoder4editing/projected_w.npz --network=https://nvlabs-fi-cdn.nvidia.com/stylegan2-ada-pytorch/pretrained/transfer-learning-source-nets/ffhq-res512-mirror-stylegan2-noaug.pkl

Loading networks from "https://nvlabs-fi-cdn.nvidia.com/stylegan2-ada-pytorch/pretrained/transfer-learning-source-nets/ffhq-res512-mirror-stylegan2-noaug.pkl"...
Downloading https://nvlabs-fi-cdn.nvidia.com/stylegan2-ada-pytorch/pretrained/transfer-learning-source-nets/ffhq-res512-mirror-stylegan2-noaug.pkl ... done
Setting up PyTorch plugin "bias_act_plugin"... Done.
Loading networks from "https://nvlabs-fi-cdn.nvidia.com/stylegan2-ada-pytorch/pretrained/transfer-learning-source-nets/ffhq-res512-mirror-stylegan2-noaug.pkl"...
Loading networks from "https://nvlabs-fi-cdn.nvidia.com/stylegan2-ada-pytorch/pretrained/transfer-learning-source-nets/ffhq-res512-mirror-stylegan2-noaug.pkl"...
Generating images from projected W "encoder4editing/projected_w.npz"
loaded 1000 ws
Setting up PyTorch plugin "bias_act_plugin"... Done.
Setting up PyTorch plugin "upfirdn2d_plugin"... Done.


In [None]:
# generate masks
!python run_deeplab.py --dataset_root='out'

In [6]:
import torch
import clip
from PIL import Image

device = "cuda" if torch.cuda.is_available() else "cpu"
model, preprocess = clip.load("ViT-B/32", device=device)


text = clip.tokenize(["a photo of a male person", "a photo of a female person"]).to(device)
all_probs = []
for i in tqdm(range(0, 1000)):
  image = preprocess(Image.open(f"out/proj{i:02d}.png")).unsqueeze(0).to(device)

  with torch.no_grad():
      image_features = model.encode_image(image)

      logits_per_image, logits_per_text = model(image, text)
      probs = logits_per_image.softmax(dim=-1).cpu().numpy()
      all_probs.append(probs)
all_probs = np.concatenate(all_probs)

  0%|          | 0/1000 [00:00<?, ?it/s]

In [7]:
import numpy as np
styles = np.load("out/input.npz")['s']
male_idx = (all_probs[:, 0] > 0.85).nonzero()[0]
female_idx = (all_probs[:, 1] > 0.85).nonzero()[0]

# style_diff = (styles[female_idx].mean(0) - styles[male_idx].mean(0))
# female_styles = styles[male_idx] + style_diff[None, ...] * 0.9
# np.savez("out/female_s.npz", s=female_styles)
# np.savez("male2female_out/direction_female.npz", s=style_diff * 0.9)
# !python generate_fromS.py --text_prompt="female" --change_power=1 --outdir=male2female_out --s_input=out/male_s.npz --network=https://nvlabs-fi-cdn.nvidia.com/stylegan2-ada-pytorch/pretrained/transfer-learning-source-nets/ffhq-res512-mirror-stylegan2-noaug.pkl

np.savez("out/female_s.npz", s=styles[female_idx], idx=female_idx)
np.savez("out/male_s.npz", s=styles[male_idx], idx=male_idx)

In [8]:
len(styles), len(male_idx), len(female_idx)

(1000, 413, 518)

In [9]:
!rm -rf runs

In [None]:
!python find_direction.py --s_input="out/male_s.npz" --text_prompt="a photo of a face of a feminine woman with no makeup" --outdir=runs/male2female_id0.6_clip1.0_lr2.5_power2.0/ --only_face_mask=0 --mask_min_value=0.1 --clip_type='double' --batch_size=4 --n_epochs=4 --identity_loss_coef=0.6 --learning_rate=2.5 --negative_text_prompt="a photo of a face of a man" --network=https://nvlabs-fi-cdn.nvidia.com/stylegan2-ada-pytorch/pretrained/transfer-learning-source-nets/ffhq-res512-mirror-stylegan2-noaug.pkl
!python generate_fromS.py --text_prompt="a photo of a face of a feminine woman with no makeup" --change_power=2.0 --outdir=runs/male2female_id0.6_clip1.0_lr2.5_power2.0/ --s_input=out/male_s.npz --network=https://nvlabs-fi-cdn.nvidia.com/stylegan2-ada-pytorch/pretrained/transfer-learning-source-nets/ffhq-res512-mirror-stylegan2-noaug.pkl

In [None]:
!python find_direction.py --s_input="out/male_s.npz" --text_prompt="a photo of a face of a feminine woman with no makeup" --outdir=runs/male2female_id0.6_clip1.0_lr2.5_power2.0_sgd/ --only_face_mask=0 --mask_min_value=0.1 --clip_type='double' --batch_size=4 --n_epochs=4 --identity_loss_coef=0.6 --learning_rate=1.5 --negative_text_prompt="a photo of a face of a man" --network=https://nvlabs-fi-cdn.nvidia.com/stylegan2-ada-pytorch/pretrained/transfer-learning-source-nets/ffhq-res512-mirror-stylegan2-noaug.pkl
!python generate_fromS.py --text_prompt="a photo of a face of a feminine woman with no makeup" --change_power=2.0 --outdir=runs/male2female_id0.6_clip1.0_lr2.5_power2.0_sgd/ --s_input=out/male_s.npz --network=https://nvlabs-fi-cdn.nvidia.com/stylegan2-ada-pytorch/pretrained/transfer-learning-source-nets/ffhq-res512-mirror-stylegan2-noaug.pkl

In [None]:
!python find_direction.py --s_input="out/male_s.npz" --text_prompt="a photo of a face of a feminine woman with no makeup" --outdir=runs/male2female_id0.6_clip1.0_lr2.5_power2.0_landmarks2/ --only_face_mask=0 --mask_min_value=0.1 --clip_type='double' --batch_size=4 --n_epochs=4 --identity_loss_coef=0.6 --landmarks_loss_coef=25.0 --learning_rate=1.5 --negative_text_prompt="a photo of a face of a man" --network=https://nvlabs-fi-cdn.nvidia.com/stylegan2-ada-pytorch/pretrained/transfer-learning-source-nets/ffhq-res512-mirror-stylegan2-noaug.pkl
!python generate_fromS.py --text_prompt="a photo of a face of a feminine woman with no makeup" --change_power=2.0 --outdir=runs/male2female_id0.6_clip1.0_lr2.5_power2.0_landmarks2/ --s_input=out/male_s.npz --network=https://nvlabs-fi-cdn.nvidia.com/stylegan2-ada-pytorch/pretrained/transfer-learning-source-nets/ffhq-res512-mirror-stylegan2-noaug.pkl

In [None]:
!python find_direction.py --s_input="out/male_s.npz" --text_prompt="a photo of a face of a feminine woman with no makeup" --outdir=runs/male2female_id0.6_clip1.0_lr2.5_power2.0_landmarks_l2reg/ --only_face_mask=0 --mask_min_value=0.1 --clip_type='double' --batch_size=4 --n_epochs=4 --identity_loss_coef=0.6 --landmarks_loss_coef=25.0 --l2_reg_coef=0.1 --learning_rate=1.5 --negative_text_prompt="a photo of a face of a man" --network=https://nvlabs-fi-cdn.nvidia.com/stylegan2-ada-pytorch/pretrained/transfer-learning-source-nets/ffhq-res512-mirror-stylegan2-noaug.pkl
!python generate_fromS.py --text_prompt="a photo of a face of a feminine woman with no makeup" --change_power=2.0 --outdir=runs/male2female_id0.6_clip1.0_lr2.5_power2.0_landmarks_l2reg/ --s_input=out/male_s.npz --network=https://nvlabs-fi-cdn.nvidia.com/stylegan2-ada-pytorch/pretrained/transfer-learning-source-nets/ffhq-res512-mirror-stylegan2-noaug.pkl

In [135]:
!python find_direction.py --s_input="out/male_s.npz" --text_prompt="a photo of a face of a feminine woman with no makeup" --outdir=runs/male2female_id0.6_clip1.0_lr2.5_power2.0_landmarks_l2reg2/ --only_face_mask=0 --mask_min_value=0.1 --clip_type='double' --batch_size=4 --n_epochs=4 --identity_loss_coef=0.6 --landmarks_loss_coef=25.0 --l2_reg_coef=0.01 --learning_rate=1.5 --negative_text_prompt="a photo of a face of a man" --network=https://nvlabs-fi-cdn.nvidia.com/stylegan2-ada-pytorch/pretrained/transfer-learning-source-nets/ffhq-res512-mirror-stylegan2-noaug.pkl
!python generate_fromS.py --text_prompt="a photo of a face of a feminine woman with no makeup" --change_power=2.0 --outdir=runs/male2female_id0.6_clip1.0_lr2.5_power2.0_landmarks_l2reg2/ --s_input=out/male_s.npz --network=https://nvlabs-fi-cdn.nvidia.com/stylegan2-ada-pytorch/pretrained/transfer-learning-source-nets/ffhq-res512-mirror-stylegan2-noaug.pkl

Loading networks from "https://nvlabs-fi-cdn.nvidia.com/stylegan2-ada-pytorch/pretrained/transfer-learning-source-nets/ffhq-res512-mirror-stylegan2-noaug.pkl"...
Loading ResNet ArcFace
Setting up PyTorch plugin "bias_act_plugin"... Done.
Setting up PyTorch plugin "upfirdn2d_plugin"... Done.
Iteration 96, img size: 224 , gradient norm: 0.0358
Clip loss: 0.708, Identity loss: 0.000, Landmarks loss: 0.000, Manipulation direction loss: 0.000, Total loss: 0.7080
Iteration 95, img size: 224 , gradient norm: 0.0862
Clip loss: 0.475, Identity loss: 0.000, Landmarks loss: 0.000, Manipulation direction loss: 0.000, Total loss: 0.4755
Iteration 96, img size: 224 , gradient norm: 0.0350
Clip loss: 0.705, Identity loss: 0.000, Landmarks loss: 0.000, Manipulation direction loss: 0.000, Total loss: 0.7056
Iteration 15, img size: 224 , gradient norm: 0.0484
Clip loss: 0.678, Identity loss: 0.000, Landmarks loss: 0.000, Manipulation direction loss: 0.001, Total loss: 0.6788
Iteration 61, img size: 224 

In [137]:
!python find_direction.py --s_input="out/female_s.npz" --text_prompt="a photo of a face of a man with no facial hair" --outdir=runs/female2male_id1.0_clip1.0_lr1.5_power2.0_landmarks/ --only_face_mask=0 --mask_min_value=0.5 --clip_type='double' --batch_size=4 --n_epochs=3 --identity_loss_coef=1.0 --landmarks_loss_coef=25.0 --l2_reg_coef=0.01 --learning_rate=1.5 --negative_text_prompt="a photo of a face of a feminine woman" --network=https://nvlabs-fi-cdn.nvidia.com/stylegan2-ada-pytorch/pretrained/transfer-learning-source-nets/ffhq-res512-mirror-stylegan2-noaug.pkl
!python generate_fromS.py --text_prompt="a photo of a face of a man with no facial hair" --change_power=2.0 --outdir=runs/female2male_id1.0_clip1.0_lr1.5_power2.0_landmarks/ --s_input=out/female_s.npz --network=https://nvlabs-fi-cdn.nvidia.com/stylegan2-ada-pytorch/pretrained/transfer-learning-source-nets/ffhq-res512-mirror-stylegan2-noaug.pkl

Loading networks from "https://nvlabs-fi-cdn.nvidia.com/stylegan2-ada-pytorch/pretrained/transfer-learning-source-nets/ffhq-res512-mirror-stylegan2-noaug.pkl"...
loaded 518 styles
Setting up PyTorch plugin "bias_act_plugin"... Done.
Setting up PyTorch plugin "upfirdn2d_plugin"... Done.
time passed: 51.1730751991272


In [138]:
!python find_direction.py --s_input="out/female_s.npz" --text_prompt="a photo of a face of a man with long hair and no facial hair" --outdir=runs/female2male_id1.0_clip1.0_lr1.5_power2.0_landmarks2/ --only_face_mask=0 --mask_min_value=0.5 --clip_type='double' --batch_size=4 --n_epochs=3 --identity_loss_coef=1.0 --landmarks_loss_coef=25.0 --l2_reg_coef=0.01 --learning_rate=1.5 --negative_text_prompt="a photo of a face of a feminine woman" --network=https://nvlabs-fi-cdn.nvidia.com/stylegan2-ada-pytorch/pretrained/transfer-learning-source-nets/ffhq-res512-mirror-stylegan2-noaug.pkl
!python generate_fromS.py --text_prompt="a photo of a face of a man with long hair and no facial hair" --change_power=2.0 --outdir=runs/female2male_id1.0_clip1.0_lr1.5_power2.0_landmarks2/ --s_input=out/female_s.npz --network=https://nvlabs-fi-cdn.nvidia.com/stylegan2-ada-pytorch/pretrained/transfer-learning-source-nets/ffhq-res512-mirror-stylegan2-noaug.pkl

Loading networks from "https://nvlabs-fi-cdn.nvidia.com/stylegan2-ada-pytorch/pretrained/transfer-learning-source-nets/ffhq-res512-mirror-stylegan2-noaug.pkl"...
Loading ResNet ArcFace
Setting up PyTorch plugin "bias_act_plugin"... Done.
Setting up PyTorch plugin "upfirdn2d_plugin"... Done.
Iteration 5, img size: 224 , gradient norm: 0.0335
Clip loss: 0.683, Identity loss: 0.000, Landmarks loss: 0.000, Manipulation direction loss: 0.000, Total loss: 0.6826
Iteration 92, img size: 224 , gradient norm: 0.0635
Clip loss: 0.580, Identity loss: 0.000, Landmarks loss: 0.000, Manipulation direction loss: 0.000, Total loss: 0.5803
Iteration 59, img size: 224 , gradient norm: 0.0996
Clip loss: 0.595, Identity loss: 0.000, Landmarks loss: 0.000, Manipulation direction loss: 0.000, Total loss: 0.5951
Iteration 74, img size: 224 , gradient norm: 0.0574
Clip loss: 0.612, Identity loss: 0.000, Landmarks loss: 0.000, Manipulation direction loss: 0.001, Total loss: 0.6130
Iteration 75, img size: 224 ,

In [140]:
!python find_direction.py --s_input="out/female_s.npz" --text_prompt="a photo of a face of a man with long hair and no facial hair" --outdir=runs/female2male_id1.2_clip1.0_lr2.5_power2.0_landmarks2/ --only_face_mask=0 --mask_min_value=0.5 --clip_type='double' --batch_size=4 --n_epochs=3 --identity_loss_coef=1.2 --landmarks_loss_coef=25.0 --l2_reg_coef=0.01 --learning_rate=2.5 --negative_text_prompt="a photo of a face of a feminine woman" --network=https://nvlabs-fi-cdn.nvidia.com/stylegan2-ada-pytorch/pretrained/transfer-learning-source-nets/ffhq-res512-mirror-stylegan2-noaug.pkl
!python generate_fromS.py --text_prompt="a photo of a face of a man with long hair and no facial hair" --change_power=2.0 --outdir=runs/female2male_id1.2_clip1.0_lr2.5_power2.0_landmarks2/ --s_input=out/female_s.npz --network=https://nvlabs-fi-cdn.nvidia.com/stylegan2-ada-pytorch/pretrained/transfer-learning-source-nets/ffhq-res512-mirror-stylegan2-noaug.pkl

Loading networks from "https://nvlabs-fi-cdn.nvidia.com/stylegan2-ada-pytorch/pretrained/transfer-learning-source-nets/ffhq-res512-mirror-stylegan2-noaug.pkl"...
Loading ResNet ArcFace
Setting up PyTorch plugin "bias_act_plugin"... Done.
Setting up PyTorch plugin "upfirdn2d_plugin"... Done.
Iteration 95, img size: 224 , gradient norm: 0.0372
Clip loss: 0.674, Identity loss: 0.000, Landmarks loss: 0.000, Manipulation direction loss: 0.000, Total loss: 0.6738
Iteration 104, img size: 224 , gradient norm: 0.0516
Clip loss: 0.674, Identity loss: 0.000, Landmarks loss: 0.000, Manipulation direction loss: 0.000, Total loss: 0.6742
Iteration 104, img size: 224 , gradient norm: 0.0465
Clip loss: 0.667, Identity loss: 0.000, Landmarks loss: 0.000, Manipulation direction loss: 0.001, Total loss: 0.6677
Iteration 86, img size: 224 , gradient norm: 0.0363
Clip loss: 0.601, Identity loss: 0.000, Landmarks loss: 0.000, Manipulation direction loss: 0.001, Total loss: 0.6017
Iteration 45, img size: 22

In [141]:
!python find_direction.py --s_input="out/female_s.npz" --text_prompt="a photo of a face of a man with long hair and no facial hair" --outdir=runs/female2male_id1.2_clip0.7_lr2.0_power2.0_landmarks2_mask/ --only_face_mask=1 --mask_min_value=0.5 --clip_type='double' --batch_size=4 --n_epochs=3 --clip_loss_coef=0.7 --identity_loss_coef=1.2 --landmarks_loss_coef=25.0 --l2_reg_coef=0.01 --learning_rate=2.0 --negative_text_prompt="a photo of a face of a feminine woman" --network=https://nvlabs-fi-cdn.nvidia.com/stylegan2-ada-pytorch/pretrained/transfer-learning-source-nets/ffhq-res512-mirror-stylegan2-noaug.pkl
!python generate_fromS.py --text_prompt="a photo of a face of a man with long hair and no facial hair" --change_power=2.0 --outdir=runs/female2male_id1.2_clip0.7_lr2.0_power2.0_landmarks2_mask/ --s_input=out/female_s.npz --network=https://nvlabs-fi-cdn.nvidia.com/stylegan2-ada-pytorch/pretrained/transfer-learning-source-nets/ffhq-res512-mirror-stylegan2-noaug.pkl

Loading networks from "https://nvlabs-fi-cdn.nvidia.com/stylegan2-ada-pytorch/pretrained/transfer-learning-source-nets/ffhq-res512-mirror-stylegan2-noaug.pkl"...
Loading ResNet ArcFace
Setting up PyTorch plugin "bias_act_plugin"... Done.
Setting up PyTorch plugin "upfirdn2d_plugin"... Done.
Iteration 40, img size: 224 , gradient norm: 0.0889
Clip loss: 0.521, Identity loss: 0.000, Landmarks loss: 0.000, Manipulation direction loss: 0.000, Total loss: 0.5215
Iteration 26, img size: 224 , gradient norm: 0.1225
Clip loss: 0.490, Identity loss: 0.000, Landmarks loss: 0.000, Manipulation direction loss: 0.001, Total loss: 0.4902
Iteration 60, img size: 224 , gradient norm: 0.0575
Clip loss: 0.611, Identity loss: 0.000, Landmarks loss: 0.000, Manipulation direction loss: 0.001, Total loss: 0.6118
Iteration 90, img size: 224 , gradient norm: 0.0417
Clip loss: 0.686, Identity loss: 0.000, Landmarks loss: 0.000, Manipulation direction loss: 0.001, Total loss: 0.6874
Iteration 117, img size: 224

In [151]:
!tar czf runs.tgz runs/

In [44]:
!ls out/parsings/*.png | wc -l

700


In [221]:
img = np.array(Image.open("out/proj40.png"))
segm_mask = np.array(Image.open("out/parsings/proj40.png"))

In [222]:
mask = ((segm_mask == 0) | (segm_mask == 13) | (segm_mask == 14) | (segm_mask == 8) | 
        (segm_mask == 9) | (segm_mask == 15) | (segm_mask == 16) | (segm_mask == 18))
segm_mask = segm_mask.astype('float')
segm_mask[mask] = 0.5
segm_mask[~mask] = 1

In [223]:
segm_mask = cv2.dilate(segm_mask, np.ones((20, 20)), iterations=3)

In [224]:
segm_mask.sum()

197358.5

In [None]:
import cv2
import matplotlib.pyplot as plt
plt.imshow(cv2.bitwise_and(img, img, mask=segm_mask))

In [145]:
import torch
import clip
from pathlib import Path
from PIL import Image

def conversion_rate(run_path, reverse=False):
  device = "cuda" if torch.cuda.is_available() else "cpu"
  model, preprocess = clip.load("ViT-B/32", device=device)

  text = clip.tokenize(["a photo of a male person", "a photo of a female person"]).to(device)
  with torch.no_grad():
    text_features = model.encode_text(text)
  all_probs = []
  paths = list(Path(run_path).glob("*.jpeg"))
  for img_path in tqdm(paths):
    img = Image.open(img_path)
    image = preprocess(Image.fromarray(np.array(img)[:, 512:, :])).unsqueeze(0).to(device)

    with torch.no_grad():
        image_features = model.encode_image(image)
        
        logits_per_image, logits_per_text = model(image, text)
        probs = logits_per_image.softmax(dim=-1).cpu().numpy()
        all_probs.append(probs)
  all_probs = np.concatenate(all_probs)

  male_pred = (all_probs[:, 0] > 0.8).sum()
  female_pred = (all_probs[:, 1] > 0.8).sum()
  if reverse:
    return male_pred / (female_pred + male_pred)
  return female_pred / (female_pred + male_pred)

In [26]:
print('conversion_rate', conversion_rate(
    'runs/male2female_id0.75_clip1.0_lr2.5_power2.0', 
    'a photo of a face of a feminine woman with no makeup')
)

  0%|          | 0/279 [00:00<?, ?it/s]

conversion_rate 0.9427480916030534


In [149]:
for folder in Path("runs").iterdir():
  if len(list(folder.glob("*"))):
    print(str(folder))
    print('conversion_rate', conversion_rate(folder, reverse="female2male" in folder.name))
    print()

runs/female2male_id1.0_clip1.0_lr1.5_power2.0_landmarks2


  0%|          | 0/518 [00:00<?, ?it/s]

convebrsion_rate 0.7463917525773196
runs/male2female_id0.6_clip1.0_lr2.5_power2.0_landmarks_l2reg


  0%|          | 0/413 [00:00<?, ?it/s]

convebrsion_rate 0.8792650918635171
runs/female2male_id1.2_clip0.7_lr2.0_power2.0_landmarks2_mask


  0%|          | 0/518 [00:00<?, ?it/s]

convebrsion_rate 0.7085106382978723
runs/male2female_id0.6_clip1.0_lr2.5_power2.0_landmarks2


  0%|          | 0/413 [00:00<?, ?it/s]

convebrsion_rate 0.9875621890547264
runs/male2female_id0.6_clip1.0_lr2.5_power2.0_landmarks_l2reg2


  0%|          | 0/413 [00:00<?, ?it/s]

convebrsion_rate 0.9875311720698254
runs/.ipynb_checkpoints
runs/male2female_id0.6_clip1.0_lr2.5_power2.0_sgd


  0%|          | 0/413 [00:00<?, ?it/s]

convebrsion_rate 0.9874686716791979
runs/female2male_id1.2_clip1.0_lr2.5_power2.0_landmarks2


  0%|          | 0/518 [00:00<?, ?it/s]

convebrsion_rate 0.8138075313807531
runs/male2female_id0.6_clip1.0_lr2.5_power2.0


  0%|          | 0/413 [00:00<?, ?it/s]

convebrsion_rate 0.9901719901719902
runs/female2male_id1.0_clip1.0_lr1.5_power2.0_landmarks


  0%|          | 0/518 [00:00<?, ?it/s]

convebrsion_rate 0.7119341563786008


In [150]:
for folder in Path("runs").iterdir():
  for path in folder.glob("*.jpeg"):
    if int(path.stem.split("_")[-1]) > 100:
      path.unlink()