<a href="https://colab.research.google.com/github/rsn870/rank_and_bias_gen/blob/main/Generate_multi_d_datasets_consistency.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install pytorch-pretrained-biggan

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting pytorch-pretrained-biggan
  Downloading pytorch_pretrained_biggan-0.1.1-py3-none-any.whl (27 kB)
Collecting boto3 (from pytorch-pretrained-biggan)
  Downloading boto3-1.26.160-py3-none-any.whl (135 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m135.9/135.9 kB[0m [31m15.8 MB/s[0m eta [36m0:00:00[0m
Collecting botocore<1.30.0,>=1.29.160 (from boto3->pytorch-pretrained-biggan)
  Downloading botocore-1.29.160-py3-none-any.whl (10.9 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m10.9/10.9 MB[0m [31m107.9 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting jmespath<2.0.0,>=0.7.1 (from boto3->pytorch-pretrained-biggan)
  Downloading jmespath-1.0.1-py3-none-any.whl (20 kB)
Collecting s3transfer<0.7.0,>=0.6.0 (from boto3->pytorch-pretrained-biggan)
  Downloading s3transfer-0.6.1-py3-none-any.whl (79 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━

We will be using Philip Pope's codebase from [this paper here](/https://openreview.net/pdf?id=XJk19XzGq2J). It allows us to generate datasets of varying dimensionality but of the same visual concept by masking the latents of a pretrained BigGAN

In [2]:
!git clone https://github.com/ppope/dimensions.git

Cloning into 'dimensions'...
remote: Enumerating objects: 41, done.[K
remote: Counting objects: 100% (41/41), done.[K
remote: Compressing objects: 100% (34/34), done.[K
remote: Total 41 (delta 6), reused 35 (delta 4), pack-reused 0[K
Unpacking objects: 100% (41/41), 4.51 MiB | 9.17 MiB/s, done.


In [3]:
%cd dimensions

/content/dimensions


In [9]:
import nltk
nltk.download('wordnet')  #Setup wordnet for BigGAN

[nltk_data] Downloading package wordnet to /root/nltk_data...


True

In [None]:
%mkdir /content/samples #Create a samples directory

The original gen_images.py code generates pt files. To generate image datasets instead use the following modified version of gen_images.py which has to be placed in the same path



```
"""
Modified from:
  * https://github.com/huggingface/pytorch-pretrained-BigGAN#usage
"""
import os
import json
import torch
import numpy as np
from datetime import datetime
import argparse
from pytorch_pretrained_biggan import (BigGAN, one_hot_from_names,
                                       truncated_noise_sample)
from utils import write_config

from torchvision.utils import save_image

parser = argparse.ArgumentParser()
parser.add_argument('--model_file', type=str, default='biggan-deep-128',
                   help="Pretrained GAN model: choice of 128,256,512")
parser.add_argument('--num_samples', type=int, default=1024,
                   help="Total number of samples to generate")
parser.add_argument('--latent_dim', type=int, default=128,
                   help="Size of latent dimension to use." \
                         "Decreased from original by fixing first k components to be zero")
parser.add_argument('--batch_size', type=int, default=128,
                   help="Batch size for generating images")
parser.add_argument('--class_name', type=str, default='soap bubble',
                   help="Wordnet name of Imagenet class to generate")
parser.add_argument('--class_id', type=int, default=0,
                   help="ID of class {0,...,999}")
parser.add_argument('--truncation', type=float, default=1,
                   help="Level of Truncation in sampling density" \
                        "Trades off between diversity (1) and sample quality (0)." \
                        "See the paper for details.")
parser.add_argument('--save_dir', type=str, default='samples/test',
                   help="Save directory for files")
parser.add_argument('--gpu_id', type=str, default="0",
                   help="GPU ID(s) to use")
parser.add_argument('--add', action='store_true', default=False,
                   help="Adds more data to existing directory" \
                        "Overides directory check")

NUM_IMAGENET_CLS = 1000


def main(args):
  #Create out directory is it doesn't exist
  if not os.path.exists(args.save_dir):
    os.mkdir(args.save_dir)
  else:
   if "test" not in args.save_dir and not args.add:
     raise Exception('Output Directory {} already exists!'.format(args.save_dir))
  print("Saving to {}".format(args.save_dir))
  write_config(args)
  os.environ["CUDA_VISIBLE_DEVICES"] = args.gpu_id

  # Load pre-trained model tokenizer (vocabulary)
  model = BigGAN.from_pretrained(args.model_file)
  if args.gpu_id:
    model.to('cuda')

  num_batches = args.num_samples // args.batch_size

  if args.add:
    existing_batches = [x for x in os.listdir(args.save_dir) if ".pt" in x]
    start_batch_num = max([ int(x.split("/")[-1].replace(".pt", "")) for x in existing_batches])
    num_batches += start_batch_num
  else:
    start_batch_num = 0

  start = datetime.now()
  for b in range(start_batch_num, num_batches):
    print('Batch: {}/{}'.format(b+1, num_batches))

    # Prepare inputs
    if args.class_id:
      class_vector = np.zeros((args.batch_size, NUM_IMAGENET_CLS), dtype=np.float32)
      class_vector[:, args.class_id] = 1
    elif args.class_name:
      class_vector = one_hot_from_names([args.class_name], batch_size=args.batch_size)
    else:
      raise Exception("Must specify either class name or ID!")
    noise_vector = truncated_noise_sample(truncation=args.truncation, batch_size=args.batch_size)
    latent_dim_orig = noise_vector.shape[1]
    if latent_dim_orig != args.latent_dim:
      #Reduce dimension of noise_vector by fixing components to be zero
      assert args.latent_dim < latent_dim_orig
      k = latent_dim_orig - args.latent_dim
      noise_vector[:,:k] = 0

    noise_vector = torch.from_numpy(noise_vector)
    class_vector = torch.from_numpy(class_vector)
    if args.gpu_id:
      noise_vector = noise_vector.to('cuda')
      class_vector = class_vector.to('cuda')

    # Generate imagex
    with torch.no_grad():
      output = model(noise_vector, class_vector, args.truncation)

    if args.gpu_id:
      output = output.to('cpu')

    ##Transform images from [-1,1] to [0,1]
    output = (output + 1)*(0.5)

    for i in range(output.shape[0]):
        
        out_fn = "{}.png".format(b*100+i)
        out_fp = os.path.join(args.save_dir, out_fn)
        save_image(output[i],out_fp)

    #save_image(output,out_fp)

  print('Num images generated: {}'.format(args.num_samples))
  print('Runtime: {}'.format(datetime.now() - start))


if __name__ == "__main__":
  args = parser.parse_args()
  main(args)

```



In [26]:
!python generate_data/gen_images.py --num_samples 10000 --class_name basenji --latent_dim 16 --batch_size 100 --save_dir /content/samples/basenji_16

Saving to /content/samples/basenji_16
Batch: 1/100
Batch: 2/100
Batch: 3/100
Batch: 4/100
Batch: 5/100
Batch: 6/100
Batch: 7/100
Batch: 8/100
Batch: 9/100
Batch: 10/100
Batch: 11/100
Batch: 12/100
Batch: 13/100
Batch: 14/100
Batch: 15/100
Batch: 16/100
Batch: 17/100
Batch: 18/100
Batch: 19/100
Batch: 20/100
Batch: 21/100
Batch: 22/100
Batch: 23/100
Batch: 24/100
Batch: 25/100
Batch: 26/100
Batch: 27/100
Batch: 28/100
Batch: 29/100
Batch: 30/100
Batch: 31/100
Batch: 32/100
Batch: 33/100
Batch: 34/100
Batch: 35/100
Batch: 36/100
Batch: 37/100
Batch: 38/100
Batch: 39/100
Batch: 40/100
Batch: 41/100
Batch: 42/100
Batch: 43/100
Batch: 44/100
Batch: 45/100
Batch: 46/100
Batch: 47/100
Batch: 48/100
Batch: 49/100
Batch: 50/100
Batch: 51/100
Batch: 52/100
Batch: 53/100
Batch: 54/100
Batch: 55/100
Batch: 56/100
Batch: 57/100
Batch: 58/100
Batch: 59/100
Batch: 60/100
Batch: 61/100
Batch: 62/100
Batch: 63/100
Batch: 64/100
Batch: 65/100
Batch: 66/100
Batch: 67/100
Batch: 68/100
Batch: 69/100
Batch

In [27]:
!python generate_data/gen_images.py --num_samples 10000 --class_name basenji --latent_dim 32 --batch_size 100 --save_dir /content/samples/basenji_32

Saving to /content/samples/basenji_32
Batch: 1/100
Batch: 2/100
Batch: 3/100
Batch: 4/100
Batch: 5/100
Batch: 6/100
Batch: 7/100
Batch: 8/100
Batch: 9/100
Batch: 10/100
Batch: 11/100
Batch: 12/100
Batch: 13/100
Batch: 14/100
Batch: 15/100
Batch: 16/100
Batch: 17/100
Batch: 18/100
Batch: 19/100
Batch: 20/100
Batch: 21/100
Batch: 22/100
Batch: 23/100
Batch: 24/100
Batch: 25/100
Batch: 26/100
Batch: 27/100
Batch: 28/100
Batch: 29/100
Batch: 30/100
Batch: 31/100
Batch: 32/100
Batch: 33/100
Batch: 34/100
Batch: 35/100
Batch: 36/100
Batch: 37/100
Batch: 38/100
Batch: 39/100
Batch: 40/100
Batch: 41/100
Batch: 42/100
Batch: 43/100
Batch: 44/100
Batch: 45/100
Batch: 46/100
Batch: 47/100
Batch: 48/100
Batch: 49/100
Batch: 50/100
Batch: 51/100
Batch: 52/100
Batch: 53/100
Batch: 54/100
Batch: 55/100
Batch: 56/100
Batch: 57/100
Batch: 58/100
Batch: 59/100
Batch: 60/100
Batch: 61/100
Batch: 62/100
Batch: 63/100
Batch: 64/100
Batch: 65/100
Batch: 66/100
Batch: 67/100
Batch: 68/100
Batch: 69/100
Batch

In [28]:
!python generate_data/gen_images.py --num_samples 10000 --class_name basenji --latent_dim 64 --batch_size 100 --save_dir /content/samples/basenji_64

Saving to /content/samples/basenji_64
Batch: 1/100
Batch: 2/100
Batch: 3/100
Batch: 4/100
Batch: 5/100
Batch: 6/100
Batch: 7/100
Batch: 8/100
Batch: 9/100
Batch: 10/100
Batch: 11/100
Batch: 12/100
Batch: 13/100
Batch: 14/100
Batch: 15/100
Batch: 16/100
Batch: 17/100
Batch: 18/100
Batch: 19/100
Batch: 20/100
Batch: 21/100
Batch: 22/100
Batch: 23/100
Batch: 24/100
Batch: 25/100
Batch: 26/100
Batch: 27/100
Batch: 28/100
Batch: 29/100
Batch: 30/100
Batch: 31/100
Batch: 32/100
Batch: 33/100
Batch: 34/100
Batch: 35/100
Batch: 36/100
Batch: 37/100
Batch: 38/100
Batch: 39/100
Batch: 40/100
Batch: 41/100
Batch: 42/100
Batch: 43/100
Batch: 44/100
Batch: 45/100
Batch: 46/100
Batch: 47/100
Batch: 48/100
Batch: 49/100
Batch: 50/100
Batch: 51/100
Batch: 52/100
Batch: 53/100
Batch: 54/100
Batch: 55/100
Batch: 56/100
Batch: 57/100
Batch: 58/100
Batch: 59/100
Batch: 60/100
Batch: 61/100
Batch: 62/100
Batch: 63/100
Batch: 64/100
Batch: 65/100
Batch: 66/100
Batch: 67/100
Batch: 68/100
Batch: 69/100
Batch

In [29]:
!python generate_data/gen_images.py --num_samples 10000 --class_name basenji --latent_dim 128 --batch_size 100 --save_dir /content/samples/basenji_128

Saving to /content/samples/basenji_128
Batch: 1/100
Batch: 2/100
Batch: 3/100
Batch: 4/100
Batch: 5/100
Batch: 6/100
Batch: 7/100
Batch: 8/100
Batch: 9/100
Batch: 10/100
Batch: 11/100
Batch: 12/100
Batch: 13/100
Batch: 14/100
Batch: 15/100
Batch: 16/100
Batch: 17/100
Batch: 18/100
Batch: 19/100
Batch: 20/100
Batch: 21/100
Batch: 22/100
Batch: 23/100
Batch: 24/100
Batch: 25/100
Batch: 26/100
Batch: 27/100
Batch: 28/100
Batch: 29/100
Batch: 30/100
Batch: 31/100
Batch: 32/100
Batch: 33/100
Batch: 34/100
Batch: 35/100
Batch: 36/100
Batch: 37/100
Batch: 38/100
Batch: 39/100
Batch: 40/100
Batch: 41/100
Batch: 42/100
Batch: 43/100
Batch: 44/100
Batch: 45/100
Batch: 46/100
Batch: 47/100
Batch: 48/100
Batch: 49/100
Batch: 50/100
Batch: 51/100
Batch: 52/100
Batch: 53/100
Batch: 54/100
Batch: 55/100
Batch: 56/100
Batch: 57/100
Batch: 58/100
Batch: 59/100
Batch: 60/100
Batch: 61/100
Batch: 62/100
Batch: 63/100
Batch: 64/100
Batch: 65/100
Batch: 66/100
Batch: 67/100
Batch: 68/100
Batch: 69/100
Batc

In [31]:
!zip -r /content/basenji_16.zip /content/basenji_multid_samples/basenji_16/

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
  adding: content/basenji_multid_samples/basenji_16/8111.png (deflated 0%)
  adding: content/basenji_multid_samples/basenji_16/5975.png (deflated 0%)
  adding: content/basenji_multid_samples/basenji_16/915.png (deflated 0%)
  adding: content/basenji_multid_samples/basenji_16/1592.png (deflated 0%)
  adding: content/basenji_multid_samples/basenji_16/4861.png (deflated 0%)
  adding: content/basenji_multid_samples/basenji_16/1481.png (deflated 0%)
  adding: content/basenji_multid_samples/basenji_16/3606.png (deflated 0%)
  adding: content/basenji_multid_samples/basenji_16/7600.png (deflated 0%)
  adding: content/basenji_multid_samples/basenji_16/2250.png (deflated 0%)
  adding: content/basenji_multid_samples/basenji_16/5488.png (deflated 0%)
  adding: content/basenji_multid_samples/basenji_16/2247.png (deflated 0%)
  adding: content/basenji_multid_samples/basenji_16/3888.png (deflated 0%)
  adding: content/basenji_multid_sam

In [32]:
!zip -r /content/basenji_32.zip /content/basenji_multid_samples/basenji_32/

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
  adding: content/basenji_multid_samples/basenji_32/8111.png (deflated 0%)
  adding: content/basenji_multid_samples/basenji_32/5975.png (deflated 0%)
  adding: content/basenji_multid_samples/basenji_32/915.png (deflated 0%)
  adding: content/basenji_multid_samples/basenji_32/1592.png (deflated 0%)
  adding: content/basenji_multid_samples/basenji_32/4861.png (deflated 0%)
  adding: content/basenji_multid_samples/basenji_32/1481.png (deflated 0%)
  adding: content/basenji_multid_samples/basenji_32/3606.png (deflated 0%)
  adding: content/basenji_multid_samples/basenji_32/7600.png (deflated 0%)
  adding: content/basenji_multid_samples/basenji_32/2250.png (deflated 0%)
  adding: content/basenji_multid_samples/basenji_32/5488.png (deflated 0%)
  adding: content/basenji_multid_samples/basenji_32/2247.png (deflated 0%)
  adding: content/basenji_multid_samples/basenji_32/3888.png (deflated 0%)
  adding: content/basenji_multid_sam

In [33]:
!zip -r /content/basenji_64.zip /content/basenji_multid_samples/basenji_64/

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
  adding: content/basenji_multid_samples/basenji_64/8111.png (stored 0%)
  adding: content/basenji_multid_samples/basenji_64/5975.png (deflated 0%)
  adding: content/basenji_multid_samples/basenji_64/915.png (deflated 0%)
  adding: content/basenji_multid_samples/basenji_64/1592.png (deflated 0%)
  adding: content/basenji_multid_samples/basenji_64/4861.png (stored 0%)
  adding: content/basenji_multid_samples/basenji_64/1481.png (deflated 0%)
  adding: content/basenji_multid_samples/basenji_64/3606.png (deflated 0%)
  adding: content/basenji_multid_samples/basenji_64/7600.png (stored 0%)
  adding: content/basenji_multid_samples/basenji_64/2250.png (stored 0%)
  adding: content/basenji_multid_samples/basenji_64/5488.png (deflated 0%)
  adding: content/basenji_multid_samples/basenji_64/2247.png (stored 0%)
  adding: content/basenji_multid_samples/basenji_64/3888.png (stored 0%)
  adding: content/basenji_multid_samples/basenji

In [34]:
!zip -r /content/basenji_128.zip /content/basenji_multid_samples/basenji_128/

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
  adding: content/basenji_multid_samples/basenji_128/8111.png (deflated 0%)
  adding: content/basenji_multid_samples/basenji_128/5975.png (stored 0%)
  adding: content/basenji_multid_samples/basenji_128/915.png (stored 0%)
  adding: content/basenji_multid_samples/basenji_128/1592.png (stored 0%)
  adding: content/basenji_multid_samples/basenji_128/4861.png (stored 0%)
  adding: content/basenji_multid_samples/basenji_128/1481.png (stored 0%)
  adding: content/basenji_multid_samples/basenji_128/3606.png (deflated 0%)
  adding: content/basenji_multid_samples/basenji_128/7600.png (deflated 0%)
  adding: content/basenji_multid_samples/basenji_128/2250.png (deflated 0%)
  adding: content/basenji_multid_samples/basenji_128/5488.png (deflated 0%)
  adding: content/basenji_multid_samples/basenji_128/2247.png (deflated 1%)
  adding: content/basenji_multid_samples/basenji_128/3888.png (deflated 0%)
  adding: content/basenji_multid_s