## **Image Segmentation Model**

In [1]:
from PIL import Image
import matplotlib.pyplot as plt
import torch
import numpy as np
from torchvision import models
import torchvision.transforms as T

In [2]:
dlab = models.segmentation.deeplabv3_resnet101(pretrained=1).eval()

Downloading: "https://download.pytorch.org/models/resnet101-5d3b4d8f.pth" to C:\Users\Harpreet/.cache\torch\checkpoints\resnet101-5d3b4d8f.pth


HBox(children=(FloatProgress(value=0.0, max=178728960.0), HTML(value='')))




Downloading: "https://download.pytorch.org/models/deeplabv3_resnet101_coco-586e9e4e.pth" to C:\Users\Harpreet/.cache\torch\checkpoints\deeplabv3_resnet101_coco-586e9e4e.pth


HBox(children=(FloatProgress(value=0.0, max=244545539.0), HTML(value='')))




In [3]:
# Define the helper function
def decode_segmap(image, nc=21):
  
  label_colors = np.array([(0, 0, 0),  # 0=background
               # 1=aeroplane, 2=bicycle, 3=bird, 4=boat, 5=bottle
               (0, 0, 0), (0, 0, 0), (0, 0, 0), (0, 0, 0), (0, 0, 0),
               # 6=bus, 7=car, 8=cat, 9=chair, 10=cow
               (0, 0, 0), (0, 0, 0), (255, 255, 255), (0, 0, 0), (0, 0, 0),
               # 11=dining table, 12=dog, 13=horse, 14=motorbike, 15=person
               (0, 0, 0), (255, 255, 255), (0, 0, 0), (0, 0, 0), (0, 0, 0),
               # 16=potted plant, 17=sheep, 18=sofa, 19=train, 20=tv/monitor
               (0, 0, 0), (0, 0, 0), (0, 0, 0), (0, 0, 0), (0, 0, 0)])

  r = np.zeros_like(image).astype(np.uint8)
  g = np.zeros_like(image).astype(np.uint8)
  b = np.zeros_like(image).astype(np.uint8)
  
  for l in range(0, nc):
    idx = image == l
    r[idx] = label_colors[l, 0]
    g[idx] = label_colors[l, 1]
    b[idx] = label_colors[l, 2]
    
  rgb = np.stack([r, g, b], axis=2)
  return rgb

In [4]:
def segment(net, path, show_orig=True, dev='cuda'):
  img = Image.open(path)
  if show_orig: plt.imshow(img); plt.axis('off'); plt.show()
  # Comment the Resize and CenterCrop for better inference results
  trf = T.Compose([T.Resize(256),
                  T.CenterCrop(256),
                  T.ToTensor(),
                  T.Normalize(mean = [0.485, 0.456, 0.406], 
                               std = [0.229, 0.224, 0.225])])
  inp = trf(img).unsqueeze(0).to(dev)
  out = net.to(dev)(inp)['out']
  om = torch.argmax(out.squeeze(), dim=0).detach().cpu().numpy()
  rgb = decode_segmap(om)
  # print("IM seg size", rgb.size)
  # print("IM seg", np.array(rgb).shape)
  # print("rgb is: ",type(rgb))
  plt.imshow(rgb); plt.axis('off'); plt.show()
  return torch.FloatTensor(rgb)

In [6]:
from src.biggan import BigGAN128
from src.biggan import BigGAN256 
from src.biggan import BigGAN512 

import torch 
import torchvision 

from scipy.stats import truncnorm

In [7]:
biggan = BigGAN256()

In [9]:
biggan.load_state_dict((torch.load("biggan256-release.pt")))

<All keys matched successfully>

In [None]:
pred = segment(dlab, '/content/BigGAN-Generator-Pretrained-Pytorch/image.png')
target = segment(dlab, '/content/BigGAN-Generator-Pretrained-Pytorch/dog_image2.png')

loss = torch.nn.MSELoss()
output = loss(pred, target)
print(output)

In [12]:
truncation = torch.clamp(torch.tensor(.23), min=0.02+1e-4, max=1.0-1e-4).float()  

In [13]:
z = truncation * torch.as_tensor(truncnorm.rvs(-2.0, 2.0, size=(1, 140))).float() 

In [43]:
c = torch.tensor((260,)).long()


In [62]:
biggan.eval()
with torch.no_grad():
    output=biggan(z,c, truncation.item())

In [63]:
output.shape

torch.Size([1, 3, 256, 256])

In [64]:
img = 0.5 * (output.data + 1)

In [65]:
img.squeeze().shape

torch.Size([3, 256, 256])

In [67]:
pil = torchvision.transforms.ToPILImage()(img.squeeze())

In [68]:
pil.show()

In [None]:
pil.save("images.gif")