In [None]:
import torch
from torch.autograd import Variable
import torchvision.datasets as dsets
import torchvision.transforms as transforms
import numpy as np
import matplotlib.pyplot as plt
import os
import imageio
from torch import nn

image_size=128
label_dim = 1
One_Hot_dim=75
G_input_dim = 128
G_output_dim = 3
D_input_dim = 3
D_output_dim = 1
num_filters = [1024, 512, 256, 128, 64, 32]

learning_rate = 0.0001
betas = (0.5, 0.999)
batch_size = 20
num_epochs = 500

# For logger
def to_np(x):
    return x.data.cpu().numpy()


def to_var(x):
    if torch.cuda.is_available():
        x = x.cuda()
    return Variable(x)


# De-normalization
def denorm(x):
    out = (x + 1) / 2
    return out.clamp(0, 1)


class Encoder(nn.Module):
    def __init__(self):
        super(Encoder, self).__init__()
        self.l1=nn.Sequential(nn.Linear(One_Hot_dim,G_input_dim),nn.ReLU(),nn.Linear(G_input_dim,G_input_dim))
    def forward(self,x):
        reses=torch.zeros(batch_size,G_input_dim).cuda()
        for i in range(5):
            res=self.l1(x[:,i*One_Hot_dim:(i+1)*One_Hot_dim])
            reses += res
        return reses
            


class Generator(nn.Module):
    def __init__(self):
        super(Generator, self).__init__()
        self.l=nn.Linear(G_input_dim,G_input_dim)
        self.main = nn.Sequential(
            
            nn.ConvTranspose2d(G_input_dim, 512, 4, 1, 0, bias=False),
            nn.BatchNorm2d(512),
            nn.ReLU(True),
            nn.ConvTranspose2d(512, 256, 4, 2, 1, bias=False),
            nn.BatchNorm2d(256),
            nn.ReLU(True),
            nn.ConvTranspose2d(256, 128, 4, 2, 1, bias=False),
            nn.BatchNorm2d(128),
            nn.ReLU(True),
            nn.ConvTranspose2d(128, 64, 4, 2, 1, bias=False),
            nn.BatchNorm2d(64),
            nn.ReLU(True),
            nn.ConvTranspose2d(64, 32, 4, 2, 1, bias=False),
            nn.BatchNorm2d(32),
            nn.ReLU(True),
            nn.ConvTranspose2d(32, G_output_dim, 4, 2, 1, bias=False),
            nn.Tanh(),
        )
    
    def forward(self, x, attr):
        return self.main(self.l(x).view(-1,G_input_dim,1,1))

class Discriminator(nn.Module):
    def __init__(self):
        super(Discriminator, self).__init__()
        self.feature_input = nn.Linear(G_input_dim, image_size * image_size)
        self.main = nn.Sequential(
            nn.Conv2d(D_input_dim+1, 32, 4, 2, 1, bias=False),
            nn.LeakyReLU(0.2, inplace=True),
            nn.Conv2d(32, 64, 4, 2, 1, bias=False),
            nn.BatchNorm2d(64),
            nn.LeakyReLU(0.2, inplace=True),
            nn.Conv2d(64, 128, 4, 2, 1, bias=False),
            nn.BatchNorm2d(128),
            nn.LeakyReLU(0.2, inplace=True),
            nn.Conv2d(128, 256, 4, 2, 1, bias=False),
            nn.BatchNorm2d(256),
            nn.LeakyReLU(0.2, inplace=True),
            nn.Conv2d(256, 512, 4, 2, 1, bias=False),
            nn.BatchNorm2d(512),
            nn.LeakyReLU(0.2, inplace=True),
            nn.Conv2d(512, 1, 4, 1, 0, bias=False),
            nn.Sigmoid()
        )
    
    def forward(self, x, attr):
        x=torch.cat([x,self.feature_input(attr).view(-1,1,image_size,image_size)],1)
        return self.main(x).view(-1, 1)





In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
import pickle
import random
import cv2
from google.colab.patches import cv2_imshow

f=open('drive/MyDrive/iclevrdata.pkl','rb')
#f=open('data (1).pkl','rb')
alldata = pickle.load(f)

lam=0.0015



def one_hot_list(ii,l):
  li=[]
  for i in range(l):
    li.append(0)
  li[ii]=1
  return li

materialmap={'rubber':0,'metal':1}
colormap={'red':0,'green':1,'blue':2,'cyan':3,'brown':4,'gray':5,'purple':6,'yellow':7}
shapemap={'sphere':0, 'cube':1, 'cylinder':2}

def datatovec(d):
  complete=[]
  #print(d)
  #print(len(d))
  for i in d:
    complete += (one_hot_list((i['pixel_coords'][0])//15,32)+one_hot_list((i['pixel_coords'][1])//10,32)+one_hot_list(shapemap[i['shape']],3)+one_hot_list(colormap[i['color']],8))
  complete += ([0]*(13*(5-len(d))))
  #print(complete)
  return complete

def takeavg(li):
  a=0
  for i in li:
    a+=i
  return a/float(len(li))

"""
def boxonly(ds):
  mask=torch.zeros(batch_size,G_output_dim,image_size,image_size).cuda()
  for j in range(len(ds)):
    for i in ds[j]:
      #print(i)
      mask[j,:,int(i['ymin']/5.0):int(i['ymax']/5.0),int(i['xmin']/7.5):int(i['xmax']/7.5)]=1
  return mask
"""
    


import copy

G = Generator()
D = Discriminator()
E = Encoder()
DE= Encoder()
G.cuda()
D.cuda()
E.cuda()
DE.cuda()
G_optimizer = torch.optim.RMSprop(G.parameters(), lr=learning_rate)
D_optimizer = torch.optim.RMSprop(D.parameters(), lr=learning_rate,weight_decay=0.02)
E_optimizer = torch.optim.RMSprop(E.parameters(), lr=learning_rate)
DE_optimizer = torch.optim.RMSprop(DE.parameters(), lr=learning_rate)

G=torch.load('/content/drive/MyDrive/G_128_20.pt')
E=torch.load('/content/drive/MyDrive/E_128_20.pt')
G.cuda()
E.cuda()
G_optimizer = torch.optim.RMSprop(G.parameters(), lr=learning_rate)
E_optimizer = torch.optim.RMSprop(E.parameters(), lr=learning_rate)

criterion = torch.nn.BCELoss()
fill = torch.ones([batch_size, label_dim, image_size, image_size])
train_data=[(datatovec(x[0]),cv2.resize(x[1],(image_size,image_size)),x[0]) for x in alldata['train']]
valid_data=[(datatovec(x[0]),cv2.resize(x[1],(image_size,image_size)),x[0]) for x in alldata['val']]
baseimg=copy.deepcopy(train_data[0][1])

def toimg(li):
  for i in range(3):
    for j in range(image_size):
      for k in range(image_size):
        #print(i)
        #print(j)
        #print(k)
        baseimg[j,k,i]=li[i][j][k]*255
  return baseimg

"""
for epoch in range(20):
  for i in range(len(train_data)//batch_size):
    start=i*batch_size
    end=(i+1)*batch_size
    trains=train_data[start:end]
    train_imgs=[x[1] for x in trains]
    train_vecs=[x[0] for x in trains]
    x_real=(torch.FloatTensor(train_imgs)/256).transpose(2,3).transpose(1,2).cuda()
    gen_image=G(E(torch.FloatTensor(train_vecs).view(batch_size,-1).cuda()),start).squeeze()
    G_loss2=torch.nn.MSELoss()(gen_image,x_real)
    G.zero_grad()
    E.zero_grad()
    G_loss2.backward()
    G_optimizer.step()
    E_optimizer.step()
    print(G_loss2)

torch.save(G,'/content/drive/MyDrive/G_128_20.pt')
torch.save(E,'/content/drive/MyDrive/E_128_20.pt')
"""

for epoch in range(num_epochs):
  random.shuffle(train_data)
  if epoch%10==9:
    G_optimizer.param_groups[0]['lr'] /= 2
    D_optimizer.param_groups[0]['lr'] /= 2
  GMSELoss=[]
  GPMSELoss=[]
  GGANLoss=[]
  DLoss=[]
  
  drange = 2
  if epoch == 0:
    drange = 40
    
  for i in range(len(train_data)//batch_size):
    start=i*batch_size
    end=(i+1)*batch_size
    trains=train_data[start:end]
    train_imgs=[x[1] for x in trains]
    train_vecs=[x[0] for x in trains]
    y_real_ = Variable(torch.ones(batch_size).cuda())
    y_fake_ = Variable(torch.zeros(batch_size).cuda())
    c=Variable(torch.ones(batch_size).view(-1,1,1,1)).cuda()
    x_real=(torch.FloatTensor(train_imgs)/256).transpose(2,3).transpose(1,2).cuda()
    
    dl=1
    for iii in range(drange):
      
      

      gen_vecs=E(torch.FloatTensor(train_vecs).view(batch_size,-1).cuda())
      dgen_vecs=DE(torch.FloatTensor(train_vecs).view(batch_size,-1).cuda())
      D_real=D(x_real,dgen_vecs).squeeze()
      D_real_loss=criterion(D_real,y_real_)
      #print('Real '+str(D_real_loss))


      
      gen_image=G(gen_vecs,c).squeeze()
      D_fake=D(gen_image,dgen_vecs).squeeze()
      D_fake_loss=criterion(D_fake,y_fake_)
      #print('Fake '+str(D_fake_loss))

      D_BCE_loss=D_real_loss+D_fake_loss
      #D_loss=torch.mean(D_fake)-torch.mean(D_real)
      DLoss.append(D_BCE_loss)
      D.zero_grad()
      DE.zero_grad()
      D_BCE_loss.backward()
      D_optimizer.step()
      DE_optimizer.step()
      dl=takeavg(DLoss)
    

    gen_vecs=E(torch.FloatTensor(train_vecs).view(batch_size,-1).cuda())
    gen_image=G(gen_vecs,c).squeeze()
    dgen_vecs=DE(torch.FloatTensor(train_vecs).view(batch_size,-1).cuda())
    D_fake=D(gen_image,dgen_vecs).squeeze()
    G_loss1=criterion(D_fake,y_real_)
    GGANLoss.append(G_loss1)
    #mask=boxonly([x[2] for x in trains])
    G_loss2=torch.nn.MSELoss()(gen_image,x_real)
    #G_loss3=torch.nn.MSELoss()(gen_image*mask,x_real*mask)
    GMSELoss.append(G_loss2)
    #GPMSELoss.append(G_loss3)
    G_loss=lam*G_loss1+G_loss2

    
    G.zero_grad()
    E.zero_grad()
    G_loss.backward()
    E_optimizer.step()
    G_optimizer.step()

    if i==0:
        img=gen_image[0].tolist()
        cv2_imshow(toimg(img))
        #img=(gen_image[0]*mask[0]).tolist()
        #cv2_imshow(toimg(img))
        img=x_real[0].tolist()
        cv2_imshow(toimg(img))
        #img=(x_real[0]*mask[0]).tolist()
        #cv2_imshow(toimg(img))

  #print(DLoss)
  print("Epoch "+str(epoch)+" GMSE: "+str(takeavg(GMSELoss))+" GGAN: "+str(takeavg(GGANLoss))+"\n")
  GMSELoss=[]
  DLoss=[]
  GGANLoss=[]
  with torch.no_grad():
    for i in range(len(valid_data)//batch_size):
      start=i*batch_size
      end=(i+1)*batch_size
      trains=valid_data[start:end]
      train_imgs=[x[1] for x in trains]
      train_vecs=[x[0] for x in trains]
      y_real_ = Variable(torch.ones(batch_size).cuda())
      y_fake_ = Variable(torch.zeros(batch_size).cuda())
      c=Variable(torch.ones(batch_size).view(-1,1,1,1)).cuda()
      x_real=(torch.FloatTensor(train_imgs)/256).transpose(2,3).transpose(1,2).cuda()

      gen_vecs=E(torch.FloatTensor(train_vecs).view(batch_size,-1).cuda())
      dgen_vecs=DE(torch.FloatTensor(train_vecs).view(batch_size,-1).cuda())
      D_real=D(x_real,dgen_vecs).squeeze()
      D_real_loss=criterion(D_real,y_real_)


      gen_image=G(gen_vecs,c).squeeze()
      
      D_fake=D(gen_image,dgen_vecs).squeeze()
      D_fake_loss=criterion(D_fake,y_fake_)

      D_loss=D_real_loss+D_fake_loss
      DLoss.append(D_loss)

      G_loss1=criterion(D_fake,y_real_)
      GGANLoss.append(G_loss1)
      G_loss2=torch.nn.MSELoss()(gen_image,x_real)
      #mask=boxonly([x[2] for x in trains])
      #G_loss3=torch.nn.MSELoss()(gen_image*mask,x_real*mask)
      GMSELoss.append(G_loss2)
      #GPMSELoss.append(G_loss3)

      if i==0:
        img=gen_image[0].tolist()
        cv2_imshow(toimg(img))
        #img=(gen_image[0]*mask[0]).tolist()
        #cv2_imshow(toimg(img))
        img=x_real[0].tolist()
        cv2_imshow(toimg(img))
        #img=(x_real[0]*mask[0]).tolist()
        #cv2_imshow(toimg(img))
  print("Epoch "+str(epoch)+" valid loss: D_loss: "+str(takeavg(DLoss))+" GMSE: "+str(takeavg(GMSELoss))+" GGAN: "+str(takeavg(GGANLoss))+"\n")
  if epoch%25 == 24:
    torch.save(G,'/content/drive/MyDrive/G_gan_'+str(epoch)+'.pt')
    torch.save(E,'/content/drive/MyDrive/E_gan_'+str(epoch)+'.pt')


    
  


    






  



FileNotFoundError: ignored

In [None]:
torch.save(G,'/content/drive/MyDrive/G_gan.pt')
torch.save(E,'/content/drive/MyDrive/E_gan.pt')

In [None]:
!nvidia-smi

Fri Dec  4 21:04:38 2020       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 455.45.01    Driver Version: 418.67       CUDA Version: 10.1     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla T4            Off  | 00000000:00:04.0 Off |                    0 |
| N/A   65C    P0    30W /  70W |   2039MiB / 15079MiB |      0%      Default |
|                               |                      |                 ERR! |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces