In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

In [13]:
#The Encoder network for single view 3D reconstruction is a ResNet18 pretrained
#on the ImageNet dataset with the last fully-connected layer adjusted to project
#the features to a 256 dimensional embedding, "c"
from torchvision.models.resnet import resnet18 as _resnet18
import numpy
from PIL import Image
import io

In [3]:
class ImageEncoder(nn.Module):
    def __init__(self):
        super(ImageEncoder,self).__init__()
        self.encoderModel = _resnet18(pretrained=True)
        self.fc1 = nn.Linear(1000, 256)
        self.betafc = nn.Linear(256,256)
        self.gammafc = nn.Linear(256,256)
        
    def forward(self,x):
        x = self.encoderModel(x)
        #project to 256 dimensional embedding 
        x = self.fc1(x)
        # Obtain Beta and gamma inputs into conditional batch normalization
        # QUESTION Are these split or one after the other?
        beta = self.betafc(x)
        gamma = self.gammafc(x) #? gammaLayer(beta)
        return beta,gamma

In [4]:
class Block(nn.Module):
    def __init__(self):
        super(Block,self).__init__()
        self.fc1 = nn.Linear(256,256)
        self.fc2 = nn.Linear(256,256)
        self.bn1 = nn.BatchNorm1d(256)
        self.bn2 = nn.BatchNorm1d(256)

        
    def forward(self,y):
        x = y['ex']
        gamma = y['gamma']
        beta = y['beta']
        #First apply Conditional Batch Normalization
        out = gamma*self.bn1(x) + beta
        #Then ReLU activation function
        out = F.relu(out)
        #fully connected layer
        out = self.fc1(out)
        #Second CBN layer
        out = gamma*self.bn2(out) + beta
        #RELU activation
        out = F.relu(out)
        #2nd fully connected
        out = self.fc2(out)
        #Add to the input of the ResNet Block 
        out = x + out
        
        return {'ex':out, 'beta':beta, 'gamma':gamma}

In [5]:
class OccupancyModel(nn.Module):
    def __init__(self):
        super(OccupancyModel,self).__init__()
        self.blocks = self.makeBlocks()
        self.encoder = ImageEncoder()
        self.cbn = nn.BatchNorm1d(256)
        self.fc1 = nn.Linear(3,256)
        self.fc2 = nn.Linear(256,1)
        
    def makeBlocks(self):
        blocks = []
        for _ in range(5):
            blocks.append(Block())
        return nn.Sequential(*blocks)
   
  
    def forward(self,x,img):
        gamma,beta = self.encoder(img)
        x = self.fc1(x)
        #5 pre-activation ResNet-blocks
        x = self.blocks({'gamma':gamma, 'beta':beta, 'ex':x })
        x = x['ex']
        x = gamma*self.cbn(x) + beta
        x = F.relu(x)
        x = self.fc2(x)
        x = torch.sigmoid(x)
        return x

In [139]:
model = OccupancyModel()
#Input to the occupancy network architecture is the 
#output of the encoder network and a batch of 3D coordinates. 
coords = torch.rand(64,3)
image = torch.rand(64,3,7,7)
model.eval()

p = model(coords,image)


In [156]:
#load some data:
#The .npz contains "points, occupancies, loc, scale" 
with numpy.load("/home/andrea/Documents/GradSchool/OccupancyNetworks/occupancy_networks/data/ShapeNet/02691156/fd528602cbde6f11bbf3143b1cb6076a/points.npz") as data:
    pts = torch.tensor(data["points"], dtype=torch.float)
    occupancies = torch.tensor(numpy.unpackbits(data["occupancies"])[:pts.size()[0]], dtype=torch.float)

image = numpy.array(Image.open("/home/andrea/Documents/GradSchool/OccupancyNetworks/occupancy_networks/data/ShapeNet/02691156/fd528602cbde6f11bbf3143b1cb6076a/img_choy2016/015.jpg"))
#At least for this image directory, the jpgs come in as 137,137,3
image = torch.tensor(image,dtype=torch.float).permute(2,0,1)
image = image.view(1,3,137,137)

train_loader = torch.utils.data.DataLoader(list(zip(pts,occupancies)), batch_size=64)


In [22]:
def train(epoch, model, trainloader, optimizer):
    modelCriterion = nn.BCELoss()
    model.train()
    for batch_idx, data in enumerate(train_loader):
        (images, pts, occupancies) = data
        print("in train")
        print(images.size())
        print(pts.size())
        print(occupancies.size())
        optimizer.zero_grad()
        output = model(pts, images[0][4].reshape(1,3,137,137)) #a probability for each point 
        loss = modelCriterion(output, occupancies)
        loss.backward()
        optimizer.step()
        if batch_idx % 10 == 0:
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\\tLoss: {:.6f}'.format(
                epoch, batch_idx * len(data), len(train_loader.dataset), 100. * batch_idx / len(train_loader),
                loss.item()))

In [162]:
optimizer = optim.Adam(model.parameters(), lr = 0.0001)
train(1,model,train_loader,optimizer)



  return F.binary_cross_entropy(input, target, weight=self.weight, reduction=self.reduction)


In [23]:
#choose a category and load all of the available data:
#"name:table, id: 04379243"
import io
topdir = "/home/andrea/Documents/GradSchool/OccupancyNetworks/occupancy_networks"
imageFiles = ["000.jpg","001.jpg", "002.jpg","003.jpg", "004.jpg", "005.jpg", "006.jpg", "007.jpg", "008.jpg",
             "009.jpg", "010.jpg", "011.jpg", "012.jpg", "013.jpg", "014.jpg", "015.jpg", "016.jpg", "017.jpg",
             "018.jpg", "019.jpg", "020.jpg", "023.jpg"]
class DataSetClass(torch.utils.data.Dataset):
    def __init__(self):
        self.trainingDirs = []
        
        #catalogue all of the directories with the chosen category
        tablesDirectory=f"{topdir}/data/ShapeNet/02828884"
        with io.open(f"{tablesDirectory}/test.lst") as testlist:
            for testdir in testlist.readlines():
                self.trainingDirs.append(f"{tablesDirectory}/{testdir.strip()}")
    def __len__(self):
        return len(self.trainingDirs)
    
    def __getitem__(self,idx):
        d = self.trainingDirs[idx]
        with numpy.load(f"{d}/points.npz") as data:
            pts = torch.tensor(data["points"], dtype=torch.float)
            occupancies = torch.tensor(numpy.unpackbits(data["occupancies"])[:pts.size()[0]], dtype=torch.float)
        images = torch.zeros(23,3,137,137)
        for idx,imagefile in enumerate(imageFiles):
            with Image.open(f"{d}/img_choy2016/{imagefile}") as image:
                image = numpy.array(image)
                image = torch.tensor(image,dtype=torch.float)
                #if the image is grey scale, stack 3 to conform dimensions
                if len(image.size()) < 3:
                    image = torch.stack([image, image, image])
                image = image.permute(2,0,1)
                image = image.reshape(1,3,137,137)
                images[idx] = image
        print(images.size())
        print(pts.size())
        print(occupancies.size())
        return images, pts, occupancies
            

In [24]:
model = OccupancyModel()
data = DataSetClass()
train_loader = torch.utils.data.DataLoader(data, batch_size=1)
optimizer = optim.Adam(model.parameters(), lr = 0.0001)

train(1,model,train_loader,optimizer)

torch.Size([23, 3, 137, 137])
torch.Size([100000, 3])
torch.Size([100000])
in train
torch.Size([1, 23, 3, 137, 137])
torch.Size([1, 100000, 3])
torch.Size([1, 100000])


RuntimeError: running_mean should contain 100000 elements not 256