In [68]:
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision
import torchvision.datasets as datasets
import torchvision.transforms as transforms
import random as rand
import copy

In [69]:
class Disentangler(nn.Module): 
    def __init__(self,encoder,decoder, transnet):
        super(Disentangler,self).__init__()
        self.encoder = encoder
        self.decoder = decoder
        self.transnet = transnet #estimates trans parameters, contains exponential weights, creates matrices
        
    def forward(self,x, x0=None):
        if x0 == None:
            y = self.encoder(x)
            s = torch.zeros(x.size(0), self.encoder.latent_dim)
        else:
            y, s = self.transnet(x,x0)
        z = self.decoder(y)
        return z,y,s

class Encoder(nn.Module):
    def __init__(self, og_dim, latent_dim): #if images are nXn, og_dim = n^2.
        assert latent_dim <= og_dim, 'latent space must have lower dimension'
        super(Encoder,self).__init__()
        self.og_dim = og_dim
        self.latent_dim = latent_dim
        self.fc1 = nn.Linear(og_dim, max(latent_dim, og_dim//16))
        self.fc2 = nn.Linear(max(latent_dim, og_dim//16), latent_dim)
    
    def forward(self,x):
        x = self.fc1(x)
        x = F.relu(x)
        x = self.fc2(x)
        x = F.relu(x)
        return x

class Decoder(nn.Module):  
    def __init__(self, og_dim, latent_dim):
        assert latent_dim <= og_dim, 'latent space must have lower dimension'
        super(Decoder,self).__init__()
        self.og_dim = og_dim
        self.latent_dim = latent_dim
        self.fc1 = nn.Linear(latent_dim, max(latent_dim, og_dim//16))
        self.fc2 = nn.Linear(max(latent_dim, og_dim//16), og_dim)
    
    def forward(self,x):
        x = self.fc1(x)
        x = F.relu(x)
        x = self.fc2(x)
        x = F.relu(x)
        return x
    
class Transnet(nn.Module):
    def __init__(self, og_dim, latent_dim, trans_dim, k_sparse):
        super(Transnet,self).__init__()
        assert latent_dim <= og_dim, 'latent space must have lower dimension'
        assert trans_dim <= latent_dim, 'translation dimension must be subspace'
        self.og_dim = og_dim
        self.latent_dim = latent_dim
        self.trans_dim = trans_dim
        ttl_dim = og_dim + latent_dim
        self.ttl_dim = ttl_dim
        self.k_sparse = k_sparse
        self.fc1 = nn.Linear(ttl_dim, max(latent_dim, ttl_dim//16))
        self.fc2 = nn.Linear(max(latent_dim, ttl_dim//16), max(latent_dim, ttl_dim//32))
        self.fc3 = nn.Linear(max(latent_dim, ttl_dim//32), trans_dim)
        
    def forward(self,x,x0):
        x1 = torch.cat((x,x0),dim = 1) #create (B, N+M) tensor
        x1 = self.fc1(x1)
        x1 = F.relu(x1)
        x1 = self.fc2(x1)
        x1 = F.relu(x1)
        x1 = self.fc3(x1)
        x0[:,:self.trans_dim] += x1
        return x0, x1
        
class Autoencoder(nn.Module): 
    def __init__(self,encoder,decoder):
        super(Autoencoder,self).__init__()
        self.encoder = encoder
        self.decoder = decoder
        
    def forward(self,x, x0=None):    
        x=self.encoder(x)
        x = self.decoder(x)
        return x
   

In [70]:
def make_model(og_dim, latent_dim, trans_dim, k_sparse=1):
    enc = Encoder(og_dim, latent_dim)
    dec = Decoder(og_dim, latent_dim)
    trans = Transnet(og_dim, latent_dim, trans_dim, k_sparse)
    model = Disentangler(enc,dec,trans)
    return model

def make_autoenc(og_dim,latent_dim):
    enc = Encoder(og_dim, latent_dim)
    dec = Decoder(og_dim, latent_dim)
    model = Autoencoder(enc,dec)
    return model


In [71]:
            torch.autograd.set_detect_anomaly(True)


<torch.autograd.anomaly_mode.set_detect_anomaly at 0x7f9ab97fb370>

In [111]:
def train(print_interval, model, device, train_loader, optimizer, epoch, movie_len, transform_set, beta = .7): #transforms is 
    model.train()
    for epoch in range(epoch):
        train_encoder = True
        for batch_idx, (data, target) in enumerate(train_loader):
#             if batch_idx % 4 == 0:
#                 if train_encoder == True:
#                     for param in model.parameters():
#                         param.requires_grad = True
#                     for param in model.encoder.parameters():
#                         param.requires_grad = False
#                     train_encoder == False
#                 else:
#                     for param in model.parameters():
#                         param.requires_grad = False
#                     for param in model.encoder.parameters():
#                         param.requires_grad = True
#                     train_encoder == True
            loss = 0
            optimizer.zero_grad()
            for i in range(movie_len):
                if i == 0:
                    transform = rand.choice(transform_set)
                    prev_frame = curr_frame = data
                    curr_frame = curr_frame.flatten(1).to(device)
                    output, latent_rep, trans_par = model(curr_frame)
                    latent_rep = latent_rep.detach().clone()
                    latent_rep = latent_rep.to(device)
                else:
                    curr_frame = transform(prev_frame)
                    prev_frame = curr_frame
                    curr_frame = curr_frame.flatten(1).to(device)
                    output, out_rep, trans_par = model(curr_frame, latent_rep)
                loss += (beta**i)*(F.mse_loss(output, curr_frame) + 5e-2*(1/50)*torch.norm(trans_par,1))
            loss.backward()
            optimizer.step()

            if batch_idx % print_interval == 0:
                print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                    epoch, batch_idx * len(data), len(train_loader.dataset),
                    100. * batch_idx / len(train_loader), loss.item()))

In [103]:
def train_enc(print_interval, model, device, train_loader, optimizer, epoch, movie_len, transform_set): #transforms is 
    model.train()
    for batch_idx, (data, target) in enumerate(train_loader):
        for i in range(movie_len):
            optimizer.zero_grad()
            if i == 0:
                prev_frame =  curr_frame = data
                transform = rand.choice(transform_set)
                curr_frame = curr_frame.flatten(1).to(device)
            else:
                curr_frame = transform(prev_frame)
                prev_frame = curr_frame
                curr_frame = curr_frame.flatten(1).to(device)
            output = model(curr_frame)
            loss = F.mse_loss(output, curr_frame)# + torch.norm(trans_par,1)
            loss.backward()
            optimizer.step()

        if batch_idx % print_interval == 0:
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                epoch, batch_idx * len(data), len(train_loader.dataset),
                100. * batch_idx / len(train_loader), loss.item()))

In [104]:
hor_trans = transforms.Compose(
    [transforms.RandomAffine(0, translate = (.1,0)),
     transforms.Normalize(.3,.3)])

ver_trans = transforms.Compose(
    [transforms.RandomAffine(0,translate = (0,.1)),
     transforms.Normalize(.3,.3)])


transform_set = [hor_trans,ver_trans]

In [105]:
loader_transform = transforms.Compose(
    [transforms.ToTensor(),
     transforms.Normalize(.3,.3)]
)
batch_size = 50
#(Down)Load MNIST
data_set = datasets.MNIST(root='./data', train=True, download=False, transform=loader_transform)

#Create data loader
data_loader = torch.utils.data.DataLoader(data_set, batch_size = batch_size, shuffle = True)


In [106]:
model_dis = make_model(28**2, latent_dim= 16, trans_dim = 2)

In [107]:
# model_dis = make_autoenc(28**2, latent_dim= 16)

In [108]:
device = torch.device(1)
torch.cuda.set_device(1)
model_dis = model_dis.to(device)
optimizer = torch.optim.Adam(model_dis.parameters(), lr=0.001) #e-1


In [112]:
train(200, model_dis, device, data_loader, optimizer, epoch = 10, movie_len = 3, transform_set = transform_set)

RuntimeError: one of the variables needed for gradient computation has been modified by an inplace operation: [torch.cuda.FloatTensor [50, 16]], which is output 0 of torch::autograd::CopySlices, is at version 4; expected version 2 instead. Hint: the backtrace further above shows the operation that failed to compute its gradient. The variable in question was changed in there or anywhere later. Good luck!

In [490]:
def testf(x,y,z):
    return x,y,z

In [492]:
a, _ , _ = testf(1,2,3)

In [57]:
A = B = torch.rand(3,4)

In [59]:
B = B.flatten()

In [509]:
loss = 0

In [510]:
loss += torch.rand(3,4)

In [85]:
rand.choice(list_tensor)

tensor([[2.7183, 1.0000, 1.0000, 1.0000],
        [1.0000, 2.7183, 1.0000, 1.0000],
        [1.0000, 1.0000, 2.7183, 1.0000],
        [1.0000, 1.0000, 1.0000, 2.7183]])

In [88]:
C = A

In [90]:
C=B

In [245]:
B= torch.rand(3,4)

In [249]:
B[:,:2]

tensor([[0.3960, 0.7971],
        [0.6771, 0.2819],
        [0.4241, 0.7296]])

In [247]:
B

tensor([[0.3960, 0.7971, 0.3008, 0.6639],
        [0.6771, 0.2819, 0.3756, 0.3071],
        [0.4241, 0.7296, 0.6412, 0.2367]])

In [250]:
B

tensor([[0.3960, 0.7971, 0.3008, 0.6639],
        [0.6771, 0.2819, 0.3756, 0.3071],
        [0.4241, 0.7296, 0.6412, 0.2367]])

In [96]:
A = torch.zeros(3,2)
B = torch.zeros(3,5)

In [99]:
C = torch.cat((A,B),dim=1)

In [105]:
C = A

In [106]:
C

tensor([[0., 0.],
        [0., 0.],
        [0., 0.]])

In [107]:
A.add(1)

tensor([[1., 1.],
        [1., 1.],
        [1., 1.]])

In [114]:
A.flatten(1).shape

torch.Size([3, 2])

In [115]:
A.add_(1)

tensor([[1., 1.],
        [1., 1.],
        [1., 1.]])

In [119]:
C = A

In [120]:
A.to(device)

tensor([[1., 1.],
        [1., 1.],
        [1., 1.]], device='cuda:0')

In [121]:
A

tensor([[1., 1.],
        [1., 1.],
        [1., 1.]])

In [122]:
C

tensor([[1., 1.],
        [1., 1.],
        [1., 1.]])