In [1]:
import numpy as np
import pandas as pd
import math
import torch
import torch.nn as nn
from torch.nn import *
import torch.nn.functional as F
import torch.optim as optim
import torchvision.datasets as normal_datasets
import torchvision.transforms as transforms
from torch.autograd import Variable
import os
os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"   # see issue #152
os.environ["CUDA_VISIBLE_DEVICES"]="1"

In [2]:
#Random dataset
dist1 = torch.randint(0,16,(100,)).float().requires_grad_(True).cuda()

#Do Stuff to Data to create graph
conv = torch.nn.Conv1d(1,1,1).cuda()(dist1[(None,)*2])

#Calculate histogram
if conv.min().item() < 0:
    conv = conv - conv.min()
bins = 10
conv_binned = torch.trunc(conv * bins/conv.max().item())
ones = torch.ones_like(conv_binned, requires_grad=True)
zeros = torch.zeros_like(conv_binned, requires_grad=True)
hist = torch.tensor([torch.where(conv_binned == bin, ones, zeros).sum() 
for bin in range(bins)], requires_grad=True)

print(hist.grad_fn)
print(conv_binned.grad_fn)

None
<TruncBackward object at 0x7f52f06e20b8>


In [None]:



def get_positive_expectation(p_samples, average=True):
    #Measure = JSD (Simplified from DIM Code for clarity)
    log_2 = math.log(2.)
    Ep = log_2 - F.softplus(-p_samples)  # Note JSD will be shifted
    if average:
        return Ep.mean()
    else:
        return Ep

def get_negative_expectation(q_samples, average=True):
    #Measure = JSD (Simplified from DIM Code for clarity)
    log_2 = math.log(2.)
    Eq = F.softplus(-q_samples) + q_samples - log_2  # Note JSD will be shifted
    if average:
        return Eq.mean()
    else:
        return Eq

def loss_calc(lmap, gmap):
    #The fenchel_dual_loss from the DIM code
    #Reshape tensors dims to (N, Channels, chunks)
    lmap = lmap.reshape(2,128,-1)
    gmap = gmap.squeeze()
    
    N, units, n_locals = lmap.size()
    n_multis = gmap.size(2)

    # First we make the input tensors the right shape.
    l = lmap.view(N, units, n_locals)
    l = lmap.permute(0, 2, 1)
    l = lmap.reshape(-1, units)

    m = gmap.view(N, units, n_multis)
    m = gmap.permute(0, 2, 1)
    m = gmap.reshape(-1, units)
    
    u = torch.mm(m, l.t())
    u = u.reshape(N, n_multis, N, n_locals).permute(0, 2, 3, 1)
    
    mask = torch.eye(N).to(l.device)
    n_mask = 1 - mask
    
    E_pos = get_positive_expectation(u, average=False).mean(2).mean(2)
    E_neg = get_negative_expectation(u, average=False).mean(2).mean(2)
    
    E_pos = (E_pos * mask).sum() / mask.sum()
    E_neg = (E_neg * n_mask).sum() / n_mask.sum()
    loss = E_neg - E_pos
    
    return loss

class Mixed_Dim(torch.nn.Module):
    def __init__(self):
        super(Mixed_Dim, self).__init__()
        #Local Feature Map: Local feature map of size [N, 128, 8, 8, 8]
        self.lmapnet = Sequential(Conv3d(1, 8, 2, 2), ReLU(), BatchNorm3d(8),
                                  Conv3d(8, 16, 2, 2), ReLU(), BatchNorm3d(16),
                                  Conv3d(16, 32, 2, 2), ReLU(), BatchNorm3d(32),
                                  Conv3d(32, 64, 2, 2), ReLU(), BatchNorm3d(64),
                                  Conv3d(64, 128, 2, 2), ReLU(), BatchNorm3d(128) 
                                 )
        
        #Global Feature Map: Global feature map of size [N, 128, 1, 1, 1]
        self.gmapnet = Sequential(Conv3d(128, 128, 2, 2), ReLU(), BatchNorm3d(128),
                                  Conv3d(128, 128, 2, 2), ReLU(), BatchNorm3d(128),
                                  Conv3d(128, 128, 2, 2), ReLU()
                                 )
        #Per paper, global map is activated:
        self.gfc1 = Sequential(Linear(1, 512), ReLU(), BatchNorm3d(128),
                               Linear(512, 512))
        self.gfc2 = Sequential(Linear(1, 512), ReLU(), BatchNorm3d(128))
        
        #Per paper, local map is activated:
        self.lfc1 = Sequential(Conv3d(128, 128, 1, 1), ReLU(), BatchNorm3d(128),
                               Conv3d(128, 128, 1, 1))
        self.lfc2 = Sequential(Conv3d(128, 128, 1, 1), ReLU(), BatchNorm3d(128))
        self.Laynorm = LayerNorm([128, 8, 8, 8])
        
    def forward(self, moving, atlas):
        #Local feature maps of moving and atlas
        lmap_moving = self.lmapnet(moving)
        lmap_atlas = self.lmapnet(atlas)
        
        #Global feature map of atlas
        gmap_atlas = self.gmapnet(lmap_atlas)
        
        #Encode Global feature map
        gout1, gout2 = self.gfc1(gmap_atlas), self.gfc2(gmap_atlas)
        gmap_atlas_enc = self.gfc1(gmap_atlas) + self.gfc2(gmap_atlas)
        
        #Encode Local feature maps
        lmap_atlas_enc = self.Laynorm(self.lfc1(lmap_atlas) + self.lfc2(lmap_atlas))
        lmap_moving_enc = self.Laynorm(self.lfc1(lmap_moving) + self.lfc2(lmap_moving))
        
        return lmap_atlas_enc, lmap_moving_enc, gmap_atlas_enc
        


In [None]:
A = torch.randint(0, 256, (2, 1, 256, 256, 256)).float().requires_grad_(True)
B = torch.randint(0, 256, (2, 1, 256, 256, 256)).float().requires_grad_(True)
model_dim = Mixed_Dim().cuda().train()
optim = torch.optim.Adam(model_dim.parameters(), lr=0.01)
A = A.cuda()
B = B.cuda()
for epoch in range(100):
    loc_atlas, loc_moving, glob_atlas = model_dim(B, A)
    loss1 = loss_calc(loc_moving, glob_atlas)
    loss2 = loss_calc(loc_atlas, glob_atlas)
    loss = (loss2 + loss1)
    model_dim.zero_grad()
    print(loss.item())
    loss.backward()
    optim.step()

In [4]:
num_epochs = 1
batch_size = 100
learning_rate = 0.001

# 将数据处理成Variable, 如果有GPU, 可以转成cuda形式
def get_variable(x):
    x = Variable(x)
    return x.cuda() if torch.cuda.is_available() else x

# 从torchvision.datasets中加载一些常用数据集
train_dataset = normal_datasets.MNIST(
                            root='./mnist/',                 # 数据集保存路径
                            train=True,                      # 是否作为训练集
                            transform=transforms.ToTensor(), # 数据如何处理, 可以自己自定义
                            download=True)                   # 路径下没有的话, 可以下载

# 见数据加载器和batch
test_dataset = normal_datasets.MNIST(root='./mnist/',
                           train=False,
                           transform=transforms.ToTensor())

In [295]:
train_loader = torch.utils.data.DataLoader(dataset=train_dataset,
                                           batch_size=batch_size,
                                           shuffle=True)

test_loader = torch.utils.data.DataLoader(dataset=test_dataset,
                                          batch_size=batch_size,
                                          shuffle=False)
testset = normal_datasets.CIFAR10(root='/home/dltdc/data/pytorch_datasets', train=False, download=True, transform=transform_test)
testloader = torch.utils.data.DataLoader(testset, batch_size=128, shuffle=False, num_workers=4)



NameError: name 'torchvision' is not defined

In [294]:
images, labels = next(iter(test_loader))
images.shape

torch.Size([100, 1, 28, 28])

In [293]:
def hist_with_neg(inp, nb_bins=10):
    """
    https://discuss.pytorch.org/t/differentiable-torch-histc/25865
    """
    x = inp
    x = x.contiguous().cuda()
    with torch.no_grad():
        hist = []
    #     if x.min().item() < 0:
    #         x = x - x.min()
    #     conv_binned = torch.trunc(x * (nb_bins-1)/x.max().item())
    #     ones = torch.ones_like(conv_binned)
    #     zeros = torch.zeros_like(conv_binned)
    #     hist = torch.tensor([torch.where(conv_binned == bin_, 1, 0).sum() 
    #                     for bin_ in range(nb_bins)])

        for b in range(inp.shape[0]):
            hist_pre_ch = []
            for c in range(inp.shape[1]):
                #histc

                x = inp[b,c,:,:]
                if x.min().item() < 0:
                    x = x - x.min()
                conv_binned = torch.trunc(x * (nb_bins-1)/x.max().item()).cuda()
                ones = torch.ones_like(conv_binned, device=device)
                zeros = torch.zeros_like(conv_binned, device=device)
                hist_pre_ch.append(torch.tensor([torch.where(conv_binned == bin_, ones, zeros).sum() 
                                for bin_ in range(nb_bins)], device=device))

            hist.append(torch.stack(hist_pre_ch, 0).cuda())
        hist = torch.stack(hist, 0).cuda()
    return hist



class Hist(torch.autograd.Function):
    def __init__(self, nb_bins=10, in_channel=3):
        super(Hist, self).__init__()
        self.nb_bins = nb_bins
        self.in_channel = in_channel
    
#     @staticmethod
    def forward(self, input):
        return hist_with_neg(input)
#         ctx.save_for_backward(input, nb_bins)
        
    
#     @staticmethod
#     def backward(ctx, grad_output):
#         x, nb_bins = ctx.saved_tensors
#         x_grad = w_grad = None
#         nb_bin_grad = torch.zeros(nb_bins.shape, device=ctx._input_device, dtype=torch.int32)
        
#         if ctx.needs_input_grad[0]:
#           x_grad = torch.nn.grad.conv2d_input(x.shape, w, grad_output)
#         if ctx.needs_input_grad[1]:
#           nb_bins_grad = torch.nn.grad.conv2d_weight(x, w.shape, grad_output)
#         return x_grad, w_grad
        


class CNN(nn.Module):
    def __init__(self):
        super(CNN, self).__init__()

        self.conv1 = nn.Sequential(
            nn.Conv2d(1, 16, kernel_size=5, padding=2),
            nn.BatchNorm2d(16),
            nn.ReLU(),
            nn.MaxPool2d(2))
#         self.conv2 = nn.Sequential(
#             nn.Conv2d(16, 32, kernel_size=5, padding=2),
#             nn.BatchNorm2d(32),
#             nn.ReLU(),
#             nn.MaxPool2d(2))
#         self.fc = nn.Linear(7 * 7 * 32, 10)
        self.hist = Hist()
        

    def forward(self, x):
#         out = self.conv1(x)
#         out = self.conv2(out)
#         out = out.view(out.size(0), -1)  # reshape
#         out = self.fc(out)
        out2 = self.hist(x)
        out = torch.histc(x, bins=10, min=x.min().item(), max=x.max().item())
#         out = self.fc(out)
#         print(out)
        return out,out2
    

    
def initialize_weights(model):
    if type(model) in [nn.Linear]:
        nn.init.xavier_normal(model.weight.data)
    elif type(model) in [nn.LSTM, nn.RNN, nn.GRU]:
        nn.init.xavier_normal(model.weight_hh_l0)
        nn.init.xavier_normal(model.weight_ih_l0)
    elif isinstance(model, nn.Conv2d):
        nn.init.xavier_normal(model.weight.data)
#         nn.init.xavier_normal(model.bias.data)


cnn = CNN()
cnn.apply(initialize_weights)
if torch.cuda.is_available():
    cnn = cnn.cuda()
loss_func = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(cnn.parameters(), lr=learning_rate)

input = torch.randn(1, 4, 64, 64).cuda()
target = torch.randn(1, 4).type(torch.LongTensor).cuda()
print(input)
optimizer.zero_grad()

output,out2 = cnn(input)
print(output)
# print(out2)
print(torch.histc(input, bins=10))
# print(torch.histc(out2, bins=10))

tensor([[[[ 0.2432,  2.0860, -0.1020,  ...,  0.6909,  0.8534,  0.1113],
          [ 0.0522,  0.0127,  0.0801,  ..., -0.5781,  0.1754, -0.5378],
          [ 1.2599, -0.6276,  0.8985,  ..., -0.4199, -1.9612,  0.9508],
          ...,
          [ 0.4318,  0.9677, -1.3690,  ..., -0.3889,  0.4352,  0.0706],
          [-0.4099,  1.3556, -0.6281,  ..., -2.4886,  0.1191,  1.4687],
          [ 1.2941,  0.3983,  0.9566,  ..., -1.3547, -1.4695,  1.0963]],

         [[ 2.1112,  1.7537,  0.5890,  ...,  1.8097,  0.1650,  0.0179],
          [-1.0349, -3.0297, -0.5800,  ...,  0.7339, -1.3606, -0.1293],
          [ 0.1269, -0.2297,  0.2872,  ..., -0.4281, -0.4633, -0.6673],
          ...,
          [-1.0434, -2.5238,  0.5666,  ..., -0.7338,  0.3869, -0.9548],
          [ 0.4015, -2.9308, -0.9158,  ...,  1.6569, -0.0793, -0.9980],
          [ 0.1843, -1.6088,  1.1173,  ...,  0.9143,  0.6597, -0.3660]],

         [[-0.7815,  1.0543, -0.5680,  ..., -1.0372,  1.2295, -0.8847],
          [-1.4189,  0.7522,  



In [122]:
filters = torch.ones(1,3,2,2).double()
inputs = torch.LongTensor(1,3,8,8).random_(0, 2).double()
print(filters)
print(inputs)
print(inputs[0,0,])
F.conv2d(inputs, filters, stride=1, padding=0).shape
windows = F.unfold(inputs, kernel_size=2, stride=2)
print(windows)
out = F.fold(windows, (5,5), kernel_size=2, stride=2)
print(out)

tensor([[[[1., 1.],
          [1., 1.]],

         [[1., 1.],
          [1., 1.]],

         [[1., 1.],
          [1., 1.]]]], dtype=torch.float64)
tensor([[[[1., 1., 0., 0., 0., 1., 1., 1.],
          [0., 1., 1., 0., 0., 0., 1., 0.],
          [0., 0., 0., 0., 1., 0., 1., 1.],
          [1., 0., 1., 0., 0., 1., 1., 1.],
          [0., 1., 1., 1., 1., 0., 0., 0.],
          [1., 1., 1., 1., 0., 0., 0., 1.],
          [0., 1., 0., 0., 0., 1., 1., 1.],
          [1., 0., 1., 1., 0., 1., 0., 1.]],

         [[1., 1., 1., 1., 1., 0., 0., 0.],
          [0., 1., 0., 0., 1., 1., 1., 0.],
          [0., 0., 0., 0., 0., 0., 1., 1.],
          [1., 0., 1., 1., 1., 0., 0., 0.],
          [1., 1., 1., 1., 0., 1., 0., 0.],
          [0., 1., 0., 1., 0., 0., 0., 1.],
          [1., 1., 0., 0., 1., 0., 1., 0.],
          [1., 0., 0., 0., 1., 1., 0., 0.]],

         [[0., 1., 0., 0., 0., 0., 1., 1.],
          [1., 0., 0., 0., 0., 0., 0., 1.],
          [0., 1., 1., 0., 0., 1., 1., 0.],
          [1

RuntimeError: Given output_size=(5, 5), kernel_size=(2, 2), dilation=(1, 1), padding=(0, 0), stride=(2, 2), expected size of input's dimension 2 to match the calculated number of sliding blocks 2 * 2 = 4, but got input.size(2)=16.

In [123]:

x = torch.randn(1, 1, 16, 16).cuda()
if x.min().item() < 0:
    x = x - x.min()
conv_binned = torch.trunc(x * 10/x.max().item())
print(conv_binned)
ones = torch.ones_like(conv_binned, requires_grad=True)
zeros = torch.zeros_like(conv_binned, requires_grad=True)
hist = torch.tensor([torch.where(conv_binned == bin_, ones, zeros).sum() 
                for bin_ in range(10)], requires_grad=True)
print(hist)

tensor([[[[ 3.,  6.,  4.,  8.,  7.,  3.,  5.,  3.,  8.,  4.,  7.,  0.,  6.,  3.,
            4.,  1.],
          [ 6.,  7.,  5.,  7.,  0.,  4.,  7.,  4.,  7.,  5.,  6.,  6.,  4.,  7.,
            2.,  3.],
          [ 6.,  9.,  3.,  6.,  6.,  2.,  7.,  5.,  8.,  5.,  5.,  5.,  2.,  5.,
            5.,  4.],
          [ 4.,  1.,  5.,  2.,  9.,  0.,  4.,  7.,  8.,  7.,  0.,  6.,  5.,  5.,
            5.,  3.],
          [ 5.,  5.,  7.,  6.,  7.,  6.,  6.,  6.,  4.,  5., 10.,  2.,  4.,  3.,
            4.,  5.],
          [ 6.,  9.,  3.,  7.,  3.,  5.,  2.,  4.,  4.,  5.,  0.,  3.,  6.,  5.,
            8.,  4.],
          [ 4.,  4.,  3.,  3.,  5.,  3.,  7.,  3.,  6.,  2.,  7.,  1.,  3.,  5.,
            5.,  4.],
          [ 4.,  5.,  6.,  5.,  1.,  3.,  3.,  5.,  4.,  1.,  8.,  1.,  6.,  4.,
            8.,  2.],
          [ 3.,  2.,  4.,  3.,  2.,  5.,  3.,  5.,  4.,  6.,  5.,  5.,  6.,  4.,
            8.,  0.],
          [ 6.,  7.,  6.,  6.,  6.,  4.,  3.,  5.,  5.,  5.,  4.,  3.,  6

In [124]:
out_channels = 6
in_channels = 1
kh, kw = 3, 3
weight = torch.randn(out_channels, in_channels, kh, kw, requires_grad=True)
with torch.no_grad():
    weight[:, :, 0, 0] = 0.
    weight[:, :, -1, 0] = 0.
    weight[:, :, 0, -1] = 0.
    weight[:, :, -1, -1] = 0.

x = torch.randn(1, in_channels, 5, 5)
output = F.conv2d(x, weight)
print(output.mean())
print(weight.grad)

tensor(0.2167, grad_fn=<MeanBackward0>)
None


In [285]:
def rmac_hist(x, L=16, eps=1e-6):
    ovr = 0.4 # desired overlap of neighboring regions
    steps = torch.Tensor([2, 3, 4, 5, 6, 7]).cuda() # possible regions for the long dimension

    W = x.size(3)
    H = x.size(2)

    w = min(W, H)
    w2 = math.floor(w/2.0 - 1)

    b = (max(H, W)-w)/(steps-1)
    (tmp, idx) = torch.min(torch.abs(((w**2 - w*b)/w**2)-ovr), 0) # steps(idx) regions for long dimension

    # region overplus per dimension
    Wd = 0;
    Hd = 0;
    if H < W:  
        Wd = idx.item() + 1
    elif H > W:
        Hd = idx.item() + 1

    v = [hist_with_neg(x)]
#     v = v / (torch.norm(v, p=2, dim=1, keepdim=True) + eps).expand_as(v)
    
    L_min = 16 #1
    for l in range(L_min, L+1):
        wl = math.floor(2*w/(l+1))
        wl2 = math.floor(wl/2 - 1)

        if l+Wd == 1:
            b = 0
        else:
            b = (W-wl)/(l+Wd-1)
        cenW = torch.floor(wl2 + torch.Tensor(range(l-1+Wd+1))*b) - wl2 # center coordinates
        if l+Hd == 1:
            b = 0
        else:
            b = (H-wl)/(l+Hd-1)
        cenH = torch.floor(wl2 + torch.Tensor(range(l-1+Hd+1))*b) - wl2 # center coordinates
        
        vt_array = []
        for i_ in cenH.tolist():
            for j_ in cenW.tolist():
                if wl == 0:
                    continue
                R = x[:,:,(int(i_)+torch.Tensor(range(wl)).long()).tolist(),:]
                R = R[:,:,:,(int(j_)+torch.Tensor(range(wl)).long()).tolist()]
                vt = hist_with_neg(R)
                vt = vt / (torch.norm(vt, p=2, dim=-1, keepdim=True) + eps).expand_as(vt)

#                 v += vt
                vt_array+=[vt]

        print(vt_array[0].shape)
        vt_array = torch.stack(vt_array, -1)
        print(len(cenH.tolist()),len(cenW.tolist()))
        print(vt_array[0,...].shape)
        arr_along_batch = []
        for batch_id in range(x.shape[0]):
            arr_along_batch.append(F.fold(vt_array[batch_id,...], (len(cenH.tolist()),len(cenW.tolist())), (1,1)))
        arr_along_batch = torch.stack(arr_along_batch, 0).cuda()
        print(arr_along_batch.shape)
#         v += vt_array

    return arr_along_batch



In [289]:
# print(input)
hist_pooling_res = rmac_hist(input)
print(hist_pooling_res)

torch.Size([1, 4, 10])
16 16
torch.Size([4, 10, 256])
torch.Size([1, 4, 10, 16, 16])
tensor([[[[[0.2212, 0.1701, 0.2321,  ..., 0.1729, 0.0977, 0.1517],
           [0.1074, 0.1712, 0.3259,  ..., 0.2141, 0.0489, 0.1044],
           [0.2233, 0.2276, 0.0526,  ..., 0.0475, 0.1099, 0.1041],
           ...,
           [0.1068, 0.2240, 0.2166,  ..., 0.1579, 0.1644, 0.0493],
           [0.1735, 0.1712, 0.1138,  ..., 0.2776, 0.2192, 0.1123],
           [0.1588, 0.1071, 0.2276,  ..., 0.2632, 0.2179, 0.2173]],

          [[0.0553, 0.1701, 0.3482,  ..., 0.1153, 0.0489, 0.1011],
           [0.2147, 0.2854, 0.1629,  ..., 0.0535, 0.1466, 0.1044],
           [0.0558, 0.1707, 0.1579,  ..., 0.1900, 0.1649, 0.1041],
           ...,
           [0.1601, 0.0000, 0.0542,  ..., 0.2632, 0.1644, 0.0987],
           [0.2892, 0.0571, 0.1138,  ..., 0.3702, 0.2192, 0.3370],
           [0.1059, 0.2141, 0.2276,  ..., 0.2632, 0.1089, 0.3259]],

          [[0.3871, 0.5103, 0.3482,  ..., 0.3458, 0.3420, 0.4046],
        

In [291]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
m=nn.AdaptiveMaxPool3d((None,1,1))
conv_du = nn.Sequential(
                nn.Conv3d(4, 4, 1, bias=True),
#                 nn.ReLU(inplace=True),
#                 nn.Conv3d(2, 4, 1, bias=True),
                nn.Sigmoid()
        ).cuda()
redu_hist3d = conv_du(hist_pooling_res)
print(redu_hist3d.shape)
print(hist_pooling_res.shape)

attn_res = hist_pooling_res*m(redu_hist3d)

attn_res_reduced = attn_res.sum(1)
upsample = nn.ConvTranspose2d(10, 10, 3, stride=np.rint(input.shape[-1]//attn_res_reduced.shape[-1]).astype(int)).cuda()
print(input.size())
upsampled_hist = upsample(attn_res_reduced, output_size=input.size())
# upsampled_hist
print(hist_pooling_res)
torch.cat((input,nn.SELU(inplace=True)(upsampled_hist).cuda()), dim=1)

torch.Size([1, 4, 10, 16, 16])
torch.Size([1, 4, 10, 16, 16])
torch.Size([1, 4, 64, 64])
tensor([[[[[0.2212, 0.1701, 0.2321,  ..., 0.1729, 0.0977, 0.1517],
           [0.1074, 0.1712, 0.3259,  ..., 0.2141, 0.0489, 0.1044],
           [0.2233, 0.2276, 0.0526,  ..., 0.0475, 0.1099, 0.1041],
           ...,
           [0.1068, 0.2240, 0.2166,  ..., 0.1579, 0.1644, 0.0493],
           [0.1735, 0.1712, 0.1138,  ..., 0.2776, 0.2192, 0.1123],
           [0.1588, 0.1071, 0.2276,  ..., 0.2632, 0.2179, 0.2173]],

          [[0.0553, 0.1701, 0.3482,  ..., 0.1153, 0.0489, 0.1011],
           [0.2147, 0.2854, 0.1629,  ..., 0.0535, 0.1466, 0.1044],
           [0.0558, 0.1707, 0.1579,  ..., 0.1900, 0.1649, 0.1041],
           ...,
           [0.1601, 0.0000, 0.0542,  ..., 0.2632, 0.1644, 0.0987],
           [0.2892, 0.0571, 0.1138,  ..., 0.3702, 0.2192, 0.3370],
           [0.1059, 0.2141, 0.2276,  ..., 0.2632, 0.1089, 0.3259]],

          [[0.3871, 0.5103, 0.3482,  ..., 0.3458, 0.3420, 0.4046],
    

tensor([[[[ 7.1264e-01, -1.4829e+00,  7.5374e-01,  ..., -3.9685e-01,
           -1.4078e+00,  1.3761e+00],
          [ 7.5623e-01, -1.0336e+00, -9.4676e-01,  ..., -7.1415e-01,
           -9.4391e-01, -6.2532e-01],
          [-1.5443e+00, -1.8265e+00, -1.1229e-01,  ..., -1.0241e+00,
           -4.0456e-01,  4.8919e-01],
          ...,
          [ 9.7460e-01,  1.8843e+00, -1.0724e-01,  ...,  1.1198e+00,
            1.2194e-01,  1.2331e+00],
          [-1.9268e-01,  4.1561e-01,  4.9510e-01,  ..., -2.0681e+00,
           -1.9003e+00,  1.3527e+00],
          [ 7.2349e-01, -1.9242e-02,  1.3629e+00,  ..., -2.2780e-02,
           -1.3694e+00, -4.0969e-01]],

         [[-4.2746e-01,  8.1367e-01, -9.8953e-01,  ..., -1.5456e-03,
            8.3861e-01, -2.3021e+00],
          [ 1.1974e+00, -1.2911e-02,  1.3376e-01,  ..., -1.1046e+00,
            1.8243e+00,  1.1191e+00],
          [ 1.1517e+00,  1.6706e+00,  2.2423e-01,  ...,  8.6491e-01,
           -1.6997e-01,  1.2227e+00],
          ...,
     

In [219]:
print(output)
a = cnn.forward(input)
print(a.shape)
loss = loss_func(output.sum(), target)
loss.backward(retain_graph=True)
optimizer.step()

tensor([[[[-5.3928, -2.7985, -1.2736],
          [ 0.5560, -0.2581,  1.7089],
          [ 1.4241,  2.0407,  4.7615]],

         [[ 1.4115,  0.0413,  5.8817],
          [-1.0436, -0.8633,  4.0784],
          [-2.0931, -3.9664, -0.8548]],

         [[ 0.7466,  0.6219,  1.7725],
          [-0.6392, -0.4786,  0.6590],
          [-0.8573, -1.3902, -0.8062]],

         [[-1.6163,  3.4959,  1.1526],
          [-0.4965,  4.5342, -1.9171],
          [ 1.5929, -2.4109, -4.6280]],

         [[-3.0557, -3.7205,  1.6888],
          [-3.0168, -0.6681,  3.7868],
          [ 2.8815,  2.3262,  3.0939]],

         [[ 0.1994, -2.7422, -3.0247],
          [ 0.2476,  2.5019, -0.0529],
          [ 3.9685,  4.0618,  0.5308]]]], grad_fn=<MkldnnConvolutionBackward>)




AttributeError: 'tuple' object has no attribute 'shape'

In [None]:
data = 50 + 25 * torch.randn(1,1000)

hist = torch.histc(data, bins=10, min=0, max=100)

print(data)
print(hist)

class SoftHistogram(nn.Module):
    def __init__(self, bins, min, max, sigma):
        super(SoftHistogram, self).__init__()
        self.bins = bins
        self.min = min
        self.max = max
        self.sigma = sigma
        self.delta = float(max - min) / float(bins)
        self.centers = float(min) + self.delta * (torch.arange(bins).float() + 0.5)
        self.centers = nn.Parameter(self.centers, requires_grad=False)

    def forward(self, x):
        x = torch.unsqueeze(x, 1) - torch.unsqueeze(self.centers, 1)
        x = torch.exp(-0.5*(x/self.sigma)**2) / (self.sigma * np.sqrt(np.pi*2)) * self.delta
        x = x.sum(dim=-1)
        x = x/x.sum(dim=-1).unsqueeze(1) # normalization
        return x

softhist = SoftHistogram(bins=10, min=-2, max=2, sigma=3*25)

data.requires_grad = True
hist = softhist(data)
print(50 + 25 * hist[0])

hist.sum().backward()
print(data.grad.max())

In [None]:
cnn.apply(initialize_weights)
for epoch in range(num_epochs):
    for i, (images, labels) in enumerate(train_loader):
        images = get_variable(images)
        labels = get_variable(labels)
#         print(images.shape)
#         print(labels.shape)
        outputs = cnn(images)
        loss = loss_func(outputs, labels)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if (i + 1) % 100 == 0:
            print('Epoch [%d/%d], Iter [%d/%d] Loss: %.4f'
                  % (epoch + 1, num_epochs, i + 1, len(train_dataset) // batch_size, loss.item()))

# 测试模型
cnn.eval()  # 改成测试形态, 应用场景如: dropout
correct = 0
total = 0
for images, labels in test_loader:
    images = get_variable(images)
    labels = get_variable(labels)

    outputs = cnn(images)
    _, predicted = torch.max(outputs.data, 1)
    total += labels.size(0)
    correct += (predicted == labels.data).sum()

print(' test acc %d %%' % (100 * correct / total))

# Save the Trained Model
torch.save(cnn.state_dict(), 'cnn.pkl')

In [66]:
loss


tensor(0.0420, device='cuda:0', grad_fn=<NllLossBackward>)

In [40]:
batchsize = 4
nb_in_channel = 3
n_classes = 1
net = Conv2d_Hist(in_channels=nb_in_channel, n_classes=n_classes)
print(net)


optimizer = optim.SGD(net.parameters(), lr=0.01)
input = torch.randn(batchsize, nb_in_channel, 32, 32)
net.zero_grad()

output = net(input)

target = torch.randn(batchsize, n_classes)  # a dummy target, for example
# target = target.view(1,-1)  # make it the same shape as output
criterion = nn.MSELoss()

loss = criterion(output, target)
print(loss)


print(output.shape)
print(target.shape)

loss.backward(torch.randn(output.shape))
optimizer.step()

Conv2d_Hist(
  (conv1): Conv2d(3, 6, kernel_size=(3, 3), stride=(1, 1))
  (conv2): Conv2d(6, 16, kernel_size=(3, 3), stride=(1, 1))
  (fc1): Linear(in_features=576, out_features=120, bias=True)
  (fc2): Linear(in_features=120, out_features=84, bias=True)
  (fc3): Linear(in_features=84, out_features=1, bias=True)
)
tensor(1.4422, grad_fn=<MseLossBackward>)
torch.Size([4, 1])
torch.Size([4, 1])


RuntimeError: Mismatch in shape: grad_output[0] has a shape of torch.Size([4, 1]) and output[0] has a shape of torch.Size([]).