In [None]:
import os
os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"   # see issue #152
os.environ["CUDA_VISIBLE_DEVICES"]="0"

In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.autograd import Variable
import numpy as np

from torch.optim.optimizer import Optimizer, required
from torch import Tensor
from torch.nn import Parameter

from tqdm import tqdm

In [5]:
import torchvision
from torch.utils.data import DataLoader,Dataset
from torchvision import datasets, transforms, models

In [2]:
def l2normalize(v, eps=1e-12):
    return v / (v.norm() + eps)


class SpectralNorm(nn.Module):
    def __init__(self, module, name='weight', power_iterations=1):
        super(SpectralNorm, self).__init__()
        self.module = module
        self.name = name
        self.power_iterations = power_iterations
        if not self._made_params():
            self._make_params()

    def _update_u_v(self):
        u = getattr(self.module, self.name + "_u")
        v = getattr(self.module, self.name + "_v")
        w = getattr(self.module, self.name + "_bar")

        height = w.data.shape[0]
        for _ in range(self.power_iterations):
            v.data = l2normalize(torch.mv(torch.t(w.view(height,-1).data), u.data))
            u.data = l2normalize(torch.mv(w.view(height,-1).data, v.data))

        # sigma = torch.dot(u.data, torch.mv(w.view(height,-1).data, v.data))
        sigma = u.dot(w.view(height, -1).mv(v))
        setattr(self.module, self.name, w / sigma.expand_as(w))

    def _made_params(self):
        try:
            u = getattr(self.module, self.name + "_u")
            v = getattr(self.module, self.name + "_v")
            w = getattr(self.module, self.name + "_bar")
            return True
        except AttributeError:
            return False


    def _make_params(self):
        w = getattr(self.module, self.name)

        height = w.data.shape[0]
        width = w.view(height, -1).data.shape[1]

        u = Parameter(w.data.new(height).normal_(0, 1), requires_grad=False)
        v = Parameter(w.data.new(width).normal_(0, 1), requires_grad=False)
        u.data = l2normalize(u.data)
        v.data = l2normalize(v.data)
        w_bar = Parameter(w.data)

        del self.module._parameters[self.name]

        self.module.register_parameter(self.name + "_u", u)
        self.module.register_parameter(self.name + "_v", v)
        self.module.register_parameter(self.name + "_bar", w_bar)


    def forward(self, *args):
        self._update_u_v()
        return self.module.forward(*args)

In [3]:
class Self_Attn(nn.Module):
    """ Self attention Layer"""
    def __init__(self,in_dim,activation):
        super(Self_Attn,self).__init__()
        self.chanel_in = in_dim
        self.activation = activation
        
        self.query_conv = nn.Conv2d(in_channels = in_dim , out_channels = in_dim//8 , kernel_size= 1)
        self.key_conv = nn.Conv2d(in_channels = in_dim , out_channels = in_dim//8 , kernel_size= 1)
        self.value_conv = nn.Conv2d(in_channels = in_dim , out_channels = in_dim , kernel_size= 1)
        self.gamma = nn.Parameter(torch.zeros(1))

        self.softmax  = nn.Softmax(dim=-1) #
    def forward(self,x):
        """
            inputs :
                x : input feature maps( B X C X W X H)
            returns :
                out : self attention value + input feature 
                attention: B X N X N (N is Width*Height)
        """
        m_batchsize,C,width ,height = x.size()
        proj_query  = self.query_conv(x).view(m_batchsize,-1,width*height).permute(0,2,1) # B X CX(N)
        proj_key =  self.key_conv(x).view(m_batchsize,-1,width*height) # B X C x (*W*H)
        energy =  torch.bmm(proj_query,proj_key) # transpose check
        attention = self.softmax(energy) # BX (N) X (N) 
        proj_value = self.value_conv(x).view(m_batchsize,-1,width*height) # B X C X N

        out = torch.bmm(proj_value,attention.permute(0,2,1) )
        out = out.view(m_batchsize,C,width,height)
        
        out = self.gamma*out + x
        return out,attention


class SelfAttention(nn.Module):
    """Discriminator, Auxiliary Classifier."""

    def __init__(self, batch_size=64, image_size=64, conv_dim=64):
        super(SelfAttention, self).__init__()
        self.imsize = image_size
        layer1 = []
        layer2 = []
        layer3 = []
        last = []

        layer1.append(SpectralNorm(nn.Conv2d(3, conv_dim, 4, 2, 1)))
        layer1.append(nn.LeakyReLU(0.1))

        curr_dim = conv_dim

        layer2.append(SpectralNorm(nn.Conv2d(curr_dim, curr_dim * 2, 4, 2, 1)))
        layer2.append(nn.LeakyReLU(0.1))
        curr_dim = curr_dim * 2

        layer3.append(SpectralNorm(nn.Conv2d(curr_dim, curr_dim * 2, 4, 2, 1)))
        layer3.append(nn.LeakyReLU(0.1))
        curr_dim = curr_dim * 2

        if self.imsize == 64:
            layer4 = []
            layer4.append(SpectralNorm(nn.Conv2d(curr_dim, curr_dim * 2, 4, 2, 1)))
            layer4.append(nn.LeakyReLU(0.1))
            self.l4 = nn.Sequential(*layer4)
            curr_dim = curr_dim*2
        self.l1 = nn.Sequential(*layer1)
        self.l2 = nn.Sequential(*layer2)
        self.l3 = nn.Sequential(*layer3)

        last.append(nn.Conv2d(curr_dim, 1, 4))
        last.append(nn.Flatten())
        last.append(nn.Linear(13*13,1))
        last.append(nn.Sigmoid())
        self.last = nn.Sequential(*last)

        self.attn1 = Self_Attn(256, 'relu')
        self.attn2 = Self_Attn(512, 'relu')

    def forward(self, x):
        out = self.l1(x)
        out = self.l2(out)
        out = self.l3(out)
        out,p1 = self.attn1(out)
        out=self.l4(out)
        out,p2 = self.attn2(out)
        out=self.last(out)
        # return out
        return out.squeeze(), p1, p2

In [12]:
# https://discuss.pytorch.org/t/balanced-sampling-between-classes-with-torchvision-dataloader/2703/3
def make_weights_for_balanced_classes(images, nclasses):                        
    count = [0] * nclasses                                                      
    for item in images:                                                         
        count[item[1]] += 1                                                     
    weight_per_class = [0.] * nclasses                                      
    N = float(sum(count))  
    print(count)
    for i in range(nclasses):                                                   
        weight_per_class[i] = N/float(count[i])   
    print(weight_per_class)
    weight = [0] * len(images)                                              
    for idx, val in enumerate(images):                                          
        weight[idx] = weight_per_class[val[1]]                                  
    return weight   


In [4]:
model = SelfAttention()

In [8]:
model = model.cuda()

In [10]:
train_transforms = transforms.Compose([transforms.Resize(256),
                                       transforms.RandomHorizontalFlip(p=0.5),
                                       transforms.RandomApply([
                                           transforms.RandomRotation(5),
                                           transforms.RandomAffine(degrees=5,scale=(0.95,1.05))
                                           ], p=0.5),
                                       transforms.ToTensor(),
                                       transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                             std=[0.229, 0.224, 0.225])
                                       
                                       ])
train_data = datasets.ImageFolder('/data/tam/kaggle/extract_raw_img',       
                    transform=train_transforms)



In [13]:
weights = make_weights_for_balanced_classes(train_data.imgs, len(train_data.classes))                                                                
weights = torch.DoubleTensor(weights)                                       
sampler = torch.utils.data.sampler.WeightedRandomSampler(weights, len(weights)) 

[594342, 548450]
[1.9227851977480979, 2.0836758136566687]


In [14]:
trainloader = torch.utils.data.DataLoader(train_data, batch_size=16,                             
                    sampler = sampler, num_workers=1, pin_memory=True)

In [15]:
test_data = datasets.ImageFolder('/data/tam/kaggle/extract_raw_img_test',       
                    transform=train_transforms)

testloader = torch.utils.data.DataLoader(test_data, batch_size=16,num_workers=1, pin_memory=True)

In [17]:
criterion = nn.BCELoss().cuda()
optimizer = torch.optim.Adam(model.parameters(), lr=0.003)

In [18]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [None]:
epochs = 1
steps = 0
running_loss = 0
print_every = 100
train_losses, test_losses = [], []
for epoch in range(epochs):
    for inputs, labels in tqdm(trainloader):
#     for inputs, labels in tqdm(testloader):
        model.train()
        steps += 1
#         labels = np.array([labels])
        inputs, labels = inputs.to(device), labels.float().to(device)
#         inputs, labels = inputs.to(device), labels[1].float().to(device)

        optimizer.zero_grad()
        logps,_,_ = model.forward(inputs)
        loss = criterion(logps, labels)
#         loss = F.binary_cross_entropy_with_logits(logps, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
        
        if steps % print_every == 0:
            print(loss)
            test_loss = 0
            accuracy = 0
            model.eval()
            with torch.no_grad():
                for inputs, labels in testloader:
                    inputs, labels = inputs.to(device),labels.float().to(device)
                    logps,_,_ = model.forward(inputs)
                    batch_loss = criterion(logps, labels)
                    test_loss += batch_loss.item()
                    equals = labels == (logps >0.5)
                    accuracy += torch.mean(equals.type(torch.FloatTensor)).item()
            print(f"Epoch {epoch+1}/{epochs}.. "
                  f"Train loss: {running_loss/print_every:.3f}.. "
                  f"Test loss: {test_loss/len(testloader):.3f}.. "
                  f"Test accuracy: {accuracy/len(testloader):.3f}")
            running_loss = 0
            model.train()




  0%|          | 0/71425 [00:00<?, ?it/s][A[A[A


  0%|          | 1/71425 [00:00<8:40:03,  2.29it/s][A[A[A


  0%|          | 4/71425 [00:00<6:19:11,  3.14it/s][A[A[A


  0%|          | 7/71425 [00:00<4:39:23,  4.26it/s][A[A[A


  0%|          | 10/71425 [00:00<3:28:58,  5.70it/s][A[A[A


  0%|          | 13/71425 [00:00<2:39:55,  7.44it/s][A[A[A


  0%|          | 16/71425 [00:01<2:06:36,  9.40it/s][A[A[A


  0%|          | 19/71425 [00:01<1:42:19, 11.63it/s][A[A[A


  0%|          | 22/71425 [00:01<1:25:20, 13.94it/s][A[A[A


  0%|          | 25/71425 [00:01<1:13:30, 16.19it/s][A[A[A


  0%|          | 28/71425 [00:01<1:04:51, 18.35it/s][A[A[A


  0%|          | 31/71425 [00:01<59:44, 19.92it/s]  [A[A[A


  0%|          | 34/71425 [00:01<55:59, 21.25it/s][A[A[A


  0%|          | 37/71425 [00:01<53:10, 22.37it/s][A[A[A


  0%|          | 40/71425 [00:01<51:20, 23.17it/s][A[A[A


  0%|          | 43/71425 [00:02<49:56, 23.82it/s][A[A

tensor(0.7338, device='cuda:0', grad_fn=<BinaryCrossEntropyBackward>)





  0%|          | 97/71425 [00:19<46:10, 25.75it/s][A[A[A