In [364]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.autograd import Variable

# Bi-LSTM Component

In [365]:
class BidirectionalLSTM(nn.Module):

    def __init__(self, nIn, nHidden, nOut):
        super(BidirectionalLSTM, self).__init__()

        self.rnn = nn.LSTM(nIn, nHidden, bidirectional=True)
        self.embedding = nn.Linear(nHidden * 2, nOut)

    def forward(self, input):
        recurrent, _ = self.rnn(input)
        T, b, h = recurrent.size()
        t_rec = recurrent.view(T * b, h)

        output = self.embedding(t_rec)  # [T * b, nOut]
        output = output.view(T, b, -1)

        return output

# R-CNN Component

In [366]:
class R_CNN(nn.Module):

    def __init__(self):
        super(R_CNN, self).__init__()

        in_nc = 3
        nf = 64
        hdn = 300
        nclass = 23 #dekhabet class
        
        self.convs = nn.Sequential(
            
            nn.Conv2d(in_nc, nf, 3, 1, 1),
            nn.LeakyReLU(0.2, True),
            nn.MaxPool2d(2, 2), #64 filters, 32*64
            
            nn.Conv2d(nf, nf*2, 3, 1, 1), 
            nn.LeakyReLU(0.2, True),
            nn.MaxPool2d(2, 2), #128 filters, 16*32
            
            nn.Conv2d(nf*2, nf*4, 3, 1, 1), 
            nn.BatchNorm2d(nf*4),
            
            nn.Conv2d(nf*4, nf*4, 3, 1, 1), 
            nn.LeakyReLU(0.2, True),
            nn.MaxPool2d(2,2), #256 filters, 40*16
            
            
            nn.Conv2d(nf*4, nf*4, 3, 1, 1), 
            nn.LeakyReLU(0.2, True),
            nn.MaxPool2d((2, 2)),
            
            
            nn.Conv2d(nf*4, nf*8, 3, 1, 1), 
            nn.BatchNorm2d(nf*8),
            
            nn.Conv2d(nf*8, nf*8, 3, 1, 1), 
            nn.LeakyReLU(0.2, True),
            nn.MaxPool2d((2, 1)),
            
            
            nn.Conv2d(nf*8, nf*8, 3, 1, 1), 
            nn.LeakyReLU(0.2, True),
            nn.MaxPool2d((2, 1)),  
            
            nn.Conv2d(nf*8, nf*8, 2, 1, 0), 
            
        )
        
        self.bilstm = nn.Sequential(
                        BidirectionalLSTM(nf*8, hdn, hdn),
                        BidirectionalLSTM(hdn, hdn, nclass),
                    )
        
        self.lgsftMx = nn.LogSoftmax(dim=2)
        
    def forward(self, x):

        out = self.convs(x)
        out = out.squeeze(2)
        out = out.permute(2, 0, 1) #ctc expects [width,batch,label]
        
        
        out = self.bilstm(out)
        out = F.log_softmax(out, dim=2)
        
        
        return out


# Initiate Model And Loss

In [388]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)

model = R_CNN()
model = model.to(device)

criterion = nn.CTCLoss().to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.0001, betas=(0.5, 0.999))

cuda:0


# Getting The Data

In [386]:
input = torch.randn(5, 3, 128, 256)
input = input.to(device)

target =  [[5,1,5,3,0,2,1,7,20,11],
           [5,1,5,3,0,2,1,7,20,11],
           [5,1,5,3,0,2,1,7,20,11], 
           [5,1,5,3,0,2,1,7,20,11],
           [5,1,5,3,0,2,1,7,20,11]]



target = torch.FloatTensor(target)
target = target.to(device)



In [383]:
T = 15      # Input sequence length
C = 22      # Number of classes (including blank)
N = 5      # Batch size
S = 9      # Target sequence length of longest target in batch
S_min = 2  # Minimum target length, for demonstration purposes

# target = torch.randint(low=1, high=23, size=(N, S), dtype=torch.long)
target_lengths = torch.randint(low=S_min, high=S, size=(N,))
                               
# input = torch.randn(T, N, C).log_softmax(2).detach().requires_grad_()

print(target)
print(target_lengths)


tensor([[ 5.,  1.,  5.,  3.,  0.,  2.,  1.,  7., 20., 11.],
        [ 5.,  1.,  5.,  3.,  0.,  2.,  1.,  7., 20., 11.],
        [ 5.,  1.,  5.,  3.,  0.,  2.,  1.,  7., 20., 11.],
        [ 5.,  1.,  5.,  3.,  0.,  2.,  1.,  7., 20., 11.],
        [ 5.,  1.,  5.,  3.,  0.,  2.,  1.,  7., 20., 11.]], device='cuda:0')
tensor([7, 3, 6, 4, 2])


# Training

In [384]:
pred = model(input)

print(pred.shape)

preds_size = Variable(torch.LongTensor([pred.size(0)] * 5))

cost = criterion(pred, target, preds_size, target_lengths)

print(cost)


torch.Size([15, 5, 23])
tensor(10.3430, device='cuda:0', grad_fn=<MeanBackward1>)


In [None]:
total_step = len(trainloader_pixel)
ctc_loss_list = []
acc_list = []
batch_size= 25
num_epochs = 2500

for epoch in range(num_epochs):
    
    trainiter = iter(trainiter)
    
    for i in range(5):
        
        spectros, lbls, lbl_lens = trainIter_pixel.next()
        
        spectros = spectos.to(device)
        lbls = lbls.to(device)
        lbl_lens.to(device)
        
        pred = model(spectros)
        preds_size = Variable(torch.LongTensor([pred.size(0)] * batch_size))
        

        cost = criterion(pred, lbls, preds_size, lbl_lens)/batch_size
        
        #backprop and optimize!
        cost.backward()
        optimizer.step()
        
      
    if (epoch+1) % 100 == 0:
        print('Epoch No [{}/{}]  {:.4f}'.format(epoch+1,num_epochs,d_loss.item()))
        ctc_loss_list.append(d_loss.item())

    if (epoch+1) % 1000 == 0:
        print('Epoch No {}  reached saving model'.format(epoch+1)
        torch.save(model.state_dict(), 'outputModel/KDNet_epoch_{}.pkl'.format(epoch+1))