DataLoader class

In [1]:
import cv2
import os
import numpy as np
import av
import re
from itertools import cycle
from random import shuffle

class videoDataset():
    """Dataset Class for Loading Video"""

    def __init__(self, path):
        
        self.rootDir = path
        name=[]
        file=[]
        for _, dirnames, filenames in os.walk(path):
            name.append(dirnames)
            file.append(filenames)
        name = name[0]

        newpath = []

        for i in range(len(name)):    
            for files in file[i+1]:
                pathn = path + name[i]+'/' + files
                newpath.append(pathn)
                
        shuffle(newpath)        
        self.sequenceLength = 5
        self.classList=['(0_Basketball)','(1_Biking)','(2_Diving)','(3_GolfSwing)','(4_HorseRiding)','(5_SoccerJuggling)','(6_Swing)','(7_TennisSwing)','(8_TrampolineJumping)','(9_VolleyballSpiking)','(10_WalkingWithDog)']	# Word 1   
        self.Xaxis = 192
        self.Yaxis = 240
        self.minFrames = 31        
        self.pathList = cycle(newpath)
        self.testdata = []
        self.batchsize = 50
        self.k = 1
        self.current=0
        
        


 
    
    def frameLength(newpath):
        v = av.open(newpath)
        stream = next(s for s in v.streams if s.type == 'video')
        #X_data = []
        for packet in v.demux(stream):
            for frame in packet.decode():
                continue
        return frame.index
    
    def setK(self,num):
        self.k = num
    
 
    def regexBatchnum(self,path):
        re1='.*?'	# Non-greedy match on filler
        re2='g'	# Uninteresting: c
        re3='.*?'	# Non-greedy match on filler
        re4='g'	# Uninteresting: c
        re5='.*?'	# Non-greedy match on filler
        re6='g'	# Uninteresting: c
        re7='.*?'	# Non-greedy match on filler
        re8='(g)'	# Any Single Character 1        

        re9= '(' + str(self.k).zfill(2) + ')'	# Integer Number 1
        
        rg = re.compile(re1+re2+re3+re4+re5+re6+re7+re8+re9,re.IGNORECASE|re.DOTALL)
        m = rg.search(path)
        
        if(m==None):
            return False
        else:
            return True
    
    def regexClass(self,path):
        
        classnum = 0
                
        re1='.*?'	# Non-greedy match on filler
        ####################
        #self.numclasses
        i=-1
        for re2 in self.classList:
            i = i+1
            rg = re.compile(re1+re2,re.IGNORECASE|re.DOTALL)
            m = rg.search(path)
            if m:
                classnum = i
                break
        return classnum
            
                
        
    
    
    def getBatch(self):
        batchCount = 0
        X = np.zeros([self.sequenceLength,self.batchsize,int(self.Xaxis/2),int(self.Yaxis/2)])
        Y = np.zeros([self.batchsize])

        for pathname in self.pathList:
            
            
            v = av.open(pathname)
            
            self.current +=1
            
            if(videoDataset(self.rootDir).regexBatchnum(pathname)== True):
                if pathname not in self.testdata:
                    self.testdata.append(pathname)
                continue

            
            stream = next(s for s in v.streams if s.type == 'video')
            X_data = []
            for packet in v.demux(stream):
                for frame in packet.decode():
                    # some other formats gray16be, bgr24, rgb24
                    img = frame.to_nd_array(format='bgr24')
                    gray_image = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
                    
                    X_data.append(gray_image)
            
            X_data = np.array(X_data)
            aa = np.floor(np.linspace(1,X_data.shape[0],self.sequenceLength,endpoint = False))
            sampledX = []
            
            for i in aa:
                sampledX.append(X_data[int(i),:,:])
            sampledX = np.array(sampledX)
            
            
            #Reduced dimensions in resize_X
            resize_X = []
            
            #Resizing the (sequence_length) number of images into half size. So that the output of CNN doesn't explode 
            for p in range(sampledX.shape[0]):
                height, width = sampledX[p,:,:].shape
                gray_image = cv2.resize(sampledX[p,:,:],(int(width/2), int(height/2)), interpolation = cv2.INTER_AREA)
                resize_X.append(gray_image)
            
            resize_X = np.array(resize_X)
            
            
            
            #Now load array into the final batch array
            X[:,batchCount,:,:] = resize_X
            Y[batchCount] = int(videoDataset(self.rootDir).regexClass(pathname))
            batchCount += 1
            
            if(batchCount == self.batchsize ):
                return X,Y
            
    
    def getTestData(self):
        
        TestData = []
        TestClass = []
        for test in self.testdata:
            
            v = av.open(test)
            
            stream = next(s for s in v.streams if s.type == 'video')
            X_test = []
            
            for packet in v.demux(stream):
                for frame in packet.decode():
                    # some other formats gray16be, bgr24, rgb24
                    img = frame.to_nd_array(format='bgr24')
                    gray_image = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
                    
                    X_test.append(gray_image)
            
            X_test = np.array(X_test)
            aa = np.floor(np.linspace(1,X_test.shape[0],self.sequenceLength,endpoint = False))
            sampledXtest = []
            
            for i in aa:
                sampledXtest.append(X_test[int(i),:,:])
            sampledXtest = np.array(sampledXtest)
            
            
            #Reduced dimensions in resize_X
            resize_X = []
            
            #Resizing the (sequence_length) number of images into half size. So that the output of CNN doesn't explode 
            for p in range(sampledX.shape[0]):
                height, width = sampledXtest[p,:,:].shape
                gray_image = cv2.resize(sampledXtest[p,:,:],(int(width/2), int(height/2)), interpolation = cv2.INTER_AREA)
                resize_X.append(gray_image)
            
            resize_X = np.array(resize_X)
            
            TestData.append(resize_X)
            TestClass.append(int(videoDataset(self.rootDir).regexClass(test)))
            
        
        TestData = np.array(TestData)
        TestData = np.swapaxes(TestData,0,1)
        TestClass = np.array(TestClass)
        return TestData, TestClass
    
            
            

Variable declaration

In [2]:
import torch
from torch.autograd import Variable
import torch.nn as nn
from tensorboardX import SummaryWriter

import numpy as np

#torch.cuda.set_device(1)
#torch.set_default_tensor_type('torch.cuda.FloatTensor')

exp = 1
writer = SummaryWriter()
path = '/media/data/bimal/Arpan/original_data/'
batchsize = 50
seq_length = 5
height = 192
width = 240
num_iter = 400

#CNN parameters
learning_rate = 0.0005
kH = 5
kW = 5
noFilters1 = 16
noFilters2 = 8
padW = (kW-1)/2
padH = (kH-1)/2
cnn_output = 5760

#RNN parameters
hidden_size = 1200
num_layers_RNN = 1
num_classes = 11




DataLoader= videoDataset(path)

Model initialization

In [3]:
class CNN(nn.Module):
    def __init__(self, noFilters1, noFilters2, kH, width):
        super(CNN, self).__init__()
        self.layer1 = nn.Sequential(
            nn.Conv2d(1, noFilters1, kernel_size= kH, padding= int((kH-1)/2)),
            nn.BatchNorm2d(noFilters1),
            nn.ReLU(),
            nn.MaxPool2d(2))
        self.layer2 = nn.Sequential(
            nn.Conv2d(noFilters1, noFilters2, kernel_size=kH, padding= int((kH-1)/2)),
            nn.BatchNorm2d(noFilters2),
            nn.ReLU(),
            nn.MaxPool2d(2))
        #self.fc = nn.Linear( int((width)/4 * (width)/4 *noFilters2), 11)
        #softmax
        
    def forward(self, x):
        out = self.layer1(x)
        out = self.layer2(out)
        out = out.view(out.size(0), -1)
        #out = self.fc(out)
        return out
        
cnn = CNN(noFilters1, noFilters2, kH, width)


# RNN Model (Many-to-One)
class RNN(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, num_classes):
        super(RNN, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=False)
        self.fc = nn.Linear(hidden_size, num_classes)
        self.soft = nn.Softmax()
    
    def forward(self, x):
        # Set initial states 
        h0 = Variable(torch.zeros(self.num_layers, x.size(1), self.hidden_size)) 
        c0 = Variable(torch.zeros(self.num_layers, x.size(1), self.hidden_size))
        
        # Forward propagate RNN
        out, _ = self.lstm(x, (h0, c0)) 
        
        
        # Decode hidden state of last time step
        output = self.fc(out.view(out.size(0)*out.size(1),out.size(2)))  
        output = self.soft(output)
        return output.view(out.size(0),out.size(1),output.size(1))

rnn = RNN(cnn_output, hidden_size, num_layers_RNN, num_classes)

# criterion = nn.MultiLabelSoftMarginLoss()
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(list(rnn.parameters()) + list(cnn.parameters()), lr=learning_rate)
scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer, gamma = 0.5)


Main code


In [None]:
for epoch in range(6):
    if((epoch + 1)%2 == 0 ):
        scheduler.step()
    for iteration in range(num_iter):
        X_data,Y_data = DataLoader.getBatch()
    #     Y_onehot = (np.arange(num_classes) == Y_data[:,None]).astype(np.float32)

        X_data = Variable(torch.FloatTensor(X_data))
        Y_data = Variable(torch.Tensor(Y_data).long(), requires_grad=False)

        RNNinput = np.zeros([seq_length,batchsize,cnn_output])
        RNNinput = Variable(torch.from_numpy(RNNinput).float())

    #     RNNoutput = np.zeros([seq_length,batchsize,num_classes])
    #     RNNoutput = Variable(torch.from_numpy(RNNoutput).float())

        T = []
        temp = []
        for i in range(seq_length):
            T.append(X_data[i,:,:,:].unsqueeze(1))

        for t in T:
            temp.append(cnn(t))

        RNNinput = temp[0].unsqueeze(0)
        for i in range(1, len(temp)):
            RNNinput = torch.cat((RNNinput, temp[i].unsqueeze(0)), 0)

    #     for k in range(batchsize):
    #         for m in range(len(temp)):
    #             TEMP = temp[m]
    #             RNNinput[m,k,:] = TEMP[k,:]

        Y_out = rnn(RNNinput)
        total_loss = 0
        for q in range(seq_length):

            optimizer.zero_grad()
            output = Y_out[q,:,:]
            loss = criterion(output, Y_data)
            loss.backward(retain_graph=True)
            optimizer.step()
            total_loss += loss.data[0]


        total_loss = total_loss/seq_length

        writer.add_scalar('DVS/train_loss', total_loss, iteration + epoch*num_iter)
        if ((iteration+1)%10 == 0 or iteration == 0):
            print("Iteration number %d , Loss value %f"%(iteration+1, total_loss))
    



Iteration number 1 , Loss value 2.398042
Iteration number 10 , Loss value 2.375924
Iteration number 20 , Loss value 2.394389
Iteration number 30 , Loss value 2.317404
Iteration number 40 , Loss value 2.401976
Iteration number 50 , Loss value 2.402094
Iteration number 60 , Loss value 2.321947
Iteration number 70 , Loss value 2.396771
Iteration number 80 , Loss value 2.289645
Iteration number 90 , Loss value 2.435684
Iteration number 100 , Loss value 2.415046
Iteration number 110 , Loss value 2.314993
Iteration number 120 , Loss value 2.434190
Iteration number 130 , Loss value 2.422462
Iteration number 140 , Loss value 2.351420
Iteration number 150 , Loss value 2.405107
Iteration number 160 , Loss value 2.333382
Iteration number 180 , Loss value 2.405294
Iteration number 190 , Loss value 2.379262
Iteration number 200 , Loss value 2.367678
Iteration number 210 , Loss value 2.386676
Iteration number 220 , Loss value 2.387442
Iteration number 230 , Loss value 2.443273
Iteration number 240 ,

KeyboardInterrupt: 

In [None]:
SAVE_PATH_CNN = './TrainModel/' + str(exp) + '-' + str(DataLoader.k) + '-CNN'
SAVE_PATH_RNN = './TrainModel/' + str(exp) + '-' + str(DataLoader.k) + '-RNN'
torch.save(cnn.state_dict(), SAVE_PATH_CNN)
torch.save(rnn.state_dict(), SAVE_PATH_RNN)

Test

In [None]:
current = 0
for path in X.pathList:
    print(int(X.regexClass(path)))
    current += 1
    
    if(current>=2000):
        break
    


In [None]:
name = []
file = []
path = '/media/data/bimal/Arpan/original_data/'
for _, dirnames, filenames in os.walk(path):
            name.append(dirnames)
            file.append(filenames)
name = name[0]
newpath = []

for i in range(len(name)):    
        for files in file[i+1]:
             pathn = path + name[i]+'/' + files
             newpath.append(pathn)


In [None]:
        classnum = 0
                
        re1='.*?'	# Non-greedy match on filler
        ####################
        #self.numclasses
        i=0
        for re2 in classList:
            i=i+1
            rg = re.compile(re1+re2,re.IGNORECASE|re.DOTALL)
            m = rg.search(newpath[200])
            if m:
                classnum = i
                break
        print(classnum)

In [None]:
a = [4,2,3,1,5,6]
if 7 not in a:
    a.append(7)

In [None]:
testtt = []

for p in range(5):
    testtt.append(np.random.rand(4,3,3))
    


In [None]:
np.swapaxes(ppp,0,1).shape