In [1]:
import cv2
import time
import torchvision
import torch.nn.functional as F
import torchvision.transforms as transforms
import torch.optim as optim
from torch.utils.data import random_split, DataLoader
import torch
import transforms as T
from movinets.config import _C
import numpy as np
from movinets import MoViNet
import random
import gc

torch.manual_seed(97)
num_frames = 16 # 16
clip_steps = 2
Bs_Train = 16
Bs_Test = 16


In [2]:
class_dict=dict()
class_dict['normal']=0
class_dict['assault']=1
class_dict['fight']=2
class_dict['burglary']=3
class_dict['vandalism']=4
class_dict['swoon']=5
class_dict['wander']=6
class_dict['trespass']=7
class_dict['dump']=8
class_dict['robbery']=9
class_dict['datefight']=10
class_dict['kidnap']=11
class_dict['drunken']=12

In [3]:
root = '../iterdata/'

In [4]:
val_data = []
#test_data = np.array(test_data)

val_label = np.empty((0), 'float')

for class_num in range(1,11):
    for num in range(1):
        path = root + str(class_num) + '/' + str(num)
        cap = cv2.VideoCapture(path+'.mp4')
        if cap.isOpened():
            val_label = np.concatenate((val_label, np.load(path + '.npy')[num_frames-1:]))
            img_arr=[]
            while True:
                ret, img = cap.read()
                if ret:
                    img_arr.append(img)
                    if len(img_arr)==num_frames:
                        val_data.append(img_arr.copy())
                        del img_arr[0]
                else:
                    break

            cap.release()

        else:
            print('cannot open the file', out_path + '.mp4')
            break

In [5]:
train_data = []
#test_data = np.array(test_data)

train_label = np.empty((0), 'float')

for class_num in range(1,11):
    for num in range(1,4):
        path = root + str(class_num) + '/' + str(num)
        cap = cv2.VideoCapture(path+'.mp4')
        if cap.isOpened():
            train_label = np.concatenate((train_label, np.load(path + '.npy')[num_frames-1:]))
            img_arr=[]
            while True:
                ret, img = cap.read()
                if ret:
                    img_arr.append(img)
                    if len(img_arr)==num_frames:
                        train_data.append(img_arr.copy())
                        del img_arr[0]
                else:
                    break

            cap.release()

        else:
            print('cannot open the file', out_path + '.mp4')
            break

In [6]:
len(train_data)

46133

In [7]:
train_label.shape

(46133,)

In [8]:
len(val_data)

15577

In [9]:
val_label.shape

(15577,)

In [10]:
class cctv():

    def __init__(self, train_data, label_data):
        self.train = train_data
        self.label = torch.from_numpy(label_data)

    def __len__(self):
        return len(self.train)

    def __getitem__(self, idx):
        return torch.from_numpy(np.array(self.train[idx]).reshape(3, 16, 172, 172)).float()/255, self.label[idx].long()


In [11]:
cctv_train = cctv(train_data, train_label)

In [12]:
cctv_val = cctv(val_data, val_label)

In [13]:
train_loader = DataLoader(cctv_train, batch_size=Bs_Train, shuffle=True)

In [14]:
val_loader = DataLoader(cctv_val, batch_size=Bs_Train, shuffle=False)

In [15]:
def train_iter(model, optimz, data_load, loss_val):
    samples = len(data_load.dataset)
    model.train()
    model.cuda()
    model.clean_activation_buffers()
    optimz.zero_grad()
    for i, (data,target) in enumerate(data_load):
        out = F.log_softmax(model(data.cuda()), dim=1)
        loss = F.nll_loss(out, target.cuda())
        loss.backward()
        optimz.step()
        optimz.zero_grad()
        model.clean_activation_buffers()
        if i % 50 == 0:
            print('[' +  '{:5}'.format(i * len(data)) + '/' + '{:5}'.format(samples) +
                  ' (' + '{:3.0f}'.format(100 * i / len(data_load)) + '%)]  Loss: ' +
                  '{:6.4f}'.format(loss.item()))
            loss_val.append(loss.item())

def evaluate(model, data_load, loss_val):
    model.eval()
    
    samples = len(data_load.dataset)
    csamp = 0
    tloss = 0
    model.clean_activation_buffers()
    with torch.no_grad():
        for data, target in data_load:
            output = F.log_softmax(model(data.cuda()), dim=1)
            loss = F.nll_loss(output, target.cuda(), reduction='sum')
            _, pred = torch.max(output, dim=1)
            
            tloss += loss.item()
            csamp += pred.eq(target.cuda()).sum()
            model.clean_activation_buffers()
    aloss = tloss / samples
    loss_val.append(aloss)
    print('\nAverage loss: ' + '{:.4f}'.format(aloss) +
          '  Accuracy:' + '{:5}'.format(csamp) + '/' +
          '{:5}'.format(samples) + ' (' +
          '{:4.2f}'.format(100.0 * csamp / samples) + '%)\n')
    
def train_iter_stream(model, optimz, data_load, loss_val, n_clips = 2, n_clip_frames=8):
    """
    In causal mode with stream buffer a single video is fed to the network
    using subclips of lenght n_clip_frames. 
    n_clips*n_clip_frames should be equal to the total number of frames presents
    in the video.
    
    n_clips : number of clips that are used
    n_clip_frames : number of frame contained in each clip
    """
    #clean the buffer of activations
    samples = len(data_load.dataset)
    model.cuda()
    model.train()
    model.clean_activation_buffers()
    optimz.zero_grad()
    
    for i, (data,target) in enumerate(data_load):
        data = data.cuda()
        target = target.cuda()
        l_batch = 0
        #backward pass for each clip
        for j in range(n_clips):
            output = F.log_softmax(model(data[:,:,(n_clip_frames)*(j):(n_clip_frames)*(j+1)]), dim=1)
            loss = F.nll_loss(output, target)
            _, pred = torch.max(output, dim=1)
            loss = F.nll_loss(output, target)/n_clips
            loss.backward()
        l_batch += loss.item()*n_clips
        optimz.step()
        optimz.zero_grad()
        
        #clean the buffer of activations
        model.clean_activation_buffers()
        if i % 50 == 0:
            print('[' +  '{:5}'.format(i * len(data)) + '/' + '{:5}'.format(samples) +
                  ' (' + '{:3.0f}'.format(100 * i / len(data_load)) + '%)]  Loss: ' +
                  '{:6.4f}'.format(l_batch))
            loss_val.append(l_batch)

def evaluate_stream(model, data_load, loss_val, n_clips = 2, n_clip_frames=8):
    model.eval()
    model.cuda()
    samples = len(data_load.dataset)
    csamp = 0
    tloss = 0
    with torch.no_grad():
        for data, target in data_load:
            data = data.cuda()
            target = target.cuda()
            model.clean_activation_buffers()
            for j in range(n_clips):
                output = F.log_softmax(model(data[:,:,(n_clip_frames)*(j):(n_clip_frames)*(j+1)]), dim=1)
                loss = F.nll_loss(output, target)
            _, pred = torch.max(output, dim=1)
            tloss += loss.item()
            csamp += pred.eq(target).sum()

    aloss = tloss /  len(data_load)
    loss_val.append(aloss)
    print('Average loss: ' + '{:.4f}'.format(aloss) +
          '  Accuracy:' + '{:5}'.format(csamp) + '/' +
          '{:5}'.format(samples) + ' (' +
          '{:4.2f}'.format(100.0 * csamp / samples) + '%)')

In [16]:
gc.collect()

0

In [17]:
N_EPOCHS = 10
start_time = time.time()
model = MoViNet(_C.MODEL.MoViNetA0, causal = True, pretrained = True )


trloss_val, tsloss_val = [], []
model.classifier[3] = torch.nn.Conv3d(2048, 51, (1,1,1))
optimz = optim.Adam(model.parameters(), lr=0.00005)
for epoch in range(1, N_EPOCHS + 1):
    print('Epoch:', epoch)
    train_time = time.time()
    train_iter_stream(model, optimz, train_loader, trloss_val)
    print('Train time:', '{:5.2f}'.format(time.time() - train_time), 'seconds')
    print('\nTrain result')
    evaluate_stream(model, train_loader, tsloss_val)
    
    print('\nValidation result')
    test_time = time.time()
    evaluate_stream(model, val_loader, tsloss_val)
    print('Validation time:', '{:5.2f}'.format(time.time() - test_time), 'seconds\n')
    

print('Execution time:', '{:5.2f}'.format(time.time() - start_time), 'seconds')

Epoch: 1
Train time: 985.09 seconds

Train result
Average loss: 0.1028  Accuracy:44486/46133 (96.43%)

Validation result
Average loss: 0.7198  Accuracy:11478/15577 (73.69%)
Validation time: 164.17 seconds

Epoch: 2
Train time: 1011.63 seconds

Train result
Average loss: 0.0428  Accuracy:45476/46133 (98.58%)

Validation result
Average loss: 0.9669  Accuracy:10971/15577 (70.43%)
Validation time: 148.24 seconds

Epoch: 3


ERROR:root:Internal Python error in the inspect module.
Below is the traceback from this internal error.



Traceback (most recent call last):
  File "C:\Users\USER001\anaconda3\envs\tf24_gpu\lib\site-packages\IPython\core\interactiveshell.py", line 3441, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-17-2487deb5afbd>", line 12, in <module>
    train_iter_stream(model, optimz, train_loader, trloss_val)
  File "<ipython-input-15-8a1a2c2cf929>", line 66, in train_iter_stream
    output = F.log_softmax(model(data[:,:,(n_clip_frames)*(j):(n_clip_frames)*(j+1)]), dim=1)
  File "C:\Users\USER001\anaconda3\envs\tf24_gpu\lib\site-packages\torch\nn\modules\module.py", line 1051, in _call_impl
    return forward_call(*input, **kwargs)
  File "E:\MoViNet\movinets\models.py", line 651, in forward
    return self._forward_impl(x)
  File "E:\MoViNet\movinets\models.py", line 642, in _forward_impl
    x = self.blocks(x)
  File "C:\Users\USER001\anaconda3\envs\tf24_gpu\lib\site-packages\torch\nn\modules\module.py", line 1051, in _call_impl
    return forward_call(*i

TypeError: object of type 'NoneType' has no len()

In [None]:
N_EPOCHS = 10
start_time = time.time()
model = MoViNet(_C.MODEL.MoViNetA0, causal = False, pretrained = True )


trloss_val, tsloss_val = [], []
model.classifier[3] = torch.nn.Conv3d(2048, 51, (1,1,1))
optimz = optim.Adam(model.parameters(), lr=0.00005)
for epoch in range(1, N_EPOCHS + 1):
    print('Epoch:', epoch)
    train_time = time.time()
    train_iter_stream(model, optimz, train_loader, trloss_val)
    print('Train time:', '{:5.2f}'.format(time.time() - train_time), 'seconds')
    print('\nTrain result')
    #evaluate_stream(model, train_loader, tsloss_val)
    
    print('\nValidation result')
    test_time = time.time()
    evaluate_stream(model, val_loader, tsloss_val)
    print('Validation time:', '{:5.2f}'.format(time.time() - test_time), 'seconds\n')
    

print('Execution time:', '{:5.2f}'.format(time.time() - start_time), 'seconds')

Epoch: 1
Train time: 921.71 seconds

Train result

Validation result
Average loss: 0.6716  Accuracy:12699/15577 (81.52%)
Validation time: 118.28 seconds

Epoch: 2
Train time: 977.39 seconds

Train result

Validation result
Average loss: 0.9416  Accuracy:11514/15577 (73.92%)
Validation time: 141.30 seconds

Epoch: 3


In [None]:
arr=[1,2,3,4,5]

In [None]:
arr

In [None]:
arrlist=[]
for i in range(6,11):
    arrlist.append(np.array(arr))
    del arr[0]
    arr.append(i)

In [None]:
arrlist