In [1]:
%reload_ext watermark
%reload_ext autoreload
%autoreload 2
%watermark -v -p numpy,sklearn,pandas
%watermark -v -p cv2,PIL,matplotlib
%watermark -v -p torch,torchvision,torchaudio,pytorch_lightning
%matplotlib inline
%config InlineBackend.figure_format='retina'
%config IPCompleter.use_jedi = False

from IPython.display import display, HTML, Javascript
display(HTML('<style>.container { width:%d%% !important; }</style>' % 90))

def _IMPORT_(x):
    try:
        exec(x, globals())
    except:
        pass


CPython 3.6.9
IPython 7.16.1

numpy 1.18.5
sklearn 0.24.0
pandas 1.1.5
CPython 3.6.9
IPython 7.16.1

cv2 4.5.1
PIL 6.2.2
matplotlib 3.3.3
CPython 3.6.9
IPython 7.16.1

torch 1.8.0.dev20210103+cu101
torchvision 0.9.0.dev20210103+cu101
torchaudio not installed
pytorch_lightning 1.2.0


In [3]:
###
### Common ###
###

import sys, os, io, time, random, math
import json, base64, requests

_IMPORT_('import numpy as np')
_IMPORT_('import pandas as pd')
_IMPORT_('from tqdm.notebook import tqdm')

def print_progress_bar(x):
    print('\r', end='')
    print('Progress: {}%:'.format(x), '%s%s' % ('▋'*(x//2), '.'*((100-x)//2)), end='')
    sys.stdout.flush()

###
### Torch ###
###

_IMPORT_('import torch')
_IMPORT_('import torch.nn as nn')
_IMPORT_('import torch.nn.functional as F')
_IMPORT_('import torch.optim as O')
_IMPORT_('from torchvision import models as M')
_IMPORT_('from torchvision import transforms as T')
_IMPORT_('from torch.utils.data import Dataset, DataLoader')

###
### Display ###
###

_IMPORT_('import cv2')
_IMPORT_('from PIL import Image')
_IMPORT_('from torchvision.utils import make_grid')
_IMPORT_('import matplotlib.pyplot as plt')
_IMPORT_('import plotly')
_IMPORT_('import plotly.graph_objects as go')

# plotly.offline.init_notebook_mode(connected=False)

def show_video(video_path, width=None, height=None):
    W, H = '', ''
    if width:
        W = 'width=%d' % width
    if height:
        H = 'height=%d' % height
    if video_path.startswith('http'):
        data_url = video_path
    else:
        mp4 = open(video_path, 'rb').read()
        data_url = 'data:video/mp4;base64,' + base64.b64encode(mp4).decode()
    return HTML('<video %s %s controls src="%s" type="video/mp4"/>' % (W, H, data_url))

def show_image(image_path, width=None, height=None):
    W, H = '', ''
    if width:
        W = 'width=%d' % width
    if height:
        H = 'height=%d' % height
    if image_path.startswith('http'):
        data_url = image_path
    else:
        img = open(image_path, 'rb').read()
        data_url = 'data:image/jpg;base64,' + base64.b64encode(img).decode()
    return HTML('<img %s %s src="%s"/>' % (W, H, data_url))

In [25]:
from torch.utils.data import Dataset, DataLoader, ConcatDataset
from random import randrange, randint
import glob

In [40]:
DATASET_ROOT = '/data/datasets/cv/repnet_datasets'

frame_per_vid = 64

synthvids = glob.glob(f'{DATASET_ROOT}/synthvids/train*.mp4')

In [52]:
class miniDataset(Dataset):
    
    def __init__(self, df, path_to_video):
        
        self.path = path_to_video
        self.df = df.reset_index()
        self.count = self.df.loc[0, 'count']
        print(self.count)

    def getFrames(self, path = None):
        """returns frames"""
    
        frames = []
        if path is None:
            path = self.path
        
        cap = cv2.VideoCapture(path)
        while cap.isOpened():
            ret, frame = cap.read()
            if ret is False:
                break
            
            img = Image.fromarray(frame)
            frames.append(img)
        
        cap.release()
        return frames

    def __getitem__(self, index):
        
        curFrames = self.getFrames()
        
        output_len = min(len(curFrames), randint(44, 64))
                
        newFrames = []
        for i in range(1, output_len + 1):
            newFrames.append(curFrames[i * len(curFrames)//output_len  - 1])

        a = randint(0, 64 - output_len)
        b = 64 - output_len - a
        
        randpath = random.choice(synthvids)
        randFrames = self.getFrames(randpath)
        newRandFrames = []
        for i in range(1, a + b + 1):
            newRandFrames.append(randFrames[i * len(randFrames)//(a+b)  - 1])
        
        same = np.random.choice([0, 1], p = [0.5, 0.5])
        if same:
            finalFrames = [newFrames[0] for i in range(a)]
            finalFrames.extend( newFrames )        
            finalFrames.extend([newFrames[-1] for i in range(b)] )
        else:
            finalFrames = newRandFrames[:a]
            finalFrames.extend( newFrames )        
            finalFrames.extend( newRandFrames[a:] )

        Xlist = []
        for img in finalFrames:
        
            preprocess = T.Compose([
                T.Resize((112, 112)),
                T.ToTensor(),
                T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])])
            frameTensor = preprocess(img).unsqueeze(0)
            Xlist.append(frameTensor)
        
        Xlist = [Xlist[i] if a<i<(64-b) else torch.nn.functional.dropout(Xlist[i], 0.2) for i in range(64)]  
        X = torch.cat(Xlist)
        y = [0 for i in range(0,a)]
        y.extend([output_len/self.count if 1<output_len/self.count<32 else 0 for i in range(0, output_len)])
        
        y.extend( [ 0 for i in range(0, b)] )
        y = torch.FloatTensor(y).unsqueeze(-1)
        
        return X, y
        
    def __len__(self):
        return 1
    
class dataset_with_indices(Dataset):

    """
    Modifies the given Dataset class to return a tuple data, target, index
    instead of just data, target.
    """
    
    def __init__(self, ds):
        self.ds = ds

    def __getitem__(self, index):
        X, y = self.ds[index]
        return X, y, index
    
    def getPeriodDist(self):
        arr = np.zeros(32,)
        
        for i in tqdm(range(self.__len__())):
            _, p,_ = self.__getitem__(i)
            per = max(p)
            arr[per] += 1
        return arr
    
    def __len__(self):
        return len(self.ds)

TEST_COUNT = 3

def getCombinedDataset(dfPath, videoDir, videoPrefix):
    df = pd.read_csv(dfPath)
    path_prefix = videoDir + '/' + videoPrefix
    
    files_present = []
    for i in range(0, len(df)):
        path_to_video = path_prefix + str(i) + '.mp4'
        if os.path.exists(path_to_video):
            files_present.append(i)
        if i > TEST_COUNT:
            break

    df = df.iloc[files_present]
    
    print(files_present)
    
    miniDatasetList = []
    for i in range(0, len(df)):
        dfi = df.iloc[[i]]
        path_to_video = path_prefix + str(dfi.index.item()) +'.mp4'
        miniDatasetList.append(miniDataset(dfi, path_to_video))
        
    megaDataset = dataset_with_indices(ConcatDataset(miniDatasetList))
    return megaDataset


In [53]:
trainDatasetC = getCombinedDataset(f'{DATASET_ROOT}/countix/countix_train.csv',
                                   f'{DATASET_ROOT}/trainvids',
                                   'train')

[0, 1, 2, 3, 4]
/data/datasets/cv/repnet_datasets/trainvids/train0.mp4
20
/data/datasets/cv/repnet_datasets/trainvids/train1.mp4
12
/data/datasets/cv/repnet_datasets/trainvids/train2.mp4
8
/data/datasets/cv/repnet_datasets/trainvids/train3.mp4
3
/data/datasets/cv/repnet_datasets/trainvids/train4.mp4
11


In [54]:
for a in trainDatasetC:
    print('aaa')

63
/data/datasets/cv/repnet_datasets/synthvids/train922.mp4
aaa
58
/data/datasets/cv/repnet_datasets/synthvids/train704.mp4
aaa
61
/data/datasets/cv/repnet_datasets/synthvids/train122.mp4
aaa
44
/data/datasets/cv/repnet_datasets/synthvids/train242.mp4
aaa
51
/data/datasets/cv/repnet_datasets/synthvids/train652.mp4
aaa
