In [None]:
import torch
from totensor import ToTensor
from PIL import Image
from torchvision import get_image_backend
import json
import os
import math
import copy

# Utilities

In [None]:
def video_loader(video_dir_path,frame_indices):
    video = []
    for i in frame_indices:
        image_path = os.path.join(video_dir_path,'image_{:05d}.jpg'.format(i))
        if os.path.exists(image_path):
            with open(image_path,'rb') as f:
                with Image.open(f) as img:
                    video.append(img.convert('RGB'))
        else:
            return video
    return video
        
def load_value_file(file_path):
    with open(file_path, 'r') as input_file:
        value = float(input_file.read().rstrip('\n\r'))

    return value



# Explore the Json File Entries

In [None]:
with open('activity_net.v1-3.min.json','r') as data_file:
    dataa =json.load(data_file)


In [None]:
for key,value in dataa['database'].items():
    print(key)
    print(value['annotations'][0])
    break

In [None]:
with open('3drKinetics.json','r') as data_file:
    data = json.load(data_file)

In [None]:
for key,value in data['database'].items():
    print(key)
    print(value['annotations'])
    break

# Make data from video

In [None]:
def make_dataset(root_path,
                 annotation_path, 
                 subset,
                 n_samples_for_each_video, 
                 sample_duration,
                 dataset_name):
    
    # Load annotation data
    with open(annotation_path,'r') as data_file:
        data = json.load(data_file)

    # get video names and annotations
    video_names = []
    annotations = []

    if dataset_name =='activitynet':

        for key, value in data['database'].items():
            this_subset = value['subset']
            if this_subset == subset:
                if subset == 'testing':
                    video_names.append('v_{}'.format(key))
                else:
                    video_names.append('v_{}'.format(key))
                    annotations.append(value['annotations'])
        
        class_names = []
        index = 0
        for node1 in data['taxonomy']:
            is_leaf = True
            for node2 in data['taxonomy']:
                if node2['parentId'] == node1['nodeId']:
                    is_leaf = False
                    break
            if is_leaf:
                class_names.append(node1['nodeName'])

        class_to_idx = {}

        for i, class_name in enumerate(class_names):
            class_to_idx[class_name] = i


    elif dataset_name =='kinetics':
            
        for key, value in data['database'].items():
            this_subset = value['subset']
            if this_subset == subset:
                if subset == 'test':
                    video_names.append('test/{}'.format(key))
                else:

                    label = value['annotations']['label']
                    video_names.append('{}/{}'.format(label, key))
            
                    annotations.append([value['annotations']])
                    #('{}/{}'.format(label,key),value['annotations'])

    
        # compute class to label ids 
        class_to_idx ={}
        index = 0
        for class_label in data['labels']:
            class_to_idx[class_label] = index
            index +=1

        
    # compute label to class ids
    idx_to_class ={}
    for name,label in class_to_idx.items():
        idx_to_class[label] = name
        
    dataset = []
    for i in range(len(video_names)):
        if i % 1000 == 0:
            ('dataset loading [{}/{}]'.format(i, len(video_names)))

        video_path = os.path.join(root_path, video_names[i])
        (video_path)
        if not os.path.exists(video_path):
            continue
        

        file_names = os.listdir(video_path)
        image_file_names = [x for x in file_names if 'image' in x]
        image_file_names.sort(reverse=True)
        n_frames = int(image_file_names[0][6:11])

 #       n_frames_file_path = os.path.join(video_path, 'n_frames')
 #       n_frames = int(load_value_file(n_frames_file_path))
        if n_frames <= 0:
            continue
            
        for annotation in annotations[i]:
            
            
            if dataset_name == 'activitynet':
                begin_t = 1 # math.ceil(annotation['segment'][0] * fps)
                end_t = n_frames #math.ceil(annotation['segment'][1] * fps)
                video_id = video_names[i][2:]
                
            elif dataset_name == 'kinetics':
                begin_t = 1
                end_t = n_frames
                video_id = video_names[i][:-14].split('/')[1]

            sample = {
                'video': video_path,
                'segment': [begin_t, end_t],
                'n_frames': n_frames,
                'video_id': video_id
            }
            
            
            if len(annotations) != 0:
                (annotation)
                sample['label'] = class_to_idx[annotation['label']]
            else:
                sample['label'] = -1



            if n_samples_for_each_video == 1:
                ("inside n_smaples for each video")
                sample['frame_indices'] = list(range(1, n_frames + 1))
                ()
                
                dataset.append(sample)
                
                
            else:
                if n_samples_for_each_video > 1:
                    step = max(1,
                               math.ceil((n_frames - 1 - sample_duration) /
                                         (n_samples_for_each_video - 1)))
                    (n_frames,step)
                else:
                    step = sample_duration
                for j in range(1, n_frames, step):
                    sample_j = copy.deepcopy(sample)
                    sample_j['frame_indices'] = list(
                        range(j, min(n_frames + 1, j + sample_duration)))
                    dataset.append(sample_j)

    return dataset, idx_to_class

# Kinetics Dataset Class

In [None]:
class VideoDataset(torch.utils.data.Dataset):
    
    def __init__(self,
                root_path,
                annotation_path,
                subset,
                n_samples_for_each_video = 1,
                sample_duration = 16,
                dataset_name = 'activitynet',
                get_loader = video_loader):
        self.data,self.class_names = make_dataset(
                                    root_path,annotation_path,subset,
                                    n_samples_for_each_video,
                                    sample_duration,
                                    dataset_name)
        self.loader = get_loader
        
    def __getitem__(self,index):
        
        path = self.data[index]['video']
        
        frame_indices = self.data[index]['frame_indices']
        
        clip = self.loader(path,frame_indices)
        clip = [ToTensor(1)(img) for img in clip]
        clip = torch.stack(clip,0).permute(1,0,2,3)
        
        target = self.data[index]
        
        return clip , target
    def __len__(self):
        return len(self.data)
        
        
            

# Usage 

In [None]:
# For ActivityNet
training_data = VideoDataset('ActivityNet_JPG/','activity_net.v1-3.min.json','validation')
#why it does not work??? 
#at is the problem?

# For Kinetics
#raining_data = VideoDataset('Kinetics_JPG/','3drKinetics.json','training',dataset_name='kinetics')

In [None]:
training_data.data

In [None]:
train_loader = torch.utils.data.DataLoader(
    training_data,
    batch_size=1,
    shuffle=True,
    num_workers = 4,
    pin_memory=True)

In [None]:
for (inputs,targets) in train_loader:
    print(inputs)
    print(targets)
    break