In [1]:
import json
import mmcv
from pathlib import Path
import os
import numpy as np

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
from itertools import chain

In [3]:
def scandir(root, recursive=False, suffix=['.jpg','.png','.jpeg']):
    root = Path(root)
    img_list = []
    pattern = '*'
    if recursive:
        pattern = '**/*'
    for sfx in suffix:
        img_list += list(root.glob(pattern+sfx))
    return img_list

In [4]:
def generate_ann_file(positive_dirs, negative_dirs, root='/', split_eval_ratio=0., class_balance=False):
    ann = {'metainfo':{
            'classes': ('negative', 'positive'), # the category index of 'cat' is 0 and 'dog' is 1.
        }}
    pos_list, neg_list = [],[]
    for d in positive_dirs:
        img_list = scandir(d, recursive=True)
        pos_list += [{'img_path': str(Path(p).relative_to(root)), 'gt_label': 1} for p in img_list]
    np.random.shuffle(pos_list)
    np.random.shuffle(pos_list)
    for d in negative_dirs:
        img_list = scandir(d, recursive=True)
        neg_list += [{'img_path': str(Path(p).relative_to(root)), 'gt_label': 0} for p in img_list]
    np.random.shuffle(neg_list)
    np.random.shuffle(neg_list)
        
    assert len(pos_list)>2 and len(neg_list)>2
    # print(len(pos_list), len(neg_list))
    
    if split_eval_ratio > 0:
        pos_eval_num = max(1, int(len(pos_list)*split_eval_ratio))
        neg_eval_num = max(1, int(len(neg_list)*split_eval_ratio))
        pos_eval_list = pos_list[-pos_eval_num:]
        pos_list = pos_list[:-pos_eval_num]
        neg_eval_list = neg_list[-neg_eval_num:]
        neg_list = neg_list[:-neg_eval_num]
        
    # repeat img data, as video data is more and similar
    pos_img_list = [p for p in pos_list if not 'video' in p['img_path']]
    pos_video_list = [p for p in pos_list if 'video' in p['img_path']]
    if len(pos_img_list)>0:
        pos_list = pos_img_list*max(1, int(len(pos_video_list)/len(pos_img_list)+0.5)) + pos_video_list
    
    neg_img_list = [p for p in neg_list if not 'video' in p['img_path']]
    neg_video_list = [p for p in neg_list if 'video' in p['img_path']]
    if len(neg_img_list)>0:
        neg_list = neg_img_list*max(1, int(len(neg_video_list)/len(neg_img_list)+0.5)) + neg_video_list
    # print(len(pos_list), len(pos_video_list))
    # print(len(neg_list), len(neg_video_list))
        
    if class_balance:
        if len(pos_list) > len(neg_list):
            ratio = max(1, int(np.ceil(len(pos_list)/len(neg_list))))
            neg_list *= ratio
        else:
            ratio = max(1, int(np.ceil(len(neg_list)/len(pos_list))))
            pos_list *= ratio
    # print(len(pos_list), len(neg_list))
    
    ann['data_list'] = pos_list + neg_list
    np.random.shuffle(ann['data_list'])
    if split_eval_ratio > 0:
        eval_ann = {'metainfo':{'classes': ('negative', 'positive')}}
        eval_ann['data_list'] = pos_eval_list + neg_eval_list
        np.random.shuffle(eval_ann['data_list'])
        return ann, eval_ann
    else:
        return ann

In [209]:
# playground
data_root = '/root/autodl-tmp/Data/larp/'
pos_dirs = ['/root/autodl-tmp/Data/larp/sjtu_larp_cloudy/playground/positive_video/',
            '/root/autodl-tmp/Data/larp/sjtu_larp_cloudy/playground/positive/',
            # '/root/autodl-tmp/Data/larp/sjtu_larp_cloudy/playground/neutral/',
            '/root/autodl-tmp/Data/larp/larp_sjtu_sunny/playground/positive_video/',
]

_neg_roots = [
    '/root/autodl-tmp/Data/larp/sjtu_larp_cloudy/doorplate/',
    '/root/autodl-tmp/Data/larp/sjtu_larp_cloudy/reverse2/',
    '/root/autodl-tmp/Data/larp/larp_sjtu_sunny/reverse2/'
]
neg_dirs = list(chain(*[list(Path(d).glob('**/')) for d in _neg_roots]))
neg_dirs += ['/root/autodl-tmp/Data/larp/sjtu_larp_cloudy/playground/negative/',]

ann, eval_ann = generate_ann_file(pos_dirs, neg_dirs, root=data_root, split_eval_ratio=0.1, class_balance=True)
with open(os.path.join(data_root, 'playground_ann_train.json'), 'w', encoding='utf-8') as f:
    json.dump(ann, f, ensure_ascii=False, indent=4)
with open(os.path.join(data_root, 'playground_ann_val.json'), 'w', encoding='utf-8') as f:
    json.dump(eval_ann, f, ensure_ascii=False, indent=4)

In [210]:
# reverse2
data_root = '/root/autodl-tmp/Data/larp/'
pos_dirs = ['/root/autodl-tmp/Data/larp/sjtu_larp_cloudy/reverse2/positive_video/',
            # '/root/autodl-tmp/Data/larp/sjtu_larp_cloudy/reverse2/positive_hard_video/',
            '/root/autodl-tmp/Data/larp/larp_sjtu_sunny/reverse2/positive_video/',
            # '/root/autodl-tmp/Data/larp/larp_sjtu_sunny/reverse2/positive_hard_video/',
]

_neg_roots = [
    '/root/autodl-tmp/Data/larp/sjtu_larp_cloudy/doorplate/',
    '/root/autodl-tmp/Data/larp/sjtu_larp_cloudy/playground/',
    '/root/autodl-tmp/Data/larp/larp_sjtu_sunny/playground/'
]
neg_dirs = list(chain(*[list(Path(d).glob('**/')) for d in _neg_roots]))
neg_dirs += [
    '/root/autodl-tmp/Data/larp/sjtu_larp_cloudy/reverse2/negative_video/',
    '/root/autodl-tmp/Data/larp/sjtu_larp_cloudy/reverse2/negative/',
]

ann, eval_ann = generate_ann_file(pos_dirs, neg_dirs, root=data_root, split_eval_ratio=0.1, class_balance=True)
with open(os.path.join(data_root, 'reverse2_ann_train.json'), 'w', encoding='utf-8') as f:
    json.dump(ann, f, ensure_ascii=False, indent=4)
with open(os.path.join(data_root, 'reverse2_ann_val.json'), 'w', encoding='utf-8') as f:
    json.dump(eval_ann, f, ensure_ascii=False, indent=4)

In [211]:
# doorplate
data_root = '/root/autodl-tmp/Data/larp/'
pos_dirs = ['/root/autodl-tmp/Data/larp/sjtu_larp_cloudy/doorplate/positive_doorplate/',
            '/root/autodl-tmp/Data/larp/sjtu_larp_cloudy/doorplate/positive_video/',
]

_neg_roots = [
    '/root/autodl-tmp/Data/larp/sjtu_larp_cloudy/reverse2/',
    '/root/autodl-tmp/Data/larp/sjtu_larp_cloudy/playground/',
    '/root/autodl-tmp/Data/larp/larp_sjtu_sunny/playground/',
    '/root/autodl-tmp/Data/larp/larp_sjtu_sunny/reverse2/',
]
neg_dirs = list(chain(*[list(Path(d).glob('**/')) for d in _neg_roots]))
neg_dirs += [
    '/root/autodl-tmp/Data/larp/sjtu_larp_cloudy/doorplate/negative/',
    '/root/autodl-tmp/Data/larp/sjtu_larp_cloudy/doorplate/other_doorplate/',
]

ann, eval_ann = generate_ann_file(pos_dirs, neg_dirs, root=data_root, split_eval_ratio=0.1, class_balance=True)
with open(os.path.join(data_root, 'doorplate_ann_train.json'), 'w', encoding='utf-8') as f:
    json.dump(ann, f, ensure_ascii=False, indent=4)
with open(os.path.join(data_root, 'doorplate_ann_val.json'), 'w', encoding='utf-8') as f:
    json.dump(eval_ann, f, ensure_ascii=False, indent=4)

In [7]:
# doorplate 504
data_root = '/root/autodl-tmp/Data/larp/'
pos_dirs = [
    # '/root/autodl-tmp/Data/larp/sjtu_larp_cloudy/doorplate/positive_doorplate/',
    '/root/autodl-tmp/Data/larp/sjtu_larp_cloudy/doorplate/positive_video/',
]

_neg_roots = [
    '/root/autodl-tmp/Data/larp/sjtu_larp_cloudy/reverse2/',
    '/root/autodl-tmp/Data/larp/sjtu_larp_cloudy/playground/',
    '/root/autodl-tmp/Data/larp/larp_sjtu_sunny/playground/',
    '/root/autodl-tmp/Data/larp/larp_sjtu_sunny/reverse2/',
]
neg_dirs = list(chain(*[list(Path(d).glob('**/')) for d in _neg_roots]))
neg_dirs += [
    '/root/autodl-tmp/Data/larp/sjtu_larp_cloudy/doorplate/negative/',
    '/root/autodl-tmp/Data/larp/sjtu_larp_cloudy/doorplate/other_doorplate/',
    '/root/autodl-tmp/Data/larp/sjtu_larp_cloudy/doorplate/positive_doorplate/',
]

ann, eval_ann = generate_ann_file(pos_dirs, neg_dirs, root=data_root, split_eval_ratio=0.1, class_balance=True)
with open(os.path.join(data_root, 'doorplate504_ann_train.json'), 'w', encoding='utf-8') as f:
    json.dump(ann, f, ensure_ascii=False, indent=4)
with open(os.path.join(data_root, 'doorplate504_ann_val.json'), 'w', encoding='utf-8') as f:
    json.dump(eval_ann, f, ensure_ascii=False, indent=4)

In [5]:
# doorplate aspect ratio aug
data_root = '/home/pickittwice/data/larp/larp/'
pos_dirs = [
    '/home/pickittwice/data/larp/larp/sjtu_larp_cloudy/doorplate/positive_doorplate/',
    '/home/pickittwice/data/larp/larp/sjtu_larp_cloudy/doorplate/positive_video/',
    '/home/pickittwice/data/larp/larp/sjtu_larp_cloudy/doorplate/positive_doorplate_cropaug/',
    '/home/pickittwice/data/larp/larp/sjtu_larp_cloudy/doorplate/positive_video_cropaug/',
    '/home/pickittwice/data/larp/larp/larp_sjtu_sunny/doorplate504_add/',
]

_neg_roots = [
    '/home/pickittwice/data/larp/larp/sjtu_larp_cloudy/reverse2/',
    '/home/pickittwice/data/larp/larp/sjtu_larp_cloudy/playground/',
    '/home/pickittwice/data/larp/larp/larp_sjtu_sunny/playground/',
    '/home/pickittwice/data/larp/larp/larp_sjtu_sunny/reverse2/',
]
neg_dirs = list(chain(*[list(Path(d).glob('**/')) for d in _neg_roots]))
neg_dirs += [
    '/home/pickittwice/data/larp/larp/sjtu_larp_cloudy/doorplate/negative/',
    '/home/pickittwice/data/larp/larp/sjtu_larp_cloudy/doorplate/other_doorplate/',
]

ann, eval_ann = generate_ann_file(pos_dirs, neg_dirs, root=data_root, split_eval_ratio=0.1, class_balance=True)
with open(os.path.join(data_root, 'doorplate_addaug_ann_train.json'), 'w', encoding='utf-8') as f:
    json.dump(ann, f, ensure_ascii=False, indent=4)
with open(os.path.join(data_root, 'doorplate_addaug_ann_val.json'), 'w', encoding='utf-8') as f:
    json.dump(eval_ann, f, ensure_ascii=False, indent=4)

In [None]:
'/root/autodl-tmp/larp/',

In [205]:
len(ann['data_list'])

15587

In [203]:
len(eval_ann['data_list'])

314

In [63]:
neg_video_dirs

['/root/autodl-tmp/Data/larp/sjtu_larp_cloudy/doorplate/positive_video',
 '/root/autodl-tmp/Data/larp/sjtu_larp_cloudy/reverse2/negative_video',
 '/root/autodl-tmp/Data/larp/sjtu_larp_cloudy/reverse2/positive_hard_video',
 '/root/autodl-tmp/Data/larp/sjtu_larp_cloudy/reverse2/positive_video']

In [38]:
ann = generate_ann_file(pos_dirs, neg_dirs, root='/root/autodl-tmp/Data/larp/sjtu_larp_cloudy/')

In [None]:
ann

In [18]:
a = Path('/root/autodl-tmp/Data/larp/sjtu_larp_cloudy/')

In [22]:
scandir(a, recursive=False)

[]

In [7]:
str(a.relative_to('/root'))

'autodl-tmp/Data/larp/sjtu_larp_cloudy'