In [1]:
import cv2
import scipy.io as sio
import os

# Change the format of datasets to be the same as the one of DUTS-TR

* DUTS-TR follows the format like:

```
.
├── DUTS-TR-Image
├── DUTS-TR-Mask
└── train_pair.lst
```

* *train_pair.lst* contains the pairs of relative paths (img_path, label_path)

```
DUTS-TR-Image/ILSVRC2012_test_00000004.jpg DUTS-TR-Mask/ILSVRC2012_test_00000004.png
DUTS-TR-Image/ILSVRC2012_test_00000018.jpg DUTS-TR-Mask/ILSVRC2012_test_00000018.png
DUTS-TR-Image/ILSVRC2012_test_00000019.jpg DUTS-TR-Mask/ILSVRC2012_test_00000019.png
.
.
.
```

# HKUIS

* HKUIS provides *.mat* files, but they require matlab program
* Found that `scipy` supports File IO for *.mat*
* Refer to [File IO (scipy.io) — SciPy v1.7.1 Manual](https://docs.scipy.org/doc/scipy/tutorial/io.html#matlab-files)
* Let's make a *.lst* file based on those *.mat* files

In [2]:
def run_hkuis():
    data_dir = '/data/swook/dataset/hkuis/HKU-IS'
    lst_dir = '/data/swook/dataset/hkuis/HKU-IS'
    
    mat_names = dict(train='trainImgSet.mat', val='valImgSet.mat', test='testImgSet.mat')
    
    union_lst_path = os.path.join(lst_dir, 'unionImgSet.lst')
    union_lst_file = open(union_lst_path, 'w')

    for mat_name in list(mat_names.values()):
        mat_path = os.path.join(data_dir, mat_name)
        mat_name_wo_ext = os.path.splitext(mat_name)[0]
        lst_name = ''.join([mat_name_wo_ext, '.lst'])
        lst_path = os.path.join(lst_dir, lst_name)
        lst_file = open(lst_path, 'w')
        
        mat_contents = sio.loadmat(mat_path)
        for itm in mat_contents[mat_name_wo_ext]:
            img_gt_name = itm[0][0]
            img_path = os.path.join('hkuis_imgs', img_gt_name)
            gt_path = os.path.join('hkuis_gt', img_gt_name)
            lst_file.write('{} {}\n'.format(img_path, gt_path))
            union_lst_file.write('{} {}\n'.format(img_path, gt_path))

        lst_file.close()
        
    union_lst_file.close()

run_hkuis()

# MSRA-B

In [None]:
def run_msrab():
    src_lst_path = '/data/swook/dataset/msra-b/MSRA-B/msrab.lst'
    dst_lst_path = '/data/swook/dataset/msra-b/MSRA-B/msrab_compatible.lst'
    
    with open(src_lst_path, 'r') as f: 
        src_lst = [x.strip() for x in f.readlines()]
        
    with open(dst_lst_path, 'w') as f: 
        for src_itm in src_lst:
            im_name, gt_name = src_itm.split()
    union_lst_path = os.path.join(lst_dir, 'unionImgSet.lst')
    union_lst_file = open(union_lst_path, 'w')

    for mat_name in list(mat_names.values()):
        mat_path = os.path.join(data_dir, mat_name)
        mat_name_wo_ext = os.path.splitext(mat_name)[0]
        lst_name = ''.join([mat_name_wo_ext, '.lst'])
        lst_path = os.path.join(lst_dir, lst_name)
        lst_file = open(lst_path, 'w')
        
        mat_contents = sio.loadmat(mat_path)
        for itm in mat_contents[mat_name_wo_ext]:
            img_gt_name = itm[0][0]
            img_path = os.path.join('hkuis_imgs', img_gt_name)
            gt_path = os.path.join('hkuis_gt', img_gt_name)
            lst_file.write('{} {}\n'.format(img_path, gt_path))
            union_lst_file.write('{} {}\n'.format(img_path, gt_path))

        lst_file.close()
        
    union_lst_file.close()

run_msrab()