In [1]:
import os
import shutil

from multiprocessing import cpu_count
from concurrent.futures import ProcessPoolExecutor, as_completed

In [2]:
# 11 yolo classes
yolo_classes = {"ACTINO":9, "CC":8, "VIRUS":10, "FUNGI":6, "TRI":7, "AGC_A":4, 
                "AGC_B":4, "EC":5, "HSIL_B":2, "HSIL_M":2, "HSIL_S":2, "SCC_G":3, 
                "ASCUS":0, "LSIL_F":0, "LSIL_E":1, "SCC_R":3}

not_choose = ["AGC_A", "AGC_B", "HSIL_B", "HSIL_M", "HSIL_S", "FUNGI", "SCC_G", "SCC_R"]
not_choose_index = [yolo_classes[key] for key in not_choose]

In [3]:
def scan_files(directory, prefix=None, postfix=None):
    files_list = []
    for root, sub_dirs, files in os.walk(directory):
        for special_file in files:
            if postfix:
                if special_file.endswith(postfix):
                    files_list.append(os.path.join(root, special_file))
            elif prefix:
                if special_file.startswith(prefix):
                    files_list.append(os.path.join(root, special_file))
            else:
                files_list.append(os.path.join(root, special_file))
    return files_list

def choose(txt_name):
    with open(txt_name, 'r') as f:
        for line in f.readlines():
            if int(line.strip().split()[0]) in not_choose_index:
                return False
    return True

def move(txt, dst_dir):
#     if txt.endswith("_hls09.txt"):  # these are hl changed, we only need original images
#         continue
    if (not txt.endswith("_hls09.txt")) and choose(txt):
        shutil.move(txt, dst_dir)
        bmp = os.path.splitext(txt)[0] + ".bmp"
        shutil.move(bmp, dst_dir)

def batch_move(txts, dst_dir):
    for txt in txts:
        move(txt, dst_dir)
        
def main(src_dir, dst_dir):
    print("start work")
    txts = scan_files(src_dir, postfix=".txt")
    print("total files", len(txts))
    
    executor = ProcessPoolExecutor(max_workers=4)
    tasks = []

    batch_size = 1000
    for i in range(0, len(txts), batch_size):
        batch = txts[i : i+batch_size]
        tasks.append(executor.submit(batch_move, txts, dst_dir))
    
    job_count = len(tasks)
    for future in as_completed(tasks):
        # result = future.result()  # get the returning result from calling fuction
        job_count -= 1
        print("One Job Done, Remaining Job Count: %s" % (job_count))

In [None]:
src_dir = "/home/hdd_array0/batch6_1216/train"
dst_dir = "/home/hdd_array0/batch6_1216/train_selected"

main(src_dir, dst_dir)

start work
total files 1910769
One Job Done, Remaining Job Count: 1910
One Job Done, Remaining Job Count: 1909
One Job Done, Remaining Job Count: 1908
One Job Done, Remaining Job Count: 1907
One Job Done, Remaining Job Count: 1906
One Job Done, Remaining Job Count: 1905
One Job Done, Remaining Job Count: 1904
One Job Done, Remaining Job Count: 1903
One Job Done, Remaining Job Count: 1902
One Job Done, Remaining Job Count: 1901
One Job Done, Remaining Job Count: 1900
One Job Done, Remaining Job Count: 1899
One Job Done, Remaining Job Count: 1898
One Job Done, Remaining Job Count: 1897
One Job Done, Remaining Job Count: 1896
One Job Done, Remaining Job Count: 1895
One Job Done, Remaining Job Count: 1894
One Job Done, Remaining Job Count: 1893
One Job Done, Remaining Job Count: 1892
One Job Done, Remaining Job Count: 1891
One Job Done, Remaining Job Count: 1890
One Job Done, Remaining Job Count: 1889
One Job Done, Remaining Job Count: 1888
One Job Done, Remaining Job Count: 1887
One Job D

One Job Done, Remaining Job Count: 1705
One Job Done, Remaining Job Count: 1704
One Job Done, Remaining Job Count: 1703
One Job Done, Remaining Job Count: 1702
One Job Done, Remaining Job Count: 1701
One Job Done, Remaining Job Count: 1700
One Job Done, Remaining Job Count: 1699
One Job Done, Remaining Job Count: 1698
One Job Done, Remaining Job Count: 1697
One Job Done, Remaining Job Count: 1696
One Job Done, Remaining Job Count: 1695
One Job Done, Remaining Job Count: 1694
One Job Done, Remaining Job Count: 1693
One Job Done, Remaining Job Count: 1692
One Job Done, Remaining Job Count: 1691
One Job Done, Remaining Job Count: 1690
One Job Done, Remaining Job Count: 1689
One Job Done, Remaining Job Count: 1688
One Job Done, Remaining Job Count: 1687
One Job Done, Remaining Job Count: 1686
One Job Done, Remaining Job Count: 1685
One Job Done, Remaining Job Count: 1684
One Job Done, Remaining Job Count: 1683
One Job Done, Remaining Job Count: 1682
One Job Done, Remaining Job Count: 1681


One Job Done, Remaining Job Count: 1500
One Job Done, Remaining Job Count: 1499
One Job Done, Remaining Job Count: 1498
One Job Done, Remaining Job Count: 1497
One Job Done, Remaining Job Count: 1496
One Job Done, Remaining Job Count: 1495
One Job Done, Remaining Job Count: 1494
One Job Done, Remaining Job Count: 1493
One Job Done, Remaining Job Count: 1492
One Job Done, Remaining Job Count: 1491
One Job Done, Remaining Job Count: 1490
One Job Done, Remaining Job Count: 1489
One Job Done, Remaining Job Count: 1488
One Job Done, Remaining Job Count: 1487
One Job Done, Remaining Job Count: 1486
One Job Done, Remaining Job Count: 1485
One Job Done, Remaining Job Count: 1484
One Job Done, Remaining Job Count: 1483
One Job Done, Remaining Job Count: 1482
One Job Done, Remaining Job Count: 1481
One Job Done, Remaining Job Count: 1480
One Job Done, Remaining Job Count: 1479
One Job Done, Remaining Job Count: 1478
One Job Done, Remaining Job Count: 1477
One Job Done, Remaining Job Count: 1476


One Job Done, Remaining Job Count: 1295
One Job Done, Remaining Job Count: 1294
One Job Done, Remaining Job Count: 1293
One Job Done, Remaining Job Count: 1292
One Job Done, Remaining Job Count: 1291
One Job Done, Remaining Job Count: 1290
One Job Done, Remaining Job Count: 1289
One Job Done, Remaining Job Count: 1288
One Job Done, Remaining Job Count: 1287
One Job Done, Remaining Job Count: 1286
One Job Done, Remaining Job Count: 1285
One Job Done, Remaining Job Count: 1284
One Job Done, Remaining Job Count: 1283
One Job Done, Remaining Job Count: 1282
One Job Done, Remaining Job Count: 1281
One Job Done, Remaining Job Count: 1280
One Job Done, Remaining Job Count: 1279
One Job Done, Remaining Job Count: 1278
One Job Done, Remaining Job Count: 1277
One Job Done, Remaining Job Count: 1276
One Job Done, Remaining Job Count: 1275
One Job Done, Remaining Job Count: 1274
One Job Done, Remaining Job Count: 1273
One Job Done, Remaining Job Count: 1272
One Job Done, Remaining Job Count: 1271


One Job Done, Remaining Job Count: 1090
One Job Done, Remaining Job Count: 1089
One Job Done, Remaining Job Count: 1088
One Job Done, Remaining Job Count: 1087
One Job Done, Remaining Job Count: 1086
One Job Done, Remaining Job Count: 1085
One Job Done, Remaining Job Count: 1084
One Job Done, Remaining Job Count: 1083
One Job Done, Remaining Job Count: 1082
One Job Done, Remaining Job Count: 1081
One Job Done, Remaining Job Count: 1080
One Job Done, Remaining Job Count: 1079
One Job Done, Remaining Job Count: 1078
One Job Done, Remaining Job Count: 1077
One Job Done, Remaining Job Count: 1076
One Job Done, Remaining Job Count: 1075
One Job Done, Remaining Job Count: 1074
One Job Done, Remaining Job Count: 1073
One Job Done, Remaining Job Count: 1072
One Job Done, Remaining Job Count: 1071
One Job Done, Remaining Job Count: 1070
One Job Done, Remaining Job Count: 1069
One Job Done, Remaining Job Count: 1068
One Job Done, Remaining Job Count: 1067
One Job Done, Remaining Job Count: 1066


One Job Done, Remaining Job Count: 881
One Job Done, Remaining Job Count: 880
One Job Done, Remaining Job Count: 879
One Job Done, Remaining Job Count: 878
One Job Done, Remaining Job Count: 877
One Job Done, Remaining Job Count: 876
One Job Done, Remaining Job Count: 875
One Job Done, Remaining Job Count: 874
One Job Done, Remaining Job Count: 873
One Job Done, Remaining Job Count: 872
One Job Done, Remaining Job Count: 871
One Job Done, Remaining Job Count: 870
One Job Done, Remaining Job Count: 869
One Job Done, Remaining Job Count: 868
One Job Done, Remaining Job Count: 867
One Job Done, Remaining Job Count: 866
One Job Done, Remaining Job Count: 865
One Job Done, Remaining Job Count: 864
One Job Done, Remaining Job Count: 863
One Job Done, Remaining Job Count: 862
One Job Done, Remaining Job Count: 861
One Job Done, Remaining Job Count: 860
One Job Done, Remaining Job Count: 859
One Job Done, Remaining Job Count: 858
One Job Done, Remaining Job Count: 857
One Job Done, Remaining J

One Job Done, Remaining Job Count: 670
One Job Done, Remaining Job Count: 669
One Job Done, Remaining Job Count: 668
One Job Done, Remaining Job Count: 667
One Job Done, Remaining Job Count: 666
One Job Done, Remaining Job Count: 665
One Job Done, Remaining Job Count: 664
One Job Done, Remaining Job Count: 663
One Job Done, Remaining Job Count: 662
One Job Done, Remaining Job Count: 661
One Job Done, Remaining Job Count: 660
One Job Done, Remaining Job Count: 659
One Job Done, Remaining Job Count: 658
One Job Done, Remaining Job Count: 657
One Job Done, Remaining Job Count: 656
One Job Done, Remaining Job Count: 655
One Job Done, Remaining Job Count: 654
One Job Done, Remaining Job Count: 653
One Job Done, Remaining Job Count: 652
One Job Done, Remaining Job Count: 651
One Job Done, Remaining Job Count: 650
One Job Done, Remaining Job Count: 649
One Job Done, Remaining Job Count: 648
One Job Done, Remaining Job Count: 647
One Job Done, Remaining Job Count: 646
One Job Done, Remaining J

One Job Done, Remaining Job Count: 458
One Job Done, Remaining Job Count: 457
One Job Done, Remaining Job Count: 456
One Job Done, Remaining Job Count: 455
One Job Done, Remaining Job Count: 454
One Job Done, Remaining Job Count: 453
One Job Done, Remaining Job Count: 452
One Job Done, Remaining Job Count: 451
One Job Done, Remaining Job Count: 450
One Job Done, Remaining Job Count: 449
One Job Done, Remaining Job Count: 448
One Job Done, Remaining Job Count: 447
One Job Done, Remaining Job Count: 446
One Job Done, Remaining Job Count: 445
One Job Done, Remaining Job Count: 444
One Job Done, Remaining Job Count: 443
One Job Done, Remaining Job Count: 442
One Job Done, Remaining Job Count: 441
One Job Done, Remaining Job Count: 440
One Job Done, Remaining Job Count: 439
One Job Done, Remaining Job Count: 438
One Job Done, Remaining Job Count: 437
One Job Done, Remaining Job Count: 436
One Job Done, Remaining Job Count: 435
One Job Done, Remaining Job Count: 434
One Job Done, Remaining J

One Job Done, Remaining Job Count: 247
One Job Done, Remaining Job Count: 246
One Job Done, Remaining Job Count: 245
One Job Done, Remaining Job Count: 244
One Job Done, Remaining Job Count: 243
One Job Done, Remaining Job Count: 242
One Job Done, Remaining Job Count: 241
One Job Done, Remaining Job Count: 240
One Job Done, Remaining Job Count: 239
One Job Done, Remaining Job Count: 238
One Job Done, Remaining Job Count: 237
One Job Done, Remaining Job Count: 236
One Job Done, Remaining Job Count: 235
One Job Done, Remaining Job Count: 234
One Job Done, Remaining Job Count: 233
One Job Done, Remaining Job Count: 232
One Job Done, Remaining Job Count: 231
One Job Done, Remaining Job Count: 230
One Job Done, Remaining Job Count: 229
One Job Done, Remaining Job Count: 228
One Job Done, Remaining Job Count: 227
One Job Done, Remaining Job Count: 226
One Job Done, Remaining Job Count: 225
One Job Done, Remaining Job Count: 224
One Job Done, Remaining Job Count: 223
One Job Done, Remaining J

One Job Done, Remaining Job Count: 35
One Job Done, Remaining Job Count: 34
One Job Done, Remaining Job Count: 33
One Job Done, Remaining Job Count: 32
One Job Done, Remaining Job Count: 31
One Job Done, Remaining Job Count: 30
One Job Done, Remaining Job Count: 29
One Job Done, Remaining Job Count: 28
One Job Done, Remaining Job Count: 27
One Job Done, Remaining Job Count: 26
One Job Done, Remaining Job Count: 25
One Job Done, Remaining Job Count: 24
One Job Done, Remaining Job Count: 23
One Job Done, Remaining Job Count: 22
One Job Done, Remaining Job Count: 21
One Job Done, Remaining Job Count: 20
One Job Done, Remaining Job Count: 19
One Job Done, Remaining Job Count: 18
One Job Done, Remaining Job Count: 17
One Job Done, Remaining Job Count: 16
One Job Done, Remaining Job Count: 15
One Job Done, Remaining Job Count: 14
One Job Done, Remaining Job Count: 13
One Job Done, Remaining Job Count: 12
One Job Done, Remaining Job Count: 11
One Job Done, Remaining Job Count: 10
One Job Done