In [31]:
import numpy as np
import cv2 

def gen_he_image(image):
    ''' generate histogram equalization image
        Input:
            image - cv2.imread
        Output:
            image_he - image of histogram equalization
    '''
    # convert to gray scale before doing a histogram equalization
    img_gray = cv2.cvtColor( image, cv2.COLOR_BGR2GRAY)
    # doing a histogram equalization
    image_he = cv2.equalizeHist(img_gray)

    return image_he

In [32]:
import pandas as pd
mapping = pd.read_excel('liver-ultrasound-detection-unzip/mapping.xlsx')


In [74]:
mapping['assign'] = ''
name_list = ['folk', 'o', 'thee', 'bb', 'diw', 'jj', 'kp', 'tc', 'ton']
for name, i in enumerate(range(0,17200,2150)):
    mapping.loc[i:i+2150,'assign'] = name_list[name]

In [75]:
mapping['assign'][mapping['assign'] == ''] = name_list[-1]

You are setting values through chained assignment. Currently this works in certain cases, but when using Copy-on-Write (which will become the default behaviour in pandas 3.0) this will never work to update the original DataFrame or Series, because the intermediate object on which we are setting values will behave as a copy.
A typical example is when you are setting values in a column of a DataFrame, like:

df["col"][row_indexer] = value

Use `df.loc[row_indexer, "col"] = values` instead, to perform the assignment in a single step and ensure this keeps updating the original `df`.

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

  mapping['assign'][mapping['assign'] == ''] = name_list[-1]


In [76]:
mapping

Unnamed: 0,Image File,Annotation File,Source,Type,assign
0,24084.jpg,24084.txt,machine,machine_positive,folk
1,97528.jpg,97528.txt,machine,machine_positive,folk
2,54563.jpg,54563.txt,machine,machine_positive,folk
3,115211.jpg,115211.txt,machine,machine_positive,folk
4,71913.jpg,71913.txt,machine,machine_positive,folk
...,...,...,...,...,...
19341,1160.jpg,,mobile,mobile_negative,ton
19342,153.jpg,,mobile,mobile_negative,ton
19343,130705.jpg,,mobile,mobile_negative,ton
19344,812.jpg,,mobile,mobile_negative,ton


In [1]:
import os 
from tqdm import tqdm

for i in tqdm(os.listdir('pre_hisep/images/val/')):
    src_path = 'pre_hisep/images/val/' + i 
    des_path = 'pre_hisep/images/val/' + 'hiseq' + i[2:]
    !mv {src_path} {des_path}
for i in tqdm(os.listdir('pre_hisep/images/train/')):
    src_path = 'pre_hisep/images/train/' + i
    des_path = 'pre_hisep/images/train/' + 'hiseq' + i[2:]
    !mv {src_path} {des_path}

100%|██████████| 4899/4899 [09:42<00:00,  8.40it/s]
100%|██████████| 14448/14448 [28:39<00:00,  8.40it/s]


In [95]:
image_ass = mapping['Image File'][mapping['assign']== 'jj']


In [96]:
print(len(image_ass))

2150


In [97]:
print(image_ass)

10750    146857.jpg
10751     39013.jpg
10752     94781.jpg
10753     85854.jpg
10754     49292.jpg
            ...    
12895    141632.jpg
12896     52160.jpg
12897     22487.jpg
12898     12544.jpg
12899     65914.jpg
Name: Image File, Length: 2150, dtype: object


In [98]:
os.listdir('liver-ultrasound-detection-unzip/'+ folder +'/' + folder + '/images')

['136759.jpg',
 '97719.jpg',
 '97310.jpg',
 '94.jpg',
 '149559.jpg',
 '92764.jpg',
 '9602.jpg',
 '34445.jpg',
 '136999.jpg',
 '14124.jpg',
 '90616.jpg',
 '27800.jpg',
 '47600.jpg',
 '127681.jpg',
 '117397.jpg',
 '66687.jpg',
 '111958.jpg',
 '138253.jpg',
 '95664.jpg',
 '22982.jpg',
 '28982.jpg',
 '6339.jpg',
 '11922.jpg',
 '66428.jpg',
 '134183.jpg',
 '16803.jpg',
 '34184.jpg',
 '117440.jpg',
 '98627.jpg',
 '145267.jpg',
 '15095.jpg',
 '137836.jpg',
 '26118.jpg',
 '2370.jpg',
 '100285.jpg',
 '112339.jpg',
 '70015.jpg',
 '54582.jpg',
 '107075.jpg',
 '143562.jpg',
 '23742.jpg',
 '86132.jpg',
 '99701.jpg',
 '73175.jpg',
 '138052.jpg',
 '140249.jpg',
 '21418.jpg',
 '133593.jpg',
 '128426.jpg',
 '65558.jpg',
 '48565.jpg',
 '29159.jpg',
 '40266.jpg',
 '73685.jpg',
 '123554.jpg',
 '11700.jpg',
 '17192.jpg',
 '75324.jpg',
 '71275.jpg',
 '41648.jpg',
 '10704.jpg',
 '112153.jpg',
 '49236.jpg',
 '83598.jpg',
 '114777.jpg',
 '2240.jpg',
 '72612.jpg',
 '44900.jpg',
 '54171.jpg',
 '25303.jpg',
 '987

In [99]:
image_ass = [i.strip() for i in image_ass]

In [100]:
from tqdm import tqdm
import os

for folder in ['val', 'train']:
    for img in tqdm(os.listdir('liver-ultrasound-detection-unzip/'+ folder +'/' + folder + '/images')): #list image in each folder

        if img in image_ass:
            
            src_img = 'liver-ultrasound-detection-unzip/'+ folder +'/' + folder + '/images'+'/'+ img #source of image/ Path
            #if mapping['Source'][mapping['Image File'] == img].values[0] == 'machine':
            image = cv2.imread(src_img)
            # Generate the flare and apply it to the image
            hiseq = gen_he_image(image)
            cv2.imwrite('pre_hisep/images/' + folder + '/he' + img, hiseq)
    
            if ~mapping['Annotation File'][mapping['Image File'] == img].isna().values[0]: #check annotate
                src_label = 'liver-ultrasound-detection-unzip/'+ folder +'/' + folder + '/annotations' + '/' + img[:-4] + '.txt' #last annotate
                des_label = 'pre_hisep/labels/' + folder + '/hiseq' + img[:-4] + '.txt' #copy anno
                !cp {src_label} {des_label}
    
            #des_img = 'data/data_preprocess/images/' + folder + '/' + img 
            #!cp {src_img} {des_img}
    
            #if ~mapping['Annotation File'][mapping['Image File'] == img].isna().values[0]:
                #src_label = 'data/liver-ultrasound-detection-unzip/labels/' + folder + '/' + img[:-4] + '.txt'
                #des_label = 'data/data_preprocess/labels/' + folder + '/' + img[:-4] + '.txt'
                #!cp {src_label} {des_label}

100%|██████████| 4898/4898 [00:17<00:00, 285.22it/s]
100%|██████████| 14448/14448 [01:03<00:00, 227.69it/s]


In [102]:
len(os.listdir('pre_hisep/images/train'))

14448

In [103]:
len(os.listdir('pre_hisep/images/val'))

4899

In [104]:
import glob

In [105]:
len(glob.glob('pre_hisep/images/val/*.jpg'))

4898

In [106]:
len(os.listdir('pre_hisep/labels/train'))

7222

In [107]:
len(os.listdir('pre_hisep/labels/val'))

2446

In [110]:
len(os.listdir('liver-ultrasound-detection-unzip/val/val/annotations'))

2445

In [111]:
len(glob.glob('pre_hisep/labels/val/*.txt'))

2445

In [112]:
for img in tqdm(os.listdir('liver-ultrasound-detection-unzip/test/test/images')): #list image in each folder

    src_img = 'liver-ultrasound-detection-unzip/test/test/images'+'/'+ img #source of image/ Path
    #if mapping['Source'][mapping['Image File'] == img].values[0] == 'machine':
    image = cv2.imread(src_img)
    # Generate the flare and apply it to the image
    hiseq = gen_he_image(image)
    cv2.imwrite('pre_hisep/images/' + 'test/' + img, hiseq),

100%|██████████| 5153/5153 [18:29<00:00,  4.64it/s]
