In [57]:
import pandas as pd
import os
import shutil

In [58]:
def list_all_folders(directory):
    try:
        # 使用 os.listdir 遍歷目錄
        folders = [name.replace('\\', '/') for name in os.listdir(directory) if os.path.isdir(os.path.join(directory, name))]
        return folders
    except Exception as e:
        print(f"發生錯誤：{e}")
        return []

In [59]:
def find_jpg_files(root_folder, foldername, extension):
    df = pd.DataFrame()

    # 遍历目录
    folder = os.path.join(root_folder, foldername).replace('\\', '/')
    file_list = []
    for dirpath, _, filenames in os.walk(folder):
        for filename in filenames:
            if filename.lower().endswith(extension):
                full_path = os.path.join(dirpath, filename)
                file_list.append({'filename': filename, 'full_path': full_path, 'foldername':foldername})
    tmp_df = pd.DataFrame(file_list, columns=['filename', 'full_path', 'foldername'])
    df = pd.concat([df, tmp_df])
    print('')
    print(f'{foldername}: {tmp_df.shape}')

    df['full_path'] = df['full_path'].apply(lambda x: x.replace('\\', '/'))
    return df

In [60]:
def delete_file(files, path):
    file_list = files.strip().splitlines()
    success_count = 0
    failure_count = 0
    failed_files = []

    # 搜尋目錄中所有的檔案
    file_paths = []
    for root, _, filenames in os.walk(path):
        for filename in filenames:
            if filename in file_list:
                file_paths.append(os.path.join(root, filename))

    # 刪除檔案
    for full_path in file_paths:
        try:
            os.remove(full_path)
            success_count += 1
        except Exception as e:
            failure_count += 1
            failed_files.append((full_path, str(e)))

    print(f"成功刪除 {success_count} 個檔案")
    print(f"刪除失敗 {failure_count} 個檔案")
    if failed_files:
        print("失敗檔案列表：")
        for failed_file, error in failed_files:
            print(f"- {failed_file}: {error}")

In [61]:
def data_to_csv(df):
    output_dir = './data/test'
    os.makedirs(output_dir, exist_ok=True)  # 確保輸出目錄存在

    for folder in df['foldername'].unique():
        df_tmp = df[df['foldername'] == folder]
        # 按檔名排序（忽略大小寫）
        df_tmp = df_tmp.sort_values('filename', key=lambda x: x.str.lower()).reset_index(drop=True)

        # 計算總批次數
        batch_size = 200
        num_batches = (len(df_tmp) + batch_size - 1) // batch_size

        for batch_idx in range(num_batches):
            start = batch_idx * batch_size
            end = start + batch_size
            batch_df = df_tmp.iloc[start:end]
            
            # 檔名格式：folder_1.csv、folder_2.csv...
            output_file = os.path.join(output_dir, f'{folder}_{batch_idx + 1}.csv')
            batch_df.to_csv(output_file, index=False)
            print(f'{output_file}:{batch_df.shape[0]}')

In [62]:
def delete_files_except(csv_path, images_folder, extension):
    try:
        # 讀取 CSV 文件，假設沒有標題列
        files_to_keep = pd.read_csv(csv_path, header=None)[0].tolist()
        
        # 計數器
        deleted_count = 0
        kept_count = 0
        print(len(files_to_keep))
        
        # 遍歷資料夾中的所有檔案
        for filename in os.listdir(images_folder):
            if filename.endswith(extension):  # 只處理 .webp 檔案
                file_path = os.path.join(images_folder, filename)
                # print(file_path)
                
                if filename in files_to_keep:
                    kept_count += 1
                    # print(f"保留: {filename}")
                else:
                    try:
                        os.remove(file_path)
                        deleted_count += 1
                        # print(f"刪除: {filename}")
                    except Exception as e:
                        print(f"刪除 {filename} 時發生錯誤: {str(e)}")
        
        print(f"\n處理完成！")
        print(f"保留的圖片數量: {kept_count}")
        print(f"刪除的圖片數量: {deleted_count}")
        
    except Exception as e:
        print(f"發生錯誤: {str(e)}")

---

In [173]:
root_folder = './images/check'
foldernames = list_all_folders(root_folder)
foldernames

['TTaeny09']

In [174]:
# foldernames = ['check']
for foldername in foldernames:
    df = find_jpg_files(root_folder, foldername, '.webp')
    data_to_csv(df)


TTaeny09: (62, 3)
./data/test\TTaeny09_1.csv:62


### 刪除photo

In [175]:
path = './images/check'
files = '''
TTaeny09_1788851537751744880_2024-05-10 08-40_1.webp
TTaeny09_1812762867999117637_2024-07-15 08-15_1.webp
TTaeny09_1812856511850529183_2024-07-15 14-27_2.webp
TTaeny09_1815279483534073863_2024-07-22 06-55_1.webp
TTaeny09_1822237419967512907_2024-08-10 11-43_1.webp
TTaeny09_1822514043053109717_2024-08-11 06-03_2.webp
'''

In [176]:
delete_file(files, path)

成功刪除 6 個檔案
刪除失敗 0 個檔案


### 保留photo

In [172]:
delete_files_except('files_to_preserve.csv', './images/check/TTaeny09', '.webp')

62

處理完成！
保留的圖片數量: 62
刪除的圖片數量: 368


---

In [207]:
def df_process(df, new_path):
    df['filename'] = df['filename'].apply(lambda x:x.replace('.jpg', '.webp'))
    df['full_path'] = df['full_path'].apply(lambda x:x.replace('.jpg', '.webp'))
    df = df[df['root_path']=='活動']
    df = df[['filename', 'full_path']]
    df = df.rename(columns={"full_path": "new_path"})
    df['new_path'] = df['new_path'].apply(lambda x:x.replace('/Users/linnianyi/Library/CloudStorage/GoogleDrive-qaz51465146@gmail.com/我的雲端硬碟/LingOrm圖片/活動/', new_path))
    return df  

In [208]:
new_path = './image/活動/'
df_photo_data = pd.read_excel('photo_data.xlsx')
df_photo_data = df_process(df_photo_data, new_path)

In [209]:
df_photo_data.shape

(38048, 2)

In [210]:
df_photo_data.head()

Unnamed: 0,filename,new_path
3591,muimui_za_1831621706043289845_2024-09-05 09-13...,./image/活動/202311/20231101_試鏡/muimui_za_183162...
3592,muimui_za_1831621706043289845_2024-09-05 09-13...,./image/活動/202311/20231101_試鏡/muimui_za_183162...
3593,muimui_za_1831621706043289845_2024-09-05 09-13...,./image/活動/202311/20231101_試鏡/muimui_za_183162...
3594,muimui_za_1831621706043289845_2024-09-05 09-13...,./image/活動/202311/20231101_試鏡/muimui_za_183162...
3595,muimui_za_1831900664223674858_2024-09-06 03-42...,./image/活動/202311/20231101_試鏡/muimui_za_183190...


In [189]:
df_all = pd.DataFrame()
root_folder = './images/活動/user'
foldernames = list_all_folders(root_folder)
for foldername in foldernames:
    df2 = find_jpg_files(root_folder, foldername, '.webp')
    df_all = pd.concat([df_all, df2])


apollowny: (90, 3)

Cherished_M0511: (260, 3)

cherry0525_: (137, 3)

Convallaria_LO: (259, 3)

GasChill: (752, 3)

Honeylattexx: (26, 3)

ikkkkkkkG: (174, 3)

JungsJinns: (855, 3)

Koiiz9: (515, 3)

LilyRose_38: (97, 3)

LOgallery38: (9, 3)

luv168cm: (85, 3)

mollie8119: (71, 3)

MONSOON_LLO: (39, 3)

ooks51127: (37, 3)

RoyalHaven1127: (232, 3)

TTaeny09: (56, 3)

TYTFSGIR4EVA: (104, 3)


In [211]:
df_all.shape

(3798, 3)

In [212]:
df_all.head(2)

Unnamed: 0,filename,full_path,foldername
0,apollowny_1853495118952820966_2024-11-04 17-50...,./images/活動/user/apollowny/apollowny_185349511...,apollowny
1,apollowny_1853495118952820966_2024-11-04 17-50...,./images/活動/user/apollowny/apollowny_185349511...,apollowny


In [213]:
df_all = df_all.merge(df_photo_data, on='filename', how='left')

In [224]:
df_all.shape

(3798, 4)

In [219]:
df_all[df_all['filename']=='Cherished_M0511_1837726603583148054_2024-09-22 05-32_2.webp']

Unnamed: 0,filename,full_path,foldername,new_path
117,Cherished_M0511_1837726603583148054_2024-09-22...,./images/活動/user/Cherished_M0511/Cherished_M05...,Cherished_M0511,./image/活動/202409/20240913_DestinyClinicOrm/20...
118,Cherished_M0511_1837726603583148054_2024-09-22...,./images/活動/user/Cherished_M0511/Cherished_M05...,Cherished_M0511,./image/活動/202409/20240921_LingOrm1stMeetMacau...


In [220]:
df_all[df_all['filename']=='JungsJinns_1837456770014576857_2024-09-21 11-40_1.webp']

Unnamed: 0,filename,full_path,foldername,new_path
2202,JungsJinns_1837456770014576857_2024-09-21 11-4...,./images/活動/user/JungsJinns/JungsJinns_1837456...,JungsJinns,./image/活動/202409/20240913_DestinyClinicOrm/20...
2203,JungsJinns_1837456770014576857_2024-09-21 11-4...,./images/活動/user/JungsJinns/JungsJinns_1837456...,JungsJinns,./image/活動/202409/20240921_LingOrm1stMeetMacau...


In [215]:
df_all.head(20)

Unnamed: 0,filename,full_path,foldername,new_path
0,apollowny_1853495118952820966_2024-11-04 17-50...,./images/活動/user/apollowny/apollowny_185349511...,apollowny,./image/活動/202411/20241102_Orm1stFMNanning/apo...
1,apollowny_1853495118952820966_2024-11-04 17-50...,./images/活動/user/apollowny/apollowny_185349511...,apollowny,./image/活動/202411/20241102_Orm1stFMNanning/apo...
2,apollowny_1853495118952820966_2024-11-04 17-50...,./images/活動/user/apollowny/apollowny_185349511...,apollowny,./image/活動/202411/20241102_Orm1stFMNanning/apo...
3,apollowny_1853495118952820966_2024-11-04 17-50...,./images/活動/user/apollowny/apollowny_185349511...,apollowny,./image/活動/202411/20241102_Orm1stFMNanning/apo...
4,apollowny_1853501578134835415_2024-11-04 18-16...,./images/活動/user/apollowny/apollowny_185350157...,apollowny,./image/活動/202411/20241102_LinglingNNGFirstMee...
5,apollowny_1853501578134835415_2024-11-04 18-16...,./images/活動/user/apollowny/apollowny_185350157...,apollowny,./image/活動/202411/20241102_LinglingNNGFirstMee...
6,apollowny_1853501578134835415_2024-11-04 18-16...,./images/活動/user/apollowny/apollowny_185350157...,apollowny,./image/活動/202411/20241102_LinglingNNGFirstMee...
7,apollowny_1853501578134835415_2024-11-04 18-16...,./images/活動/user/apollowny/apollowny_185350157...,apollowny,./image/活動/202411/20241102_LinglingNNGFirstMee...
8,apollowny_1853503709411733860_2024-11-04 18-24...,./images/活動/user/apollowny/apollowny_185350370...,apollowny,./image/活動/202411/20241102_LinglingNNGFirstMee...
9,apollowny_1853504186476060739_2024-11-04 18-26...,./images/活動/user/apollowny/apollowny_185350418...,apollowny,./image/活動/202411/20241102_Orm1stFMNanning/apo...
