In [1]:
import pandas as pd
import os
import shutil

In [2]:
def list_all_folders(directory):
    try:
        # 使用 os.listdir 遍歷目錄
        folders = [name.replace('\\', '/') for name in os.listdir(directory) if os.path.isdir(os.path.join(directory, name))]
        return folders
    except Exception as e:
        print(f"發生錯誤：{e}")
        return []

In [3]:
def find_jpg_files(root_folder, foldername):
    df = pd.DataFrame()

    # 遍历目录
    folder = os.path.join(root_folder, foldername).replace('\\', '/')
    file_list = []
    for dirpath, _, filenames in os.walk(folder):
        for filename in filenames:
            if filename.lower().endswith('.webp'):
                full_path = os.path.join(dirpath, filename)
                file_list.append({'filename': filename, 'full_path': full_path, 'foldername':foldername})
    tmp_df = pd.DataFrame(file_list, columns=['filename', 'full_path', 'foldername'])
    df = pd.concat([df, tmp_df])
    print('')
    print(f'{foldername}: {tmp_df.shape}')

    df['full_path'] = df['full_path'].apply(lambda x: x.replace('\\', '/'))
    return df

In [4]:
def delete_file(files, path):
    file_list = files.strip().splitlines()
    success_count = 0
    failure_count = 0
    failed_files = []

    # 搜尋目錄中所有的檔案
    file_paths = []
    for root, _, filenames in os.walk(path):
        for filename in filenames:
            if filename in file_list:
                file_paths.append(os.path.join(root, filename))

    # 刪除檔案
    for full_path in file_paths:
        try:
            os.remove(full_path)
            success_count += 1
        except Exception as e:
            failure_count += 1
            failed_files.append((full_path, str(e)))

    print(f"成功刪除 {success_count} 個檔案")
    print(f"刪除失敗 {failure_count} 個檔案")
    if failed_files:
        print("失敗檔案列表：")
        for failed_file, error in failed_files:
            print(f"- {failed_file}: {error}")

In [5]:
def data_to_csv(df):
    output_dir = './data/test'
    os.makedirs(output_dir, exist_ok=True)  # 確保輸出目錄存在

    for folder in df['foldername'].unique():
        df_tmp = df[df['foldername'] == folder]
        # 按檔名排序（忽略大小寫）
        df_tmp = df_tmp.sort_values('filename', key=lambda x: x.str.lower()).reset_index(drop=True)

        # 計算總批次數
        batch_size = 200
        num_batches = (len(df_tmp) + batch_size - 1) // batch_size

        for batch_idx in range(num_batches):
            start = batch_idx * batch_size
            end = start + batch_size
            batch_df = df_tmp.iloc[start:end]
            
            # 檔名格式：folder_1.csv、folder_2.csv...
            output_file = os.path.join(output_dir, f'{folder}_{batch_idx + 1}.csv')
            batch_df.to_csv(output_file, index=False)
            print(f'{output_file}:{batch_df.shape[0]}')

---

In [6]:
root_folder = './images/check'
foldernames = list_all_folders(root_folder)
foldernames

['20240509_LingOrmCh3FanGreet',
 'apollowny',
 'cherry0525_',
 'Honeylattexx',
 'luv168cm',
 'mollie8119',
 'MONSOON_LLO',
 'ooks51127',
 'TYTFSGIR4EVA']

In [7]:
# foldernames = ['check']
for foldername in foldernames:
    df = find_jpg_files(root_folder, foldername)
    data_to_csv(df)


20240509_LingOrmCh3FanGreet: (299, 3)
./data/test\20240509_LingOrmCh3FanGreet_1.csv:200
./data/test\20240509_LingOrmCh3FanGreet_2.csv:99

apollowny: (98, 3)
./data/test\apollowny_1.csv:98

cherry0525_: (163, 3)
./data/test\cherry0525__1.csv:163

Honeylattexx: (91, 3)
./data/test\Honeylattexx_1.csv:91

luv168cm: (91, 3)
./data/test\luv168cm_1.csv:91

mollie8119: (95, 3)
./data/test\mollie8119_1.csv:95

MONSOON_LLO: (83, 3)
./data/test\MONSOON_LLO_1.csv:83

ooks51127: (155, 3)
./data/test\ooks51127_1.csv:155

TYTFSGIR4EVA: (555, 3)
./data/test\TYTFSGIR4EVA_1.csv:200
./data/test\TYTFSGIR4EVA_2.csv:200
./data/test\TYTFSGIR4EVA_3.csv:155


In [14]:
path = './images/check'
files = '''
apollowny_1857244021778993222_2024-11-15 02-07_3.webp
apollowny_1854401674724757539_2024-11-07 05-53_3.webp
apollowny_1857244021778993222_2024-11-15 02-07_4.webp
apollowny_1857001965277855998_2024-11-14 10-05_1.webp
apollowny_1856593562173092256_2024-11-13 07-02_1.webp
apollowny_1858058942213243146_2024-11-17 08-05_1.webp
apollowny_1855255834911244669_2024-11-09 14-27_1.webp
apollowny_1857244021778993222_2024-11-15 02-07_1.webp
'''

In [15]:
delete_file(files, path)

成功刪除 8 個檔案
刪除失敗 0 個檔案


In [120]:
df = pd.read_csv('./data/images_活動.csv')

In [40]:
df.shape

(3767, 2)

---

In [2]:
df_no_dup = pd.read_excel('photo_data.xlsx')
df_no_dup['filename'] = df_no_dup['filename'].apply(lambda x:x.replace('.jpg', '.webp'))
df_no_dup = df_no_dup[(df_no_dup['root_path']=='雜誌') | (df_no_dup['root_path']=='拍攝')]
df2 = pd.read_csv('./data/images_活動.csv')

In [3]:
df_no_dup.shape

(1229, 10)

In [4]:
df_no_dup.root_path.value_counts()

root_path
雜誌    617
拍攝    612
Name: count, dtype: int64

In [5]:
df2.shape

(5523, 2)

In [6]:
df2.head(2)

Unnamed: 0,filename,full_path
0,muimui_za_1831621706043289845_2024-09-05 09-13...,./images/活動/activity/20231101_試鏡/muimui_za_183...
1,muimui_za_1831621706043289845_2024-09-05 09-13...,./images/活動/activity/20231101_試鏡/muimui_za_183...


In [7]:
df_no_dup.head(2)

Unnamed: 0,filename,full_path,username,relative_path,file_date,file_size,extension,root_path,children_path,month_path
2850,cocochicwedding_3485939477002385180_2024-10-24...,/Users/linnianyi/Library/CloudStorage/GoogleDr...,cocochicwedding,/雜誌/Bride雜誌,2024-10-24,706477,jpg,雜誌,Bride雜誌,
2851,PP_0566_1819186007704850790_2024-08-02 01-38_1...,/Users/linnianyi/Library/CloudStorage/GoogleDr...,PP_0566,/雜誌/Bride雜誌,2024-08-02,405683,jpg,雜誌,Bride雜誌,


In [8]:
df_no_dup1 = df_no_dup[['filename', 'root_path']]

In [9]:
df_no_dup1.head(2)

Unnamed: 0,filename,root_path
2850,cocochicwedding_3485939477002385180_2024-10-24...,雜誌
2851,PP_0566_1819186007704850790_2024-08-02 01-38_1...,雜誌


In [10]:
df2 = df2.merge(df_no_dup1, on='filename', how='left')

In [11]:
df2.shape

(5523, 3)

In [12]:
df2.head()

Unnamed: 0,filename,full_path,root_path
0,muimui_za_1831621706043289845_2024-09-05 09-13...,./images/活動/activity/20231101_試鏡/muimui_za_183...,
1,muimui_za_1831621706043289845_2024-09-05 09-13...,./images/活動/activity/20231101_試鏡/muimui_za_183...,
2,muimui_za_1831621706043289845_2024-09-05 09-13...,./images/活動/activity/20231101_試鏡/muimui_za_183...,
3,muimui_za_1831621706043289845_2024-09-05 09-13...,./images/活動/activity/20231101_試鏡/muimui_za_183...,
4,muimui_za_1831900664223674858_2024-09-06 03-42...,./images/活動/activity/20231101_試鏡/muimui_za_183...,


In [13]:
df2['root_path'].value_counts()

root_path
拍攝    25
Name: count, dtype: int64

In [18]:
aa = df2[df2['root_path']=='拍攝']

In [19]:
aa.head()

Unnamed: 0,filename,full_path,root_path
888,fabrique.co_3451107824765875200_2024-09-06 12-...,./images/活動/notion/fabrique.co_345110782476587...,拍攝
889,fabrique.co_3451107824765875200_2024-09-06 12-...,./images/活動/notion/fabrique.co_345110782476587...,拍攝
890,fabrique.co_3451107824765875200_2024-09-06 12-...,./images/活動/notion/fabrique.co_345110782476587...,拍攝
891,fabrique.co_3451107824765875200_2024-09-06 12-...,./images/活動/notion/fabrique.co_345110782476587...,拍攝
892,fabrique.co_3451107824765875200_2024-09-06 12-...,./images/活動/notion/fabrique.co_345110782476587...,拍攝


In [20]:
aa

Unnamed: 0,filename,full_path,root_path
888,fabrique.co_3451107824765875200_2024-09-06 12-...,./images/活動/notion/fabrique.co_345110782476587...,拍攝
889,fabrique.co_3451107824765875200_2024-09-06 12-...,./images/活動/notion/fabrique.co_345110782476587...,拍攝
890,fabrique.co_3451107824765875200_2024-09-06 12-...,./images/活動/notion/fabrique.co_345110782476587...,拍攝
891,fabrique.co_3451107824765875200_2024-09-06 12-...,./images/活動/notion/fabrique.co_345110782476587...,拍攝
892,fabrique.co_3451107824765875200_2024-09-06 12-...,./images/活動/notion/fabrique.co_345110782476587...,拍攝
893,fabrique.co_3451107824765875200_2024-09-06 12-...,./images/活動/notion/fabrique.co_345110782476587...,拍攝
1989,monday_21_5075378859416603_2024-09-05 14-54_2....,./images/活動/notion/monday_21_5075378859416603_...,拍攝
1990,monday_21_5075378859416603_2024-09-05 14-54_4....,./images/活動/notion/monday_21_5075378859416603_...,拍攝
2213,orm.kornnaphat_3447105999466233263_2024-08-31 ...,./images/活動/notion/orm.kornnaphat_344710599946...,拍攝
2214,orm.kornnaphat_3449685543620646636_2024-09-04 ...,./images/活動/notion/orm.kornnaphat_344968554362...,拍攝


In [21]:
aa.shape

(25, 3)

In [22]:
for index, row in aa.iterrows():
    image_path = row['full_path']
    try:
        shutil.copy(image_path, './del')
    except Exception as e:
        print(image_path, str(e))

In [23]:
success_count = 0
failure_count = 0
failed_files = []
for index, row in aa.iterrows():
    image_path = row['full_path']
    try:
        os.remove(image_path)
        success_count += 1
    except Exception as e:
        failure_count += 1
        failed_files.append((image_path, str(e)))

In [24]:
print(f"成功刪除 {success_count} 個檔案")
print(f"刪除失敗 {failure_count} 個檔案")

成功刪除 25 個檔案
刪除失敗 0 個檔案
