In [2]:
import pandas as pd
import os
import shutil

In [3]:
def list_all_folders(directory):
    try:
        # 使用 os.listdir 遍歷目錄
        folders = [name.replace('\\', '/') for name in os.listdir(directory) if os.path.isdir(os.path.join(directory, name))]
        return folders
    except Exception as e:
        print(f"發生錯誤：{e}")
        return []

In [4]:
def find_jpg_files(root_folder, foldername, extension):
    df = pd.DataFrame()

    # 遍历目录
    folder = os.path.join(root_folder, foldername).replace('\\', '/')
    file_list = []
    for dirpath, _, filenames in os.walk(folder):
        for filename in filenames:
            if filename.lower().endswith(extension):
                full_path = os.path.join(dirpath, filename)
                file_list.append({'filename': filename, 'full_path': full_path, 'foldername':foldername})
    tmp_df = pd.DataFrame(file_list, columns=['filename', 'full_path', 'foldername'])
    df = pd.concat([df, tmp_df])
    print('')
    print(f'{foldername}: {tmp_df.shape}')

    df['full_path'] = df['full_path'].apply(lambda x: x.replace('\\', '/'))
    return df

In [5]:
def delete_file(files, path):
    file_list = files.strip().splitlines()
    success_count = 0
    failure_count = 0
    failed_files = []

    # 搜尋目錄中所有的檔案
    file_paths = []
    for root, _, filenames in os.walk(path):
        for filename in filenames:
            if filename in file_list:
                file_paths.append(os.path.join(root, filename))

    # 刪除檔案
    for full_path in file_paths:
        try:
            os.remove(full_path)
            success_count += 1
        except Exception as e:
            failure_count += 1
            failed_files.append((full_path, str(e)))

    print(f"成功刪除 {success_count} 個檔案")
    print(f"刪除失敗 {failure_count} 個檔案")
    if failed_files:
        print("失敗檔案列表：")
        for failed_file, error in failed_files:
            print(f"- {failed_file}: {error}")

In [6]:
def data_to_csv(df):
    output_dir = './data/test'
    os.makedirs(output_dir, exist_ok=True)  # 確保輸出目錄存在

    for folder in df['foldername'].unique():
        df_tmp = df[df['foldername'] == folder]
        # 按檔名排序（忽略大小寫）
        df_tmp = df_tmp.sort_values('filename', key=lambda x: x.str.lower()).reset_index(drop=True)

        # 計算總批次數
        batch_size = 200
        num_batches = (len(df_tmp) + batch_size - 1) // batch_size

        for batch_idx in range(num_batches):
            start = batch_idx * batch_size
            end = start + batch_size
            batch_df = df_tmp.iloc[start:end]
            
            # 檔名格式：folder_1.csv、folder_2.csv...
            output_file = os.path.join(output_dir, f'{folder}_{batch_idx + 1}.csv')
            batch_df.to_csv(output_file, index=False)
            print(f'{output_file}:{batch_df.shape[0]}')

In [7]:
def delete_files_except(csv_path, images_folder, extension):
    try:
        # Read CSV file, assuming no header
        files_to_keep = pd.read_csv(csv_path, header=None)[0].tolist()
        
        # Counters
        deleted_count = 0
        kept_count = 0
        print(len(files_to_keep))
        
        # Traverse all files in the folder and its subfolders
        for dirpath, _, filenames in os.walk(images_folder):
            for filename in filenames:
                if filename.endswith(extension):
                    file_path = os.path.join(dirpath, filename)
                    
                    if filename in files_to_keep:
                        kept_count += 1
                    else:
                        try:
                            os.remove(file_path)
                            deleted_count += 1
                        except Exception as e:
                            print(f"Error deleting {filename}: {str(e)}")
        
        print("\nProcessing completed!")
        print(f"Images kept: {kept_count}")
        print(f"Images deleted: {deleted_count}")
        
    except Exception as e:
        print(f"An error occurred: {str(e)}")

---

In [18]:
# root_folder = './images/check/'
root_folder = './image_process/compress/'
foldernames = list_all_folders(root_folder)
foldernames

['20240519_LingOrmCharityBirthday',
 '20240526_ORM_CHARITY_FASHION_SHOW',
 '20240621_SWUPridexOrmKornnaphat']

In [19]:
# foldernames = ['check']
for foldername in foldernames:
    df = find_jpg_files(root_folder, foldername, '.webp')
    data_to_csv(df)


20240519_LingOrmCharityBirthday: (462, 3)
./data/test\20240519_LingOrmCharityBirthday_1.csv:200
./data/test\20240519_LingOrmCharityBirthday_2.csv:200
./data/test\20240519_LingOrmCharityBirthday_3.csv:62

20240526_ORM_CHARITY_FASHION_SHOW: (28, 3)
./data/test\20240526_ORM_CHARITY_FASHION_SHOW_1.csv:28

20240621_SWUPridexOrmKornnaphat: (46, 3)
./data/test\20240621_SWUPridexOrmKornnaphat_1.csv:46


### 刪除photo

In [35]:
files = '''
Abears19_1802376259059720302_2024-06-16 16-22_1.webp
LilyRose_38_1802350130882109889_2024-06-16 14-38_2.webp
LilyRose_38_1802350130882109889_2024-06-16 14-38_3.webp
LilyRose_38_1802340001642151957_2024-06-16 13-58_4.webp
TYTFSGIR4EVA_1802359740686889310_2024-06-16 15-17_2.webp
'''

In [36]:
path_check = './images/check/'
delete_file(files, path_check)

成功刪除 5 個檔案
刪除失敗 0 個檔案


In [37]:
path_activity = './images/活動/'
delete_file(files, path_activity)

成功刪除 1 個檔案
刪除失敗 0 個檔案


### 保留photo

In [9]:
root_folder = './images/check/photo_activity_1201-1'
delete_files_except('files_to_preserve.csv', root_folder, '.jpg')

11

Processing completed!
Images kept: 11
Images deleted: 94


---

In [9]:
def df_process(df, new_path):
    df['filename'] = df['filename'].apply(lambda x:x.replace('.jpg', '.webp'))
    df['full_path'] = df['full_path'].apply(lambda x:x.replace('.jpg', '.webp'))
    df = df[df['root_path']=='活動']
    df = df[['filename', 'full_path']]
    df = df.rename(columns={"full_path": "new_path"})
    df['new_path'] = df['new_path'].apply(lambda x:x.replace('/Users/linnianyi/Library/CloudStorage/GoogleDrive-qaz51465146@gmail.com/我的雲端硬碟/LingOrm圖片/活動/', new_path))
    return df  

In [10]:
new_path = './images/活動/'
df_photo_data = pd.read_excel('photo_data.xlsx')
df_photo_data = df_process(df_photo_data, new_path)

In [11]:
df_photo_data.shape

(38048, 2)

In [12]:
df_photo_data.head()

Unnamed: 0,filename,new_path
3591,muimui_za_1831621706043289845_2024-09-05 09-13...,./images/活動/202311/20231101_試鏡/muimui_za_18316...
3592,muimui_za_1831621706043289845_2024-09-05 09-13...,./images/活動/202311/20231101_試鏡/muimui_za_18316...
3593,muimui_za_1831621706043289845_2024-09-05 09-13...,./images/活動/202311/20231101_試鏡/muimui_za_18316...
3594,muimui_za_1831621706043289845_2024-09-05 09-13...,./images/活動/202311/20231101_試鏡/muimui_za_18316...
3595,muimui_za_1831900664223674858_2024-09-06 03-42...,./images/活動/202311/20231101_試鏡/muimui_za_18319...


In [189]:
df_all = pd.DataFrame()
root_folder = './images/活動/user'
foldernames = list_all_folders(root_folder)
for foldername in foldernames:
    df2 = find_jpg_files(root_folder, foldername, '.webp')
    df_all = pd.concat([df_all, df2])


apollowny: (90, 3)

Cherished_M0511: (260, 3)

cherry0525_: (137, 3)

Convallaria_LO: (259, 3)

GasChill: (752, 3)

Honeylattexx: (26, 3)

ikkkkkkkG: (174, 3)

JungsJinns: (855, 3)

Koiiz9: (515, 3)

LilyRose_38: (97, 3)

LOgallery38: (9, 3)

luv168cm: (85, 3)

mollie8119: (71, 3)

MONSOON_LLO: (39, 3)

ooks51127: (37, 3)

RoyalHaven1127: (232, 3)

TTaeny09: (56, 3)

TYTFSGIR4EVA: (104, 3)


In [211]:
df_all.shape

(3798, 3)

In [212]:
df_all.head(2)

Unnamed: 0,filename,full_path,foldername
0,apollowny_1853495118952820966_2024-11-04 17-50...,./images/活動/user/apollowny/apollowny_185349511...,apollowny
1,apollowny_1853495118952820966_2024-11-04 17-50...,./images/活動/user/apollowny/apollowny_185349511...,apollowny


In [213]:
df_all = df_all.merge(df_photo_data, on='filename', how='left')

In [224]:
df_all.shape

(3798, 4)

In [223]:
df_all.filename.value_counts()

filename
TYTFSGIR4EVA_1856669867317047430_2024-11-13 12-06_1.webp    1
apollowny_1853495118952820966_2024-11-04 17-50_1.webp       1
apollowny_1853495118952820966_2024-11-04 17-50_2.webp       1
apollowny_1853495118952820966_2024-11-04 17-50_3.webp       1
apollowny_1853495118952820966_2024-11-04 17-50_4.webp       1
                                                           ..
apollowny_1853504577418744272_2024-11-04 18-28_2.webp       1
apollowny_1853504186476060739_2024-11-04 18-26_1.webp       1
apollowny_1853503709411733860_2024-11-04 18-24_2.webp       1
apollowny_1853501578134835415_2024-11-04 18-16_4.webp       1
apollowny_1853501578134835415_2024-11-04 18-16_3.webp       1
Name: count, Length: 3798, dtype: int64

In [222]:
df_all.drop(117, inplace=True)
df_all.drop(2202, inplace=True)

In [219]:
df_all[df_all['filename']=='Cherished_M0511_1837726603583148054_2024-09-22 05-32_2.webp']

Unnamed: 0,filename,full_path,foldername,new_path
117,Cherished_M0511_1837726603583148054_2024-09-22...,./images/活動/user/Cherished_M0511/Cherished_M05...,Cherished_M0511,./image/活動/202409/20240913_DestinyClinicOrm/20...
118,Cherished_M0511_1837726603583148054_2024-09-22...,./images/活動/user/Cherished_M0511/Cherished_M05...,Cherished_M0511,./image/活動/202409/20240921_LingOrm1stMeetMacau...


In [220]:
df_all[df_all['filename']=='JungsJinns_1837456770014576857_2024-09-21 11-40_1.webp']

Unnamed: 0,filename,full_path,foldername,new_path
2202,JungsJinns_1837456770014576857_2024-09-21 11-4...,./images/活動/user/JungsJinns/JungsJinns_1837456...,JungsJinns,./image/活動/202409/20240913_DestinyClinicOrm/20...
2203,JungsJinns_1837456770014576857_2024-09-21 11-4...,./images/活動/user/JungsJinns/JungsJinns_1837456...,JungsJinns,./image/活動/202409/20240921_LingOrm1stMeetMacau...


In [225]:
df_all.head()

Unnamed: 0,filename,full_path,foldername,new_path
0,apollowny_1853495118952820966_2024-11-04 17-50...,./images/活動/user/apollowny/apollowny_185349511...,apollowny,./image/活動/202411/20241102_Orm1stFMNanning/apo...
1,apollowny_1853495118952820966_2024-11-04 17-50...,./images/活動/user/apollowny/apollowny_185349511...,apollowny,./image/活動/202411/20241102_Orm1stFMNanning/apo...
2,apollowny_1853495118952820966_2024-11-04 17-50...,./images/活動/user/apollowny/apollowny_185349511...,apollowny,./image/活動/202411/20241102_Orm1stFMNanning/apo...
3,apollowny_1853495118952820966_2024-11-04 17-50...,./images/活動/user/apollowny/apollowny_185349511...,apollowny,./image/活動/202411/20241102_Orm1stFMNanning/apo...
4,apollowny_1853501578134835415_2024-11-04 18-16...,./images/活動/user/apollowny/apollowny_185350157...,apollowny,./image/活動/202411/20241102_LinglingNNGFirstMee...


In [226]:
for _, row in df_all.iterrows():
    source = row['full_path']
    destination = row['new_path']
    
    # 创建目标文件夹（如果不存在）
    os.makedirs(os.path.dirname(destination), exist_ok=True)
    
    # 复制文件
    shutil.copy(source, destination)

---

In [18]:
df_all = pd.DataFrame()
root_folder = './images/活動'
foldernames = ['notion']
for foldername in foldernames:
    df2 = find_jpg_files(root_folder, foldername, '.webp')
    df_all = pd.concat([df_all, df2])


notion: (3035, 3)


In [19]:
df_all.shape

(3035, 3)

In [20]:
df_all = df_all.merge(df_photo_data, on='filename', how='left')

In [21]:
df_all.shape

(3042, 4)

In [32]:
df_all.head(2)

Unnamed: 0,filename,full_path,foldername,new_path
0,-小王-w_5069823839044298_2024-08-21 07-00_1.webp,./images/活動/notion/-小王-w_5069823839044298_2024...,notion,./images/活動/202408/20240817_LingOrm1stFMinHK/-...
1,-小王-w_5069823839044298_2024-08-21 07-00_7.webp,./images/活動/notion/-小王-w_5069823839044298_2024...,notion,./images/活動/202408/20240817_LingOrm1stFMinHK/-...


In [43]:
aa = df_all[df_all.new_path.isna()]

In [48]:
aa['filename'][77]

'AiMenig_1830607983505789415_2024-09-02 14-05_1.webp'

In [51]:
n=0
for _, row in df_all.iterrows():
    try:
        source = row['full_path']
        destination = row['new_path']
        os.makedirs(os.path.dirname(destination), exist_ok=True)
        shutil.move(source, destination)
    except:
        n+=1
        source = row['full_path']
        print(source)
        print(destination)
        print()
        # os.makedirs(os.path.dirname(destination), exist_ok=True)
        # shutil.copy(source, destination)

./images/活動/notion/AiMenig_1830607983505789415_2024-09-02 14-05_1.webp
nan

./images/活動/notion/buzzzz_425_1849091349288722900_2024-10-23 14-11_2.webp
nan

./images/活動/notion/Fang_1225_5074984823166268_2024-09-04 12-48_6.webp
nan

./images/活動/notion/Fang_1225_5074984823166268_2024-09-04 12-48_7.webp
nan

./images/活動/notion/fanoii_1845111430502285590_2024-10-12 14-37_1.webp
nan

./images/活動/notion/FiloROSIE_1832746817072836671_2024-09-08 11-44_1.webp
nan

./images/活動/notion/FiloROSIE_1837029334202339749_2024-09-20 07-21_2.webp
nan

./images/活動/notion/frogiewana_1822943163771953514_2024-08-12 10-28_2.webp
nan

./images/活動/notion/frogiewana_1837487672971219307_2024-09-21 13-42_3.webp
nan

./images/活動/notion/frogiewana_1837511219802263632_2024-09-21 15-16_2.webp
nan

./images/活動/notion/frogiewana_1837746209198690814_2024-09-22 06-50_2.webp
nan

./images/活動/notion/frogiewana_1837746912105320522_2024-09-22 06-53_2.webp
nan

./images/活動/notion/frogiewana_1837805731372061012_2024-09-22 10-46_4.

In [50]:
n

96