### 0. 取出好的 outfits

In [1]:
DATA_PATH_PREFIX = "../new_data/style:"  # + STYLE
CUT_DATA_PATH_PREFIX = "../new_data/cut_style:"  # + STYLE

In [2]:
import os
import csv

def get_outfits(STYLE, STYLE_NUMBER):
    
    DATA_PATH = DATA_PATH_PREFIX + STYLE
    CUT_DATA_PATH = CUT_DATA_PATH_PREFIX + STYLE
    
    data_files = os.listdir(DATA_PATH)
    cut_data_files = os.listdir(CUT_DATA_PATH)

    outfits = []  # 上衣、下衣都存在
    failed = []  # 其他

    # get outfits
    for data_file in data_files:
        # "1.jpg" --> "1"
        index_str = os.path.splitext(data_file)[0]
        try:
            index = int(index_str)
        except ValueError:
            print(index_str)
            continue

        e_filename = f"{STYLE_NUMBER}_{index}_E_.jpg"
        q_filename = f"{STYLE_NUMBER}_{index}_Q_.jpg"

        # check if 上衣、下衣都存在
        if e_filename in cut_data_files and q_filename in cut_data_files:
            outfit = {
                "index": index,
                "img_path": os.path.join(DATA_PATH, data_file),
                "img_pathE": os.path.join(CUT_DATA_PATH, e_filename),
                "img_pathQ": os.path.join(CUT_DATA_PATH, q_filename)
            }
            outfits.append(outfit)
        else:
            failed.append(index)

    # sort
    outfits.sort(key=lambda x: x["index"])
    for outfit in outfits:
        outfit["index"] = f"{STYLE_NUMBER}_{outfit['index']}"
    failed.sort()

    # write
    good_outfits_file = "good_outfits_" + STYLE + ".csv"

    with open(good_outfits_file, mode='w', newline='') as file:
        writer = csv.DictWriter(file, fieldnames=["index", "img_path", "img_pathE", "img_pathQ"])
        writer.writeheader()
        for outfit in outfits:
            writer.writerow(outfit)

    print(f"\n好的 outfits 儲存在 {good_outfits_file} 中，共 {len(outfits)} 筆資料。")
    print(f"\n不完全的 outfits index 有 {len(failed)} 筆：{failed}")
    
    return outfits

In [3]:
outfits_a = get_outfits("america", "0")
outfits_j = get_outfits("japan", "1")
outfits_k = get_outfits("korea", "2")


好的 outfits 儲存在 good_outfits_america.csv 中，共 1013 筆資料。

不完全的 outfits index 有 46 筆：[25, 29, 32, 43, 98, 102, 108, 121, 124, 130, 140, 144, 146, 148, 175, 198, 258, 265, 271, 318, 374, 380, 384, 392, 488, 541, 553, 555, 586, 591, 600, 603, 648, 652, 698, 763, 782, 791, 826, 864, 951, 957, 975, 1021, 1023, 1051]
.DS_Store

好的 outfits 儲存在 good_outfits_japan.csv 中，共 849 筆資料。

不完全的 outfits index 有 156 筆：[12, 29, 34, 39, 42, 58, 64, 65, 66, 74, 75, 82, 108, 117, 128, 134, 148, 154, 163, 170, 171, 178, 191, 192, 198, 207, 211, 221, 231, 241, 263, 269, 277, 305, 312, 314, 316, 317, 322, 325, 330, 332, 334, 336, 348, 350, 355, 358, 360, 363, 369, 373, 376, 379, 380, 382, 384, 385, 394, 405, 406, 412, 452, 458, 466, 470, 475, 481, 492, 495, 496, 506, 507, 511, 514, 522, 533, 553, 560, 565, 570, 616, 628, 643, 661, 677, 678, 684, 693, 704, 705, 719, 720, 723, 730, 733, 737, 738, 740, 742, 747, 751, 754, 755, 762, 763, 764, 765, 769, 770, 771, 772, 774, 777, 782, 791, 792, 794, 804, 805, 806, 809, 

### 1. 每個 style 各取 698 個 outfits

In [4]:
import random
random.seed(42)

# get 698 data per style
n = 698
sampled_outfits_a = random.sample(outfits_a, n)
sampled_outfits_j = random.sample(outfits_j, n)
sampled_outfits_k = random.sample(outfits_k, n)

all_sampled_outfits = sampled_outfits_a + sampled_outfits_j + sampled_outfits_k

# sort
def sort_key(outfit):
    parts = outfit["index"].split('_')
    return (int(parts[0]), int(parts[1]))

all_sampled_outfits.sort(key=sort_key)

# write
good_outfits_file = "good_outfits.csv"

with open(good_outfits_file, mode='w', newline='') as file:
    writer = csv.DictWriter(file, fieldnames=["index", "img_path", "img_pathE", "img_pathQ"])
    writer.writeheader()
    for outfit in all_sampled_outfits:
        writer.writerow(outfit)

print(f"好的 outfits 儲存在 {good_outfits_file} 中，共 {len(all_sampled_outfits)} 筆資料。")

好的 outfits 儲存在 good_outfits.csv 中，共 2094 筆資料。


### 2. + 台風

In [13]:
import csv
import random
random.seed(42)


# 0: 取出完整 outfits
outfits_t = get_outfits("taiwan", "14")

# 1: 抽樣 698 個
n = 698
sampled_outfits_t = random.sample(outfits_t, n)

# sort
def sort_key(outfit):
    parts = outfit["index"].split('_')
    return (int(parts[0]), int(parts[1]))

sampled_outfits_t.sort(key=sort_key)


好的 outfits 儲存在 good_outfits_taiwan.csv 中，共 880 筆資料。

不完全的 outfits index 有 245 筆：[9, 11, 22, 25, 26, 27, 28, 31, 33, 39, 41, 46, 49, 53, 55, 56, 57, 58, 65, 70, 71, 75, 76, 78, 79, 80, 82, 85, 86, 87, 89, 95, 96, 97, 100, 107, 108, 127, 129, 140, 142, 151, 154, 156, 158, 159, 167, 176, 177, 179, 183, 187, 191, 192, 201, 202, 206, 207, 223, 225, 226, 229, 232, 233, 245, 247, 250, 251, 252, 253, 261, 265, 267, 268, 291, 293, 299, 302, 305, 306, 307, 325, 332, 338, 346, 350, 353, 358, 359, 367, 368, 369, 372, 373, 378, 381, 383, 385, 386, 387, 390, 395, 397, 399, 402, 404, 405, 406, 407, 408, 414, 415, 420, 421, 422, 428, 434, 435, 441, 442, 456, 457, 458, 461, 462, 464, 469, 471, 473, 474, 480, 485, 488, 490, 495, 500, 501, 506, 507, 509, 515, 527, 528, 532, 538, 541, 555, 558, 560, 564, 576, 610, 620, 621, 626, 629, 630, 634, 637, 641, 645, 655, 659, 661, 663, 665, 673, 693, 700, 706, 710, 723, 733, 735, 748, 752, 755, 760, 761, 764, 766, 769, 781, 782, 785, 790, 800, 818, 822, 823, 830

In [27]:
import shutil

# 2: 追加台風
# 讀取現有的 good_outfits.csv 檔案
good_outfits_file = "good_outfits.csv"
good4_outfits_file = "good4_outfits.csv"
shutil.copyfile(good_outfits_file, good4_outfits_file)

with open(good4_outfits_file, mode='a+', newline='') as file:
    writer = csv.DictWriter(file, fieldnames=["index", "img_path", "img_pathE", "img_pathQ"])
    writer.writeheader()
    for outfit in sampled_outfits_t:
        writer.writerow(outfit)
        
with open(good4_outfits_file, mode='r', newline='') as file:
    reader = csv.DictReader(file)
    row_count = sum(1 for row in reader)
    row_count = row_count - 1

    print(f"追加好的 outfits 儲存在 {good4_outfits_file} 中，共 {row_count} 筆資料。")

追加好的 outfits 儲存在 good4_outfits.csv 中，共 2792 筆資料。


### 3. end