In [26]:
import cv2
import os
import json
import numpy as np
from tqdm import tqdm

# 讀取class_bgr.json文件
with open('class_bgr.json', 'r') as file:
    class_bgr = json.load(file)

# 建立從BGR值到類別名的映射
bgr_to_class = {tuple(bgr): class_name for class_name, bgr in class_bgr.items()}

# 準備文件路徑
inference_output_dir = './inference_data/ground_truth'

# 存儲每張圖像的類別名
documents = []

# 轉換並建構documents
for inference_filename in tqdm(os.listdir(inference_output_dir), desc="Processing images"):
    inference_path = os.path.join(inference_output_dir, inference_filename)
    segmentation_result = cv2.imread(inference_path)

    # 獲得該圖像的所有像素點的Class
    class_names = [bgr_to_class.get(tuple(segmentation_result[y, x].tolist()), "unknown") for y in range(segmentation_result.shape[0]) for x in range(segmentation_result.shape[1])]

    # 將類別名作為“文檔”
    documents.append(class_names)


Processing images: 100%|██████████| 242/242 [00:55<00:00,  4.39it/s]


In [27]:
# 建立詞彙表和索引
vocab = set(word for doc in documents for word in doc)
vocab_index = {word: i for i, word in enumerate(vocab)}

# 手動計算TF
tf_matrix = np.zeros((len(documents), len(vocab)))

for doc_idx, doc in enumerate(documents):
    for word in doc:
        word_idx = vocab_index[word]
        tf_matrix[doc_idx, word_idx] += 1

In [37]:
# 計算documents frequency
n_documents = len(documents)
df = np.count_nonzero(tf_matrix, axis=0) / n_documents

# 對TF取倒數並與IDF相乘
tf_inverse = 1 / (tf_matrix + 1e-9)  # 避免除以0
weights = tf_inverse * df *df

# 計算每個類別的權重並保存
class_weights = np.sum(weights, axis=0)

# 對權重進行歸一化
print("[INFO] Normalizing...")
class_weights_normalized = class_weights / np.linalg.norm(class_weights)

# 計算每個類別的權重
class_weights_dict = {word: weight for word, weight in zip(vocab, class_weights_normalized)}

[INFO] Normalizing...


In [38]:
# 讀取class_bgr.json文件來獲取類別順序
with open('class_bgr.json', 'r') as file:
    class_order = json.load(file)

# 根據class_bgr.json中的順序重新排序class_weights_dict
sorted_class_weights = {class_name: class_weights_dict[class_name] for class_name in class_order.keys() if class_name in class_weights_dict}
simplified_class_weights = {class_name: round(weight, 2) for class_name, weight in sorted_class_weights.items()}

"""
class_weight_ver1:
    weights = tf_inverse

class_weight_ver2:
    weights = tf_inverse * df

class_weight_ver3:
    weights = tf_inverse * df * df
"""
# 保存排序後的權重到JSON文件
saved_filename = "class_weight_ver3"
with open(f'{saved_filename}.json', 'w') as json_file:
    json.dump(simplified_class_weights, json_file, indent=4)

print(f"Sorted class weights have been saved to {saved_filename}.json.")

Sorted class weights have been saved to class_weight_ver6.json.
