In [1]:
pip install transformers

Note: you may need to restart the kernel to use updated packages.


## Py顏色提取

In [26]:
pip install webcolors scikit-learn

Note: you may need to restart the kernel to use updated packages.


In [27]:
pip install threadpoolctl

Note: you may need to restart the kernel to use updated packages.


In [34]:
import torch
from PIL import Image
from transformers import CLIPProcessor, CLIPModel
from ultralytics import YOLO
from collections import Counter
import numpy as np
from sklearn.metrics import pairwise_distances
import os
import glob

yolo_model = YOLO("yolov8n.pt")  # 使用適當的 YOLOv8 模型權重

clip_model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32")
clip_processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")

# 偵測物件
def detect_objects(image_path):
    results = yolo_model(image_path)
    objects = results[0].boxes.xyxy.cpu().numpy()
    return objects, results

# 提取主要顏色
def get_dominant_color(image):
    image = image.resize((50, 50))  
    pixels = np.array(image).reshape(-1, 3)
    counter = Counter(map(tuple, pixels))
    dominant_color = counter.most_common(1)[0][0]
    return dominant_color

# 使用 CLIP 生成英文描述
def generate_clip_description(image, objects):
    descriptions = []
    for obj in objects:
        x1, y1, x2, y2 = map(int, obj[:4]) 
        cropped_image = image.crop((x1, y1, x2, y2))
        dominant_color = get_dominant_color(cropped_image)
        color_name = f"{dominant_color}"
        
        inputs = clip_processor(images=cropped_image, return_tensors="pt")
        with torch.no_grad():
            image_features = clip_model.get_image_features(**inputs)

        # 定義一組預設的文本描述，包含顏色信息
        texts = [f"a {color_name} room", f"a {color_name} chair", f"a {color_name} table", f"a {color_name} bed", 
                 f"a {color_name} lamp", f"a {color_name} window", f"a {color_name} door", f"a {color_name} sofa"]
        text_inputs = clip_processor(text=texts, return_tensors="pt", padding=True)
        text_features = clip_model.get_text_features(**text_inputs)

        # 計算圖像特徵和文本特徵之間的相似度
        similarities = torch.nn.functional.cosine_similarity(image_features, text_features)
        best_match = similarities.argmax().item()
        descriptions.append((texts[best_match], dominant_color))
    return descriptions

# 計算描述相似度，包含顏色比較
def calculate_similarity(desc1, desc2):
    similarities = []
    for d1, color1 in desc1:
        for d2, color2 in desc2:
            text_similarity = 1 if d1.split(" ")[1:] == d2.split(" ")[1:] else 0  # 忽略顏色部分進行文本相似度比較
            color_similarity = 1 - pairwise_distances([color1], [color2], metric='cosine')[0][0]  # 使用cosine相似度比較顏色
            combined_similarity = 0.5 * text_similarity + 0.5 * color_similarity  # 結合文本和顏色相似度
            similarities.append(combined_similarity)
    return similarities

def process_folder(folder_path):
    image_paths = glob.glob(os.path.join(folder_path, "*.jpg"))
    all_descriptions = {}
    for image_path in image_paths:
        objects, yolo_results = detect_objects(image_path)
        image = Image.open(image_path)
        descriptions = generate_clip_description(image, objects)
        all_descriptions[image_path] = descriptions
        print(f"Processed {image_path}")
    return all_descriptions

folder1 = "C:\\Users\\user\\OneDrive\\桌面\\爬蟲\\gold_house\\8713071 - 複製"
folder2 = "C:\\Users\\user\\OneDrive\\桌面\\爬蟲\\gold_house\\8713071"

descriptions1 = process_folder(folder1)
descriptions2 = process_folder(folder2)

all_similarities = []
for path1, desc1 in descriptions1.items():
    for path2, desc2 in descriptions2.items():
        if len(desc1) > 0 and len(desc2) > 0:
            similarities = calculate_similarity(desc1, desc2)
            similarity_score = sum(similarities) / max(len(desc1), len(desc2))
            all_similarities.append((path1, path2, similarity_score, desc1, desc2))

# 設定閾值，判斷兩個資料夾內圖像相似程度
image_threshold = 0.8
folder_threshold = 0.6
similar_count = 0

if len(all_similarities) > 0:
    for path1, path2, score, desc1, desc2 in all_similarities:
        print(f"Comparing {path1} and {path2}")
        print(f"CLIP Descriptions for {path1}: {desc1}")
        print(f"CLIP Descriptions for {path2}: {desc2}")
        print(f"Similarity Score: {score:.2f}")
        if score > image_threshold:
            print("The images are likely from the same room.")
            similar_count += 1
        else:
            print("The images are likely from different rooms.")
        print()

    folder_similarity_ratio = similar_count / len(all_similarities)
    print(f"Similarity ratio between the two folders: {folder_similarity_ratio:.2f}")
    if folder_similarity_ratio > folder_threshold:
        print("The folders are likely from the same house.")
    else:
        print("The folders are likely from different houses.")
else:
    print("No similarities found between the two folders.")



image 1/1 C:\Users\user\OneDrive\\\gold_house\8713071 - \image1.jpg: 480x640 1 couch, 1 tv, 1 refrigerator, 229.0ms
Speed: 5.0ms preprocess, 229.0ms inference, 3.0ms postprocess per image at shape (1, 3, 480, 640)
Processed C:\Users\user\OneDrive\桌面\爬蟲\gold_house\8713071 - 複製\image1.jpg

image 1/1 C:\Users\user\OneDrive\\\gold_house\8713071 - \image2.jpg: 640x480 1 bed, 136.0ms
Speed: 4.5ms preprocess, 136.0ms inference, 2.0ms postprocess per image at shape (1, 3, 640, 480)
Processed C:\Users\user\OneDrive\桌面\爬蟲\gold_house\8713071 - 複製\image2.jpg

image 1/1 C:\Users\user\OneDrive\\\gold_house\8713071 - \image3.jpg: 640x480 (no detections), 142.3ms
Speed: 5.0ms preprocess, 142.3ms inference, 1.0ms postprocess per image at shape (1, 3, 640, 480)
Processed C:\Users\user\OneDrive\桌面\爬蟲\gold_house\8713071 - 複製\image3.jpg

image 1/1 C:\Users\user\OneDrive\\\gold_house\8713071 - \image4.jpg: 640x480 1 toilet, 2 sinks, 131.9ms
Speed: 4.0ms preprocess, 131.9ms inference, 2.0ms postprocess per 

## 資料夾間比對

In [36]:
import torch
from PIL import Image
from transformers import CLIPProcessor, CLIPModel
from ultralytics import YOLO
from collections import Counter
import numpy as np
from sklearn.metrics import pairwise_distances
import os
import glob

yolo_model = YOLO("yolov8n.pt")  # 使用適當的 YOLOv8 模型權重

clip_model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32")
clip_processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")

# 偵測物件
def detect_objects(image_path):
    results = yolo_model(image_path)
    objects = results[0].boxes.xyxy.cpu().numpy()
    return objects, results

# 提取主要顏色
def get_dominant_color(image):
    image = image.resize((50, 50))  
    pixels = np.array(image).reshape(-1, 3)
    counter = Counter(map(tuple, pixels))
    dominant_color = counter.most_common(1)[0][0]
    return dominant_color

# 使用 CLIP 生成英文描述
def generate_clip_description(image, objects):
    descriptions = []
    for obj in objects:
        x1, y1, x2, y2 = map(int, obj[:4]) 
        cropped_image = image.crop((x1, y1, x2, y2))
        dominant_color = get_dominant_color(cropped_image)
        color_name = f"{dominant_color}"
        
        inputs = clip_processor(images=cropped_image, return_tensors="pt")
        with torch.no_grad():
            image_features = clip_model.get_image_features(**inputs)

        # 定義一組預設的文本描述，包含顏色信息
        texts = [f"a {color_name} room", f"a {color_name} chair", f"a {color_name} table", f"a {color_name} bed", 
                 f"a {color_name} lamp", f"a {color_name} window", f"a {color_name} door", f"a {color_name} sofa"]
        text_inputs = clip_processor(text=texts, return_tensors="pt", padding=True)
        text_features = clip_model.get_text_features(**text_inputs)

        # 計算圖像特徵和文本特徵之間的相似度
        similarities = torch.nn.functional.cosine_similarity(image_features, text_features)
        best_match = similarities.argmax().item()
        descriptions.append((texts[best_match], dominant_color))
    return descriptions

# 計算描述相似度，包含顏色比較
def calculate_similarity(desc1, desc2):
    similarities = []
    for d1, color1 in desc1:
        for d2, color2 in desc2:
            text_similarity = 1 if d1.split(" ")[1:] == d2.split(" ")[1:] else 0  # 忽略顏色部分進行文本相似度比較
            color_similarity = 1 - pairwise_distances([color1], [color2], metric='cosine')[0][0]  # 使用cosine相似度比較顏色
            combined_similarity = 0.5 * text_similarity + 0.5 * color_similarity  # 結合文本和顏色相似度
            similarities.append(combined_similarity)
    return similarities

def process_folder(folder_path):
    image_paths = glob.glob(os.path.join(folder_path, "*.jpg"))
    all_descriptions = {}
    for image_path in image_paths:
        objects, yolo_results = detect_objects(image_path)
        image = Image.open(image_path)
        descriptions = generate_clip_description(image, objects)
        all_descriptions[image_path] = descriptions
        print(f"Processed {image_path}")
    return all_descriptions

folder1 = "C:\\Users\\user\\OneDrive\\桌面\\爬蟲\\gold_house\\8713071 - 複製"
folder2 = "C:\\Users\\user\\OneDrive\\桌面\\爬蟲\\gold_house\\8713071"

descriptions1 = process_folder(folder1)
descriptions2 = process_folder(folder2)

all_similarities = []
for path1, desc1 in descriptions1.items():
    for path2, desc2 in descriptions2.items():
        if len(desc1) > 0 and len(desc2) > 0:
            similarities = calculate_similarity(desc1, desc2)
            similarity_score = sum(similarities) / max(len(desc1), len(desc2))
            all_similarities.append((path1, path2, similarity_score, desc1, desc2))

# 設定閾值，判斷兩個資料夾內圖像相似程度
image_threshold = 0.8
folder_threshold = 0.6
similar_count = 0

if len(all_similarities) > 0:
    for path1, path2, score, desc1, desc2 in all_similarities:
        print(f"Comparing {path1} and {path2}")
        print(f"CLIP Descriptions for {path1}: {desc1}")
        print(f"CLIP Descriptions for {path2}: {desc2}")
        print(f"Similarity Score: {score:.2f}")
        if score > image_threshold:
            print("The images are likely from the same room.")
            similar_count += 1
        else:
            print("The images are likely from different rooms.")
        print()

    folder_similarity_ratio = similar_count / len(all_similarities)
    print(f"Similarity ratio between the two folders: {folder_similarity_ratio:.2f}")
    if folder_similarity_ratio > folder_threshold:
        print("The folders are likely from the same house.")
    else:
        print("The folders are likely from different houses.")
else:
    print("No similarities found between the two folders.")



image 1/1 C:\Users\user\OneDrive\\\gold_house\8713071 - \image1.jpg: 480x640 1 couch, 1 tv, 1 refrigerator, 190.0ms
Speed: 5.1ms preprocess, 190.0ms inference, 5.0ms postprocess per image at shape (1, 3, 480, 640)
Processed C:\Users\user\OneDrive\桌面\爬蟲\gold_house\8713071 - 複製\image1.jpg

image 1/1 C:\Users\user\OneDrive\\\gold_house\8713071 - \image2.jpg: 640x480 1 bed, 134.9ms
Speed: 4.1ms preprocess, 134.9ms inference, 5.0ms postprocess per image at shape (1, 3, 640, 480)
Processed C:\Users\user\OneDrive\桌面\爬蟲\gold_house\8713071 - 複製\image2.jpg

image 1/1 C:\Users\user\OneDrive\\\gold_house\8713071 - \image3.jpg: 640x480 (no detections), 135.7ms
Speed: 4.9ms preprocess, 135.7ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 480)
Processed C:\Users\user\OneDrive\桌面\爬蟲\gold_house\8713071 - 複製\image3.jpg

image 1/1 C:\Users\user\OneDrive\\\gold_house\8713071 - \image4.jpg: 640x480 1 toilet, 2 sinks, 131.1ms
Speed: 4.0ms preprocess, 131.1ms inference, 2.0ms postprocess per 