In [209]:
import os
import cv2
import numpy as np
import matplotlib.pyplot as plt
from numpy.linalg import norm
from PIL import Image

In [210]:
#select the mode of panoptic
#set the path of picture/mask/label
panoptic_mode = int(input("Select the mode of panoptic: 1 for DETR, 2 for SETR: "))
if panoptic_mode == 1:
    Filepath = './DETR_result/'
    image1 = cv2.imread('image1.jpg')
    image2 = cv2.imread('image2.jpg')
    mask_label_path1 = Filepath + 'image1/'
    mask_label_path2 = Filepath + 'image2/'
    
elif panoptic_mode == 2:
    Filepath = './SETR_result/'
    image1 = cv2.imread('image1.jpg')
    image2 = cv2.imread('image2.jpg')
    mask_label_path1 = Filepath + 'image1/'
    mask_label_path2 = Filepath + 'image2/'

Select the mode of panoptic: 1 for DETR, 2 for SETR:  1


In [211]:
# create a dictionary to store the mask and label and category
def read_masks_and_labels(mask_label_path):
    labels = []
    categories = []

    with open(mask_label_path + 'image_categories.txt', 'r') as f:
        lines = f.readlines()
        for line in lines:
            parts = line.strip().split(',')
            label = parts[0].split(':')[1].strip()
            category = parts[1].split(':')[1].strip()
            labels.append(int(label))
            categories.append(category)

    masks = []
    mask_dict = {}
    
    for i, ID in enumerate (labels):
        filename = 'mask_class_' + str(ID) + '.png'
        mask = cv2.imread(mask_label_path + filename, cv2.IMREAD_GRAYSCALE)
        masks = np.array(mask)
        mask_dict[i] = {'labels': labels[i], 'categories': categories[i], 'masks': masks}
        
    return mask_dict

# Use the function to read masks and labels
mask_dict1 = read_masks_and_labels(mask_label_path1)
mask_dict2 = read_masks_and_labels(mask_label_path2)

In [212]:
# print(mask_dict1)
# print(mask_dict2)

In [213]:
def get_id_from_dict(mask_dict):
    ids = set()
    for i in range(len(mask_dict)):
        ids.add(mask_dict[i]['labels'])
    return ids

def invert_intersection_masks(modified_dict):
    # Stack and invert masks for the intersection
    if len(modified_dict) > 0:
        intersection_label = 99
        intersection_masks = np.stack([item['masks'] for item in modified_dict.values()])
        intersection_mask = np.bitwise_not(np.bitwise_or.reduce(intersection_masks, axis=0))
        modified_dict[len(modified_dict)] = {
            'labels': intersection_label,
            'categories': 'others',
            'masks': intersection_mask
        } 
    return modified_dict

def modified_dict_by_class(mask_dict1, mask_dict2):
    # Create sets to store the IDs of each dictionary
    ids_dict1 = get_id_from_dict(mask_dict1)
    ids_dict2 = get_id_from_dict(mask_dict2)

    # print(ids_dict1)
    # print(ids_dict2)
    
    # Create dictionaries to store matched and unmatched items
    same_dict1 = {}
    same_dict2 = {}
    counter1 = 0
    counter2 = 0
    
    # Process the first dictionary
    for key in mask_dict1:
        if mask_dict1[key]['labels'] in ids_dict2:
            same_dict1[counter1] = mask_dict1[key]
            counter1+=1
    
    # Process the second dictionary
    for key in mask_dict2:
        if mask_dict2[key]['labels'] in ids_dict1:
            same_dict2[counter2] = mask_dict2[key]
            counter2+=1

    modified_dict1 = {}
    modified_dict2 = {}

    modified_dict1 = invert_intersection_masks(same_dict1)
    modified_dict2 = invert_intersection_masks(same_dict2)
  
    return modified_dict1, modified_dict2

modified_dict1, modified_dict2 = modified_dict_by_class(mask_dict1, mask_dict2)

In [214]:
# print(modified_dict1)
# print(modified_dict2)

In [215]:
#SIFT
#create the feature point
# Convert images to grayscale
gray1 = cv2.cvtColor(image1, cv2.COLOR_BGR2GRAY)
gray2 = cv2.cvtColor(image2, cv2.COLOR_BGR2GRAY)

In [216]:
def process_image(gray, mask):
    mask = cv2.resize(mask, (gray.shape[1], gray.shape[0]))

    gray_array = np.array(gray)
    mask_array = np.array(mask)

    # Ensure the mask is binary (i.e., contains only 0s and 1s)
    binary_mask = (mask_array > 0).astype(np.uint8)

    # Perform dilation on the binary mask
    kernel = np.ones((3, 3), np.uint8)
    dilated_mask = cv2.dilate(binary_mask, kernel, iterations=5)

    # Use the mask to crop the grayscale image
    cropped_gray = gray_array * dilated_mask

    # Convert the cropped result back to an image
    cropped_gray_image = Image.fromarray(cropped_gray)
    
    return cropped_gray_image


def process_images_and_store(gray, mask_dict):
    # Create a dictionary to store the processed images and their categories
    processed_images = {}

    # Loop over each mask in the mask dictionary
    for i in range(len(mask_dict)):
        # Process the image
        processed_image = process_image(gray, mask_dict[i]['masks'])
        processed_image = np.array(processed_image)
        
        # Get the category for this image
        category = mask_dict[i]['categories']
        
        # Get the label for this image
        label = mask_dict[i]['labels']
        
        # Save the processed image and its category to the dictionary
        processed_images[i] = {'label': label, 'category': category, 'image': processed_image}
    
    return processed_images

In [217]:
# Use the function to process the images and store them in dictionaries
processed_images1 = process_images_and_store(gray1, modified_dict1)
processed_images2 = process_images_and_store(gray2, modified_dict1)

In [222]:
# plt.imshow(processed_images1[0]['image'], cmap='gray')

In [223]:
# plt.imshow(processed_images1[1]['image'], cmap='gray')

In [224]:
# plt.imshow(processed_images1[2]['image'], cmap='gray')

In [233]:
processed_images1[2]['image']

array([[145, 142, 128, ...,  99,  74,  89],
       [153, 134, 118, ..., 101,  84, 106],
       [140, 140, 132, ..., 110,  97,  95],
       ...,
       [ 19,  19,  17, ..., 162, 160, 162],
       [ 17,  22,  18, ..., 180, 176, 162],
       [ 14,  24,  19, ..., 169, 164, 156]], dtype=uint8)

In [231]:
gray1

array([[145, 142, 128, ...,  99,  74,  89],
       [153, 134, 118, ..., 101,  84, 106],
       [140, 140, 132, ..., 110,  97,  95],
       ...,
       [ 19,  19,  17, ..., 162, 160, 162],
       [ 17,  22,  18, ..., 180, 176, 162],
       [ 14,  24,  19, ..., 169, 164, 156]], dtype=uint8)

In [168]:
#use the same label mask to seperate the picture's feature point
# Initialize the feature detector and extractor (e.g., SIFT)
sift = cv2.SIFT_create()
# Detect keypoints and compute descriptors for both images
keypoints1, descriptors1 = sift.detectAndCompute(gray1, None)
keypoints2, descriptors2 = sift.detectAndCompute(gray2, None)

In [3]:
#matching each label picture
# Initialize the feature matcher using brute-force matching
bf = cv2.BFMatcher()

# Match the descriptors using brute-force matching
matches = bf.match(descriptors1, descriptors2)

# Select the top N matches
num_matches = 50
matches = sorted(matches, key=lambda x: x.distance)[:num_matches]

# Extract matching keypoints
src_points = np.float32([keypoints1[match.queryIdx].pt for match in matches]).reshape(-1, 1, 2)
dst_points = np.float32([keypoints2[match.trainIdx].pt for match in matches]).reshape(-1, 1, 2)

In [4]:
#summary all the matching into one picture 


In [5]:
# Estimate the homography matrix
homography, _ = cv2.findHomography(src_points, dst_points, cv2.RANSAC, 5.0)

In [None]:
# Warp the first image using the homography
result = cv2.warpPerspective(image1, homography, (image2.shape[1], image2.shape[0]))

In [None]:
# Blending the warped image with the second image using alpha blending
alpha = 0.5  # blending factor
blended_image = cv2.addWeighted(result, alpha, image2, 1 - alpha, 0)

# Display the blended image
cv2.imshow('Blended Image', blended_image)
cv2.waitKey(0)
cv2.destroyAllWindows()