In [None]:

import numpy as np
import os
from PIL import Image
import matplotlib.pyplot as plt
import cv2
import torch
import torchvision
from torchvision import transforms as T
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from torchvision.models.detection.mask_rcnn import MaskRCNNPredictor
from tqdm import tqdm

In [None]:
# from google.colab import drive
# drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print("Device: ", device)

Device:  cpu


In [1]:
import config as global_config

# Working on Google streetview images
In this notebook, we are going to show the proof of concept of the idea: Compensated Foreground Object removal using Multiview Images

## Segmentation using mask RCNN

In [None]:
DATA_DIR_PATH = global_config.IMAGES_FILEPATH_PARENT_DIR
MASKS_DIR_PATH = global_config.MODEL_MASKS_IMGS_PATH
IMAGES_FILEPATH_DIR = global_config.IMAGES_FILEPATH_DIR

In [None]:
STICHED_IMAGES_PATH = global_config.STITCHED_IMGS_PATH

## Perspective projection, using a bounding box (for inpainting)

In [None]:
def filenames_to_img_arrays(filenames_list):
    video_frames = []
    for filename in filenames_list:
        video_frame = cv2.imread(filename)
        r,c,_ = video_frame.shape
        video_frame = cv2.resize(video_frame, (c//4, r//4), interpolation=cv2.INTER_AREA)
        video_frames.append( cv2.cvtColor(video_frame, cv2.COLOR_BGR2RGB)) #/ 255.0)
    return video_frames

# Example usage:
def list_files(directory_path):
    paths = []
    file_names = []
    for root, _, files in os.walk(directory_path):
      file_names.append(files)
    file_names = file_names[-1]
    print("file_names:", file_names)
    N = len(file_names)
    file_type = file_names[0].split(".")[-1]
    for n in range(N):
        full_path = os.path.join(root, f"{n}.{file_type}")
        paths.append(full_path)
    print("paths:",paths)
    return paths
images_filenames_list = list_files(IMAGES_FILEPATH_DIR)
equally_spaced_frames_array = filenames_to_img_arrays(images_filenames_list)

file_names: ['2.jpg', '3.jpg', '4.jpg', '5.jpg', '6.jpg', '7.jpg', '8.jpg', '9.jpg', '10.jpg', '11.jpg', '12.jpg', '13.jpg', '14.jpg', '15.jpg', '16.jpg', '17.jpg', '18.jpg', '19.jpg', '1.jpg', '0.jpg']
paths: ['/content/drive/MyDrive/ACV_Project/sample_data/DATA/streetviewdataset/0.jpg', '/content/drive/MyDrive/ACV_Project/sample_data/DATA/streetviewdataset/1.jpg', '/content/drive/MyDrive/ACV_Project/sample_data/DATA/streetviewdataset/2.jpg', '/content/drive/MyDrive/ACV_Project/sample_data/DATA/streetviewdataset/3.jpg', '/content/drive/MyDrive/ACV_Project/sample_data/DATA/streetviewdataset/4.jpg', '/content/drive/MyDrive/ACV_Project/sample_data/DATA/streetviewdataset/5.jpg', '/content/drive/MyDrive/ACV_Project/sample_data/DATA/streetviewdataset/6.jpg', '/content/drive/MyDrive/ACV_Project/sample_data/DATA/streetviewdataset/7.jpg', '/content/drive/MyDrive/ACV_Project/sample_data/DATA/streetviewdataset/8.jpg', '/content/drive/MyDrive/ACV_Project/sample_data/DATA/streetviewdataset/9.jpg',

In [None]:
def get_masks_file_paths(path = MASKS_DIR_PATH):
  l = os.listdir(MASKS_DIR_PATH)
  l_ = [i for i in l if i.split(".")[-1]=="jpg"]
  paths = [MASKS_DIR_PATH+i for i in l_]
  indexes = [int(i.split(".")[0]) for i in l_]
  return paths, indexes
masks_paths, masks_indexes = get_masks_file_paths()

In [None]:
def find_bounding_box(mask):
    # Find the indices of zero pixels
    zero_indices = np.argwhere(mask == 0)

    # Get the minimum and maximum coordinates
    y1 , x1 = np.min(zero_indices, axis=0)
    y2, x2 = np.max(zero_indices, axis=0)

    # Return the bounding box coordinates
    return [x1, y1, x2, y2]

In [None]:
def generate_warped_images(STICHED_IMAGES_PATH = STICHED_IMAGES_PATH):
  for idx, image in tqdm(enumerate(equally_spaced_frames_array)):
    img1_color = image.copy()# Image to be aligned
    if idx in masks_indexes:
      mask_path =  masks_paths[masks_indexes.index(idx)]
      mask = plt.imread(mask_path)
    else:
      result = img1_color
    img1_color_with_patch_removed = (cv2.bitwise_and(img1_color, img1_color, mask=mask) *1).astype(np.uint8)
    img2_color = equally_spaced_frames_array.copy()[idx-1].copy() if idx!=0 else  equally_spaced_frames_array.copy()[1].copy() # Reference image
    bounding_box  = find_bounding_box(mask)
    bounding_tolerance = 0.01
    dH,dW, _ = (np.array(img1_color_with_patch_removed.shape) * bounding_tolerance).astype(int)
    bounding_box_with_tolerance = bounding_box + np.array([-dW,-dH,dW,dH])
    img1 = cv2.cvtColor(img1_color_with_patch_removed, cv2.COLOR_BGR2GRAY)
    img2 = cv2.cvtColor(img2_color, cv2.COLOR_BGR2GRAY)
    bounding_large_tolerance = 0.1
    dH,dW, _ = (np.array(img2_color.shape) * bounding_large_tolerance).astype(int)
    bounding_box_with_large_tolerance = bounding_box + np.array([-dW,-dH,dW,dH])

    orb_detector = cv2.SIFT_create(5000)
    img1_zero_background = np.zeros(img1.shape).astype(np.uint8)
    img1_zero_background[bounding_box_with_tolerance[1]:bounding_box_with_tolerance[3],
                    bounding_box_with_tolerance[0]:bounding_box_with_tolerance[2]
                    ] = img1[bounding_box_with_tolerance[1]:bounding_box_with_tolerance[3],
                    bounding_box_with_tolerance[0]:bounding_box_with_tolerance[2]
                    ]
    kp1, d1 = orb_detector.detectAndCompute(img1_zero_background, None)
    img2_zero_background = np.zeros(img2.shape).astype(np.uint8)
    img2_zero_background[bounding_box_with_large_tolerance[1]:bounding_box_with_large_tolerance[3],
                    bounding_box_with_large_tolerance[0]:bounding_box_with_large_tolerance[2]
                    ] = img2[bounding_box_with_large_tolerance[1]:bounding_box_with_large_tolerance[3],
                    bounding_box_with_large_tolerance[0]:bounding_box_with_large_tolerance[2]
                    ]
    kp2, d2 = orb_detector.detectAndCompute(img2_zero_background, None)
    matcher = cv2.BFMatcher(cv2.NORM_HAMMING, crossCheck=True)
    if (d1 is None) or (d2 is None):
      matches = []
    else:
      matches_tuple = matcher.match(d1.astype(np.uint8), d2.astype(np.uint8))
      matches = list(matches_tuple)
      matches.sort(key=lambda x: x.distance)
    if len(matches) >= 4:  # Need at least 4 matches
      src_pts = np.float32([kp1[m.queryIdx].pt for m in matches[:2500]]).reshape(-1, 1, 2)
      dst_pts = np.float32([kp2[m.trainIdx].pt for m in matches[:2500]]).reshape(-1, 1, 2)

      H, _ = cv2.findHomography(dst_pts, src_pts, cv2.RANSAC, 5)

      # Use the homography matrix to transform img2_color to align with img1_color
      img2_transformed = cv2.warpPerspective(img2_color, H, (img1_color.shape[1], img1_color.shape[0]))

      # Isolate the patch area in img2_transformed using the original mask
      img2_patch = cv2.bitwise_and(img2_transformed, img2_transformed, mask=~mask)

      # Combine the original img1_color with the patch from img2_transformed to fill in the masked out patch
      result = cv2.add(img1_color_with_patch_removed, img2_patch)

    else:
      result = img1_color

    plt.imsave(STICHED_IMAGES_PATH+f"{idx}.jpg", result)


In [None]:
generate_warped_images(STICHED_IMAGES_PATH = STICHED_IMAGES_PATH)

20it [02:34,  7.74s/it]
