## Description
We are experimenting with Image to Video generation and to benchmark the performance, need to flag the abnormalities in the animation. Below lie two approaches to do so. </br>  1) Iteratively calculating IOUs of masks of each frames.  </br> 2) Track the translation of object on the image canvas using the bbox coords.




- No installations and dependencies as such bcoz its just manipulation of coords and mask images.

#### Imports

In [1]:
import os
import cv2
import numpy as np
import pandas as pd
import random
import shutil


#### Select required folders

In [2]:
def find_matching_folders(root_dir):
    matching_folders = []
    for dirpath, dirnames, filenames in os.walk(root_dir):
        if "harmonized_image" in dirpath and "I2V" in dirpath:
            matching_folders.append(os.path.join(dirpath, ))
            dirnames.clear()
    
    return matching_folders

base_folders = find_matching_folders("./Data/")

mask_folders = [os.path.join(i, "2/mask/") for i in base_folders]

print("Matching folders:")
print(random.choices(base_folders, k = 3))



Matching folders:
['./Data/img_to_vid_samples/2/06f37289-2aeb-40c2-9b03-a8fd16f79b46/I2V/harmonized_image_1', './Data/img_to_vid_samples/3/3d3723d3-edc7-438e-ab2a-198a345ffb9f/I2V/harmonized_image_3', './Data/img_to_vid_samples/3/0e1b4f54-0387-413e-99d7-305f6cb9ed4b/I2V/harmonized_image_3']


## 1.1  Calculate_iou iteratively over each mask


In [104]:
ACCEPTED_IOU_THRESH = 0.92

def read_masks(file_path):
    image = cv2.imread(file_path, cv2.IMREAD_GRAYSCALE)
    if image is not None:
        _, thresholded_image = cv2.threshold(image, 127, 255, cv2.THRESH_BINARY)
    else:
        image = None
        print(f"Failed to read {file_path}")
    return image

def calculate_iou(mask1, mask2):
    mask1 = (mask1 > 0).astype(np.uint8)
    mask2 = (mask2 > 0).astype(np.uint8)
    
    # Calculate intersection and union
    intersection = np.logical_and(mask1, mask2)
    union = np.logical_or(mask1, mask2)
    
    # Calculate IoU
    iou = np.sum(intersection) / np.sum(union)
    return iou


In [105]:
mask_analysis_df = pd.DataFrame({})

for relative_path in mask_folders:
    file_names = os.listdir(relative_path)
    
    ref_mask_path_0  = os.path.join(relative_path, "0.png")

    reference_mask = read_masks(ref_mask_path_0)

    valid_animation = "True"

    for file_idx in range(1, len(file_names[1:])):
        file = str(file_idx) + ".png"

        file_path = os.path.join(relative_path, file)
        
        current_mask = read_masks(file_path)

        iou_val = calculate_iou(reference_mask, current_mask)

        if iou_val > ACCEPTED_IOU_THRESH:
            reference_mask = current_mask.copy()
        else:
            valid_animation = "False"
            break




    result_dict = {
        "relative_path": relative_path,
        "valid_animation": valid_animation,
        "ref_file" : str(file_idx - 1) + ".png",
        "current_file" : file,
        "iou" : str(iou_val)
    }

    print(result_dict)

    row = pd.DataFrame(result_dict, index = [0])
    mask_analysis_df = pd.concat([mask_analysis_df, row], ignore_index=True)
    

{'relative_path': './Data/a14487e1-7656-4edb-ac6c-e457aea64b1b/I2V/harmonized_image_3/2/mask/', 'valid_animation': 'True', 'ref_file': '302.png', 'current_file': '303.png', 'iou': '0.9966237942122187'}
{'relative_path': './Data/a14487e1-7656-4edb-ac6c-e457aea64b1b/I2V/harmonized_image_2/2/mask/', 'valid_animation': 'True', 'ref_file': '302.png', 'current_file': '303.png', 'iou': '0.9965859209884571'}
{'relative_path': './Data/a14487e1-7656-4edb-ac6c-e457aea64b1b/I2V/harmonized_image_4/2/mask/', 'valid_animation': 'True', 'ref_file': '302.png', 'current_file': '303.png', 'iou': '0.9985260399606944'}
{'relative_path': './Data/a14487e1-7656-4edb-ac6c-e457aea64b1b/I2V/harmonized_image_1/2/mask/', 'valid_animation': 'True', 'ref_file': '302.png', 'current_file': '303.png', 'iou': '0.9964584674822924'}
{'relative_path': './Data/0c52cc3c-c2fb-4e87-ae20-4ecfe327eaa5/I2V/harmonized_image_3/2/mask/', 'valid_animation': 'False', 'ref_file': '138.png', 'current_file': '139.png', 'iou': '0.91436619

## 1.2 Calculate displacement of mask bboxes

In [3]:
print(random.choices(base_folders, k=2))
print(len(base_folders))

['./Data/img_to_vid_samples/4/a0a89946-df77-4caf-8f7b-cb5c40e898c1/I2V/harmonized_image_3', './Data/img_to_vid_samples/3/2d16c514-d1e4-4b5d-938d-b5bac7e5569e/I2V/harmonized_image_3']
315


In [4]:
def calculate_displacement(coords1, coords2):
    if len(coords1) != len(coords2):
        return np.inf
    
    displacement = np.sqrt(np.sum((coords1 - coords2) ** 2, axis=1))
    return displacement

In [5]:
abs_path = "2/mask_coords.npy"
coords_file_paths = [os.path.join(directory, abs_path) for directory in base_folders]

print(len(coords_file_paths))

defaulters = [i for i in coords_file_paths if not(os.path.exists(i))]
len(defaulters)

315


0

##### Test Displacement

In [131]:
abrupt_changes = []
DISPLACEMENT_THRESHOLD = 10

np_array = np.load("./Data/a14487e1-7656-4edb-ac6c-e457aea64b1b/I2V/harmonized_image_1/2/mask_coords.npy")
reference_coord = np_array[0]
[[ref_x1, ref_y1], [ref_x2, ref_y2], [ref_x3, ref_y3], [ref_x4, ref_y4]] = reference_coord

for idx, mask_coord in enumerate(np_array[1:]):
    [[x1, y1], [x2, y2], [x3, y3], [x4, y4]] = mask_coord


    
    if reference_coord is not None:
        displacement = calculate_displacement(reference_coord, mask_coord)
        
        if np.any(displacement > DISPLACEMENT_THRESHOLD):
            abrupt_changes.append({idx+1 : list(displacement)})
        
        reference_coord = mask_coord.copy()


print(abrupt_changes)
    

[]


#### Displacement calculation router 

In [43]:
bbox_analysis_df = pd.DataFrame({})

DISPLACEMENT_THRESHOLD = 10

for numpy_file in coords_file_paths:

    np_array = np.load(numpy_file)

    abrupt_changes = []


    reference_coord = np_array[0]
    [[ref_x1, ref_y1], [ref_x2, ref_y2], [ref_x3, ref_y3], [ref_x4, ref_y4]] = reference_coord

    for idx, mask_coord in enumerate(np_array[1:]):
        [[x1, y1], [x2, y2], [x3, y3], [x4, y4]] = mask_coord
        
        if reference_coord is not None:
            displacement = calculate_displacement(reference_coord, mask_coord)
            
            if np.any(displacement > DISPLACEMENT_THRESHOLD):
                abrupt_changes.append({idx+1 : list(displacement)})
            
        reference_coord = mask_coord.copy()
    
    valid_animation = "False" if len(abrupt_changes)>1 else "True"


    result_dict = {
        "numpy_file_path": numpy_file,
        "valid_animation": valid_animation,
        "pixel_displacements" : str(list(abrupt_changes)),
    }

    row = pd.DataFrame(result_dict, index = [0])
    bbox_analysis_df = pd.concat([bbox_analysis_df, row], ignore_index=True)



In [44]:
print(bbox_analysis_df.shape)

(315, 3)


In [45]:
bbox_analysis_df["valid_animation"].value_counts()


valid_animation
True     209
False    106
Name: count, dtype: int64

#### 1.2.0 Move wrong Animations to a new folder for eyeballing

In [46]:
false_rows = bbox_analysis_df[bbox_analysis_df["valid_animation"] == "False"]
false_rows.head()

Unnamed: 0,numpy_file_path,valid_animation,pixel_displacements
0,./Data/img_to_vid_samples/4/3f3eab9e-6e6d-4673...,False,"[{74: [135.0, 135.0, 0.0, 0.0]}, {75: [135.0, ..."
1,./Data/img_to_vid_samples/4/3f3eab9e-6e6d-4673...,False,"[{72: [153.0, 153.0, 0.0, 0.0]}, {85: [154.0, ..."
2,./Data/img_to_vid_samples/4/3f3eab9e-6e6d-4673...,False,"[{181: [148.0, 148.0, 0.0, 0.0]}, {182: [39.0,..."
3,./Data/img_to_vid_samples/4/3f3eab9e-6e6d-4673...,False,"[{217: [192.0, 192.0, 0.0, 0.0]}, {221: [192.0..."
4,./Data/img_to_vid_samples/4/1b67354c-b057-44e2...,False,"[{41: [79.0, 79.0, 0.0, 0.0]}, {52: [24.0, 24...."


In [47]:
no_vid_files = []


for index, row in false_rows.iterrows():
    defaulter_numpy_file_path = dict(row)["numpy_file_path"]
    base_uuid = defaulter_numpy_file_path.split("/")[-5]
    sub_folder = "_hi_" + defaulter_numpy_file_path.split("/")[-3][-1]
    final_name = base_uuid + sub_folder
    final_path = os.path.join("/home/pixis/Desktop/eyeballing_vid/", final_name)
    video_path = defaulter_numpy_file_path.replace("mask_coords.npy", "video_M_mat_butter_low_pass.mp4")

    if not os.path.exists(video_path):
        no_vid_files.append(video_path)
    
    shutil.copy(video_path, final_path)

In [42]:
no_vid_files

[]

#### 1.2.1 Testing another set

In [56]:
#### 1.2.1 new generations with affine
new_test_samples = "./Data/img_to_vid_samples/affine_transformation"

def get_abs_file_paths(root_dir):
    all_files = []
    
    for dirpath, dirnames, filenames in os.walk(root_dir):
        for filename in filenames:
            if filename.startswith("m") and "mask_coords.npy" in filename:
                all_files.append(os.path.join(dirpath, filename))
    return all_files

new_test_sample_paths = get_abs_file_paths(new_test_samples)



new_bbox_test_df = pd.DataFrame({})

DISPLACEMENT_THRESHOLD = 10

for numpy_file in new_test_sample_paths:

    np_array = np.load(numpy_file)

    abrupt_changes = []


    reference_coord = np_array[0]
    [[ref_x1, ref_y1], [ref_x2, ref_y2], [ref_x3, ref_y3], [ref_x4, ref_y4]] = reference_coord

    for idx, mask_coord in enumerate(np_array[1:]):
        [[x1, y1], [x2, y2], [x3, y3], [x4, y4]] = mask_coord
        
        if reference_coord is not None:
            displacement = calculate_displacement(reference_coord, mask_coord)
            
            if np.any(displacement > DISPLACEMENT_THRESHOLD):
                abrupt_changes.append({idx+1 : list(displacement)})
            
        reference_coord = mask_coord.copy()
    
    valid_animation = "False" if len(abrupt_changes)>1 else "True"


    result_dict = {
        "numpy_file_path": numpy_file,
        "valid_animation": valid_animation,
        "pixel_displacements" : str(list(abrupt_changes)),
    }

    row = pd.DataFrame(result_dict, index = [0])
    new_bbox_test_df = pd.concat([new_bbox_test_df, row], ignore_index=True)

In [58]:
new_bbox_test_df.tail(20)

Unnamed: 0,numpy_file_path,valid_animation,pixel_displacements
0,./Data/img_to_vid_samples/affine_transformatio...,True,[]
1,./Data/img_to_vid_samples/affine_transformatio...,True,[]
2,./Data/img_to_vid_samples/affine_transformatio...,True,[]
3,./Data/img_to_vid_samples/affine_transformatio...,True,[]
4,./Data/img_to_vid_samples/affine_transformatio...,True,[]
5,./Data/img_to_vid_samples/affine_transformatio...,True,[]
6,./Data/img_to_vid_samples/affine_transformatio...,True,[]
7,./Data/img_to_vid_samples/affine_transformatio...,True,[]
8,./Data/img_to_vid_samples/affine_transformatio...,True,[]
9,./Data/img_to_vid_samples/affine_transformatio...,True,[]


In [62]:
false_rows = new_bbox_test_df[new_bbox_test_df["valid_animation"] == "False"]
false_rows.head()

Unnamed: 0,numpy_file_path,valid_animation,pixel_displacements


In [63]:
no_vid_files = []
for index, row in false_rows.iterrows():
    defaulter_numpy_file_path = dict(row)["numpy_file_path"]
    base_uuid = defaulter_numpy_file_path.split("/")[-5]
    sub_folder = "_hi_" + defaulter_numpy_file_path.split("/")[-3][-1]
    final_name = base_uuid + sub_folder
    final_path = os.path.join("/home/pixis/Desktop/eyeballing_vid/new_test/", final_name)
    video_path = defaulter_numpy_file_path.replace("mask_coords.npy", "video_butter_low_pass.mp4")

    if not os.path.exists(video_path):
        no_vid_files.append(video_path)
    
    shutil.copy(video_path, final_path)


# Miscellaneous snippets below

### Solving for irregular masks incase of part of elements are left while masking it.

In [None]:
def get_white_pixel_coords_and_bounds(mask_path):

    mask = cv2.imread(mask_path, cv2.IMREAD_GRAYSCALE)
    
    white_pixel_coords = np.column_stack(np.where(mask == 255))
    
    min_x, min_y = np.min(white_pixel_coords, axis=0)
    max_x, max_y = np.max(white_pixel_coords, axis=0)
    
    return white_pixel_coords, (min_x, min_y), (max_x, max_y)

## Clean the unwanted folders.

In [155]:
# for i in defaulters:
#     del_folder = i.replace("/2/mask_coords.npy", "")
#     shutil.rmtree(del_folder)