In [None]:
import sys
sys.path.append('RAFT')
from PIL import Image
import argparse
import os
import time
import numpy as np
import torch
import torch.nn.functional as F
import matplotlib.pyplot as plt

import datasets
from utils import flow_viz
from utils import frame_utils

from raft import RAFT
from utils.utils import InputPadder, forward_interpolate
from IPython.display import clear_output
from scipy.spatial.transform import Rotation as R
import copy
import torchvision.models as models
import torch.nn as nn
import cv2

In [2]:
class PoseTransformer2(nn.Module):
    def __init__(self, num_layers=12, embed_dim=512, num_heads=8, ff_dim=512, 
                 dropout=0.1, out_dim=512):
        super().__init__()
        
        self.pos_embedding = nn.Parameter(torch.randn(1, 361, embed_dim))  # 19x19=361 tokens
        encoder_layer = nn.TransformerEncoderLayer(
            d_model=embed_dim,
            nhead=num_heads,
            dim_feedforward=ff_dim,
            dropout=dropout,
            activation='gelu',
            batch_first=True,
            norm_first=True  # Pre-layer normalization
        )
        self.transformer = nn.TransformerEncoder(encoder_layer, num_layers=num_layers)
        # Output MLP for pose regression
        self.fc1 = nn.Linear(embed_dim, out_dim)
        self.embed_dim = embed_dim

    def forward(self, x):
        B = x.shape[0]
        x = x.view(B, self.embed_dim, -1).permute(0, 2, 1)  # (B, 361, embed_dim)
        # x = x + self.pos_embedding
        x = self.transformer(x)
        x = x.mean(dim=1)
        x = self.fc1(x)
        return x

In [3]:
class PoseTransformer(nn.Module):
    def __init__(self, in_dim=2, num_layers=12, embed_dim=512, num_heads=8, ff_dim=512, 
                 dropout=0.1, out_dim=512):
        super().__init__()
        
        # self.pos_embedding = nn.Parameter(torch.randn(1, 361, embed_dim))  # 19x19=361 tokens
        self.fc0 = nn.Linear(in_dim, embed_dim)
        encoder_layer = nn.TransformerEncoderLayer(
            d_model=embed_dim,
            nhead=num_heads,
            dim_feedforward=ff_dim,
            dropout=dropout,
            activation='gelu',
            batch_first=True,
            norm_first=True  # Pre-layer normalization
        )
        self.transformer = nn.TransformerEncoder(encoder_layer, num_layers=num_layers)
        # Output MLP for pose regression
        self.fc1 = nn.Linear(embed_dim, out_dim)
        self.embed_dim = embed_dim

    def forward(self, x):
        x = self.fc0(x)
        B = x.shape[0]
        x = x.view(B, self.embed_dim, -1).permute(0, 2, 1)  # (B, 361, embed_dim)
        # x = x + self.pos_embedding
        x = self.transformer(x)
        x = x.mean(dim=1)
        x = self.fc1(x)
        return x

In [4]:
class ModResNet2(nn.Module):
    def __init__(self, in_chans, out):
        super(ModResNet2, self).__init__()
        original_model = models.resnet101(pretrained=True)
        original_model.conv1 = nn.Conv2d(
                    in_channels=in_chans,  # Change from 3 to 1 to accept grayscale images
                    out_channels=original_model.conv1.out_channels,
                    kernel_size=original_model.conv1.kernel_size,
                    stride=original_model.conv1.stride,
                    padding=original_model.conv1.padding,
                    bias=original_model.conv1.bias)
        self.features = nn.Sequential(
            original_model.conv1,
            original_model.bn1,
            original_model.relu,
            original_model.maxpool,
            original_model.layer1,
            original_model.layer2,
            original_model.layer3,
            original_model.layer4
        )
        self.avgpool = original_model.avgpool
        num_features = original_model.fc.in_features
        num_out_feas = out
        original_model.fc = nn.Linear(num_features, num_out_feas)
        self.fc = original_model.fc
    def forward(self, x):
        x = self.features(x)
        x = self.avgpool(x)
        x = torch.flatten(x, 1)
        out_fc = self.fc(x)
        return out_fc

In [5]:
class SiamesePoseNet3b_trans2d(nn.Module):
    def __init__(self):
        super(SiamesePoseNet3b_trans2d, self).__init__()
        self.model = ModResNet2(1,512)
        self.model2a = PoseTransformer2(embed_dim=512, out_dim=2, num_layers=2)
        self.model2b = PoseTransformer2(embed_dim=512, out_dim=2, num_layers=2)
        self.model2d = PoseTransformer2(embed_dim=512, out_dim=4, num_layers=2)
    def forward(self, rgbd1):
        f1_rgb = self.model(rgbd1[:,0:1,:,:])
        f2_rgb = self.model(rgbd1[:,1:,:,:])
        pfocal = self.model2a(f1_rgb - f2_rgb)
        pcenters = self.model2b(f1_rgb - f2_rgb)
        pquat = self.model2d(f1_rgb - f2_rgb)
        return pfocal, pcenters, [], pquat

In [6]:
class homo_opt2(nn.Module):
    def __init__(self):
        super(homo_opt2, self).__init__()
        self.model = ModResNet2(1,512)
        self.model2e = PoseTransformer(in_dim=2, out_dim=512, num_layers=2) #2
        self.model2f = PoseTransformer(in_dim=512, out_dim=2, num_layers=2)#8
    def forward(self, rgbd1, uvc=None):
        f1_rgb = self.model(rgbd1[:,0:1,:,:])
        f2_rgb = self.model(rgbd1[:,1:,:,:])
        
        # fcomb = self.model(rgbd1)
        p_all_del_uv = []
        if uvc is not None:
            for ii in range(len(uvc)):
                sh = uvc[ii].shape[0]
                Nmax = 20000
                rndd = sh//Nmax
                inter_p = []
                for jj in range(rndd):
                    nnmin, nnmax = jj*Nmax, jj*Nmax + Nmax
                    f_uv = self.model2e(uvc[ii][:,0:2][nnmin:nnmax,:].cuda())
                    p_del_uv_ = self.model2f(f_uv + f1_rgb[ii,:] - f2_rgb[ii,:]) #self.model2f(f_uv + fcomb[ii,:]) #
                    inter_p.append(p_del_uv_)
                if (sh - rndd*Nmax) > 0:
                    nnmin, nnmax = rndd*Nmax, sh
                    f_uv = self.model2e(uvc[ii][:,0:2][nnmin:nnmax,:].cuda())
                    p_del_uv_ = self.model2f(f_uv + f1_rgb[ii,:] - f2_rgb[ii,:]) #self.model2f(f_uv + fcomb[ii,:]) #
                    inter_p.append(p_del_uv_)
                
                p_del_uv = torch.vstack(inter_p)  
                p_all_del_uv.append(p_del_uv)
        return p_all_del_uv

In [None]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [None]:
## RAFT
sys.path.append('core')
parser = argparse.ArgumentParser()
parser.add_argument('--model', help="restore checkpoint")
parser.add_argument('--dataset', help="dataset for evaluation")
parser.add_argument('--small', action='store_true', help='use small model')
parser.add_argument('--mixed_precision', action='store_true', help='use mixed precision')
parser.add_argument('--alternate_corr', action='store_true', help='use efficent correlation implementation')
args, unknown = parser.parse_known_args()
model = torch.nn.DataParallel(RAFT(args))
model.load_state_dict(torch.load('./raft-things.pth'))
model = model.eval().to(device)
mod0 = copy.deepcopy(model).eval()
modref = copy.deepcopy(model).eval()
modh = SiamesePoseNet3b_trans2d().to(device)
modh = modh.to(device)

In [None]:
## Flow_former
import sys
sys.path.append('FlowFormer-Official')
sys.path.append('core_f')
from configs.default import get_cfg
from configs.things_eval import get_cfg as get_things_cfg
from configs.small_things_eval import get_cfg as get_small_things_cfg
from core_f.utils.misc import process_cfg
import datasets
from core_f.utils import flow_viz
from core_f.utils import frame_utils
from core_f.FlowFormer import build_flowformer
from core_f.utils.utils import InputPadder, forward_interpolate
import argparse
import torch

parser = argparse.ArgumentParser()
parser.add_argument('--model', help="restore checkpoint")
parser.add_argument('--dataset', help="dataset for evaluation")
parser.add_argument('--small', action='store_true', help='use small model')
parser.add_argument('--mixed_precision', action='store_true', help='use mixed precision')
parser.add_argument('--alternate_corr', action='store_true', help='use efficent correlation implementation')
args, unknown = parser.parse_known_args()
cfg = get_things_cfg()
cfg.update(vars(args))
cfg.model = "./things.pth"
model_ff = torch.nn.DataParallel(build_flowformer(cfg))
model_ff.load_state_dict(torch.load(cfg.model))
model_ff = model_ff.eval().to(device)

In [11]:
K_mm = np.array([[508.3997, 0, 316.0652],[0,677.8663,254.0068],[0, 0, 1]])
nimg_0 = [13,19,22,21,48,65,52,54,55,31,32,7,14]
klb_dir = '/home/thomas/ScoreFunc/smart_bridge2/'
IMGS, TRNS, QUTS = [], [], []
for idx in nimg_0:
    img_idx = cv2.imread(klb_dir+'img'+str(idx)+'.png')
    IMGS.append(cv2.cvtColor(img_idx,cv2.COLOR_BGR2RGB)) 
    try:
        trs = np.load(klb_dir+'trans_'+str(idx)+'.npy').flatten()
        qts = np.load(klb_dir+'quat_'+str(idx)+'.npy').flatten()
        TRNS.append(trs) 
        QUTS.append(qts)
    except:
        TRNS.append(None) 
        QUTS.append(None)   

def calc_ffbx(K_mm, Rts, xyz_0, trs, th=100):
    zuv0 = K_mm@Rts@(xyz_0.T - trs.reshape(-1,1))
    uv_0 = (zuv0[0:2,:]/zuv0[2:,:]).astype(np.int16)
    return uv_0

BXMs = []
n_patches = 4
for idx in nimg_0:
    trs = np.load(klb_dir+'trans_'+str(idx)+'.npy').flatten()
    qts = np.load(klb_dir+'quat_'+str(idx)+'.npy').flatten() #0.95
    Rts = np.array(R.from_quat([qts[1],qts[2],qts[3],qts[0]]).as_matrix())
    xyz_0 = np.array([[-1.03, -10.92, 16.96],[-1.03,2.85,16.96],[-1.03,2.85,13.97],[-1.03,-10.66,13.97]])
    xyz_1 = np.array([[-1.03, 2.85, 16.96],[-1.03,16.62,16.96],[-1.03,16.36,13.97],[-1.03,2.85,13.97]])
    xyz_2 = np.array([[-1.03, 2.85, 13.97],[-1.03,16.36,13.97],[-1.03,16.1,10.97],[-1.03,2.85,10.97]])
    xyz_3 = np.array([[-1.03,-10.66, 13.97],[-1.03,2.85,13.97],[-1.03,2.85,10.97],[-1.03,-10.39,10.97]])
    bxm = []
    uv_0 = calc_ffbx(K_mm, Rts, xyz_0, trs)
    uv_1 = calc_ffbx(K_mm, Rts, xyz_1, trs)
    uv_2 = calc_ffbx(K_mm, Rts, xyz_2, trs)
    uv_3 = calc_ffbx(K_mm, Rts, xyz_3, trs)
    BXMs.append([uv_0.T, uv_1.T, uv_2.T, uv_3.T])

In [12]:
def comp_mskk(im, uv_1):
    im = np.array(im).astype(np.uint8)
    hh,ww = im.shape[0:2]
    msk = np.zeros((hh,ww),dtype=np.uint8)
    pts = np.array(uv_1[0:2,:]).T.astype(np.int32)
    cv2.fillPoly(msk, pts[np.newaxis,:,:], 1)
    if len(im.shape)>2:
        msk = msk[:,:,None]
    imgi = msk*im
    return imgi

In [13]:
def remap_homo(img1, Hp_1_2, ww=640, hh=480, mxlim=3500, inv=False):
    H, W = img1.shape[:2]
    corners = np.array([
        [0, 0, 1],
        [W-1, 0, 1],
        [W-1, H-1, 1],
        [0, H-1, 1]
    ], dtype=np.float32).T

    warped = Hp_1_2 @ corners
    warped /= warped[2]
    min_x, min_y = np.min(warped[:2],axis=1)
    max_x, max_y = np.max(warped[:2],axis=1)
    shift_x = -min_x if min_x < 0 else 0
    shift_y = -min_y if min_y < 0 else 0

    T = np.array([[1, 0, shift_x],
                  [0, 1, shift_y],
                  [0, 0, 1]], dtype=np.float32)
    H_shifted = T @ Hp_1_2
    new_w = max_x + shift_x
    new_h = max_y + shift_y
    sx = ww / new_w
    sy = hh / new_h
    S = np.array([[sx, 0, 0],
                  [0, sy, 0],
                  [0, 0, 1]], dtype=np.float32)

    H_final = S @ H_shifted
    blk_img = cv2.warpPerspective(img1, H_final, (ww, hh))
    return blk_img, H_final


In [14]:
def construct_homo(img, K1, K2, R1, R2, R2_, params, inv=False, Hp=None, ori=False, dft=False):
    img = np.array(img).astype(np.uint8)
    h, w = img.shape[:2]
    sf, sc = params
    K2 = np.array(K2)
    K2[0,0] = sf[0] + K2[0,0]
    K2[1,1] = sf[1] + K2[1,1]
    K2[0,2] = sc[0] + K2[0,2]
    K2[1,2] = sc[1] + K2[1,2]
    if inv:
        H2 = (K2@R2)@np.linalg.inv(K1@R1)
        Hp = H2@Hp
        Hinv = np.linalg.inv(Hp)
        Hinv /= Hinv[2, 2]
        rotated_image = cv2.warpPerspective(img, Hinv, (w, h))
        return rotated_image, Hp
    else:
        H_o = (K2@R2)@np.linalg.inv(K1@R1)
        H_o_ = (K2@R2_)@np.linalg.inv(K1@R1)
        H = H_o/H_o[2, 2]
        if dft:
            rotated_image = cv2.warpPerspective(img, H, (w, h))
        else:
            rotated_image, H_o = remap_homo(img, H)
        Hf = H_o 
        return rotated_image, K2, R2, Hf, H_o_#H_o

In [15]:
def fratio(uv_1, uv_2, th=0.2, w=640, h=480):
    uv_2[0,uv_2[0,:]<0] = 0
    uv_2[0,uv_2[0,:]>w-1] = w-1
    uv_2[1,uv_2[1,:]<0] = 0
    uv_2[1,uv_2[1,:]>h-1] = h-1
    vmin,vmax = np.nanmin(uv_2[1,:])/h,np.nanmax(uv_2[1,:])/h
    umin,umax = np.nanmin(uv_2[0,:])/w,np.nanmax(uv_2[0,:])/w
    a2 = (vmax-vmin)*(umax-umin)
    vmin1,vmax1 = np.nanmin(uv_1[1,:])/h,np.nanmax(uv_1[1,:])/h
    umin1,umax1 = np.nanmin(uv_1[0,:])/w,np.nanmax(uv_1[0,:])/w
    a1 = (vmax1-vmin1)*(umax1-umin1)
    flag = (a2/a1)>=th
    if flag:
        return True
    else:
        return None

In [16]:
import random

In [17]:
def gen_data(image1, uv_, R1_, K_mm, level):
    image1 = cv2.resize(image1, (640, 480), interpolation=cv2.INTER_LINEAR)
    ru = R.from_matrix(R1_).as_euler('xyz',degrees=True) 
    K1 = np.array(K_mm)
    K2 = np.array(K_mm)
    if level==0:
        df = np.random.uniform(-20.0,20.0,2)
        dc = np.random.uniform(-20.0,20.0,2)
        d_ang = np.random.uniform(-5.0,5.0,3)
    elif level==1:
        df = random.choice([np.random.uniform(-60.0,-20,2), np.random.uniform(20,60,2)])
        dc = random.choice([np.random.uniform(-60.0,-20,2), np.random.uniform(20,60,2)])
        d_ang = random.choice([np.random.uniform(-15,-5,3), np.random.uniform(5,15,3)])
    elif level==2:
        df = random.choice([np.random.uniform(-100,-60,2), np.random.uniform(60,100,2)])
        dc = random.choice([np.random.uniform(-100,-60,2), np.random.uniform(60,100,2)])
        d_ang = random.choice([np.random.uniform(-30,-15,3), np.random.uniform(15,30,3)])

    ang_ = np.random.uniform(-3,3)
    params = (df, dc)
    R2 = R.from_euler('xyz', d_ang+ru, degrees=True).as_matrix()
    R2_ = R.from_euler('xyz', d_ang+ru+ang_, degrees=True).as_matrix()
    image1 = comp_mskk(image1, uv_.astype(np.int16))
    image2, _, _, Hp, Hp_ = construct_homo(image1, K1, K2, R1_, R2, R2_, params, dft=False)
    return image1, image2, Hp, Hp_

In [18]:
def compute_optical_flow(image1, image2, UV1): #LK optical flow
    if image1.ndim == 3:
        image1 = cv2.cvtColor(image1, cv2.COLOR_BGR2GRAY)
    if image2.ndim == 3:
        image2 = cv2.cvtColor(image2, cv2.COLOR_BGR2GRAY)

    flow = cv2.calcOpticalFlowFarneback(
        image1, image2, None,
        pyr_scale=0.5, levels=5, winsize=15,
        iterations=5, poly_n=7, poly_sigma=1.5,
        flags=cv2.OPTFLOW_FARNEBACK_GAUSSIAN
    ).astype(np.float32)

    H, W = flow.shape[:2]
    UV1 = UV1.astype(np.float32)
    u = UV1[:,0]
    v = UV1[:,1]
    x0 = np.floor(u).astype(int)
    y0 = np.floor(v).astype(int)
    x1 = np.clip(x0+1, 0, W-1)
    y1 = np.clip(y0+1, 0, H-1)

    wx = u - x0
    wy = v - y0
    dx = (1-wx)*(1-wy)*flow[y0,x0,0] + wx*(1-wy)*flow[y0,x1,0] + \
         (1-wx)*wy*flow[y1,x0,0]     + wx*wy*flow[y1,x1,0]
    dy = (1-wx)*(1-wy)*flow[y0,x0,1] + wx*(1-wy)*flow[y0,x1,1] + \
         (1-wx)*wy*flow[y1,x0,1]     + wx*wy*flow[y1,x1,1]
    UV2 = np.column_stack([u+dx, v+dy])
    return UV2

In [19]:
def compute_raft_flow(mod, image1, image2, UV1):
    with torch.no_grad():
        image1 = torch.tensor(image1).permute(2, 0, 1).unsqueeze(0).to(device)
        image2 = torch.tensor(image2).permute(2, 0, 1).unsqueeze(0).to(device)
        flow_low, flow_pr = mod(image1, image2, iters=12, test_mode=True) #iters=12
        flow_pr_ = flow_pr.squeeze(0).permute(1, 2, 0)
        duv_flow_pr = flow_pr_[UV1[:,1], UV1[:,0],:]
        p_UV2 = duv_flow_pr.cpu().numpy() + UV1
        return p_UV2

In [32]:
def compute_flowf_flow(mod, image1, image2, UV1):
    with torch.no_grad():
        image1 = torch.tensor(image1).permute(2, 0, 1).unsqueeze(0).to(device)
        image2 = torch.tensor(image2).permute(2, 0, 1).unsqueeze(0).to(device)

        padder = InputPadder(image1.shape)
        image1, image2 = padder.pad(image1, image2)
        flow_pre = mod(image1, image2)
        flow_pre = padder.unpad(flow_pre[0]).cpu()[0]
        flow_pr_ = flow_pre.squeeze(0).permute(1, 2, 0)
        duv_flow_pr = flow_pr_[UV1[:,1], UV1[:,0],:]
        p_UV2 = duv_flow_pr.cpu().numpy() + UV1
        return p_UV2

In [21]:
def update_k(fs, cs, Kn):
    Kn = np.array(Kn)
    Kn[0,0] = fs[0] + Kn[0,0]
    Kn[1,1] = fs[1] + Kn[1,1]
    Kn[0,2] = cs[0] + Kn[0,2]
    Kn[1,2] = cs[1] + Kn[1,2]
    return Kn

In [22]:
def calRfq(qn):
    Rm = np.array(R.from_quat([qn[1],qn[2],qn[3],qn[0]]).as_matrix())
    return Rm

In [23]:
def homo_unwarp(img, Hn, w=640, h=480):
    img = np.array(img).astype(np.uint8)
    Hinv = np.linalg.inv(Hn)
    Hinv /= Hinv[2, 2]
    rotated_image = cv2.warpPerspective(img, Hinv, (w, h))
    return rotated_image

In [24]:
from skimage.feature import hog
from sklearn.metrics.pairwise import cosine_similarity
def hog_similarity(img1, img2):
    img1 = np.array(img1).astype(np.uint8)
    img2 = np.array(img2).astype(np.uint8)
    feat1 = hog(img1, pixels_per_cell=(8, 8), cells_per_block=(2, 2),
                orientations=9, block_norm='L2-Hys', feature_vector=True)
    feat2 = hog(img2, pixels_per_cell=(8, 8), cells_per_block=(2, 2),
                orientations=9, block_norm='L2-Hys', feature_vector=True)
    sim = cosine_similarity([feat1], [feat2])[0, 0]
    sim = (sim + 1) / 2.0
    return sim

In [31]:
def homo_align(modhn, img1, img2, Kmm, sz=300, iters=1, Hp=None):
    with torch.no_grad():
        img1_0 = cv2.cvtColor(img1,cv2.COLOR_BGR2GRAY)
        img2_0p = np.array(img2).astype(np.uint8)
        img2_0r = np.array(img2).astype(np.uint8)
        img2_0 = cv2.cvtColor(img2,cv2.COLOR_BGR2GRAY)
        img1_00, img2_00 = np.array(img1_0).astype(np.uint8), np.array(img2_0).astype(np.uint8)
        img_tsr10 = F.interpolate(torch.tensor(img1_00/255.0).unsqueeze(0).unsqueeze(0), size=(sz,sz), mode='bilinear', align_corners=False).squeeze(0).squeeze(0)
        K1 = np.array(Kmm)
        H12_c, H21_c = np.eye(3), np.eye(3)
        mtx1 = []
        Hs = []
        img2_trans = []
        if Hp is not None:
            img2_0r = homo_unwarp(img2_0p, Hp)
            return img2_0r, Hp, None
        else:
            for j in range(iters):
                img2_trans.append(img2_0r.astype(np.uint8))
                mtrc10 = hog_similarity(img1_00, img2_0)
                mtx1.append(mtrc10)
                Hs.append(H12_c)
                img_tsr2 = F.interpolate(torch.tensor(img2_0/255.0).unsqueeze(0).unsqueeze(0), size=(sz,sz), mode='bilinear', align_corners=False).squeeze(0).squeeze(0)
                comb_tsr1 = torch.stack([img_tsr10, img_tsr2],0).float()
                pred_homo1 = modhn(comb_tsr1.unsqueeze(0).to(device))
                fscale1, cscale1, _, quat1 = pred_homo1
                fs1, cs1  = fscale1.cpu().numpy(), cscale1.cpu().numpy()
                qtn1 = quat1/quat1.norm(p=2,dim=1,keepdim=True)
                qtn1 = qtn1.cpu().numpy()
                K12 = update_k(fs1[0,:], cs1[0,:], K1)
                R12 = calRfq(qtn1[0,:].flatten().tolist())
                try:
                    H12 = K12 @ R12 @ np.linalg.inv(K1)
                    H12_c = np.array(H12 @ H12_c)
                    img2_0 = homo_unwarp(img2_00, np.array(H12_c))
                    img2_0r = homo_unwarp(img2_0p, np.array(H12_c))
                except:
                    break

            b_ind = np.argmax(mtx1)
            b_sc = np.max(mtx1)
            print('b_sc: ', b_sc)
            return img2_trans[b_ind], Hs[b_ind], b_sc #

In [26]:
def rect_pred_flow(p_uv2_0, H12_c):
    p_uv2_ = p_uv2_0.T
    onsx = np.ones_like(p_uv2_[0,:])
    p_uv2_ = np.vstack((p_uv2_,onsx))
    p_uv2_ = H12_c@p_uv2_
    p_uv2_ = p_uv2_[0:2,:] / (p_uv2_[2:,:] + 1e-12)
    p_uv2_ = p_uv2_.T
    return p_uv2_

In [27]:
def compute_errs(preds, UV2):
    errs = []
    for p_ in preds:
        errs.append(np.mean(np.abs(p_ - UV2)))
    return errs

In [28]:
def plot_progress3(apt, tit):
    clear_output(wait=True)
    labs = [
        'Pure_Homo', 'LK.', 'RAFT', 'RAFT_FT', 'F.Former',
        'LK_RMR', 'RAFT_RMR', 'RAFT_FT_RMR', 'F.Former_RMR'
    ]
    difficulties = ['Simple', 'Medium', 'Hard']
    apt = np.asarray(apt)
    assert apt.ndim == 3, "Expected apt shape (3, 9, num_models)"
    data = apt.mean(axis=1)
    num_diff, num_models = data.shape
    x = np.arange(num_diff)
    bar_width = 0.8 / num_models
    plt.figure(figsize=(13, 5))
    colors = [
        "#4E79A7",  # blue
        "#F28E2B",  # orange
        "#E15759",  # red
        "#76B7B2",  # teal
        "#59A14F",  # green
        "#EDC948",  # yellow
        "#B07AA1",  # purple
        "#FF9DA7",  # pink
        "#9C755F",  # brown
    ]
    for i in range(num_models):
        xpos = x - 0.4 + i * bar_width
        plt.bar(
            xpos,
            data[:, i],
            width=bar_width,
            color=colors[i % len(colors)],
            label=labs[i]
        )
        for j in range(num_diff):
            plt.text(
                xpos[j],
                data[j, i],
                f"{data[j, i]:.2f}",
                ha='center',
                va='bottom',
                fontsize=10,
                rotation=90
            )

    plt.xticks(x, difficulties)
    plt.xlabel("Difficulty Level")
    plt.ylabel("Average Pixel Error")
    plt.title(
        tit,
        color="#1f3a5f",   # deep blue
        fontsize=14,
        fontweight='semibold'
    )
    plt.ylim(0, np.max(data) * 1.15)
    plt.legend(
        title="Models",
        bbox_to_anchor=(1.02, 0.95),
        loc='upper left',
        fontsize=9
    )
    plt.grid(axis='y', linestyle='--', alpha=0.4)
    plt.tight_layout()
    plt.show()


In [29]:
import warnings
warnings.filterwarnings("ignore", category=FutureWarning)

In [None]:
opt_models = './PATCH_OPT_MODELS/'
homo_models = './PATCH_HOMO_MODELS/'
metxs = []
for lev in range(3):
    print('current_level: ', lev)
    agt, apt = [], []
    while len(apt)<1000:
        idx = np.random.randint(len(nimg_0))
        image1 = IMGS[idx]
        try:
            R1_ = QUTS[idx]
            if len(R1_.shape)<2:
                R1_ = np.array(R.from_quat([R1_[1],R1_[2],R1_[3],R1_[0]]).as_matrix())
        except:
             R1_ = np.eye(3)
        pz = np.random.randint(len(BXMs[idx]))
        modh.load_state_dict(torch.load(homo_models+'/homo_match_comb_' +str(pz)+ '.pth',map_location='cuda:0'))
        modh = modh.eval()
        modref.load_state_dict(torch.load(opt_models+'/raft_match_comb_' +str(pz)+ '.pth'))
        uv_ = BXMs[idx][pz].T
        
        image1, image2, Hp, Hp_ = gen_data(image1, uv_, R1_, K_mm, level=lev)
        ones_ = np.ones_like(uv_[0,:])
        uv_1 = np.vstack((uv_,ones_))
        uv_2 = Hp @ uv_1
        uv_2 = uv_2[0:2,:]/uv_2[2:,:]
        flg_ = fratio(np.array(uv_), uv_2, th=0.3)
        if flg_ is not None:
                comb_imgs = np.hstack((image1,image2))
                pts_1 = np.array(uv_.T).astype(np.int32)
                msk__1 = np.zeros_like(image1)
                cv2.fillPoly(msk__1, pts_1[np.newaxis,:,:], 1)
                vv_1, uu_1, _ = np.where(msk__1 == 1)
                ones_1 = np.ones_like(vv_1)
                uv_op = np.vstack((uu_1, vv_1, ones_1))
                uv_2op = Hp @ uv_op
                uv_2op = uv_2op[0:2,:]/uv_2op[2:,:]
                hhn, wwn = image1.shape[0:2] 
                viz_flg = (uv_2op[0,:]>=0) * (uv_2op[0,:]<wwn) * (uv_2op[1,:]>=0) * (uv_2op[1,:]<hhn)
                UV1 = np.vstack((uu_1[viz_flg],vv_1[viz_flg])).T
                UV2 = uv_2op[:,viz_flg].T

                pred_UV2_opt = compute_optical_flow(image1, image2, UV1)
                pred_UV2_raft = compute_raft_flow(mod0, image1, image2, UV1)
                pred_UV2_flowf = compute_flowf_flow(model_ff, image1, image2, UV1)
                pred_UV2_raft_ft = compute_raft_flow(modref, image1, image2, UV1)

                img2_rect, H12_, _ = homo_align(modh, image1, image2, K_mm, iters=25, Hp=None)
                pred_UV2_opt_rect = compute_optical_flow(image1, img2_rect, UV1)
                pred_UV2_opt_rect_algn = rect_pred_flow(pred_UV2_opt_rect, H12_)
                pred_UV2_raft_rect = compute_raft_flow(mod0, image1, img2_rect, UV1)
                pred_UV2_raft_rect_algn = rect_pred_flow(pred_UV2_raft_rect, H12_)

                pred_UV2_flowf_rect = compute_flowf_flow(model_ff, image1, img2_rect, UV1)
                pred_UV2_flowf_rect_algn = rect_pred_flow(pred_UV2_flowf_rect, H12_)

                pred_UV2_raft_ft_rect = compute_raft_flow(modref, image1, img2_rect, UV1)
                pred_UV2_raft_ft_rect_algn = rect_pred_flow(pred_UV2_raft_ft_rect, H12_)
                pred_pure_homo = rect_pred_flow(UV1, H12_)
                preds = [pred_pure_homo, pred_UV2_opt, pred_UV2_raft, pred_UV2_raft_ft, pred_UV2_flowf, pred_UV2_opt_rect_algn,
                        pred_UV2_raft_rect_algn, pred_UV2_raft_ft_rect_algn, pred_UV2_flowf_rect_algn]
                errs_i = compute_errs(preds, UV2)
                apt.append(errs_i)
    metxs.append(apt)
plot_progress3(metxs,tit="Average Pixel Error Across Difficulty Levels for KLB Bridge") 