In [1]:
! pip install tensorflow numpy matplotlib pandas scikit-learn opencv-python ipywidgets

import numpy as np
import matplotlib.pyplot as plt
import os
from glob import glob
import tensorflow as tf
import keras
from keras import Input, Model
from keras.layers import Conv2D, Concatenate, Flatten, Dense, MaxPooling2D
import keras.optimizers
import sklearn
import sklearn.metrics
from PIL import Image
import time
from IPython.display import clear_output, display
import math
import ipywidgets as widgets
from IPython.display import display, clear_output
import cv2






In [2]:
def load_images_from_folder(folder, image_size=(500, 500), numImgs = (0, 100)):
    paths = sorted(glob(os.path.join(folder, '*.png')) + glob(os.path.join(folder, '*.jpg')))
    paths = sorted(paths, key=lambda x:int(os.path.basename(x).split('.')[0]))

    paths = paths[numImgs[0]:numImgs[1]]

    images = []
    for path in paths:
        img = keras.preprocessing.image.load_img(path, target_size=image_size)
        img = keras.preprocessing.image.img_to_array(img).astype(np.float32)
        img = img / 255.0  # Normalize to [0,1]
        images.append(img)
    return np.array(images, dtype=np.float32)


# [[distance, pitch, yaw, vehicle_id_string],...]
def load_transforms(folder, numImgs = (0, 100)):
    paths = sorted(glob(os.path.join(folder, '*.npy')))
    paths = sorted(paths, key=lambda x: int(os.path.basename(x).split('.')[0]))

    paths = paths[numImgs[0]:numImgs[1]]

    transforms = []
    for path in paths:
        data = np.load(path) 
        transforms.append(data)
    
    return np.array(transforms)


dataset_folder = 'sample_dataset'
imgsToLoad = (101, 209)
sample_references = load_images_from_folder(f"{dataset_folder}/reference", numImgs = imgsToLoad)
sample_masks = load_images_from_folder(f"{dataset_folder}/masks", numImgs = imgsToLoad)
sample_overlays = load_images_from_folder(f"{dataset_folder}/overlays", numImgs = imgsToLoad)
sample_transforms = load_transforms(f"{dataset_folder}/transforms", numImgs = imgsToLoad)



In [3]:

def calcTransforms(pitch, yaw):
    yawdiff = ((yaw + 45) % 90) - 45 #the difference of yaw from the nearest 90 degree multiple
    if pitch > 35: # top face is predominant
        pitch = -pitch
        yaw = -yawdiff
        roll = 0

    elif pitch < 15: # lowest orbit angle
        pitch = pitch
        roll = -yawdiff
        yaw = 0
    
    else: # mid orbit angle
        #if on front face
        if 145 < yaw < 215:
            yaw = -yawdiff
            pitch = -pitch
            roll = 0

        else:
            roll = -yawdiff
            yaw = 0

    return pitch, yaw, roll


       

In [10]:
def get_rotation_matrix_tf(pitch, yaw, roll):
    """
    Constructs a rotation matrix in TensorFlow matching YOUR original definitions.
    """
    # Convert to radians
    deg2rad = np.pi / 180.0
    p = pitch * deg2rad
    y = yaw * deg2rad
    r = roll * deg2rad

    # --- MATCHING YOUR ORIGINAL "rotation_y" (Pitch) ---
    # Your code: pitch_mat = rotation_y(pitch_rad)
    # Your rotation_y had 1 in the top-left (Standard X-Rotation)
    rot_pitch = tf.stack([
        [1.0, 0.0, 0.0],
        [0.0, tf.cos(p), -tf.sin(p)],
        [0.0, tf.sin(p),  tf.cos(p)]
    ])
    
    # --- MATCHING YOUR ORIGINAL "rotation_x" (Roll) ---
    # Your code: roll_mat = rotation_x(roll_rad)
    # Your rotation_x had 1 in the middle (Standard Y-Rotation)
    rot_roll = tf.stack([
        [ tf.cos(r), 0.0, tf.sin(r)],
        [ 0.0,       1.0, 0.0],
        [-tf.sin(r), 0.0, tf.cos(r)]
    ])
    
    # --- MATCHING YOUR ORIGINAL "rotation_z" (Yaw) ---
    # Your code: yaw_mat = rotation_z(yaw_rad)
    rot_yaw = tf.stack([
        [tf.cos(y), -tf.sin(y), 0.0],
        [tf.sin(y),  tf.cos(y), 0.0],
        [0.0, 0.0, 1.0]
    ])
    
    # Combined: pitch_mat @ roll_mat @ yaw_mat
    # We use vector-matrix multiplication order matching your original:
    # res_mat = pitch_mat @ roll_mat @ yaw_mat
    rot = tf.matmul(rot_pitch, tf.matmul(rot_roll, rot_yaw))
    
    return rot


def bilinear_sampler_tf(img, x, y):
    """
    Differentiable bilinear sampling.
    img: (H, W, C)
    x, y: (OutputH, OutputW) floats
    """
    H = tf.shape(img)[0]
    W = tf.shape(img)[1]
    H_f = tf.cast(H, tf.float32)
    W_f = tf.cast(W, tf.float32)
    
    x = tf.clip_by_value(x, 0.0, W_f - 1.001)
    y = tf.clip_by_value(y, 0.0, H_f - 1.001)
    
    x0 = tf.cast(tf.floor(x), tf.int32)
    x1 = x0 + 1
    y0 = tf.cast(tf.floor(y), tf.int32)
    y1 = y0 + 1
    
    # Get values
    Ia = tf.gather_nd(img, tf.stack([y0, x0], axis=-1))
    Ib = tf.gather_nd(img, tf.stack([y1, x0], axis=-1))
    Ic = tf.gather_nd(img, tf.stack([y0, x1], axis=-1))
    Id = tf.gather_nd(img, tf.stack([y1, x1], axis=-1))
    
    wa = (tf.cast(x1, tf.float32) - x) * (tf.cast(y1, tf.float32) - y)
    wb = (tf.cast(x1, tf.float32) - x) * (y - tf.cast(y0, tf.float32))
    wc = (x - tf.cast(x0, tf.float32)) * (tf.cast(y1, tf.float32) - y)
    wd = (x - tf.cast(x0, tf.float32)) * (y - tf.cast(y0, tf.float32))
    
    wa = tf.expand_dims(wa, -1)
    wb = tf.expand_dims(wb, -1)
    wc = tf.expand_dims(wc, -1)
    wd = tf.expand_dims(wd, -1)
    
    return tf.add_n([wa*Ia, wb*Ib, wc*Ic, wd*Id])



def solve_homography_forward_and_invert(src_pts, dst_pts):
    """
    1. Solves H_forward mapping src (Texture) -> dst (Screen).
       This is numerically STABLE because src is always a perfect square.
    2. Returns H_inv = inv(H_forward).
    """
    # 1. Normalize the destination points (Screen) to [-1, 1] range 
    #    to improve numerical stability (Hartley normalization).
    #    Screen is approx 0..500. Centering it helps the solver.
    u_mean, v_mean = tf.reduce_mean(dst_pts[:, 0]), tf.reduce_mean(dst_pts[:, 1])
    # Simple shift, scale is less critical here but shift is vital
    us = dst_pts[:, 0] - u_mean
    vs = dst_pts[:, 1] - v_mean
    
    xs = src_pts[:, 0]
    ys = src_pts[:, 1]
    
    # Build Matrix for H_forward
    # Maps (x,y) -> (u,v) (Texture -> Screen)
    num_points = 4
    A = []
    b = []
    
    for i in range(num_points):
        x, y = xs[i], ys[i]
        u, v = us[i], vs[i]
        
        # Standard DLT for Forward mapping
        A.append([x, y, 1.0, 0.0, 0.0, 0.0, -x*u, -y*u])
        b.append(u)
        A.append([0.0, 0.0, 0.0, x, y, 1.0, -x*v, -y*v])
        b.append(v)
        
    A = tf.stack(A)
    b = tf.stack(b)
    b = tf.expand_dims(b, -1)
    
    # Solve stable forward transform
    h = tf.linalg.lstsq(A, b, l2_regularizer=1e-5, fast=True)
    h = tf.reshape(h, [8])
    
    # Construct Forward Matrix
    H_fwd = tf.stack([
        [h[0], h[1], h[2]],
        [h[3], h[4], h[5]],
        [h[6], h[7], 1.0 ]
    ])
    
    # 2. Invert to get the mapping we actually need (Screen -> Texture)
    H_inv = tf.linalg.inv(H_fwd)
    
    # 3. Account for the shift we applied earlier
    # To undo the shift: H_final = H_inv * T_shift
    T_shift = tf.stack([
        [1.0, 0.0, -u_mean],
        [0.0, 1.0, -v_mean],
        [0.0, 0.0, 1.0]
    ])
    
    H_final = tf.matmul(H_inv, T_shift)
    
    return H_final

@tf.function
def render_texture_tf(texture, pitch, yaw, roll, distance, uv_scale=50.0, shift_u=0.0, shift_v=0.0, image_size=(500, 500)):
    out_h, out_w = image_size
    f = 500.0
    cx, cy = out_w / 2.0, out_h / 2.0
    
    # --- 1. SHARPNESS TRICK ---
    high_res_tex = tf.image.resize(texture, [256, 256], method='nearest')
    
    # --- 2. CAMERA GEOMETRY ---
    R = get_rotation_matrix_tf(pitch, yaw, roll)
    plane_normal = R[:, 2] 

    # --- 3. PERSPECTIVE LOGIC ---
    pixels_per_meter = 100.0 
    center_dist_units = distance * pixels_per_meter
    plane_point = tf.constant([0.0, 0.0, 0.0]) + (plane_normal * center_dist_units)

    # --- 4. RAY CASTING ---
    grid_x, grid_y = tf.meshgrid(tf.range(out_w), tf.range(out_h))
    rx = tf.cast(grid_x, tf.float32) - cx
    ry = tf.cast(grid_y, tf.float32) - cy
    rz = tf.ones_like(rx) * f
    ray_dir = tf.stack([rx, ry, rz], axis=-1)

    # --- 5. INTERSECTION ---
    camera_pos = tf.constant([0.0, 0.0, -f])
    numerator = tf.tensordot(plane_point - camera_pos, plane_normal, axes=1)
    denominator = tf.tensordot(ray_dir, plane_normal, axes=1)
    denominator = tf.where(tf.abs(denominator) < 1e-5, 1e-5, denominator)
    t = numerator / denominator
    hit_point = camera_pos + (ray_dir * tf.expand_dims(t, -1))

    # --- 6. UV MAPPING ---
    hit_point_flat = tf.reshape(hit_point, [-1, 3])
    relative_hit = hit_point_flat - plane_point
    p_local_flat = tf.matmul(relative_hit, R) 
    p_local = tf.reshape(p_local_flat, [out_h, out_w, 3])
    
    raw_u = p_local[:, :, 0]
    raw_v = p_local[:, :, 1]
    
    # Scale
    u = raw_u / uv_scale
    v = raw_v / uv_scale
    
    # --- SHIFTING (Translation) ---
    # Add the shift offset. 
    # 0.0 = No shift, 0.5 = Half tile shift, 1.0 = Full tile shift (Same as 0.0)
    u = u + shift_u
    v = v + shift_v
    
    # Tiling
    u = tf.math.floormod(u, 1.0)
    v = tf.math.floormod(v, 1.0)
    
    # --- 7. SAMPLING ---
    tex_h = tf.cast(tf.shape(high_res_tex)[0], tf.float32)
    tex_w = tf.cast(tf.shape(high_res_tex)[1], tf.float32)
    
    sample_x = u * (tex_w - 1.0)
    sample_y = v * (tex_h - 1.0)
    
    output = bilinear_sampler_tf(high_res_tex, sample_x, sample_y)
    
    # --- 8. MASKING ---
    valid_mask = t > 0.0
    output = tf.where(tf.expand_dims(valid_mask, -1), output, tf.zeros_like(output))

    return output

In [11]:
textureResolution = 16
initial_tex = np.random.randint(0, 256, (textureResolution, textureResolution, 3), dtype=np.uint8)
# Convert to Variable for TF optimization
tf_texture = tf.Variable(initial_tex.astype(np.float32) / 255.0, dtype=tf.float32)


# --- UI Setup ---
display_widget = widgets.Image(format='jpeg', width=1000)
info_label = widgets.HTML(value="<b>Initializing...</b>")

sample_slider = widgets.IntSlider(value=0, min=0, max=max(0, len(sample_references)-1), description='Sample:', continuous_update=False)
tex_scale_slider = widgets.FloatSlider(value=330.0, min=150.0, max=1000.0, step=1.0, description='Tex Scale:', continuous_update=False)

shift_u_slider = widgets.FloatSlider(value=0.0, min=0.0, max=1.0, step=0.05, description='Shift X:', continuous_update=True)
shift_v_slider = widgets.FloatSlider(value=0.0, min=0.0, max=1.0, step=0.05, description='Shift Y:', continuous_update=True)

def to_uint8(img):
    return (np.clip(img, 0, 1) * 255).astype(np.uint8)

def fast_render(sampleNo, texScale, shiftU, shiftV):
    if sampleNo >= len(sample_references):
        return

    # 1. Get Data
    ref_img   = sample_references[sampleNo] # Already float 0-1
    mask_img  = sample_masks[sampleNo]
    overlay   = sample_overlays[sampleNo]
    transforms = sample_transforms[sampleNo] 
    # [distance, pitch, yaw, vehicle_id]

    # 2. Pre-process Angles (Numpy)
    raw_dist, raw_pitch, raw_yaw = int(transforms[0]), int(transforms[1]), int(transforms[2])
    pitch, yaw, roll = calcTransforms(raw_pitch, raw_yaw)
    
    # 3. Differentiable Render (TensorFlow)
    # Convert scalar inputs to tensors
    t_pitch = tf.convert_to_tensor(pitch, dtype=tf.float32)
    t_yaw   = tf.convert_to_tensor(yaw, dtype=tf.float32)
    t_roll  = tf.convert_to_tensor(roll, dtype=tf.float32)
    t_dist  = tf.convert_to_tensor(raw_dist, dtype=tf.float32)

    t_scale = tf.convert_to_tensor(texScale, dtype=tf.float32)

    t_shift_u = tf.convert_to_tensor(shiftU, dtype=tf.float32)
    t_shift_v = tf.convert_to_tensor(shiftV, dtype=tf.float32)
    
    # Run the graph function
    # Returns (500, 500, 3) Tensor
    tf_output = render_texture_tf(
        tf_texture, t_pitch, t_yaw, t_roll, t_dist, 
        uv_scale=t_scale, 
        shift_u=t_shift_u, 
        shift_v=t_shift_v
    )
    
    # Convert back to Numpy for display
    transformed_tex = tf_output.numpy()

    # 4. Composite (Numpy)
    road_mask = np.any(mask_img > 0.01, axis=-1, keepdims=True)
    car_mask  = np.any(overlay  > 0.01, axis=-1, keepdims=True)
    apply_tex = road_mask & ~car_mask

    final_comp = np.where(apply_tex, transformed_tex, ref_img)

    # 5. Display
    combined = np.hstack((
        to_uint8(ref_img),
        to_uint8(transformed_tex),
        to_uint8(final_comp)
    ))

    _, encoded = cv2.imencode(
        '.jpg',
        cv2.cvtColor(combined, cv2.COLOR_RGB2BGR),
        [int(cv2.IMWRITE_JPEG_QUALITY), 80]
    )
    display_widget.value = encoded.tobytes()

    info_label.value = f"""
    <div style="font-family: monospace; font-size: 14px;">
        <b>Original:</b> Dist:{raw_dist:.1f}, P:{raw_pitch:.1f}, Y:{raw_yaw:.1f} <br>
        <b>Adjusted:</b> P:{pitch:.1f}, Y:{yaw:.1f}, R:{roll:.1f}
    </div>
    """

out = widgets.interactive_output(fast_render, {'sampleNo': sample_slider, 'texScale': tex_scale_slider, 'shiftU': shift_u_slider, 'shiftV': shift_v_slider})
ui_controls = widgets.VBox([sample_slider, tex_scale_slider, shift_u_slider, shift_v_slider, info_label])
display(ui_controls, display_widget, out)

VBox(children=(IntSlider(value=0, continuous_update=False, description='Sample:', max=107), FloatSlider(value=…

Image(value=b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x01\x01\x00\x00\x01\x00\x01\x00\x00\xff\xdb\x00C\x00\x06\x04\x0…

Output()