In [None]:
import pandas as pd 

csv_names = ["Drop_yolo.csv", "Grasp_yolo.csv", "ReachPick_yolo.csv", "ReachDrop_yolo.csv"]
#csv_names = ["ReachDrop_yolo.csv"]

# Load and merge CSV files
dataframes = [pd.read_csv(name) for name in csv_names]
merged_df = pd.concat(dataframes, ignore_index=True)

# Keep only the rows where "cls" is "red cube"
#filtered_df = merged_df[merged_df['cls'] == 'blue cube']

In [22]:
#filtered_df.head(5)

In [24]:
merged_df.head(5)

Unnamed: 0,px,py,w,h,conf,cls,world_x,world_y,world_z
0,47.0,121.5,30.0,29.0,0.912683,red cube,0.000116,-0.180255,0.871774
1,244.5,93.5,23.0,31.0,0.901834,blue cube,0.012437,0.233266,0.946969
2,51.0,136.5,30.0,31.0,0.879418,green cube,6.8e-05,-0.180102,0.824637
3,47.0,121.5,30.0,29.0,0.912683,red cube,0.000116,-0.180255,0.871774
4,51.0,136.5,30.0,31.0,0.879418,green cube,6.8e-05,-0.180102,0.824637


In [8]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import PolynomialFeatures
from sklearn.linear_model import LinearRegression
from sklearn.pipeline import make_pipeline

# Load your dataframe (df)
# columns: px, py, w, h, conf, cls, world_x, world_y, world_z

df = merged_df

# Input features (image space)
X = df[["px", "py"]].values

# Targets (world space)
Y = df[["world_x", "world_y", "world_z"]].values

# Fit affine mapping for each world coordinate
reg_x = LinearRegression().fit(np.c_[X, np.ones(len(X))], Y[:,0])
reg_y = LinearRegression().fit(np.c_[X, np.ones(len(X))], Y[:,1])
reg_z = LinearRegression().fit(np.c_[X, np.ones(len(X))], Y[:,2])

def pixel_to_world(px, py):
    vec = np.array([px, py, 1.0])
    wx = reg_x.predict([vec])[0]
    wy = reg_y.predict([vec])[0]
    wz = reg_z.predict([vec])[0]
    return wx, wy, wz

# Test on one sample
print(pixel_to_world(47.0, 121.5))



(0.0007619680808784637, -0.17993275737890543, 0.8657178686889888)


In [29]:
import joblib

joblib.dump({"reg_x": reg_x, "reg_y": reg_y, "reg_z": reg_z}, "calibration_models.pkl")

['calibration_models.pkl']

In [27]:
from sklearn.preprocessing import PolynomialFeatures, StandardScaler


# --- Features and targets ---
X = df[["px", "py", "w", "h"]].values
Y = df[["world_x", "world_y", "world_z"]].values

# --- Polynomial regression (degree 2 works well as first test) ---
def make_reg():
    return make_pipeline(StandardScaler(),
                         PolynomialFeatures(degree=2, include_bias=False),
                         LinearRegression())

reg_x = make_reg()
reg_y = make_reg()
reg_z = make_reg()

# Fit each regressor
reg_x.fit(X, Y[:, 0])
reg_y.fit(X, Y[:, 1])
reg_z.fit(X, Y[:, 2])

# --- Prediction function ---
def pixel_to_world(px, py, w, h):
    features = np.array([[px, py, w, h]])
    x = reg_x.predict(features)[0]
    y = reg_y.predict(features)[0]
    z = reg_z.predict(features)[0]
    return x, y, z

# --- Example usage ---
print(pixel_to_world(47.0, 121.5, 30.0, 29.0))

(-8.703270604412672e-07, -0.17993528249209165, 0.8715439857456941)


In [20]:
joblib.dump({"reg_x": reg_x, "reg_y": reg_y, "reg_z": reg_z}, "poly_models.pkl")

['poly_models.pkl']

In [39]:
import joblib

#joblib.dump({"reg_x": reg_x, "reg_y": reg_y, "reg_z": reg_z}, "calibration_models.pkl")

# Load
models = joblib.load("calibration_models.pkl")
reg_x, reg_y, reg_z = models["reg_x"], models["reg_y"], models["reg_z"]


In [40]:
# Find all the csv files that Start with a name from the given csv_names list and ends with ".csv" knowing that the files are in the same directory as this script
# file names are of the type csv_names[0] + "_*.csv"
import os
import glob
import pandas as pd

csv_names = ["Drop", "Grasp", "ReachPick", "ReachDrop"]

files = []
for name in csv_names:
    files.extend(glob.glob(f"{name}_*.csv"))
# Load and merge the files found
dataframes = [pd.read_csv(file) for file in files]
merged_df = pd.concat(dataframes, ignore_index=True)


In [41]:
# add new columns "pred_x", "pred_y", "pred_z" to the merged_df dataframe

merged_df["pred_x"] = merged_df.apply(lambda row: pixel_to_world(row["px"], row["py"])[0], axis=1)
merged_df["pred_y"] = merged_df.apply(lambda row: pixel_to_world(row["px"], row["py"])[1], axis=1)
merged_df["pred_z"] = merged_df.apply(lambda row: pixel_to_world(row["px"], row["py"])[2], axis=1)



In [42]:
# Remove the noisy rows where the world pos (world_x, world_y, world_z) is far away from the predicted position (pred_x, pred_y, pred_z)
# This is done via statistical analysis of the differences and removing the rows where the difference is greater than 3 standard deviations from the mean

def remove_noisy_rows(df):
    # Calculate the differences
    df["diff_x"] = df["world_x"] - df["pred_x"]
    df["diff_y"] = df["world_y"] - df["pred_y"]
    df["diff_z"] = df["world_z"] - df["pred_z"]

    # Calculate mean and std for each difference
    mean_x, std_x = df["diff_x"].mean(), df["diff_x"].std()
    mean_y, std_y = df["diff_y"].mean(), df["diff_y"].std()
    mean_z, std_z = df["diff_z"].mean(), df["diff_z"].std()

    # Filter out rows where the difference is greater than 3 standard deviations from the mean
    filtered_df = df[
        (np.abs(df["diff_x"] - mean_x) <= 3 * std_x) &
        (np.abs(df["diff_y"] - mean_y) <= 3 * std_y) &
        (np.abs(df["diff_z"] - mean_z) <= 3 * std_z)
    ]

    return filtered_df
clean_df = remove_noisy_rows(merged_df)

In [None]:
# Create a copy of the merged_df dataframe with only the columns "px", "py", "w", "h", "world_x", "world_y", "world_z", "pred_x", "pred_y", "pred_z"
filtered_df = clean_df[["px", "py", "w", "h", "world_x", "world_y", "world_z", "pred_x", "pred_y", "pred_z"]].copy()


#filtered_df["world_x"] -= 0.005
#filtered_df["world_y"] += 0.0
#[-10,14,0]
# Add a small offset to the world_z values (2mm)
filtered_df["world_z"] += 0.008


In [44]:
merged_df.head(5)

Unnamed: 0,px,py,w,h,conf,cls,world_x,world_y,world_z,pred_x,pred_y,pred_z,diff_x,diff_y,diff_z
0,47.0,121.5,30.0,29.0,0.914762,red cube,0.000131,-0.180318,0.871774,-0.000543,-0.180069,0.868439,0.000674,-0.000249,0.003335
1,51.0,136.5,30.0,31.0,0.880062,green cube,7.3e-05,-0.180118,0.824637,0.00206,-0.177679,0.830204,-0.001987,-0.002439,-0.005567
2,244.0,95.0,24.0,30.0,0.858274,blue cube,0.012234,0.233919,0.943752,0.002635,0.219203,0.934911,0.009599,0.014716,0.008842
3,47.0,121.5,30.0,29.0,0.914762,red cube,0.000131,-0.180318,0.871774,-0.000543,-0.180069,0.868439,0.000674,-0.000249,0.003335
4,244.0,96.5,24.0,31.0,0.890489,blue cube,0.012234,0.233919,0.943752,0.00288,0.218651,0.931089,0.009354,0.015268,0.012663


In [45]:
filtered_df.head(5)

Unnamed: 0,px,py,w,h,world_x,world_y,world_z,pred_x,pred_y,pred_z
0,47.0,121.5,30.0,29.0,0.000131,-0.158318,0.879774,-0.000543,-0.180069,0.868439
1,51.0,136.5,30.0,31.0,7.3e-05,-0.158118,0.832637,0.00206,-0.177679,0.830204
2,244.0,95.0,24.0,30.0,0.012234,0.255919,0.951752,0.002635,0.219203,0.934911
3,47.0,121.5,30.0,29.0,0.000131,-0.158318,0.879774,-0.000543,-0.180069,0.868439
4,244.0,96.5,24.0,31.0,0.012234,0.255919,0.951752,0.00288,0.218651,0.931089


In [46]:
# Retrain linear regression with the filtered data
X_filtered = filtered_df[["px", "py"]].values
Y_filtered = filtered_df[["world_x", "world_y", "world_z"]].values
reg_x_filtered = LinearRegression().fit(np.c_[X_filtered, np.ones(len(X_filtered))], Y_filtered[:,0])
reg_y_filtered = LinearRegression().fit(np.c_[X_filtered, np.ones(len(X_filtered))], Y_filtered[:,1])
reg_z_filtered = LinearRegression().fit(np.c_[X_filtered, np.ones(len(X_filtered))], Y_filtered[:,2])
def pixel_to_world_filtered(px, py):
    vec = np.array([px, py, 1.0])
    wx = reg_x_filtered.predict([vec])[0]
    wy = reg_y_filtered.predict([vec])[0]
    wz = reg_z_filtered.predict([vec])[0]
    return wx, wy, wz

# Test on one sample
print(pixel_to_world_filtered(47.0, 121.5))


(-0.00032867255568820586, -0.16053407417998278, 0.8741101523804948)


In [47]:
# Replace pred columns with the filtered predictions

filtered_df["pred_x"] = filtered_df.apply(lambda row: pixel_to_world_filtered(row["px"], row["py"])[0], axis=1)
filtered_df["pred_y"] = filtered_df.apply(lambda row: pixel_to_world_filtered(row["px"], row["py"])[1], axis=1)
filtered_df["pred_z"] = filtered_df.apply(lambda row: pixel_to_world_filtered(row["px"], row["py"])[2], axis=1)

# Compute the differences between the world and predicted positions
filtered_df["diff_x"] = filtered_df["world_x"] - filtered_df["pred_x"]
filtered_df["diff_y"] = filtered_df["world_y"] - filtered_df["pred_y"]
filtered_df["diff_z"] = filtered_df["world_z"] - filtered_df["pred_z"]

# Compute the error metrics
mean_error_x = filtered_df["diff_x"].mean()
mean_error_y = filtered_df["diff_y"].mean()
mean_error_z = filtered_df["diff_z"].mean()
std_error_x = filtered_df["diff_x"].std()
std_error_y = filtered_df["diff_y"].std()
std_error_z = filtered_df["diff_z"].std()
print(f"Mean Error X: {mean_error_x}, Std Error X: {std_error_x}")
print(f"Mean Error Y: {mean_error_y}, Std Error Y: {std_error_y}")
print(f"Mean Error Z: {mean_error_z}, Std Error Z: {std_error_z}")


Mean Error X: 2.194672507663093e-18, Std Error X: 0.004136811470553184
Mean Error Y: 3.395657342956362e-17, Std Error Y: 0.010720729096946739
Mean Error Z: 6.456671076033535e-17, Std Error Z: 0.005916039767177171


In [48]:
# Save the filtered models
joblib.dump({"reg_x": reg_x_filtered, "reg_y": reg_y_filtered, "reg_z": reg_z_filtered}, "filtered_calibration_models.pkl")

['filtered_calibration_models.pkl']

In [9]:
# Load base regression models
import joblib
models = joblib.load("calibration_models.pkl")
reg_x, reg_y, reg_z = models["reg_x"], models["reg_y"], models["reg_z"]

In [10]:
#### LEARN FROM DUAL CAMS ####

# Find all the csv files that Start with a name from the given csv_names list and ends with ".csv" knowing that the files are in the same directory as this script
# file names are of the type csv_names[0] + "_*.csv"
import os
import glob
import pandas as pd
import numpy as np

csv_names = ["Drop", "Grasp", "ReachPick", "ReachDrop", "yolo"]

files = []
for name in csv_names:
    files.extend(glob.glob(f"{name}_*.csv"))
print(files)
# Load and merge the files found
dataframes = [pd.read_csv(file) for file in files]
merged_df = pd.concat(dataframes, ignore_index=True)


['yolo_data_4.csv']


In [11]:
# add new columns "pred_x", "pred_y", "pred_z" to the merged_df dataframe
def pixel_to_world(px, py):
    vec = np.array([px, py, 1.0])
    wx = reg_x.predict([vec])[0]
    wy = reg_y.predict([vec])[0]
    wz = reg_z.predict([vec])[0]
    return wx, wy, wz

merged_df["pred_x"] = merged_df.apply(lambda row: pixel_to_world(row["px_cam1"], row["py_cam1"])[0], axis=1)
merged_df["pred_y"] = merged_df.apply(lambda row: pixel_to_world(row["px_cam1"], row["py_cam1"])[1], axis=1)
merged_df["pred_z"] = merged_df.apply(lambda row: pixel_to_world(row["px_cam1"], row["py_cam1"])[2], axis=1)
len(merged_df)

51987

In [12]:
# Remove the noisy rows where the world pos (world_x, world_y, world_z) is far away from the predicted position (pred_x, pred_y, pred_z)
# This is done via statistical analysis of the differences and removing the rows where the difference is greater than 3 standard deviations from the mean

def remove_noisy_rows(df):
    # Calculate the differences
    df["diff_x"] = df["world_x"] - df["pred_x"]
    df["diff_y"] = df["world_y"] - df["pred_y"]
    df["diff_z"] = df["world_z"] - df["pred_z"]

    # Calculate mean and std for each difference
    mean_x, std_x = df["diff_x"].mean(), df["diff_x"].std()
    mean_y, std_y = df["diff_y"].mean(), df["diff_y"].std()
    mean_z, std_z = df["diff_z"].mean(), df["diff_z"].std()

    # Filter out rows where the difference is greater than 3 standard deviations from the mean
    filtered_df = df[
        (np.abs(df["diff_x"] - mean_x) <= 3 * std_x) &
        (np.abs(df["diff_y"] - mean_y) <= 3 * std_y) &
        (np.abs(df["diff_z"] - mean_z) <= 3 * std_z)
    ]

    return filtered_df
clean_df = remove_noisy_rows(merged_df)

In [13]:
# Create a copy of the merged_df dataframe with only the columns "px_cam1", "py_cam1", "w_cam1", "h_cam1", "conf_cam1", "px_cam2", "py_cam2", "w_cam2", "h_cam2", "conf_cam2", "world_x", "world_y", "world_z", "ee_x", "ee_y", "ee_z", "pred_x", "pred_y", "pred_z"

filtered_df = clean_df[["px_cam1", "py_cam1", "w_cam1", "h_cam1", "conf_cam1", "px_cam2", "py_cam2", "w_cam2", "h_cam2", "conf_cam2", "world_x", "world_y", "world_z", "ee_x", "ee_y", "ee_z", "pred_x", "pred_y", "pred_z"]].copy()

filtered_df["world_z"] += 0.008

In [14]:
filtered_df.head(5)

Unnamed: 0,px_cam1,py_cam1,w_cam1,h_cam1,conf_cam1,px_cam2,py_cam2,w_cam2,h_cam2,conf_cam2,world_x,world_y,world_z,ee_x,ee_y,ee_z,pred_x,pred_y,pred_z
0,136,137,20,25,0.940421,97,108,30,29,0.845666,1.698589e-19,0.02,0.827784,-0.07885,-0.0254,0.936416,0.00538,-0.009795,0.828482
1,50,135,30,32,0.92358,0,0,0,0,0.0,-1.330247e-19,-0.18,0.832784,-0.07885,-0.0254,0.936416,0.001777,-0.179105,0.834031
2,222,136,30,29,0.908191,0,0,0,0,0.0,-1.06143e-19,0.22,0.830284,-0.07885,-0.0254,0.936416,0.008494,0.160618,0.830577
3,136,137,20,25,0.940767,98,111,29,29,0.862378,1.7501839999999998e-19,0.02,0.827784,-0.074212,-0.02343,0.936162,0.00538,-0.009795,0.828482
4,50,135,30,32,0.923963,237,111,36,37,0.571598,-1.230897e-19,-0.18,0.832784,-0.074212,-0.02343,0.936162,0.001777,-0.179105,0.834031


In [15]:
len(filtered_df)

50765

In [77]:
# Train a new polynomial regression model with the dual cam data + ee pos
from sklearn.preprocessing import PolynomialFeatures, StandardScaler
# --- Features and targets ---
X_dual = filtered_df[["px_cam1", "py_cam1", "w_cam1", "h_cam1", "conf_cam1", "px_cam2", "py_cam2", "w_cam2", "h_cam2", "conf_cam2", "ee_x", "ee_y", "ee_z"]].values
Y_dual = filtered_df[["world_x", "world_y", "world_z"]].values
# --- Polynomial regression (degree 2 works well as first test) ---
def make_reg():
    return make_pipeline(StandardScaler(),
                         PolynomialFeatures(degree=5, include_bias=False),
                         LinearRegression())
reg_x_dual = make_reg()
reg_y_dual = make_reg()
reg_z_dual = make_reg()

# Fit each regressor
reg_x_dual.fit(X_dual, Y_dual[:, 0])
reg_y_dual.fit(X_dual, Y_dual[:, 1])
reg_z_dual.fit(X_dual, Y_dual[:, 2])
def pixel_to_world_dual(px1, py1, w1, h1, conf1, px2, py2, w2, h2, conf2, ee_x, ee_y, ee_z):
    features = np.array([[px1, py1, w1, h1, conf1, px2, py2, w2, h2, conf2, ee_x, ee_y, ee_z]])
    x = reg_x_dual.predict(features)[0]
    y = reg_y_dual.predict(features)[0]
    z = reg_z_dual.predict(features)[0]
    return x, y, z

# --- Example usage ---
# Get a prediction for the first row of the filtered_df dataframe
px1, py1, w1, h1, conf1 = filtered_df.iloc[0][["px_cam1", "py_cam1", "w_cam1", "h_cam1", "conf_cam1"]]
px2, py2, w2, h2, conf2 = filtered_df.iloc[0][["px_cam2", "py_cam2", "w_cam2", "h_cam2", "conf_cam2"]]
ee_x, ee_y, ee_z = filtered_df.iloc[0][["ee_x", "ee_y", "ee_z"]]
print(pixel_to_world_dual(px1, py1, w1, h1, conf1, px2, py2, w2, h2, conf2, ee_x, ee_y, ee_z))
print(filtered_df.iloc[0][["world_x", "world_y", "world_z"]].values)

KeyboardInterrupt: 

In [None]:
from sklearn.ensemble import GradientBoostingRegressor
X_dual = filtered_df[["px_cam1", "py_cam1", "w_cam1", "h_cam1", "conf_cam1", "px_cam2", "py_cam2", "w_cam2", "h_cam2", "conf_cam2", "ee_x", "ee_y", "ee_z"]].values
Y_dual = filtered_df[["world_x", "world_y", "world_z"]].values

reg_x_dual = GradientBoostingRegressor(n_estimators=200, max_depth=3, learning_rate=0.1)
reg_y_dual = GradientBoostingRegressor(n_estimators=200, max_depth=3, learning_rate=0.1)
reg_z_dual = GradientBoostingRegressor(n_estimators=200, max_depth=3, learning_rate=0.1)

reg_x_dual.fit(X_dual, Y_dual[:, 0])
reg_y_dual.fit(X_dual, Y_dual[:, 1])
reg_z_dual.fit(X_dual, Y_dual[:, 2])
def pixel_to_world_dual(px1, py1, w1, h1, conf1, px2, py2, w2, h2, conf2, ee_x, ee_y, ee_z):
    features = np.array([[px1, py1, w1, h1, conf1, px2, py2, w2, h2, conf2, ee_x, ee_y, ee_z]])
    x = reg_x_dual.predict(features)[0]
    y = reg_y_dual.predict(features)[0]
    z = reg_z_dual.predict(features)[0]
    return x, y, z
# --- Example usage ---
# Get a prediction for the first row of the filtered_df dataframe
px1, py1, w1, h1, conf1 = filtered_df.iloc[0][["px_cam1", "py_cam1", "w_cam1", "h_cam1", "conf_cam1"]]
px2, py2, w2, h2, conf2 = filtered_df.iloc[0][["px_cam2", "py_cam2", "w_cam2", "h_cam2", "conf_cam2"]]
ee_x, ee_y, ee_z = filtered_df.iloc[0][["ee_x", "ee_y", "ee_z"]]
print(pixel_to_world_dual(px1, py1, w1, h1, conf1, px2, py2, w2, h2, conf2, ee_x, ee_y, ee_z))
print(filtered_df.iloc[0][["world_x", "world_y", "world_z"]].values)

(-2.9732311645179773e-07, 0.01994981820511498, 0.8278254660471982)
[1.69858918e-19 2.00000000e-02 8.27784489e-01]


In [None]:
# Replace pred columns with the dual cam predictions
filtered_df["pred_x"] = filtered_df.apply(lambda row: pixel_to_world_dual(row["px_cam1"], row["py_cam1"], row["w_cam1"], row["h_cam1"], row["conf_cam1"], row["px_cam2"], row["py_cam2"], row["w_cam2"], row["h_cam2"], row["conf_cam2"], row["ee_x"], row["ee_y"], row["ee_z"])[0], axis=1)
filtered_df["pred_y"] = filtered_df.apply(lambda row: pixel_to_world_dual(row["px_cam1"], row["py_cam1"], row["w_cam1"], row["h_cam1"], row["conf_cam1"], row["px_cam2"], row["py_cam2"], row["w_cam2"], row["h_cam2"], row["conf_cam2"], row["ee_x"], row["ee_y"], row["ee_z"])[1], axis=1)
filtered_df["pred_z"] = filtered_df.apply(lambda row: pixel_to_world_dual(row["px_cam1"], row["py_cam1"], row["w_cam1"], row["h_cam1"], row["conf_cam1"], row["px_cam2"], row["py_cam2"], row["w_cam2"], row["h_cam2"], row["conf_cam2"], row["ee_x"], row["ee_y"], row["ee_z"])[2], axis=1)

# Compute the differences between the world and predicted positions
filtered_df["diff_x"] = filtered_df["world_x"] - filtered_df["pred_x"]
filtered_df["diff_y"] = filtered_df["world_y"] - filtered_df["pred_y"]
filtered_df["diff_z"] = filtered_df["world_z"] - filtered_df["pred_z"]

# Compute the error metrics
mean_error_x = filtered_df["diff_x"].mean()
mean_error_y = filtered_df["diff_y"].mean()
mean_error_z = filtered_df["diff_z"].mean()
std_error_x = filtered_df["diff_x"].std()
std_error_y = filtered_df["diff_y"].std()
std_error_z = filtered_df["diff_z"].std()
print(f"Mean Error X: {mean_error_x}, Std Error X: {std_error_x}")
print(f"Mean Error Y: {mean_error_y}, Std Error Y: {std_error_y}")
print(f"Mean Error Z: {mean_error_z}, Std Error Z: {std_error_z}")

In [80]:
# Replace pred columns with the dual cam predictions
filtered_df["pred_x"] = filtered_df.apply(lambda row: pixel_to_world_dual(row["px_cam1"], row["py_cam1"], row["w_cam1"], row["h_cam1"], row["conf_cam1"], row["px_cam2"], row["py_cam2"], row["w_cam2"], row["h_cam2"], row["conf_cam2"], row["ee_x"], row["ee_y"], row["ee_z"])[0], axis=1)
filtered_df["pred_y"] = filtered_df.apply(lambda row: pixel_to_world_dual(row["px_cam1"], row["py_cam1"], row["w_cam1"], row["h_cam1"], row["conf_cam1"], row["px_cam2"], row["py_cam2"], row["w_cam2"], row["h_cam2"], row["conf_cam2"], row["ee_x"], row["ee_y"], row["ee_z"])[1], axis=1)
filtered_df["pred_z"] = filtered_df.apply(lambda row: pixel_to_world_dual(row["px_cam1"], row["py_cam1"], row["w_cam1"], row["h_cam1"], row["conf_cam1"], row["px_cam2"], row["py_cam2"], row["w_cam2"], row["h_cam2"], row["conf_cam2"], row["ee_x"], row["ee_y"], row["ee_z"])[2], axis=1)

# Compute the differences between the world and predicted positions
filtered_df["diff_x"] = filtered_df["world_x"] - filtered_df["pred_x"]
filtered_df["diff_y"] = filtered_df["world_y"] - filtered_df["pred_y"]
filtered_df["diff_z"] = filtered_df["world_z"] - filtered_df["pred_z"]

# Compute the error metrics
mean_error_x = filtered_df["diff_x"].mean()
mean_error_y = filtered_df["diff_y"].mean()
mean_error_z = filtered_df["diff_z"].mean()
std_error_x = filtered_df["diff_x"].std()
std_error_y = filtered_df["diff_y"].std()
std_error_z = filtered_df["diff_z"].std()
print(f"Mean Error X: {mean_error_x}, Std Error X: {std_error_x}")
print(f"Mean Error Y: {mean_error_y}, Std Error Y: {std_error_y}")
print(f"Mean Error Z: {mean_error_z}, Std Error Z: {std_error_z}")

Mean Error X: 7.741512536792649e-20, Std Error X: 0.0008793294829363763
Mean Error Y: -1.5314735350672477e-17, Std Error Y: 0.0010732572754690844
Mean Error Z: -5.846042994875362e-17, Std Error Z: 0.0008155256801207981


In [81]:
# Save the dual cam models
joblib.dump({"reg_x": reg_x_dual, "reg_y": reg_y_dual, "reg_z": reg_z_dual}, "dual_cam_calibration_models.pkl")

['dual_cam_calibration_models.pkl']

In [17]:
from sklearn.ensemble import RandomForestRegressor
X_dual = filtered_df[["px_cam1", "py_cam1", "w_cam1", "h_cam1", "conf_cam1", "px_cam2", "py_cam2", "w_cam2", "h_cam2", "conf_cam2", "ee_x", "ee_y", "ee_z"]].values
Y_dual = filtered_df[["world_x", "world_y", "world_z"]].values

reg_x_dual = RandomForestRegressor(n_estimators=200, max_depth=10, random_state=42)
reg_y_dual = RandomForestRegressor(n_estimators=200, max_depth=10, random_state=42)
reg_z_dual = RandomForestRegressor(n_estimators=200, max_depth=10, random_state=42)

reg_x_dual.fit(X_dual, Y_dual[:, 0])
reg_y_dual.fit(X_dual, Y_dual[:, 1])
reg_z_dual.fit(X_dual, Y_dual[:, 2])
def pixel_to_world_dual(px1, py1, w1, h1, conf1, px2, py2, w2, h2, conf2, ee_x, ee_y, ee_z):
    features = np.array([[px1, py1, w1, h1, conf1, px2, py2, w2, h2, conf2, ee_x, ee_y, ee_z]])
    x = reg_x_dual.predict(features)[0]
    y = reg_y_dual.predict(features)[0]
    z = reg_z_dual.predict(features)[0]
    return x, y, z
# --- Example usage ---
# Get a prediction for the first row of the filtered_df dataframe
px1, py1, w1, h1, conf1 = filtered_df.iloc[0][["px_cam1", "py_cam1", "w_cam1", "h_cam1", "conf_cam1"]]
px2, py2, w2, h2, conf2 = filtered_df.iloc[0][["px_cam2", "py_cam2", "w_cam2", "h_cam2", "conf_cam2"]]
ee_x, ee_y, ee_z = filtered_df.iloc[0][["ee_x", "ee_y", "ee_z"]]
print(pixel_to_world_dual(px1, py1, w1, h1, conf1, px2, py2, w2, h2, conf2, ee_x, ee_y, ee_z))
print(filtered_df.iloc[0][["world_x", "world_y", "world_z"]].values)

(1.4790676546961885e-05, 0.019987686773308935, 0.8277852845283147)
[1.69858918e-19 2.00000000e-02 8.27784489e-01]


In [18]:
# Replace pred columns with the dual cam predictions
filtered_df["pred_x"] = filtered_df.apply(lambda row: pixel_to_world_dual(row["px_cam1"], row["py_cam1"], row["w_cam1"], row["h_cam1"], row["conf_cam1"], row["px_cam2"], row["py_cam2"], row["w_cam2"], row["h_cam2"], row["conf_cam2"], row["ee_x"], row["ee_y"], row["ee_z"])[0], axis=1)
filtered_df["pred_y"] = filtered_df.apply(lambda row: pixel_to_world_dual(row["px_cam1"], row["py_cam1"], row["w_cam1"], row["h_cam1"], row["conf_cam1"], row["px_cam2"], row["py_cam2"], row["w_cam2"], row["h_cam2"], row["conf_cam2"], row["ee_x"], row["ee_y"], row["ee_z"])[1], axis=1)
filtered_df["pred_z"] = filtered_df.apply(lambda row: pixel_to_world_dual(row["px_cam1"], row["py_cam1"], row["w_cam1"], row["h_cam1"], row["conf_cam1"], row["px_cam2"], row["py_cam2"], row["w_cam2"], row["h_cam2"], row["conf_cam2"], row["ee_x"], row["ee_y"], row["ee_z"])[2], axis=1)

# Compute the differences between the world and predicted positions
filtered_df["diff_x"] = filtered_df["world_x"] - filtered_df["pred_x"]
filtered_df["diff_y"] = filtered_df["world_y"] - filtered_df["pred_y"]
filtered_df["diff_z"] = filtered_df["world_z"] - filtered_df["pred_z"]

# Compute the error metrics
mean_error_x = filtered_df["diff_x"].mean()
mean_error_y = filtered_df["diff_y"].mean()
mean_error_z = filtered_df["diff_z"].mean()
std_error_x = filtered_df["diff_x"].std()
std_error_y = filtered_df["diff_y"].std()
std_error_z = filtered_df["diff_z"].std()
print(f"Mean Error X: {mean_error_x}, Std Error X: {std_error_x}")
print(f"Mean Error Y: {mean_error_y}, Std Error Y: {std_error_y}")
print(f"Mean Error Z: {mean_error_z}, Std Error Z: {std_error_z}")

Mean Error X: -1.5636196816180205e-06, Std Error X: 0.0005645848941660973
Mean Error Y: 3.3442651240772504e-07, Std Error Y: 0.0007365071757306728
Mean Error Z: -7.759937644472823e-07, Std Error Z: 0.0003579350943676746


In [59]:
# Replace pred columns with the dual cam predictions
filtered_df["pred_x"] = filtered_df.apply(lambda row: pixel_to_world_dual(row["px_cam1"], row["py_cam1"], row["w_cam1"], row["h_cam1"], row["conf_cam1"], row["px_cam2"], row["py_cam2"], row["w_cam2"], row["h_cam2"], row["conf_cam2"], row["ee_x"], row["ee_y"], row["ee_z"])[0], axis=1)
filtered_df["pred_y"] = filtered_df.apply(lambda row: pixel_to_world_dual(row["px_cam1"], row["py_cam1"], row["w_cam1"], row["h_cam1"], row["conf_cam1"], row["px_cam2"], row["py_cam2"], row["w_cam2"], row["h_cam2"], row["conf_cam2"], row["ee_x"], row["ee_y"], row["ee_z"])[1], axis=1)
filtered_df["pred_z"] = filtered_df.apply(lambda row: pixel_to_world_dual(row["px_cam1"], row["py_cam1"], row["w_cam1"], row["h_cam1"], row["conf_cam1"], row["px_cam2"], row["py_cam2"], row["w_cam2"], row["h_cam2"], row["conf_cam2"], row["ee_x"], row["ee_y"], row["ee_z"])[2], axis=1)

# Compute the differences between the world and predicted positions
filtered_df["diff_x"] = filtered_df["world_x"] - filtered_df["pred_x"]
filtered_df["diff_y"] = filtered_df["world_y"] - filtered_df["pred_y"]
filtered_df["diff_z"] = filtered_df["world_z"] - filtered_df["pred_z"]

# Compute the error metrics
mean_error_x = filtered_df["diff_x"].mean()
mean_error_y = filtered_df["diff_y"].mean()
mean_error_z = filtered_df["diff_z"].mean()
std_error_x = filtered_df["diff_x"].std()
std_error_y = filtered_df["diff_y"].std()
std_error_z = filtered_df["diff_z"].std()
print(f"Mean Error X: {mean_error_x}, Std Error X: {std_error_x}")
print(f"Mean Error Y: {mean_error_y}, Std Error Y: {std_error_y}")
print(f"Mean Error Z: {mean_error_z}, Std Error Z: {std_error_z}")

Mean Error X: 5.207707027316054e-16, Std Error X: 0.0013200825027191241
Mean Error Y: 1.1677341442863136e-15, Std Error Y: 0.0018794034669458873
Mean Error Z: 2.02529678151545e-15, Std Error Z: 0.0020466680696900378


In [60]:
# Save the dual cam models
joblib.dump({"reg_x": reg_x_dual, "reg_y": reg_y_dual, "reg_z": reg_z_dual}, "dual_cam_calibration_models.pkl")

['dual_cam_calibration_models.pkl']

In [None]:
# Load the dual cam models
models_dual = joblib.load("dual_cam_calibration_models.pkl")
reg_x_dual, reg_y_dual, reg_z_dual = models_dual["reg_x"], models_dual["reg_y"], models_dual["reg_z"]