In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [2]:
import numpy as np
#calculate angular error between two points
def angular_difference(theta1, phi1, theta2, phi2):
    delta_theta = theta1 - theta2
    delta_phi = phi1 - phi2
    return delta_theta, delta_phi

#Calculate final angular error for all points
def AngularError(data1, data2):
  delta_thetas, delta_phis = angular_difference(data1[:,0], data1[:,1], data2[:,0], data2[:,1])
  full_errors = np.sqrt(delta_thetas**2 + delta_phis**2)                                          #Overall angular error (Euclidean distance in angular space)
  #mean angular
  mean_error = np.mean(full_errors)
  return full_errors, mean_error

In [None]:
def denormalize(data,true_min, true_max):
  denormalized = data * (true_max - true_min) + true_min
  return denormalized


def denormalize_full(data, true_minX, true_maxX, true_minY, true_maxY):  #data = (x,y)
  Xgazedenorm = denormalize(data[:,0], true_minX, true_maxX)   #denorm
  Ygazedenorm = denormalize(data[:,1], true_minY, true_maxY)

  Xgazedenorm = np.array(Xgazedenorm)
  Ygazedenorm = np.array(Ygazedenorm)
  final_denormalized = np.column_stack((Xgazedenorm,Ygazedenorm))


  return final_denormalized     #data shape (x,y)


Xmin = -19.067780871643322
Xmax = 21.46873726490827

Ymin = -38.03606803389029
Ymax = 27.631278570342523


In [4]:
import tensorflow as tf
from tensorflow import keras
from keras import models
import h5py

In [5]:
#Remove outliers from one-dimensional array
#input: 1d array
#output: 1d array without outliers
def removeOutliers1D(data):
  #print("Old Shape:", data.shape)
  # IQR
  Q1 = np.percentile(data, 25, method='midpoint')
  Q3 = np.percentile(data, 75, method='midpoint')
  IQR = Q3 - Q1
  #print("IQR: ",IQR)
  # Calculate the upper and lower limits
  upper = Q3 + 1.5 * IQR
  lower = Q1 - 1.5 * IQR
  #Find Outlier values below and above thresholds (indexes)
  upper_array = np.where(data >= upper)
  lower_array = np.where(data <= lower)

  #Concatenate outliers
  outliers = np.hstack((upper_array,lower_array))
  print("Outliers:" , outliers.shape[1])
  #print(outliers)

  #Remove outliers
  #data = np.delete(data, outliers)
  #print("New shape:", data.shape)

  return data



In [None]:
#Parameters
model = models.load_model("/content/drive/MyDrive/Thesis/Models/Full_10.h5")

final_path1 = "/content/drive/MyDrive/Thesis/Datasets2/final_dataset.hdf5"
final_path2 = "/content/drive/MyDrive/Thesis/Datasets2/final_dtset/final_dataset2.hdf5"
final_path3 = "/content/drive/MyDrive/Thesis/Datasets2/final_dtset/final_dataset3.hdf5"
final_path4 = "/content/drive/MyDrive/Thesis/Datasets2/final_dtset/final_dataset4.hdf5"

filenames = [final_path1, final_path2, final_path3, final_path4]


#MEAN baseline                              #calc mean for X and for Y from groundturh data, and then put it in an array (X,Y) for proper shape (curr mean: 1.1891, -5.2595)
mean_theta = 1.1891425755626226
mean_phi = -5.259580611647258
mean_baseline = np.full((1,2), (mean_theta, mean_phi))
print(f"Mean Baseline: {mean_baseline}")

#CENTER baseline                            # (0,0)
center_theta = center_phi = 0
center_baseline = np.full((1,2),0)
print(f"Center baseline: {center_baseline}")


batch_size = 1000
startPoint = 154863
endPoint = 172071

#init prediction list
full_predictions = []
full_groundtruth = []

#init full error list
full_errors= []
full_error_mean = []
full_error_center = []

# Init list of file sizes
file_sizes = []
total_samples = 0

# Calculate size of each file
for filename in filenames:
    with h5py.File(filename, 'r') as f:
        file_size = len(f['X1_dataset'])
        file_sizes.append(file_size)
        total_samples += file_size


# Start from startPoint
current_position = startPoint

# Find the current file and local position based on `current_position`
file_index = 0
cumulative_position = 0
added_file_sizes_position = 0

 # Determine the correct file index and local position                                                         #helps locate the proper file to use (for validation for example)
while file_index < len(file_sizes) and current_position >= added_file_sizes_position + file_sizes[file_index]:
     added_file_sizes_position += file_sizes[file_index]
     file_index += 1

 # Local position within the current file
local_position = current_position - added_file_sizes_position
filename = filenames[file_index]

# Print the current filename being used
print(f"\nUsing file with index {file_index}: {filename}")


with h5py.File(filename, 'r') as f:
    X1_dataset = f['X1_dataset']
    X2_dataset = f['X2_dataset']
    Y_dataset = f['Y_dataset']
    num_samples = file_sizes[file_index]

    while local_position < num_samples and current_position < endPoint:
        # Calculate batch indices
        batch_end = min(local_position + batch_size, num_samples)
        batch_indices = np.arange(local_position, batch_end)
        #print(batch_indices)
        #create batches!
        X1batch = X1_dataset[batch_indices]
        X2batch = X2_dataset[batch_indices]
        Ybatch  = Y_dataset[batch_indices]
        batch_predictions = model.predict((X1batch,X2batch))        #predict

        final_batch_predictions = denormalize_full(batch_predictions, Xmin, Xmax, Ymin, Ymax)
        final_batch_groundTruth = denormalize_full(Ybatch, Xmin, Xmax, Ymin, Ymax)
        full_predictions.append(final_batch_predictions)
        full_groundtruth.append(final_batch_groundTruth)


        batch_errors, batch_average = AngularError(final_batch_groundTruth, final_batch_predictions)
        full_errors.append(batch_errors)

        mean_batch_error, mean_batch_average = AngularError(final_batch_groundTruth,mean_baseline)
        full_error_mean.append(mean_batch_error)

        center_batch_error, mean_batch_average = AngularError(final_batch_groundTruth,center_baseline)
        full_error_center.append(center_batch_error)



        # Update positions
        local_position += batch_size
        current_position += batch_size


final_prediction_error = []
final_mean_error = []
final_center_error = []

final_predictions = []
final_groundtruth = []

for batch in full_predictions:
  for pred in batch:
    final_predictions.append(pred)


for batch in full_groundtruth:
  for pred in batch:
    final_groundtruth.append(pred)

###########################################################################

for batch in full_errors:
  for pred in batch:
    final_prediction_error.append(pred)
print("Final num of predictions: ",len(final_prediction_error))

#mean_final = np.mean(final_prediction_error)
#print("Average: ", mean_final)

for batch in full_error_mean:
  for pred in batch:
    final_mean_error.append(pred)


for batch in full_error_center:
  for pred in batch:
    final_center_error.append(pred)





In [None]:
data = [final_center_error, final_mean_error, final_prediction_error]
print("Mean Baseline   - Average Error: ", np.mean(final_mean_error),
      "\nCenter Baseline - Average Error: ",np.mean(final_center_error),
      "\nPredictions     - Average Error:  ", np.mean(final_prediction_error), "\n")
print("Mean Baseline   - Median Value: ", np.median(final_mean_error),
      "\nCenter Baseline - Median Value: ", np.median(final_center_error),
      "\nPredictions     - Median Value:  ", np.median(final_prediction_error))


In [None]:
import seaborn as sns
#fig = plt.figure(figsize =(10, 7))
sns.set_style("whitegrid")
ax = sns.boxplot(data=data, width=0.5,showmeans=True, palette ='flare')
ax.set_xticklabels(['Center Baseline', 'Mean Baseline', 'Predictions'])
# Add titles to the axes
#ax.set_xlabel('Predictors')  # X-axis label
ax.set_ylabel('Angular Error (degrees)')  # Y-axis label

# Add a title to the plot
#ax.set_title('Comparison of Errors Across Different Baselines and Predictions')


In [None]:
print("==========================Predictions==============================")
removeOutliers1D(final_prediction_error)
print("==========================Mean baseline=============================")
removeOutliers1D(final_mean_error)
print("==========================Center baseline===========================")
removeOutliers1D(final_center_error)


In [None]:
#Parameters
model = models.load_model("/content/drive/MyDrive/Thesis/Models/Full_10.h5")

final_path1 = "/content/drive/MyDrive/Thesis/Datasets2/final_dataset.hdf5"
final_path2 = "/content/drive/MyDrive/Thesis/Datasets2/final_dtset/final_dataset2.hdf5"
final_path3 = "/content/drive/MyDrive/Thesis/Datasets2/final_dtset/final_dataset3.hdf5"
final_path4 = "/content/drive/MyDrive/Thesis/Datasets2/final_dtset/final_dataset4.hdf5"

filenames = [final_path1, final_path2, final_path3, final_path4]


#MEAN baseline                              #calc mean for X and for Y from groundturh data, and then put it in an array (X,Y) for proper shape (curr mean: 1.1891, -5.2595)
mean_theta = 1.1891425755626226
mean_phi = -5.259580611647258
mean_baseline = np.full((1,2), (mean_theta, mean_phi))
print(f"Mean Baseline: {mean_baseline}")

#CENTER baseline                            # (0,0)
center_theta = center_phi = 0
center_baseline = np.full((1,2),0)
print(f"Center baseline: {center_baseline}")


batch_size = 1000
startPoint = 154863
endPoint = 172071



#init full error list
full_errors= []
full_error_mean = []
full_error_center = []

# Init list of file sizes
file_sizes = []
total_samples = 0

# Calculate size of each file
for filename in filenames:
    with h5py.File(filename, 'r') as f:
        file_size = len(f['X1_dataset'])
        file_sizes.append(file_size)
        total_samples += file_size


# Start from startPoint
current_position = startPoint

# Find the current file and local position based on `current_position`
file_index = 0
cumulative_position = 0
added_file_sizes_position = 0


 # Determine the correct file index and local position                                                         #helps locate the proper file to use (for validation for example)
while file_index < len(file_sizes) and current_position >= added_file_sizes_position + file_sizes[file_index]:
     added_file_sizes_position += file_sizes[file_index]
     file_index += 1

 # Local position within the current file
local_position = current_position - added_file_sizes_position
filename = filenames[file_index]

# Print the current filename being used
print(f"\nUsing file with index {file_index}: {filename}")



In [None]:
# Parameters for region radius
ground_truth_radius = 15  # degrees for ground truth region
predicted_radius = 15     # degrees for predicted region (both model and baseline)

# Initialize lists for recall counting
model_hits = 0
mean_hits = 0
center_hits = 0

# Function to calculate Euclidean distance (in degrees) between two sets of points
def calculate_distance(gt, pred):
    return np.sqrt((pred[:, 0] - gt[:, 0])**2 + (pred[:, 1] - gt[:, 1])**2)


with h5py.File(filename, 'r') as f:
    X1_dataset = f['X1_dataset']
    X2_dataset = f['X2_dataset']
    Y_dataset = f['Y_dataset']
    num_samples = file_sizes[file_index]

    while local_position < num_samples and current_position < endPoint:
        # Calculate batch indices
        batch_end = min(local_position + batch_size, num_samples)
        batch_indices = np.arange(local_position, batch_end)
        #print(batch_indices)
        #create batches!
        X1batch = X1_dataset[batch_indices]
        X2batch = X2_dataset[batch_indices]
        Ybatch  = Y_dataset[batch_indices]
        batch_predictions = model.predict((X1batch,X2batch))        #predict

        final_batch_predictions = denormalize_full(batch_predictions, Xmin, Xmax, Ymin, Ymax)
        final_batch_groundTruth = denormalize_full(Ybatch, Xmin, Xmax, Ymin, Ymax)

        # Calculate distances for model predictions, mean baseline, and center baseline
        distances_model = calculate_distance(final_batch_groundTruth, final_batch_predictions)
        distances_mean = calculate_distance(final_batch_groundTruth, mean_baseline)
        distances_center = calculate_distance(final_batch_groundTruth, center_baseline)

        # Count hits based on the ground truth and prediction radii
        model_hits += np.sum(distances_model <= ground_truth_radius)
        mean_hits += np.sum(distances_mean <= predicted_radius)
        center_hits += np.sum(distances_center <= predicted_radius)
        # Update positions
        local_position += batch_size
        current_position += batch_size


# Total number of samples evaluated
total_samples = endPoint - startPoint

# Calculate recall rates
model_recall_rate = model_hits / total_samples
mean_recall_rate = mean_hits / total_samples
center_recall_rate = center_hits / total_samples

# Print out the recall rates
print(f"Model Recall Rate: {model_recall_rate:.4f}")
print(f"Mean Baseline Recall Rate: {mean_recall_rate:.4f}")
print(f"Center Baseline Recall Rate: {center_recall_rate:.4f}")

In [None]:
import matplotlib.pyplot as plt
import pandas as pd
# No of Data points
N = 17208

# getting data of the histogram
count_mean, bins_count_mean = np.histogram(final_mean_error, bins=10, density = True)
count_center, bins_count_center = np.histogram(final_center_error, bins=10, density = True)
count_predictions, bins_count_predictions= np.histogram(final_prediction_error, bins=10, density = True)

# finding the PDF of the histogram using count values
pdf_mean = count_mean / np.sum(bins_count_mean)
pdf_center = count_center / np.sum(bins_count_center)
pdf_predictions = count_predictions / np.sum(bins_count_predictions)

#calculate the CDF
cdf_mean = np.cumsum(count_mean) * np.diff(bins_count_mean)
cdf_center = np.cumsum(count_center) * np.diff(bins_count_center)
cdf_predictions = np.cumsum(count_predictions) * np.diff(bins_count_predictions)


# plotting PDF and CDF
plt.plot(bins_count_mean[1:], cdf_mean, label="Mean")
plt.plot(bins_count_center[1:], cdf_center, color="red", label="Center")
plt.plot(bins_count_predictions[1:], cdf_predictions, color="magenta", label="Predictions")
plt.legend()
plt.xlabel("Prediction error (deg)")
plt.ylabel("Data Proportion")
#plt.title("CDF of Prediction and Baselines")
plt.show()


In [None]:
# Calculate means
mean_model = np.mean(final_prediction_error)
mean_mean = np.mean(final_mean_error)
mean_center = np.mean(final_center_error)

# Calculate standard deviations
std_model = np.std(final_prediction_error, ddof=1)
std_mean = np.std(final_mean_error, ddof=1)
std_center = np.std(final_center_error, ddof=1)


# Calculate SEMs
sem_model = std_model / np.sqrt(len(final_prediction_error))
sem_mean = std_model / np.sqrt(len(final_mean_error))
sem_center = std_model / np.sqrt(len(final_center_error))

# Calculate 95% confidence intervals
ci_model = [mean_model - 1.96 * sem_model, mean_model + 1.96 * sem_model]
ci_mean = [mean_mean - 1.96 * sem_mean, mean_mean + 1.96 * sem_mean]
ci_center = [mean_center - 1.96 * sem_center, mean_center + 1.96 * sem_center]



# Check if confidence intervals overlap
do_intervals_overlap = not (ci_model[1] < ci_mean[0] or ci_mean[1] < ci_model[0])

# Print results
print(f"Model Mean Error:         {mean_model},               95% CI: {ci_model}")
print(f"Mean Baseline Mean Error: {mean_mean},               95% CI: {ci_mean}")
if do_intervals_overlap:
    print("The confidence intervals overlap, so the difference is NOT statistically significant.")
else:
    print("The confidence intervals do NOT overlap, so the difference IS statistically significant.")


# Check if confidence intervals overlap
do_intervals_overlap = not (ci_model[1] < ci_center[0] or ci_center[1] < ci_model[0])

# Print results
print(f"\nModel Mean Error:           {mean_model},             95% CI: {ci_model}")
print(f"Center Baseline Mean Error: {mean_center},            95% CI: {ci_center}")
if do_intervals_overlap:
    print("The confidence intervals overlap, so the difference is NOT statistically significant.")
else:
    print("The confidence intervals do NOT overlap, so the difference IS statistically significant.")



print(f"\nModel SEM:  {sem_model} \nMean SEM:   {sem_mean} \nCenter SEM: {sem_center}")

In [None]:
import matplotlib.pyplot as plt
import numpy as np
from scipy.stats import entropy



# Create 2D histograms
bins = 50
range_limits = [[-50, 50], [-50, 50]]

pred_hist, xedges, yedges = np.histogram2d(x1, y1, bins=bins, range=range_limits)
gt_hist, _, _ = np.histogram2d(x2, y2, bins=bins, range=range_limits)

# Normalize histograms to sum to 1 (for statistical comparison)
pred_hist_norm = pred_hist / np.sum(pred_hist)
gt_hist_norm = gt_hist / np.sum(gt_hist)

# Calculate statistical metrics
# 1. KL Divergence (flattened histograms)
pred_flat = pred_hist_norm.flatten() + 1e-10  # Add small constant to avoid division by zero
gt_flat = gt_hist_norm.flatten() + 1e-10
kl_div = entropy(pred_flat, gt_flat)

# 2. Mean Squared Error (MSE)
mse = np.mean((pred_hist_norm - gt_hist_norm) ** 2)

# Plot Predictions in the first figure
plt.figure(figsize=(8, 6))
plt.hist2d(x1, y1, bins=bins, range=range_limits, cmap="viridis")
plt.title("Predictions")
plt.xlabel("X (degrees)")
plt.ylabel("Y (degrees)")
plt.colorbar(label="Density")
plt.tight_layout()
plt.show()

# Plot Groundtruth in the second figure
plt.figure(figsize=(8, 6))
plt.hist2d(x2, y2, bins=bins, range=range_limits, cmap="viridis")
plt.title("Groundtruth")
plt.xlabel("X (degrees)")
plt.ylabel("Y (degrees)")
plt.colorbar(label="Density")
plt.tight_layout()
plt.show()

# Display metrics
print(f"KL Divergence: {kl_div:.4f}")
