In [None]:
import matplotlib.pyplot as plt
import numpy as np
import os
import pandas as pd
from scipy.stats import gaussian_kde
import matplotlib.patches as patches
from matplotlib.patches import Ellipse
from scipy.stats import chi2
from glob import glob

from physhapes.plotting import *
from physhapes.mcmc import load_mcmc_results

# Visualize results


In [None]:
burnin_percent = 0.3
sim_path = "hercules_root_sigma=0.4_alpha=0.03_dt=0.05/seed=1055283113"  # Change to your experiment path
mcmc_path = sim_path + "/mcmc/id=164632041"
results_path = mcmc_path + "/results_*.pkl"  # Adjust pattern as needed
save_path = mcmc_path + "/plots"
os.makedirs(save_path, exist_ok=True)

chain_results = load_mcmc_results(results_path)
param_names = ["sigma", "alpha"]  # Replace with your actual parameter names
len(chain_results)

In [None]:
plot_log_posterior(chain_results, burnin_percent, save_path=save_path + f'/log_posterior_burnin_percent={burnin_percent}.png')

In [None]:
print([np.mean(result['acceptsigma']) for result in chain_results if result is not None])

In [None]:
[np.mean(result['acceptalpha']) for result in chain_results if result is not None]

In [None]:
[np.mean(result['acceptpath']) for result in chain_results if result is not None]

In [None]:
plot_parameter_traces(chain_results, param_names, burnin_percent, savepath=save_path + f'/parameter_traces_burnin_percent={burnin_percent}.png')

In [None]:
burnin_end = int(chain_results[0]['sigma'].shape[0] * burnin_percent)
print(np.mean([chain_results[i]['sigma'][burnin_end:] for i in range(len(chain_results)) if chain_results[i] is not None]))
print(np.mean([chain_results[i]['alpha'][burnin_end:] for i in range(len(chain_results)) if chain_results[i] is not None]))

In [None]:
chain_results[0]['settings']

In [None]:
diagnostics = compute_diagnostics(chain_results, burnin_percent)

In [None]:
plot_traces(chain_results, burnin_percent, node_idx=[0, 1, 2, 6], save_path=save_path, diagnostics=diagnostics)

In [None]:
plot_samples_from_posterior(chain_results, burnin_percent=burnin_percent, node_idx=[0, 1, 2, 6], sample_every=50, savepath=save_path, true_values=None)

## Plot confidence ellipses from fastAnc

In [None]:
# define functions 

def plot_confidence_ellipse(mean, cov, ax, confidence=0.95, edgecolor='red', facecolor='none', **kwargs):
    """
    Plots a confidence ellipse of a bivariate Gaussian distribution.

    Parameters:
    -----------
    mean : array-like, shape (2,)
        The mean vector [x, y].
    cov : array-like, shape (2, 2)
        The 2x2 covariance matrix.
    ax : matplotlib.axes.Axes
        The axes object to draw the ellipse into.
    confidence : float
        Confidence level (e.g., 0.95 for 95%).
    edgecolor : str
        Color of the ellipse edge.
    facecolor : str
        Color of the ellipse face.
    kwargs : dict
        Additional arguments for Ellipse.
    """
    # Compute the Mahalanobis distance for the given confidence level
    chi2_val = chi2.ppf(confidence, df=2)
    # Eigenvalues and eigenvectors
    vals, vecs = np.linalg.eigh(cov)
    # Sort by largest eigenvalue
    order = vals.argsort()[::-1]
    vals, vecs = vals[order], vecs[:, order]
    # Angle of ellipse
    theta = np.degrees(np.arctan2(*vecs[:,0][::-1]))
    # Width and height (2*sqrt(eigenvalue*chi2_val))
    width, height = 2 * np.sqrt(vals * chi2_val)
    ellipse = Ellipse(xy=mean, width=width, height=height, angle=theta,
                      edgecolor=edgecolor, facecolor=facecolor, lw=2, **kwargs)
    ax.add_patch(ellipse)
    return ellipse


def read_fastAnc_var(directory):
    """
    Read all CSV files starting with 'vars_' in the specified directory.

    Parameters:
    -----------
    directory : str
        Path to the directory containing the CSV files
        
    Returns:
    --------
    dict
        Dictionary with filenames as keys and pandas DataFrames as values
    """
    # Find all matching files
    pattern = os.path.join(directory, "vars_*.csv")
    files = glob(pattern)
    
    if not files:
        print(f"No files matching 'vars_*.csv' found in {directory}")
        return {}
    
    # Read each file into a dictionary
    result = {}
    for file_path in files:
        file_name = os.path.basename(file_path)
        try:
            df = pd.read_csv(file_path, header=0, index_col=0)
            result[file_name] = df
            #print(f"Successfully loaded: {file_name}")
        except Exception as e:
            print(f"Error loading {file_name}: {e}")
    
    return result

In [None]:
# read fastAnc reconstructions
fastAnc_recon= pd.read_csv(sim_path + f"/fastAnc/fastAnc_recon.csv", delimiter=',', header=0, index_col=0)
fastAnc_recon_for_plotting = pd.concat([fastAnc_recon, fastAnc_recon.iloc[:2]], ignore_index=True)

In [None]:
# read variances from fastAnc
vars = read_fastAnc_var(sim_path+ "/fastAnc/")
rows = []
trait_names = []
for i in range(40):
    trait_name = f'vars_trait{i+1}.csv'
    cvars = vars[trait_name]
    # Flatten the DataFrame values row-wise
    rows.append(cvars.values.flatten())
    trait_names.append(trait_name)

# Create DataFrame: rows = traits, columns = flattened (landmark, dimension, node)
cvars_df = pd.DataFrame(rows, index=trait_names)

print(cvars_df.shape)
print(cvars_df.head())

In [None]:
confidence_level = 0.95  # Set your desired confidence level here
fig, axes = plt.subplots(2, 2, figsize=(10, 10), sharex=True, sharey=True)
node_indices = [0, 1, 2, 3]  # Adjust if you have a different number of inner nodes

for ax, node_idx in zip(axes.flat, node_indices):
    #print(node_idx)
    for landmark_id in range(0, 40, 2):
        mean = [
            fastAnc_recon_for_plotting.iloc[landmark_id, node_idx],
            fastAnc_recon_for_plotting.iloc[landmark_id + 1, node_idx]
        ]
        cov = np.diag([
            cvars_df.iloc[landmark_id, node_idx],
            cvars_df.iloc[landmark_id + 1, node_idx]
        ])
        ax.scatter(mean[0], mean[1], s=50, color='blue')
        plot_confidence_ellipse(mean, cov, ax, confidence=confidence_level, edgecolor='red')
    ax.set_aspect('equal')
    ax.set_title(f'Inner node {node_idx}')
plt.tight_layout()
plt.savefig(sim_path + f"/fastAnc_confidence_ellipse_all.pdf")
plt.show()  

In [None]:
# prepare posterior for plotting 
nidx = 2
# nchain x mcmc iter x nnodes x ndim
burnin_end = int(chain_results[0]['trees'].shape[0] * burnin_percent)
all_posterior_samples = np.array([chain_results[i]['trees'][burnin_end:,:,:] for i in range(len(chain_results)) if chain_results[i] is not None])
sele_node = all_posterior_samples[:,:,nidx,:].reshape(-1,all_posterior_samples.shape[3])
print(sele_node.shape)

# reshape for nicer plotting 
first_landmark = sele_node[:, :2]
sele_node_plot = np.concatenate([sele_node, first_landmark], axis=1)
print(sele_node_plot.shape)

In [None]:
fig, axes = plt.subplots(1, 2, figsize=(18, 8), sharex=True, sharey=True)
xlim_min = -0.25; xlim_max = 0.35
ylim_min = -0.25; ylim_max = 0.35
# First subplot: fastAnc_recon with confidence intervals
axes[0].plot(fastAnc_recon_for_plotting.iloc[0::2, nidx], fastAnc_recon_for_plotting.iloc[1::2, nidx], '--o')
axes[0].set_xlim(xlim_min, xlim_max)
axes[0].set_ylim(ylim_min, ylim_max)
for i in range(0, fastAnc_recon.shape[0], 2):
        mean = [
            fastAnc_recon_for_plotting.iloc[i, nidx],
            fastAnc_recon_for_plotting.iloc[i + 1, nidx]
        ]
        cov = np.diag([
            cvars_df.iloc[i, nidx],
            cvars_df.iloc[i + 1, nidx]
        ])
        axes[0].scatter(mean[0], mean[1], s=50, color='blue')
        plot_confidence_ellipse(mean, cov, axes[0], confidence=confidence_level, edgecolor='red')
axes[0].tick_params(axis='both', which='major', labelsize=18)
axes[0].set_title('Brownian motion confidence sets', fontsize=22)

# Second subplot: sele_node_plot
for i in range(0, sele_node_plot.shape[0], 100):
    axes[1].plot(sele_node_plot[i,0::2], sele_node_plot[i,1::2], '--o', alpha=0.1, color='steelblue')
axes[1].set_xlim(xlim_min, xlim_max)
axes[1].set_ylim(ylim_min, ylim_max)
axes[1].tick_params(axis='both', which='major', labelsize=18)
axes[1].set_title('Posterior samples', fontsize=22)

plt.tight_layout()
plt.savefig(sim_path + f"/combined_landmark_plots_ellipsis_nidx={nidx}.pdf")
plt.show()

## Plot posterior distances between the landmarks that seem to be close

In [None]:
# Create a figure for the plot
fig, ax = plt.subplots(figsize=(10, 6))

# Get all samples
sele_node = all_posterior_samples[:,:,nidx,:].reshape(-1, all_posterior_samples.shape[3])

# Calculate distances between landmarks 12 and 13 for each sample
landmark12_13_distances = []
for i in range(0, sele_node.shape[0]):
    # Extract coordinates for landmark 12 (indices 24, 25)
    landmark12_x = sele_node[i, 24]
    landmark12_y = sele_node[i, 25]
    
    # Extract coordinates for landmark 13 (indices 26, 27)
    landmark13_x = sele_node[i, 26]
    landmark13_y = sele_node[i, 27]
    
    # Compute Euclidean distance
    dist = np.sqrt((landmark12_x - landmark13_x)**2 + (landmark12_y - landmark13_y)**2)
    landmark12_13_distances.append(dist)

# Plot histogram of the distances
ax.hist(landmark12_13_distances, bins=30, alpha=0.7, color='steelblue')
ax.set_xlabel('Distance Between Landmarks 12 and 13')
ax.set_ylabel('Frequency')
ax.set_title('Distribution of Distances Between Landmarks 12 and 13')

# Add statistics to the plot
mean_dist = np.mean(landmark12_13_distances)
median_dist = np.median(landmark12_13_distances)
min_dist = np.min(landmark12_13_distances)
max_dist = np.max(landmark12_13_distances)

# Add vertical line at mean
ax.axvline(x=mean_dist, color='red', linestyle='-', 
          label=f'Mean: {mean_dist:.4f}')

# Add vertical line at median
ax.axvline(x=median_dist, color='green', linestyle='--', 
          label=f'Median: {median_dist:.4f}')

# Add text with statistics
stats_text = (f"Min: {min_dist:.4f}\nMax: {max_dist:.4f}\n"
              f"Mean: {mean_dist:.4f}\nMedian: {median_dist:.4f}")
ax.text(0.95, 0.95, stats_text, transform=ax.transAxes, 
        verticalalignment='top', horizontalalignment='right',
        bbox=dict(boxstyle='round', facecolor='white', alpha=0.8))

ax.legend()
plt.tight_layout()
plt.savefig(save_path + "/landmark12_13_distance_distribution.pdf")
plt.show()

# Print basic statistics
print(f"Distance between landmarks 12 and 13:")
print(f"Mean: {mean_dist:.4f}")
print(f"Median: {median_dist:.4f}")
print(f"Min: {min_dist:.4f}")
print(f"Max: {max_dist:.4f}")
print(f"Standard deviation: {np.std(landmark12_13_distances):.4f}")

In [None]:
# Create a figure for the plot
fig, ax = plt.subplots(figsize=(10, 6))

# Get all samples
sele_node = all_posterior_samples[:,:,nidx,:].reshape(-1, all_posterior_samples.shape[3])

# Calculate distances between landmarks 12 and 13 for each sample
landmark12_13_distances = []
for i in range(0, sele_node.shape[0]):
    # Extract coordinates for landmark 12 (indices 24, 25)
    landmark12_x = sele_node[i, 0]
    landmark12_y = sele_node[i, 1]
    
    # Extract coordinates for landmark 13 (indices 26, 27)
    landmark13_x = sele_node[i, 2]
    landmark13_y = sele_node[i, 3]
    
    # Compute Euclidean distance
    dist = np.sqrt((landmark12_x - landmark13_x)**2 + (landmark12_y - landmark13_y)**2)
    landmark12_13_distances.append(dist)

# Plot histogram of the distances
ax.hist(landmark12_13_distances, bins=30, alpha=0.7, color='steelblue')
ax.set_xlabel('Distance Between Landmarks 0 and 1')
ax.set_ylabel('Frequency')
ax.set_title('Distribution of Distances Between Landmarks 0 and 1')

# Add statistics to the plot
mean_dist = np.mean(landmark12_13_distances)
median_dist = np.median(landmark12_13_distances)
min_dist = np.min(landmark12_13_distances)
max_dist = np.max(landmark12_13_distances)

# Add vertical line at mean
ax.axvline(x=mean_dist, color='red', linestyle='-', 
          label=f'Mean: {mean_dist:.4f}')

# Add vertical line at median
ax.axvline(x=median_dist, color='green', linestyle='--', 
          label=f'Median: {median_dist:.4f}')

# Add text with statistics
stats_text = (f"Min: {min_dist:.4f}\nMax: {max_dist:.4f}\n"
              f"Mean: {mean_dist:.4f}\nMedian: {median_dist:.4f}")
ax.text(0.95, 0.95, stats_text, transform=ax.transAxes, 
        verticalalignment='top', horizontalalignment='right',
        bbox=dict(boxstyle='round', facecolor='white', alpha=0.8))

ax.legend()
plt.tight_layout()
plt.savefig(save_path + "/landmark12_13_distance_distribution.pdf")
plt.show()

# Print basic statistics
print(f"Distance between landmarks 0 and 1:")
print(f"Mean: {mean_dist:.4f}")
print(f"Median: {median_dist:.4f}")
print(f"Min: {min_dist:.4f}")
print(f"Max: {max_dist:.4f}")
print(f"Standard deviation: {np.std(landmark12_13_distances):.4f}")

In [None]:
confidence_level = 0.95  # Set your desired confidence level here
fig, axes = plt.subplots(2, 2, figsize=(10, 10), sharex=True, sharey=True)
node_indices = [0, 1, 2, 3]  # Adjust if you have a different number of inner nodes

for ax, node_idx in zip(axes.flat, node_indices):
    for landmark_id in range(0, 40, 2):
        mean = [
            fastAnc_recon_for_plotting.iloc[landmark_id, node_idx],
            fastAnc_recon_for_plotting.iloc[landmark_id + 1, node_idx]
        ]
        cov = np.diag([
            cvars_df.iloc[landmark_id, node_idx],
            cvars_df.iloc[landmark_id + 1, node_idx]
        ])
        ax.scatter(mean[0], mean[1], s=50, color='blue')
        # Plot landmark index next to the point
        ax.text(mean[0], mean[1], str(landmark_id // 2), fontsize=8, color='black', ha='right', va='bottom')
        plot_confidence_ellipse(mean, cov, ax, confidence=confidence_level, edgecolor='red')
    ax.set_aspect('equal')
    ax.set_title(f'Inner node {node_idx}')
plt.tight_layout()
plt.show()