In [1]:
import pickle
from pathlib import Path
import numpy as np
import statistics
import numpy as np
import sympy
import matplotlib.pyplot as plt
import base64
import pickle
from sympy import symbols, MatrixSymbol, lambdify, Matrix, pprint

from scipy.optimize import minimize
from matplotlib.ticker import FuncFormatter
from sympy import symbols, MatrixSymbol, lambdify
from matplotlib import cm
import random
import matplotlib.colors as mcolors
import scipy
import time
from pathlib import Path
import os
import ast
import pandas as pd
from pathlib import Path
from matplotlib.ticker import ScalarFormatter


import pennylane as qml
from functools import partial
from qiskit.circuit.library import *
from qiskit import *
from qiskit.quantum_info import *
import pickle
from pathlib import Path
import numpy as np
import statistics
from pennylane.wires import Wires
import matplotlib.cm as cm
import base64
from qiskit import *
from qiskit.quantum_info import *
import math

In [2]:
from ANALYSIS_SPECIFIC_CONFIG import is_valid_pickle_file,spread_per_sample_vectorized,spread_pooling_vectorized, load_and_clean_pickle, extract_Nr, extract_trotter_step

def compute_single_draw_stats(
    eigvals,
    full_qfim_mat,
    threshold=1e-10,
    spread_methods=("variance", "mad"),
    ddof=1,
    scale="normal",
    gamma=1.0,
    n=1,
    V_theta=1.0,
    n_ctrl=None,
    n_reserv=None,
    trotter_step=None,
):
    """
    Compute QFIM (or DQFIM) statistics for a SINGLE set of eigenvalues (one draw),
    and also compute an effective dimension from the provided full QFIM matrix.
    
    Returned dictionary includes:
      - "draw_rank"
      - "var_all_eigenvalues"
      - "var_nonzero_eigenvalues"
      - "trace_eigenvalues"
      - "var_all_normalized_by_param_count"
      - "trace_normalized_by_rank"
      - "var_nonzero_log"
      - "trace_normalized_by_param_count"
      - "ipr_deff_raw"        (raw IPR measure)
      - "ipr_deff_norm"       (IPR computed on trace-normalized eigenvalues)
      - "abbas_deff_raw"      (sum(log(1 + alpha*λ)) on raw eigenvalues)
      - "abbas_deff_norm"     (sum(log(1 + alpha*λ)) on trace-normalized eigenvalues)
      - "effective_dimension" (computed from the trace-normalized full QFIM)
      - "spread_metric_{method}" for each method in spread_methods.
    
    Parameters
    ----------
    eigvals : array-like
        Eigenvalues for this single QFIM (or DQFIM) draw.
    full_qfim_mat : array-like (2D)
        The full QFIM matrix for this draw.
    threshold : float
        Zero out eigenvalues below this threshold.
    spread_methods : tuple of str
        Methods for "spread-of-log" metrics.
    ddof : int
        Degrees of freedom for variance computations.
    scale : str
        Scale indicator for spread metrics.
    gamma : float
        Scaling parameter in the Abbas formula (typically in (0,1]).
    n : int
        Number of data samples used in the Abbas formula.
    V_theta : float
        Volume factor (typically 1.0).
    n_ctrl, n_reserv, trotter_step : optional
        Additional metadata.
    
    Returns
    -------
    stats_dict : dict
        Dictionary of computed statistics.
    """
    # Ensure eigvals is a 1D NumPy array.
    arr = np.array(eigvals, dtype=float)
    if arr.ndim != 1:
        arr = arr.flatten()
    # Zero out small eigenvalues.
    arr = np.where(arr < threshold, 0.0, arr)
    
    # --- 1) Basic stats ---
    draw_rank = np.count_nonzero(arr)
    var_all_eigenvalues = np.var(arr, ddof=ddof)
    # Variance on nonzero values using Boolean indexing.
    nonzero = arr[arr > threshold]
    var_nonzero_eigenvalues = np.var(nonzero, ddof=ddof) if nonzero.size > 1 else 0.0
    var_nonzero_log = np.log(var_nonzero_eigenvalues) if var_nonzero_eigenvalues > 0 else -np.inf
    trace_eigenvalues = np.sum(arr)
    min_nonzero_eigenvalue = np.min(nonzero) 
    
    var_normalized_by_param_count = var_all_eigenvalues / len(arr)
    var_nonzero_normalized_by_rank = var_nonzero_eigenvalues / draw_rank
    var_normalized_by_rank = var_all_eigenvalues / draw_rank
    trace_normalized_by_rank = (trace_eigenvalues / draw_rank) if draw_rank > 0 else 0.0
    trace_normalized_by_param_count = trace_eigenvalues / len(arr)
    
    # --- 2) IPR-based dimensions ---
    # Raw IPR: (trace^2) / (sum of squares)
    sum_of_squares = np.sum(arr**2)
    ipr_deff_raw = (trace_eigenvalues**2) / sum_of_squares if sum_of_squares > 0 else 0.0
    
    # Normalized IPR: using normalized eigenvalues (p = λ/trace)
    if trace_eigenvalues > 0:
        arr_norm = arr / trace_eigenvalues
        sum_norm_sq = np.sum(arr_norm**2)
        ipr_deff_norm = 1.0 / sum_norm_sq if sum_norm_sq > 0 else 0.0
    else:
        arr_norm = None
        ipr_deff_norm = 0.0
    
    # --- 3) Abbas-based dimensions ---
    # Compute alpha = (gamma * n) / (2*log(n)) if n>1, else use limit.
    if n > 1 and math.log(n) != 0.0:
        alpha = (gamma * n) / (2.0 * math.log(n))
    else:
        alpha = 0.0
    # Raw Abbas: computed on original eigenvalues.
    abbas_deff_raw = np.sum(np.log(np.maximum(1.0 + alpha * arr, 1e-15)))
    # Normalized Abbas: computed on trace-normalized eigenvalues.
    if arr_norm is not None:
        abbas_deff_norm = np.sum(np.log(np.maximum(1.0 + alpha * arr_norm, 1e-15)))
    else:
        abbas_deff_norm = 0.0
    
    # --- 4) Effective dimension from the full QFIM ---
    # Normalize the full QFIM by its trace BEFORE diagonalizing.
    F = np.array(full_qfim_mat, dtype=complex)
    trF = np.trace(F)
    if trF > 0:
        F_hat = F / trF
        eigs_F = np.linalg.eigvalsh(F_hat)  # Eigenvalues of the normalized full QFIM.
        eps = 1e-12
        # Here, effective dimension is computed from the normalized spectrum p_i.
        # If n > 1, use the standard formula; if n == 1, use the limit:
        if n > 1 and math.log(n) != 0.0:
            z = 0.5 * np.sum(np.log(1.0 + n * eigs_F + eps))
            effective_dimension = (2.0 / np.log(n)) * z
        else:
            # For n == 1, define effective dimension as the sum_i p_i/(1+p_i)
            effective_dimension = np.sum(eigs_F / (1.0 + eigs_F))
    else:
        effective_dimension = 0.0
    
    # --- 5) Spread-of-log metrics ---
    # Reshape arr into a 1-row 2D array for external functions.
    arr_2d = arr.reshape(1, -1)
    spread_metrics = {}
    for method in spread_methods:
        per_draw = spread_per_sample_vectorized(arr_2d, method=method, threshold=threshold, ddof=ddof, scale=scale)
        spread_metrics[f"spread_metric_{method}"] = per_draw[0] if per_draw.size > 0 else 0.0
    
    # --- 6) Build final dictionary ---
    stats_dict = {
        # Basic stats
        "draw_rank": draw_rank,
        "var_all_eigenvalues": var_all_eigenvalues,
        "var_nonzero_eigenvalues": var_nonzero_eigenvalues,
        "trace_eigenvalues": trace_eigenvalues,
        "var_all_normalized_by_param_count": var_normalized_by_param_count,
        "var_all_normalized_by_rank": var_normalized_by_rank,
        "var_nonzero_normalized_by_rank":var_nonzero_normalized_by_rank,
        "trace_normalized_by_rank": trace_normalized_by_rank,
        "trace_normalized_by_param_count": trace_normalized_by_param_count,
        "var_nonzero_log": var_nonzero_log,
        
        # IPR-based dimensions
        "ipr_deff_raw": ipr_deff_raw,
        "ipr_deff_norm": ipr_deff_norm,
        
        # Abbas-based dimensions
        "abbas_deff_raw": abbas_deff_raw,
        "abbas_deff_norm": abbas_deff_norm,
        
        # Effective dimension computed from the full QFIM (trace-normalized)
        "d_eff": effective_dimension,
        # Minimum nonzero eigenvalue (above threshold)
        "min_nonzero_eigenvalue": min_nonzero_eigenvalue,
    }
    stats_dict.update(spread_metrics)
    
    return stats_dict


## <b>QFIM</b>, T = 2

In [7]:
T = 10
sample_range_label ='normal_.5pi'
Nc = 2
Nr = 2
# file_path = f'/Users/sophieblock/QRCCapstone/parameter_analysis_directory/QFIM_results/analog/Nc_{Nc}/sample_{sample_range_label}/1xK/Nr_{Nr}/trotter_step_{T}/data.pickle'
file_path = f'/Users/sophieblock/QRCCapstone/parameter_analysis_directory/QFIM_global_results/analog_model_DQFIM/Nc_{Nc}/sample_{sample_range_label}/1xK/Nr_{Nr}/trotter_step_{T}/L_10/data.pickle'

with open(file_path, 'rb') as f:
    all_tests_data = pickle.load(f)
fixed_params_dict = 'fixed_params0'
test_data = all_tests_data[fixed_params_dict]

FileNotFoundError: [Errno 2] No such file or directory: '/Users/sophieblock/QRCCapstone/parameter_analysis_directory/QFIM_global_results/analog_model_DQFIM/Nc_2/sample_normal_.5pi/1xK/Nr_2/trotter_step_10/L_10/data.pickle'

In [75]:
number_of_tests

200

In [3]:
import math
import numpy as np
import pandas as pd

# ----------------------------------------------------------------------
# 1. Compute single-draw QFIM/DQFIM statistics (unchanged)
# ----------------------------------------------------------------------
def compute_single_draw_stats(
    eigvals,
    full_qfim_mat,
    threshold=1e-10,
    spread_methods=("variance", "mad"),
    ddof=1,
    scale="normal",
    gamma=1.0,
    n=1,
    V_theta=1.0,
    n_ctrl=None,
    n_reserv=None,
    trotter_step=None,
):
    """
    Compute QFIM (or DQFIM) statistics for a SINGLE set of eigenvalues.
    (This function is unchanged.)
    """
    arr = np.array(eigvals)
    if arr.ndim != 1:
        arr = arr.flatten()
    arr = np.where(arr < threshold, 0.0, arr)
    
    draw_rank = np.count_nonzero(arr)
    var_all_eigenvalues = np.var(arr, ddof=1)
    var_all_eigenvalues_doff_0 = np.var(arr, ddof=0)
    nonzero = arr[arr > threshold]
    var_nonzero_eigenvalues = np.var(nonzero, ddof=1) if nonzero.size > 1 else 0.0
    var_nonzero_eigenvalues_doff_0 = np.var(nonzero, ddof=0) if nonzero.size > 1 else 0.0
    var_nonzero_log = np.log(var_nonzero_eigenvalues) if var_nonzero_eigenvalues > 0 else -np.inf
    trace_eigenvalues = np.sum(arr)
    min_nonzero_eigenvalue = np.min(nonzero)
    
    var_normalized_by_param_count = var_all_eigenvalues / len(arr)
    var_nonzero_normalized_by_rank = var_nonzero_eigenvalues / draw_rank if draw_rank > 0 else 0.0
    var_normalized_by_rank = var_all_eigenvalues / draw_rank if draw_rank > 0 else 0.0
    trace_normalized_by_rank = (trace_eigenvalues / draw_rank) if draw_rank > 0 else 0.0
    trace_normalized_by_param_count = trace_eigenvalues / len(arr)
    
    sum_of_squares = np.sum(arr**2)
    ipr_deff_raw = (trace_eigenvalues**2) / sum_of_squares if sum_of_squares > 0 else 0.0
    
    if trace_eigenvalues > 0:
        arr_norm = arr / trace_eigenvalues
        sum_norm_sq = np.sum(arr_norm**2)
        ipr_deff_norm = 1.0 / sum_norm_sq if sum_norm_sq > 0 else 0.0
    else:
        arr_norm = None
        ipr_deff_norm = 0.0

    if n > 1 and math.log(n) != 0.0:
        alpha = (gamma * n) / (2.0 * math.log(n))
    else:
        alpha = 0.0
    abbas_deff_raw = np.sum(np.log(np.maximum(1.0 + alpha * arr, 1e-15)))
    if arr_norm is not None:
        abbas_deff_norm = np.sum(np.log(np.maximum(1.0 + alpha * arr_norm, 1e-15)))
    else:
        abbas_deff_norm = 0.0

    F = np.array(full_qfim_mat, dtype=complex)
    trF = np.trace(F)
    if trF > 0:
        F_hat = F / trF
        eigs_F = np.linalg.eigvalsh(F_hat)
        eps = 1e-12
        if n > 1 and math.log(n) != 0.0:
            z = 0.5 * np.sum(np.log(1.0 + n * eigs_F + eps))
            effective_dimension = (2.0 / np.log(n)) * z
        else:
            effective_dimension = np.sum(eigs_F / (1.0 + eigs_F))
    else:
        effective_dimension = 0.0

    arr_2d = arr.reshape(1, -1)
    spread_metrics = {}
    # (Assuming spread_per_sample_vectorized is defined elsewhere.)
    for method in spread_methods:
        per_draw = spread_per_sample_vectorized(arr_2d, method=method, threshold=threshold, ddof=ddof, scale=scale)
        spread_metrics[f"spread_metric_{method}"] = per_draw[0] if per_draw.size > 0 else 0.0
    
    stats_dict = {
        "draw_rank": draw_rank,
        "var_all_eigenvalues": var_all_eigenvalues,
        "var_all_eigenvalues_doff_0":var_all_eigenvalues_doff_0,
        "var_nonzero_eigenvalues": var_nonzero_eigenvalues,
        "var_nonzero_eigenvalues_doff_0":var_nonzero_eigenvalues_doff_0,
        "trace_eigenvalues": trace_eigenvalues,
        # "var_all_normalized_by_param_count": var_normalized_by_param_count,
        "var_all_normalized_by_rank": var_normalized_by_rank,
        "var_nonzero_normalized_by_rank": var_nonzero_normalized_by_rank,
        "trace_normalized_by_rank": trace_normalized_by_rank,
        # "trace_normalized_by_param_count": trace_normalized_by_param_count,
        "var_nonzero_log": var_nonzero_log,
        # "ipr_deff_raw": ipr_deff_raw,
        "ipr_deff_norm": ipr_deff_norm,
        "abbas_deff_raw": abbas_deff_raw,
        "abbas_deff_norm": abbas_deff_norm,
        "d_eff": effective_dimension,
        # Minimum nonzero eigenvalue (above threshold)
        "min_nonzero_eigenvalue": min_nonzero_eigenvalue,
    }
    stats_dict.update(spread_metrics)
    
    return stats_dict

# ----------------------------------------------------------------------
# 2. clean_array (unchanged)
# ----------------------------------------------------------------------
def clean_array(data):
    if isinstance(data, np.ndarray):
        return np.array(data)
    elif isinstance(data, dict):
        return {k: clean_array(v) for k, v in data.items()}
    elif isinstance(data, list):
        return [clean_array(v) for v in data]
    else:
        return data

# ----------------------------------------------------------------------
# 3. Updated read_jax_file
# ----------------------------------------------------------------------
def read_jax_file(file_path, gate_name, test_key,num_L):
    """
    Read the pickle file and extract fields using the updated keys.
    Now extracts:
      - QFIM Results (GHZ)
      - QFIM_basis_state
      - Computed training DQFIM from "DQFIM_stats_local"
      - Original (file-stored) DQFIM from "DQFIM_stats_{NUM_L}_L_states"
      - Computed target DQFIM from "target DQFIM stats"
    """
    with open(file_path, 'rb') as f:
        df = pickle.load(f)
    df = clean_array(df)
    print(df.keys())
    
    try:
        costs = np.asarray([float(i) for i in df['costs'][0]], dtype=np.float64)
        N_train = float(df['N_train'][0])
    except Exception as e:
        print(f"Error reading costs/N_train from {file_path}: {e}")
        costs = None
        N_train= None
        
    try:
        grads_per_epoch = [np.asarray(i, dtype=np.float64) for i in df['grads_per_epoch'][0]]
    except Exception as e:
        print(f"Error reading grads_per_epoch from {file_path}: {e}")
        grads_per_epoch = None
        
    try:
        fidelity = float(df['avg_fidelity'][0])
    except Exception as e:
        print(f"Error reading avg_fidelity from {file_path}: {e}")
        fidelity = None
        
    try:
        num_params = 3 + int(df['controls'][0]) * int(df['reservoirs'][0]) * int(df['trotter_step'][0]) + int(df['trotter_step'][0])
    except Exception as e:
        print(f"Error computing num_params from {file_path}: {e}")
        num_params = None
        
    try:
        test_results = np.asarray(df['testing_results'][0], dtype=np.float64)
    except Exception as e:
        print(f"Error reading testing_results from {file_path}: {e}")
        test_results = None
        
    # QFIM Results (GHZ)
    qfim_stats_dict_GHZ = df.get('QFIM Results', [None])[0]
    if qfim_stats_dict_GHZ is None:
        print(f"Warning: 'QFIM Results' not found in {file_path}")
        qfim_eigvals_GHZ = qfim_full_GHZ = entropy_GHZ = None
    else:
        qfim_eigvals_GHZ = qfim_stats_dict_GHZ.get('qfim_eigvals', None)
        qfim_full_GHZ = qfim_stats_dict_GHZ.get('qfim', None)
        entropy_GHZ = qfim_stats_dict_GHZ.get('entropy', None)
    
    # QFIM Results (GHZ)
    qfim_stats_dict_GHZ = df['QFIM Results'][0]
    qfim_eigvals_GHZ = qfim_stats_dict_GHZ['qfim_eigvals']
    qfim_full_GHZ = qfim_stats_dict_GHZ['qfim']
    entropy_GHZ = qfim_stats_dict_GHZ['entropy']
    
    # QFIM Basis State
    qfim_stats_dict_basis = df['QFIM_basis_state'][0]
    qfim_eigvals_basis = qfim_stats_dict_basis['qfim_eigvals']
    qfim_full_basis = qfim_stats_dict_basis['qfim']
    entropy_basis = qfim_stats_dict_basis['entropy']
    
    # Computed Training DQFIM stats (new key)
    dqfim_stats_local = df['DQFIM_stats_local'][0]
    dqfim_eigvals_train = dqfim_stats_local['eigvals_train']
    dqfim_train = dqfim_stats_local['DQFIM_train']
    dqfim_entropies = dqfim_stats_local['entropies_train']
    
    # Original (file-stored) DQFIM stats (new key) – stored under key like "DQFIM_stats_{NUM_L}_L_states"
    key_name = f'DQFIM_stats_{num_L}_L_states'
    dqfim_stats_dict_random = df[key_name][0]
    dqfim_eigvals_random = dqfim_stats_dict_random['dqfim_eigvals']
    dqfim_full_random = dqfim_stats_dict_random['dqfim']
    L = dqfim_stats_dict_random.get('L', None)
    dqfim_entropies_random = dqfim_stats_dict_random.get('entropies', None)
    # Computed Target DQFIM stats (new key) target_dqfim_entropies
    computed_target = df["target DQFIM stats"][0]
    try:
        target_dqfim_eigvals = computed_target['eigvals_target']
        target_dqfim_full = computed_target['DQFIM_target']
        target_dqfim_entropies = computed_target['entropies_target']
    except KeyError:
        target_dqfim_eigvals = computed_target['dqfim_eigvals']
        target_dqfim_full = computed_target['DQFIM']
        target_dqfim_entropies = computed_target['entropies']
                                             
    readin_test_key = df.get("test_key", [None])[0]
    assert readin_test_key == test_key, f'Got: {readin_test_key}. Expected: {test_key}'
    result = {
        "costs": costs,
        "fidelity": fidelity,
        "num_params": num_params,
        "test_results": test_results,
        "qfim_eigvals_GHZ": qfim_eigvals_GHZ,
        "qfim_full_GHZ": qfim_full_GHZ,
        "entropy_GHZ": entropy_GHZ,
        "qfim_eigvals_basis": qfim_eigvals_basis,
        "qfim_full_basis": qfim_full_basis,
        "entropy_basis": entropy_basis,
        "dqfim_eigvals_train": dqfim_eigvals_train,
        "dqfim_train": dqfim_train,
        "dqfim_entropies": dqfim_entropies,
        "dqfim_eigvals_random": dqfim_eigvals_random,
        "dqfim_full_random": dqfim_full_random,
        "dqfim_entropies_random":dqfim_entropies_random,
        "num_sampled_states": num_L,
        "target_dqfim_eigvals": target_dqfim_eigvals,
        "target_dqfim_full": target_dqfim_full,
        "target_dqfim_entropies": target_dqfim_entropies,
        "N_ctrl": df.get('controls', [None])[0],
        "Trotter_Step": df.get('trotter_step', [None])[0],
        "N_R": df.get('reservoirs', [None])[0],
        "gate": gate_name,
        'N_train':N_train,
        "test_key": readin_test_key
    }
    return result
import glob
import os

def listdir_nohidden(path):
    return glob.glob(os.path.join(path, '*'))
# ----------------------------------------------------------------------
# 4. Build and aggregate DataFrame results (updated)
# ----------------------------------------------------------------------
def build_df_results(fixed_param_folder, base_folder, N_C, N_R, T,num_L):
    """
    Build a DataFrame by scanning the results folder.
    Updated to include new keys (e.g., 'DQFIM_stats_local' and original file DQFIM stats).
    """

    rows = []
    full_path = os.path.join(base_folder, fixed_param_folder)
    print(full_path)
    for test_key in os.listdir(full_path):
        test_key_path = os.path.join(full_path, test_key)
        if not os.path.isdir(test_key_path):
            continue
        for gate_folder in os.listdir(test_key_path):
            if not gate_folder.startswith(f"U{N_C}_"):
                continue
            gate_folder_path = os.path.join(test_key_path, gate_folder)
            if not os.path.isdir(gate_folder_path):
                continue
            datarun_files_in_folder = listdir_nohidden(gate_folder_path)
            if len(datarun_files_in_folder) > 1:
                
                file_name = f"data_run_0_.pickle"
                print(f'{test_key} gate {gate_folder} has {len(datarun_files_in_folder)} data runs stored. Picking latest one: {file_name}')
            else:
                file_name = "data_run_0.pickle"
            pickle_file = os.path.join(gate_folder_path, file_name)
            if os.path.isfile(pickle_file):
                try:
                    data = read_jax_file(pickle_file, gate_folder, test_key, num_L)
                    data["test_key"] = test_key
                    data["gate_folder"] = gate_folder
                    data["file_path"] = pickle_file
                    rows.append(data)
                except Exception as ex:
                    print(f"Error processing {pickle_file}: {ex}")
                    raise  # Break out immediately on error.
            else:
                print(f"Pickle file does not exist: {pickle_file}")
    df_results = pd.DataFrame(rows)
    return df_results

# Example usage in post-processing:
# df_results = build_df_results(fixed_param_folder, base_folder, N_C=2)
# df_agg = aggregate_results(df_results)
# df_final = update_with_all_qfim_metrics(df_agg)
# print(df_final.shape)

trotter_Step = 10
N_ctrl = 2
num_L = 20
N_reserv = 1
fixed_param_folder = "fixed_params0"
num_epochs = 1500
train_size = 20
Nc = N_ctrl
Nr = N_reserv
sample_range_label = '.5pi'
base_folder = f"/Users/so714f/Documents/offline/qrc/param_initialization_final/analog_results/Nc_{Nc}/epochs_{num_epochs}/reservoirs_{Nr}/trotter_{trotter_Step}/trainsize_{train_size}/sample_.5pi"

# base_folder = f"/Users/sophieblock/QRCCapstone/parameter_analysis_directory/param_initialization_final/analog_results/Nc_{N_ctrl}/epochs_{num_epochs}/reservoirs_{N_reserv}/trotter_{trotter_Step}/trainsize_{train_size}/sample_{sample_range_label}"
df_results = build_df_results(fixed_param_folder, base_folder, N_C=N_ctrl, N_R=N_reserv, T=trotter_Step,num_L=num_L)

print("df_results shape:", df_results.shape)
print(len(df_results['test_key'].unique()))

# /Users/so714f/Documents/offline/qrc/param_initialization_final/analog_results/Nc_2/epochs_1500/reservoirs_1/trotter_10/trainsize_10/sample_.5pi/fixed_params0

/Users/so714f/Documents/offline/qrc/param_initialization_final/analog_results/Nc_2/epochs_1500/reservoirs_1/trotter_10/trainsize_20/sample_.5pi/fixed_params0
Index(['Gate', 'opt_description', 'specs', 'epochs', 'trotter_step',
       'time_steps', 'controls', 'reservoirs', 'N_train', 'fixed_params',
       'init_params', 'testing_results', 'infidelities', 'avg_fidelity',
       'costs', 'params_per_epoch', 'training_states', 'opt_params', 'opt_lr',
       'grads_per_epoch', 'init_grads', 'target DQFIM stats',
       'DQFIM_stats_local', 'test_key', 'QFIM Results',
       'DQFIM_stats_20_L_states', 'QFIM_basis_state'],
      dtype='object')
Index(['Gate', 'opt_description', 'specs', 'epochs', 'trotter_step',
       'time_steps', 'controls', 'reservoirs', 'N_train', 'fixed_params',
       'init_params', 'testing_results', 'infidelities', 'avg_fidelity',
       'costs', 'params_per_epoch', 'training_states', 'opt_params', 'opt_lr',
       'grads_per_epoch', 'init_grads', 'target DQFIM sta

In [5]:
base_folder
# /Users/so714f/Documents/offline/qrc/param_initialization_final/analog_results/Nc_2/epochs_1500/reservoirs_1/trotter_10/trainsize_10/sample_.5pi

'/Users/so714f/Documents/offline/qrc/param_initialization_final/analog_results/Nc_2/epochs_1500/reservoirs_1/trotter_10/trainsize_20/sample_.5pi'

In [4]:

def aggregate_results(df):
    """
    Aggregate the DataFrame by test_key and gate.
    Now also includes the original DQFIM file stats.
    """
    aggregated = df.groupby(["test_key", "gate"]).agg(
        fidelities_list=("fidelity", list),
        avg_fidelity=("fidelity", "mean"),
        error=("fidelity", lambda x: np.mean(np.log(1 - x))),
        avg_infidelity=("fidelity", lambda x: np.mean(1 - x)),
        qfim_eigvals_GHZ=("qfim_eigvals_GHZ", "first"),
        qfim_full_GHZ=("qfim_full_GHZ", "first"),
        entropy_GHZ=("entropy_GHZ", "first"),
        qfim_eigvals_basis=("qfim_eigvals_basis", "first"),
        qfim_full_basis=("qfim_full_basis", "first"),
        entropy_basis=("entropy_basis", "first"),
        dqfim_eigvals_train=("dqfim_eigvals_train", "first"),
        dqfim_train=("dqfim_train", "first"),
        dqfim_eigvals_random=("dqfim_eigvals_random", "first"),
        dqfim_full_random=("dqfim_full_random", "first"),
        target_dqfim_eigvals=("target_dqfim_eigvals", "first"),
        target_dqfim_full=("target_dqfim_full", "first"),
        target_dqfim_entropies=("target_dqfim_entropies", "first"),
        N_ctrl=("N_ctrl", "first"),
        N_R=("N_R", "first"),
        Trotter_Step=("Trotter_Step", "first"),
        num_sampled_states=("num_sampled_states", "first"),
        num_train = ("N_train","first")
    ).reset_index()
    return aggregated

def update_with_all_qfim_metrics(df, threshold=1e-10, spread_methods=("variance", "mad"),
                                 ddof=1, scale="normal", gamma=0.1, n=1, V_theta=1.0):
    """
    For each row, compute derived metrics for each QFIM variant:
      - GHZ QFIM (from "qfim_eigvals_GHZ" and "qfim_full_GHZ")
      - Basis QFIM (from "qfim_eigvals" and "qfim_full")
      - Computed Training DQFIM (from "dqfim_eigvals_train" and "dqfim_train")
      - Original file-stored DQFIM (from "dqfim_eigvals_file" and "dqfim_full_file")
      - Computed Target DQFIM (from "target_dqfim_eigvals" and "target_dqfim_full")
    """
    new_rows = []
    for _, row in df.iterrows():
        ghz_stats = compute_single_draw_stats(
            row["qfim_eigvals_GHZ"],
            row["qfim_full_GHZ"],
            threshold=threshold,
            spread_methods=spread_methods,
            ddof=ddof,
            scale=scale,
            gamma=gamma,
            n=1,
            V_theta=V_theta,
            n_ctrl=row["N_ctrl"],
            n_reserv=row["N_R"],
            trotter_step=row["Trotter_Step"]
        )
        basis_stats = compute_single_draw_stats(
            row["qfim_eigvals_basis"],
            row["qfim_full_basis"],
            threshold=threshold,
            spread_methods=spread_methods,
            ddof=ddof,
            scale=scale,
            gamma=gamma,
            n=1,
            V_theta=V_theta,
            n_ctrl=row["N_ctrl"],
            n_reserv=row["N_R"],
            trotter_step=row["Trotter_Step"]
        )
        dqfim_stats_train = compute_single_draw_stats(
            row["dqfim_eigvals_train"],
            row["dqfim_train"],
            threshold=threshold,
            spread_methods=spread_methods,
            ddof=ddof,
            scale=scale,
            gamma=gamma,
            n=row['num_train'] if 'num_train' in row else 1,
            V_theta=V_theta,
            n_ctrl=row["N_ctrl"],
            n_reserv=row["N_R"],
            trotter_step=row["Trotter_Step"]
        )
        file_dqfim_stats = compute_single_draw_stats(
            row["dqfim_eigvals_random"],
            row["dqfim_full_random"],
            threshold=threshold,
            spread_methods=spread_methods,
            ddof=ddof,
            scale=scale,
            gamma=gamma,
            n=row['num_sampled_states'] if 'num_sampled_states' in row else 1,
            V_theta=V_theta,
            n_ctrl=row["N_ctrl"],
            n_reserv=row["N_R"],
            trotter_step=row["Trotter_Step"]
        )
        dqfim_stats_targ = compute_single_draw_stats(
            row["target_dqfim_eigvals"],
            row["target_dqfim_full"],
            threshold=threshold,
            spread_methods=spread_methods,
            ddof=ddof,
            scale=scale,
            gamma=gamma,
            n=row['num_train'] if 'num_train' in row else 1,
            V_theta=V_theta,
            n_ctrl=row["N_ctrl"],
            n_reserv=row["N_R"],
            trotter_step=row["Trotter_Step"]
        )
        updated_row = row.to_dict()
        updated_row.update({f"GHZ_{k}": v for k, v in ghz_stats.items()})
        updated_row.update({f"basis_{k}": v for k, v in basis_stats.items()})
        updated_row.update({f"dqfim_{k}": v for k, v in dqfim_stats_train.items()})
        updated_row.update({f"random_sample_dqfim_{k}": v for k, v in file_dqfim_stats.items()})
        updated_row.update({f"tdqfim_{k}": v for k, v in dqfim_stats_targ.items()})
        new_rows.append(updated_row)
    return pd.DataFrame(new_rows)


In [23]:
a =['test326', 'test43', 'test346', 'test64', 'test164', 'test169', 'test150', 'test58', 'test131', 'test286', 'test56']
print(len(a))

11


In [5]:
df_agg = aggregate_results(df_results)
print("Aggregated DataFrame shape:", df_agg.shape)
# display(df_agg.keys())

df_final = update_with_all_qfim_metrics(df_agg)
print(len(df_final['test_key'].unique()),df_final['test_key'].unique())

df_final.shape
display(df_final.keys())


Aggregated DataFrame shape: (224, 24)
13 ['test0' 'test131' 'test150' 'test164' 'test169' 'test248' 'test286'
 'test326' 'test346' 'test43' 'test56' 'test58' 'test64']


Index(['test_key', 'gate', 'fidelities_list', 'avg_fidelity', 'error',
       'avg_infidelity', 'qfim_eigvals_GHZ', 'qfim_full_GHZ', 'entropy_GHZ',
       'qfim_eigvals_basis',
       ...
       'tdqfim_var_nonzero_normalized_by_rank',
       'tdqfim_trace_normalized_by_rank', 'tdqfim_var_nonzero_log',
       'tdqfim_ipr_deff_norm', 'tdqfim_abbas_deff_raw',
       'tdqfim_abbas_deff_norm', 'tdqfim_d_eff',
       'tdqfim_min_nonzero_eigenvalue', 'tdqfim_spread_metric_variance',
       'tdqfim_spread_metric_mad'],
      dtype='object', length=109)

In [13]:
expected = ['test326', 'test43', 'test346', 'test64', 'test164', 'test169', 'test150', 'test58', 'test131', 'test286', 'test56']

for key in df_final['test_key'].unique():
    print(key)
    print(key in expected)

test131
True
test150
True
test164
True
test169
True
test286
True
test326
True
test346
True
test43
True
test56
True
test58
True
test64
True


In [6]:
numeric_cols = []
for col in df_final.columns:
    if pd.api.types.is_numeric_dtype(df_final[col]):
        numeric_cols.append(col)

# 2) Group by test_key and compute the mean for these numeric columns.
#    numeric_only=True ensures that Pandas only attempts to average numeric data.
df_agg_by_test_key = (
    df_final
    .groupby("test_key")[numeric_cols]
    .mean(numeric_only=True)  # or just .mean() if you're on older Pandas versions
    .reset_index()
)


In [7]:
import pingouin as pg
import warnings
from scipy.stats import ConstantInputWarning
def analyze_correlations(df_merged, x_metric, metrics_of_interest, corr_threshold=0.2, p_threshold=0.05,
                         print_all_pearson=False, print_all_spearman=False):
    """
    Analyze pairwise correlations between a given x_metric and each metric in metrics_of_interest.
    
    Parameters
    ----------
    df_merged : pd.DataFrame
        The merged DataFrame containing the columns of interest.
    x_metric : str
        The column name for the independent variable (e.g., "avg_fidelity").
    metrics_of_interest : list of str
        List of column names whose correlations with x_metric will be computed.
    corr_threshold : float, optional
        Minimum absolute correlation coefficient to report (default 0.2).
    p_threshold : float, optional
        Maximum p-value threshold to report (default 0.05).
    print_all_pearson : bool, optional
        If True, print Pearson correlation results for every metric.
    print_all_spearman : bool, optional
        If True, print Spearman correlation results for every metric.
        
    Returns
    -------
    pearson_results : dict
        Dictionary mapping each metric to its Pearson correlation coefficient and p-value.
    spearman_results : dict
        Dictionary mapping each metric to its Spearman correlation coefficient and p-value.
    """
    # Create a new DataFrame with the columns of interest and drop rows with NaN values.
    df_corr = df_merged[[x_metric] + metrics_of_interest].copy()
    df_corr = df_corr.dropna(subset=[x_metric] + metrics_of_interest)
    
    # Ensure that each metric column contains float values.
    for col in metrics_of_interest:
        df_corr[col] = df_corr[col].apply(
            lambda val: float(val.item()) if hasattr(val, "item") else float(val)
        )
    
    # Compute Pearson correlations.
    pearson_results = {}
    for col in metrics_of_interest:
        try:
            with warnings.catch_warnings(record=True) as w:
                warnings.simplefilter("error", category=ConstantInputWarning)
                res_df = pg.corr(x=df_corr[x_metric], y=df_corr[col], method="pearson")
            r_val = res_df["r"].iloc[0]
            p_val = res_df["p-val"].iloc[0]
            pearson_results[col] = {"pearson_r": r_val, "p_value": p_val}
        except ConstantInputWarning as cie:
            print(f"ConstantInputWarning for {col} (Pearson): {cie}")
            pearson_results[col] = {"pearson_r": None, "p_value": None}
    
    # Compute Spearman correlations.
    spearman_results = {}
    for col in metrics_of_interest:
        try:
            with warnings.catch_warnings(record=True) as w:
                warnings.simplefilter("error", category=ConstantInputWarning)
                sp_df = pg.corr(x=df_corr[x_metric], y=df_corr[col], method="spearman")
            rho_val = sp_df["r"].iloc[0]
            p_val = sp_df["p-val"].iloc[0]
            spearman_results[col] = {"spearman_rho": rho_val, "p_value": p_val}
        except ConstantInputWarning as cie:
            print(f"ConstantInputWarning for {col} (Spearman): {cie}")
            spearman_results[col] = {"spearman_rho": None, "p_value": None}
    
    # Print out the Pearson correlations.
    print(f"\nPairwise correlations vs. {x_metric} (Pearson):")
    for metric, vals in pearson_results.items():
        if print_all_pearson or (vals["pearson_r"] is not None and abs(vals["pearson_r"]) > corr_threshold and vals["p_value"] < p_threshold):
            if vals["pearson_r"] is not None:
                print(f"{metric}: r = {vals['pearson_r']:.3f}, p = {vals['p_value']:.3g}")
            else:
                print(f"{metric}: r = None, p = None")
    
    # Print out the Spearman correlations.
    print(f"\nPairwise correlations vs. {x_metric} (Spearman):")
    for metric, vals in spearman_results.items():
        if print_all_spearman or (vals["spearman_rho"] is not None and abs(vals["spearman_rho"]) > corr_threshold and vals["p_value"] < p_threshold):
            if vals["spearman_rho"] is not None:
                print(f"{metric}: rho = {vals['spearman_rho']:.3f}, p = {vals['p_value']:.3g}")
            else:
                print(f"{metric}: rho = None, p = None")
    
    return pearson_results, spearman_results



CORR_THRESHOLD = 0.1
P_THRESHOLD = 0.05
x_metric = "avg_fidelity"
metrics_of_interest_ghz = [

  
       'GHZ_var_all_eigenvalues', 'GHZ_var_nonzero_eigenvalues',
       'GHZ_var_all_eigenvalues_doff_0', 'GHZ_var_nonzero_eigenvalues_doff_0',
       'GHZ_trace_eigenvalues', 
       'GHZ_var_nonzero_log',  'GHZ_ipr_deff_norm', 'GHZ_d_eff',
    #    'GHZ_spread_variance_doff_0', 'GHZ_spread_mad_normal',
    #     'GHZ_spread_mad_1.0',

]
# Now you can use these common thresholds in your calls:
pearson_corrs_ghz, spearman_corrs_ghz = analyze_correlations(
    df_final, x_metric, metrics_of_interest_ghz,
    corr_threshold=CORR_THRESHOLD, p_threshold=P_THRESHOLD
)

# for metric, vals in pearson_corrss
# on_r']:.3f}, p={vals['p_value']:.3g}")
    


Pairwise correlations vs. avg_fidelity (Pearson):
GHZ_var_all_eigenvalues: r = 0.233, p = 0.000432
GHZ_var_nonzero_eigenvalues: r = 0.232, p = 0.000468
GHZ_var_all_eigenvalues_doff_0: r = 0.233, p = 0.000432
GHZ_var_nonzero_eigenvalues_doff_0: r = 0.232, p = 0.000468
GHZ_trace_eigenvalues: r = 0.224, p = 0.000746
GHZ_var_nonzero_log: r = 0.231, p = 0.000483
GHZ_ipr_deff_norm: r = -0.196, p = 0.00322
GHZ_d_eff: r = -0.193, p = 0.00374

Pairwise correlations vs. avg_fidelity (Spearman):
GHZ_var_all_eigenvalues: rho = 0.364, p = 2.1e-08
GHZ_var_nonzero_eigenvalues: rho = 0.348, p = 8.86e-08
GHZ_var_all_eigenvalues_doff_0: rho = 0.364, p = 2.1e-08
GHZ_var_nonzero_eigenvalues_doff_0: rho = 0.348, p = 8.86e-08
GHZ_trace_eigenvalues: rho = 0.335, p = 2.87e-07
GHZ_var_nonzero_log: rho = 0.348, p = 8.86e-08
GHZ_ipr_deff_norm: rho = -0.297, p = 6.23e-06
GHZ_d_eff: rho = -0.319, p = 1.11e-06


In [8]:
numeric_cols = []
for col in df_final.columns:
    if pd.api.types.is_numeric_dtype(df_final[col]):
        numeric_cols.append(col)

# 2) Group by test_key and compute the mean for these numeric columns.
#    numeric_only=True ensures that Pandas only attempts to average numeric data.
df_agg_by_test_key = (
    df_final
    .groupby("test_key")[numeric_cols]
    .mean(numeric_only=True)  # or just .mean() if you're on older Pandas versions
    .reset_index()
)

In [9]:
import pingouin as pg
import warnings
from scipy.stats import ConstantInputWarning
def analyze_correlations(df_merged, x_metric, metrics_of_interest, corr_threshold=0.2, p_threshold=0.05,
                         print_all_pearson=False, print_all_spearman=False):
    """
    Analyze pairwise correlations between a given x_metric and each metric in metrics_of_interest.
    
    Parameters
    ----------
    df_merged : pd.DataFrame
        The merged DataFrame containing the columns of interest.
    x_metric : str
        The column name for the independent variable (e.g., "avg_fidelity").
    metrics_of_interest : list of str
        List of column names whose correlations with x_metric will be computed.
    corr_threshold : float, optional
        Minimum absolute correlation coefficient to report (default 0.2).
    p_threshold : float, optional
        Maximum p-value threshold to report (default 0.05).
    print_all_pearson : bool, optional
        If True, print Pearson correlation results for every metric.
    print_all_spearman : bool, optional
        If True, print Spearman correlation results for every metric.
        
    Returns
    -------
    pearson_results : dict
        Dictionary mapping each metric to its Pearson correlation coefficient and p-value.
    spearman_results : dict
        Dictionary mapping each metric to its Spearman correlation coefficient and p-value.
    """
    # Create a new DataFrame with the columns of interest and drop rows with NaN values.
    df_corr = df_merged[[x_metric] + metrics_of_interest].copy()
    df_corr = df_corr.dropna(subset=[x_metric] + metrics_of_interest)
    
    # Ensure that each metric column contains float values.
    for col in metrics_of_interest:
        df_corr[col] = df_corr[col].apply(
            lambda val: float(val.item()) if hasattr(val, "item") else float(val)
        )
    
    # Compute Pearson correlations.
    pearson_results = {}
    for col in metrics_of_interest:
        try:
            with warnings.catch_warnings(record=True) as w:
                warnings.simplefilter("error", category=ConstantInputWarning)
                res_df = pg.corr(x=df_corr[x_metric], y=df_corr[col], method="pearson")
            r_val = res_df["r"].iloc[0]
            p_val = res_df["p-val"].iloc[0]
            pearson_results[col] = {"pearson_r": r_val, "p_value": p_val}
        except ConstantInputWarning as cie:
            print(f"ConstantInputWarning for {col} (Pearson): {cie}")
            pearson_results[col] = {"pearson_r": None, "p_value": None}
    
    # Compute Spearman correlations.
    spearman_results = {}
    for col in metrics_of_interest:
        try:
            with warnings.catch_warnings(record=True) as w:
                warnings.simplefilter("error", category=ConstantInputWarning)
                sp_df = pg.corr(x=df_corr[x_metric], y=df_corr[col], method="spearman")
            rho_val = sp_df["r"].iloc[0]
            p_val = sp_df["p-val"].iloc[0]
            spearman_results[col] = {"spearman_rho": rho_val, "p_value": p_val}
        except ConstantInputWarning as cie:
            print(f"ConstantInputWarning for {col} (Spearman): {cie}")
            spearman_results[col] = {"spearman_rho": None, "p_value": None}
    
    # Print out the Pearson correlations.
    print(f"\nPairwise correlations vs. {x_metric} (Pearson):")
    for metric, vals in pearson_results.items():
        if print_all_pearson or (vals["pearson_r"] is not None and abs(vals["pearson_r"]) > corr_threshold and vals["p_value"] < p_threshold):
            if vals["pearson_r"] is not None:
                print(f"{metric}: r = {vals['pearson_r']:.3f}, p = {vals['p_value']:.3g}")
            else:
                print(f"{metric}: r = None, p = None")
    
    # Print out the Spearman correlations.
    print(f"\nPairwise correlations vs. {x_metric} (Spearman):")
    for metric, vals in spearman_results.items():
        if print_all_spearman or (vals["spearman_rho"] is not None and abs(vals["spearman_rho"]) > corr_threshold and vals["p_value"] < p_threshold):
            if vals["spearman_rho"] is not None:
                print(f"{metric}: rho = {vals['spearman_rho']:.3f}, p = {vals['p_value']:.3g}")
            else:
                print(f"{metric}: rho = None, p = None")
    
    return pearson_results, spearman_results



CORR_THRESHOLD = 0.1
P_THRESHOLD = 0.05
x_metric = "avg_fidelity"
metrics_of_interest_ghz = [

  
       'GHZ_var_all_eigenvalues', 'GHZ_var_nonzero_eigenvalues',
       'GHZ_trace_eigenvalues', 
       'GHZ_var_nonzero_log',  'GHZ_ipr_deff_norm', 'GHZ_d_eff',
       'GHZ_spread_metric_variance', 'GHZ_spread_metric_mad',
]
# Now you can use these common thresholds in your calls:
pearson_corrs_ghz, spearman_corrs_ghz = analyze_correlations(
    df_final, x_metric, metrics_of_interest_ghz,
    corr_threshold=CORR_THRESHOLD, p_threshold=P_THRESHOLD
)

# for metric, vals in pearson_corrss
# on_r']:.3f}, p={vals['p_value']:.3g}")
    


Pairwise correlations vs. avg_fidelity (Pearson):
GHZ_var_all_eigenvalues: r = 0.233, p = 0.000432
GHZ_var_nonzero_eigenvalues: r = 0.232, p = 0.000468
GHZ_trace_eigenvalues: r = 0.224, p = 0.000746
GHZ_var_nonzero_log: r = 0.231, p = 0.000483
GHZ_ipr_deff_norm: r = -0.196, p = 0.00322
GHZ_d_eff: r = -0.193, p = 0.00374

Pairwise correlations vs. avg_fidelity (Spearman):
GHZ_var_all_eigenvalues: rho = 0.364, p = 2.1e-08
GHZ_var_nonzero_eigenvalues: rho = 0.348, p = 8.86e-08
GHZ_trace_eigenvalues: rho = 0.335, p = 2.87e-07
GHZ_var_nonzero_log: rho = 0.348, p = 8.86e-08
GHZ_ipr_deff_norm: rho = -0.297, p = 6.23e-06
GHZ_d_eff: rho = -0.319, p = 1.11e-06
GHZ_spread_metric_variance: rho = -0.175, p = 0.00867
GHZ_spread_metric_mad: rho = -0.136, p = 0.0424


In [10]:
dqfim_metrics = [key for key in df_final.keys() if key.startswith('random_sample_dqfim_')]
print("Target dQFIM metric keys:", dqfim_metrics)

Target dQFIM metric keys: ['random_sample_dqfim_draw_rank', 'random_sample_dqfim_var_all_eigenvalues', 'random_sample_dqfim_var_all_eigenvalues_doff_0', 'random_sample_dqfim_var_nonzero_eigenvalues', 'random_sample_dqfim_var_nonzero_eigenvalues_doff_0', 'random_sample_dqfim_trace_eigenvalues', 'random_sample_dqfim_var_all_normalized_by_rank', 'random_sample_dqfim_var_nonzero_normalized_by_rank', 'random_sample_dqfim_trace_normalized_by_rank', 'random_sample_dqfim_var_nonzero_log', 'random_sample_dqfim_ipr_deff_norm', 'random_sample_dqfim_abbas_deff_raw', 'random_sample_dqfim_abbas_deff_norm', 'random_sample_dqfim_d_eff', 'random_sample_dqfim_min_nonzero_eigenvalue', 'random_sample_dqfim_spread_metric_variance', 'random_sample_dqfim_spread_metric_mad']


In [11]:
x_metric = 'error'
metrics_of_interest_targ_dqfim = [
    'random_sample_dqfim_var_all_eigenvalues', 'random_sample_dqfim_var_nonzero_eigenvalues', 'random_sample_dqfim_trace_eigenvalues',
    'random_sample_dqfim_ipr_deff_norm', 'random_sample_dqfim_abbas_deff_raw', 'random_sample_dqfim_abbas_deff_norm', 'random_sample_dqfim_d_eff',
    'random_sample_dqfim_min_nonzero_eigenvalue', 'random_sample_dqfim_spread_metric_variance', 'random_sample_dqfim_spread_metric_mad']
pearson_corrs_targ, spearman_corrs_targ = analyze_correlations(
    df_final, x_metric, metrics_of_interest_targ_dqfim,
    corr_threshold=CORR_THRESHOLD, p_threshold=P_THRESHOLD, print_all_pearson=False
)


Pairwise correlations vs. error (Pearson):
random_sample_dqfim_var_all_eigenvalues: r = -0.168, p = 0.0117
random_sample_dqfim_var_nonzero_eigenvalues: r = -0.168, p = 0.0117
random_sample_dqfim_trace_eigenvalues: r = -0.147, p = 0.028
random_sample_dqfim_abbas_deff_raw: r = -0.155, p = 0.0205
random_sample_dqfim_spread_metric_mad: r = 0.157, p = 0.0185

Pairwise correlations vs. error (Spearman):
random_sample_dqfim_abbas_deff_raw: rho = -0.136, p = 0.0416
random_sample_dqfim_spread_metric_variance: rho = 0.149, p = 0.0258
random_sample_dqfim_spread_metric_mad: rho = 0.163, p = 0.0143


In [12]:
n_colors = 100
import plotly.express as px
import seaborn as sns
import matplotlib.colors as mcolors
from matplotlib.colors import LinearSegmentedColormap, PowerNorm
def plot_metric_vs_error_plotly(
    df,
    qfim_type,
    x_metric,
    error_col="error",
    title=None,
    add_trendline=False,
    n_colors=100,
    width=800,
    height=600,
    log_x = False,
    log_y = False,
):
    """
    Plot a scatter of one QFIM metric vs. error, with the points colored by the same error.
    This is useful to visualize how error changes with the metric, and the color scale 
    helps distinguish overlapping points.

    Parameters
    ----------
    df : pd.DataFrame
        DataFrame with columns for the QFIM metrics and an error column.
    qfim_type : str
        The QFIM type (e.g. "GHZ", "basis", "dqfim", "target").
        If None, x_metric is assumed to be a direct column name in df.
    x_metric : str
        The base name of the metric for the x-axis (without prefix if qfim_type is None).
    error_col : str, default "error"
        The column name for the error values (y-axis and color).
    title : str, optional
        Plot title. If None, a default is generated.
    add_trendline : bool, optional
        If True, add an OLS trendline across all points.
    n_colors : int, optional
        Number of color steps in the custom colormap.
    width, height : int
        Plot width and height in pixels.

    Returns
    -------
    fig : plotly.graph_objects.Figure
        A scatter plot with a bounding box, black marker borders, 
        and a color bar for the error values.
    """
    import plotly.express as px
    import numpy as np
    import seaborn as sns
    import matplotlib.colors as mcolors
    from matplotlib.colors import LinearSegmentedColormap

    # 1) Construct x_col
    if qfim_type is not None:
        x_col = f"{qfim_type}_{x_metric}"
    else:
        x_col = x_metric

    # Check columns
    for col in [x_col, error_col]:
        if col not in df.columns:
            raise ValueError(f"Column '{col}' not found in the DataFrame.")

    # 2) Build a custom color scale from the error range
    data_min = df[error_col].min()
    data_max = df[error_col].max()
    if np.isclose(data_min, data_max):
        data_max = data_min + 1e-12

    values = np.linspace(data_min, data_max, n_colors) ** 4
    base_colormap = plt.cm.viridis
    # base_colormap = sns.color_palette("Spectral", as_cmap=True).reversed()
    # base_colormap = sns.color_palette("YlGn", as_cmap=True).reversed()
    colors_rgba = base_colormap((values - values.min()) / (values.max() - values.min()))
    # 'YlGn'
    custom_cmap = LinearSegmentedColormap.from_list('vibrant', colors_rgba, N=n_colors)
    plotly_color_scale = [mcolors.to_hex(custom_cmap(i)) for i in np.linspace(0, 1, n_colors)]

    # 3) Trend line options
    trendline_option = "ols" if add_trendline else None

    # 4) Create the scatter plot
    fig = px.scatter(
        df,
        x=x_col,
        y=error_col,
        color=error_col,
        color_continuous_scale=plotly_color_scale,
        title=title if title else f"{x_col} vs {error_col} colored by {error_col}",
        hover_data=["test_key"],
        template="plotly_white",
        trendline=trendline_option,
        trendline_scope="overall",
        trendline_color_override="red",
        width=width,
        height=height,
        log_x = log_x,
        log_y = log_y,
    )

    # 5) Marker styling
    fig.update_traces(
        marker=dict(
            size=12,
            line=dict(width=1, color="black"),
            symbol="circle"
        )
    )

    # 6) Adjust color bar and bounding box
    fig.update_layout(
        coloraxis_colorbar=dict(
            title=error_col,
            len=0.9,
            thickness=30,
            y=0.5,
            yanchor="middle",
            x=1.03
        ),
        shapes=[
            dict(
                type="rect",
                xref="paper", yref="paper",
                x0=0, y0=0, x1=1, y1=1,
                line=dict(color="black", width=2),
                layer="below"
            )
        ]
    )

    return fig

def plot_two_metrics_with_color(
    df,
    x_qfim_type,
    x_metric,
    y_qfim_type,
    y_metric,
    color_metric,
    log_x = False,
    log_y = False,
    title=None,
    add_trendline=True,
    n_colors=100,
    gamma=0.7,
    cmap_name="Spectral",
    width=1000,
    height=600,
    scale_factor=0.05
):
    """
    Create a scatter plot comparing two metrics (x and y axes), potentially from 
    different QFIM types, with points colored by a third metric.
    
    If x_qfim_type or y_qfim_type is None, we assume that x_metric or y_metric
    is already a column in df (e.g. "error", "avg_fidelity").

    Parameters
    ----------
    df : pd.DataFrame
        DataFrame with one row per test_key (or per gate) containing QFIM metrics.
    x_qfim_type : str or None
        QFIM type for the x-axis metric (e.g. "GHZ", "basis", "dqfim", "target") 
        or None if x_metric is already a column in df with no prefix.
    x_metric : str
        The metric name for the x-axis (no prefix if x_qfim_type is None).
    y_qfim_type : str or None
        QFIM type for the y-axis metric, or None if y_metric is already a column in df.
    y_metric : str
        The metric name for the y-axis (no prefix if y_qfim_type is None).
    color_metric : str
        The column name (or QFIM metric name) used for coloring the points.
        (If you want to color by a QFIM metric with a prefix, pass the full col name.)
    title : str, optional
        Custom title for the plot. If None, a default is generated.
    add_trendline : bool, optional
        If True, add an Ordinary Least Squares (OLS) trendline across all points.
    n_colors : int, optional
        Number of steps in the custom colormap.
    width, height : int
        Plot width and height in pixels.

    Returns
    -------
    fig : plotly.graph_objects.Figure
        Scatter plot with custom color scale, bounding box, black marker borders, 
        and optional OLS trend line.
    """
    # 1) Construct the full column names for x and y
    if x_qfim_type is not None:
        x_col = f"{x_qfim_type}_{x_metric}"
    else:
        x_col = x_metric  # no prefix

    if y_qfim_type is not None:
        y_col = f"{y_qfim_type}_{y_metric}"
    else:
        y_col = y_metric  # no prefix

    color_col = color_metric  # e.g., "avg_fidelity", or "target_abbas_deff_norm", etc.

    # Ensure required columns exist.
    for col in [x_col, y_col, color_col]:
        if col not in df.columns:
            raise ValueError(f"Column '{col}' not found in the DataFrame.")

    # 2. Subset the DataFrame to only rows that have non-null x and y values.
    df_plot = df[[x_col, y_col, color_col, "test_key"]].dropna()

    # 3. Compute the min and max for the color metric from only the plotted rows.
    data_min = df_plot[color_col].min()
    data_max = df_plot[color_col].max()
    if np.isclose(data_min, data_max):
        data_max = data_min + 1e-12
     # Print the rows corresponding to the minimum and maximum values.
    min_rows = df_plot[df_plot[color_col] == data_min]
    max_rows = df_plot[df_plot[color_col] == data_max]
    print(f"Color column '{color_col}' minimum value: {data_min}")
    print("test corresponding to minimum value:")
    print(min_rows.get('test_key', None))
    print(f"Color column '{color_col}' maximum value: {data_max}")
    print("test corresponding to maximum value:")
    print(max_rows.get('test_key', None))
    # Optionally adjust the lower bound.
    if color_col == "error":
        scale_factor = 0.0
        # data_max =  np.log(0.75)
        # data_max *= 1e-
    adj_min = data_min - scale_factor * abs(data_min)
    adj_max = data_max 

    print(f"\nadj_min: {adj_min}, adj_max: {adj_max}")

    # Create a PowerNorm based on the adjusted min and data_max.
    norm = PowerNorm(gamma=gamma, vmin=adj_min, vmax=data_max)
    # Create n_colors evenly spaced values between data_min and data_max.
    values = np.linspace(data_min, data_max, n_colors)
    normed_values = norm(values)
    # Use a reversed Spectral palette from seaborn.
    base_colormap = sns.color_palette(cmap_name, as_cmap=True).reversed()
    colors_rgba = base_colormap(normed_values)
    # Build a Matplotlib colormap and convert to a list of hex strings for Plotly.
    custom_cmap = LinearSegmentedColormap.from_list('vibrant', colors_rgba, N=n_colors)
    plotly_color_scale = [mcolors.to_hex(custom_cmap(i)) for i in np.linspace(0, 1, n_colors)]

    # 3) Trend line options
    trendline_option = "ols" if add_trendline else None
    # 4. Create the scatter plot
    #    trendline_scope="overall" forces a single line across the entire dataset.
    fig = px.scatter(
        df,
        x=x_col,
        y=y_col,
        color=color_col,
        log_x=log_x,
        log_y=log_y,
        # color_continuous_scale="plasma_r",
        color_continuous_scale=plotly_color_scale,
        range_color=[adj_min, data_max],
        title=title if title else f"{x_col} vs {y_col} colored by {color_col}",
        hover_data=["test_key"],
        template="plotly_white",
        trendline=trendline_option,
        trendline_scope="overall",
        trendline_color_override="red",
        width=width,
        height=height
    )


    # 5. Marker styling: bigger, black border
    fig.update_traces(
        marker=dict(
            size=12,
            line=dict(width=0.75, color="black"),
            symbol="circle"
        )
    )

    # 6. Adjust color bar: make it the full height of the plot
    fig.update_layout(
        coloraxis_colorbar=dict(
            title=color_col,
            len=1.1,            # 100% of plot height
            thickness=35,       # adjust width of the color bar
            yanchor="middle",   # center it vertically
            x=1.0,             # shift slightly to the right
            y=0.53
        ),
        shapes=[
            dict(
                type="rect",
                xref="paper", yref="paper",
                x0=0, y0=0, x1=1, y1=1,
                line=dict(color="black", width=2),
                layer="below"
            )
        ]
    )
    fig.update_xaxes(showgrid=True,gridwidth=1,gridcolor="lightgray", ticklabelstandoff=10)
    fig.update_yaxes(showgrid=True,gridwidth=1,gridcolor="lightgray", ticklabelstandoff=10)

    return fig



In [21]:

fig = plot_two_metrics_with_color(
    df_final,
    x_qfim_type=None,
   
    x_metric="dqfim_trace_eigenvalues",
    y_qfim_type=None,
    y_metric="error",
    # color_metric="target_abbas_deff_norm",
    color_metric="dqfim_var_all_eigenvalues",
    add_trendline=True,
gamma=1.2,
    cmap_name="plasma",
    # log_x=True,
    # log_y=True,
)
fig.show() 

fig = plot_two_metrics_with_color(
    df_final,
    x_qfim_type=None,
   
    x_metric="dqfim_trace_eigenvalues",
    y_qfim_type=None,
    y_metric="error",
    # color_metric="target_abbas_deff_norm",
    color_metric="dqfim_abbas_deff_raw",
    add_trendline=True,
gamma=1.2,
    cmap_name="plasma",
    # log_x=True,
    # log_y=True,
)
fig.show() 

fig = plot_two_metrics_with_color(
    df_final,
    x_qfim_type=None,
   
    x_metric="dqfim_var_nonzero_eigenvalues",
    y_qfim_type=None,
    y_metric="error",
    # color_metric="target_abbas_deff_norm",
    color_metric="dqfim_abbas_deff_raw",
    add_trendline=True,
gamma=1.2,
    cmap_name="plasma",
    log_x=True,
    # log_y=True,
)
fig.show() 


fig = plot_two_metrics_with_color(
    df_agg_by_test_key,
    x_qfim_type=None,
   
    x_metric="dqfim_trace_eigenvalues",
    y_qfim_type=None,
    y_metric="dqfim_var_all_eigenvalues",
    color_metric="error",
    # color_metric="dqfim_abbas_deff_raw",
    add_trendline=True,
    cmap_name="plasma",
    gamma=1.2,
    # log_x=True,
    # log_y=True,
)
fig.show() 


Color column 'dqfim_var_all_eigenvalues' minimum value: 5.359255313873291
test corresponding to minimum value:
184    test58
Name: test_key, dtype: object
Color column 'dqfim_var_all_eigenvalues' maximum value: 101.6332778930664
test corresponding to maximum value:
98    test286
Name: test_key, dtype: object

adj_min: 5.091292381286621, adj_max: 101.6332778930664


Color column 'dqfim_abbas_deff_raw' minimum value: 3.2057175636291504
test corresponding to minimum value:
63    test169
Name: test_key, dtype: object
Color column 'dqfim_abbas_deff_raw' maximum value: 8.829816818237305
test corresponding to maximum value:
143    test346
Name: test_key, dtype: object

adj_min: 3.045431613922119, adj_max: 8.829816818237305


Color column 'dqfim_abbas_deff_raw' minimum value: 3.2057175636291504
test corresponding to minimum value:
63    test169
Name: test_key, dtype: object
Color column 'dqfim_abbas_deff_raw' maximum value: 8.829816818237305
test corresponding to maximum value:
143    test346
Name: test_key, dtype: object

adj_min: 3.045431613922119, adj_max: 8.829816818237305


Color column 'error' minimum value: -4.971550399060236
test corresponding to minimum value:
0    test0
Name: test_key, dtype: object
Color column 'error' maximum value: -3.092023919708461
test corresponding to maximum value:
12    test64
Name: test_key, dtype: object

adj_min: -4.971550399060236, adj_max: -3.092023919708461


In [58]:


fig = plot_two_metrics_with_color(
    df_final,
    x_qfim_type='dqfim',
   
    x_metric="trace_normalized_by_rank",
    y_qfim_type=None,
    y_metric="dqfim_var_all_eigenvalues",
    color_metric="error",
    add_trendline=False,
    # cmap_name="viridis",
    gamma=1.2,
    # log_x=True,
    # log_y=True,
)
fig.show() 

Color column 'error' minimum value: -6.781865475886515
test corresponding to minimum value:
37    test150
Name: test_key, dtype: object
Color column 'error' maximum value: -0.4844408324953334
test corresponding to maximum value:
19    test131
Name: test_key, dtype: object

adj_min: -6.781865475886515, adj_max: -0.4844408324953334


In [16]:


fig = plot_two_metrics_with_color(
    df_agg_by_test_key,
    x_qfim_type='dqfim',
   
    x_metric="trace_normalized_by_rank",
    y_qfim_type="dqfim",
    y_metric="var_all_normalized_by_rank",
    color_metric="error",
    add_trendline=False,

    cmap_name="plasma",
    gamma=1.2,
    # log_x=True,
    # log_y=True,
)
fig.show() 

fig = plot_two_metrics_with_color(
    df_agg_by_test_key,
    x_qfim_type='dqfim',
   
    x_metric="trace_normalized_by_rank",
    y_qfim_type="dqfim",
    y_metric="abbas_deff_raw",
    color_metric="error",
    add_trendline=False,

    cmap_name="plasma",
    gamma=1.2,
    # log_x=True,
    # log_y=True,
)
fig.show() 
fig = plot_two_metrics_with_color(
    df_agg_by_test_key,
    x_qfim_type='dqfim',
   
    x_metric="trace_normalized_by_rank",
    y_qfim_type="dqfim",
    # y_metric="dqfim_var_nonzero_log",
    y_metric="var_all_normalized_by_rank",
    color_metric="error",
    add_trendline=False,
    cmap_name="plasma",
    gamma=1.5,
    # log_x=True,
    # log_y=True,
)
fig.show() 
fig = plot_two_metrics_with_color(
    df_agg_by_test_key,
    x_qfim_type='GHZ',
   
    x_metric="trace_normalized_by_rank",
    y_qfim_type="GHZ",
    # y_metric="dqfim_var_nonzero_log",
    y_metric="var_all_normalized_by_rank",
    color_metric="error",
    add_trendline=False,
    cmap_name="viridis",
    gamma=1.5,
    # log_x=True,
    # log_y=True,
)
fig.show() 
fig = plot_two_metrics_with_color(
    df_agg_by_test_key,
    x_qfim_type='GHZ',
   
    x_metric="trace_normalized_by_rank",
    y_qfim_type="GHZ",
    # y_metric="dqfim_var_nonzero_log",
    y_metric="var_all_normalized_by_rank",
    color_metric="error",
    add_trendline=False,
    cmap_name="viridis",
    gamma=1.5,
    # log_x=True,
    # log_y=True,
)
fig.show() 

Color column 'error' minimum value: -4.971550399060236
test corresponding to minimum value:
0    test0
Name: test_key, dtype: object
Color column 'error' maximum value: -3.092023919708461
test corresponding to maximum value:
12    test64
Name: test_key, dtype: object

adj_min: -4.971550399060236, adj_max: -3.092023919708461


Color column 'error' minimum value: -4.971550399060236
test corresponding to minimum value:
0    test0
Name: test_key, dtype: object
Color column 'error' maximum value: -3.092023919708461
test corresponding to maximum value:
12    test64
Name: test_key, dtype: object

adj_min: -4.971550399060236, adj_max: -3.092023919708461


Color column 'error' minimum value: -4.971550399060236
test corresponding to minimum value:
0    test0
Name: test_key, dtype: object
Color column 'error' maximum value: -3.092023919708461
test corresponding to maximum value:
12    test64
Name: test_key, dtype: object

adj_min: -4.971550399060236, adj_max: -3.092023919708461


Color column 'error' minimum value: -4.971550399060236
test corresponding to minimum value:
0    test0
Name: test_key, dtype: object
Color column 'error' maximum value: -3.092023919708461
test corresponding to maximum value:
12    test64
Name: test_key, dtype: object

adj_min: -4.971550399060236, adj_max: -3.092023919708461


Color column 'error' minimum value: -4.971550399060236
test corresponding to minimum value:
0    test0
Name: test_key, dtype: object
Color column 'error' maximum value: -3.092023919708461
test corresponding to maximum value:
12    test64
Name: test_key, dtype: object

adj_min: -4.971550399060236, adj_max: -3.092023919708461


In [61]:
x_metric = "error"
metrics_of_interest_dqfim = [

     
       'dqfim_var_all_normalized_by_rank',
       'dqfim_var_nonzero_normalized_by_rank',
       'dqfim_trace_normalized_by_rank',
       'dqfim_var_nonzero_log',
     'dqfim_ipr_deff_norm', 'dqfim_abbas_deff_raw',
       'dqfim_abbas_deff_norm', 'dqfim_d_eff', 'dqfim_spread_metric_variance',
       'dqfim_spread_metric_mad'
]

pearson_corrs_dqfim, spearman_corrs_dqfim = analyze_correlations(
    df_final, x_metric, metrics_of_interest_dqfim,
    corr_threshold=CORR_THRESHOLD, p_threshold=P_THRESHOLD, print_all_pearson=True
)


Pairwise correlations vs. error (Pearson):
dqfim_var_all_normalized_by_rank: r = -0.361, p = 3.45e-08
dqfim_var_nonzero_normalized_by_rank: r = -0.360, p = 4.1e-08
dqfim_trace_normalized_by_rank: r = -0.350, p = 9.94e-08
dqfim_var_nonzero_log: r = -0.316, p = 1.78e-06
dqfim_ipr_deff_norm: r = 0.043, p = 0.524
dqfim_abbas_deff_raw: r = -0.328, p = 6.26e-07
dqfim_abbas_deff_norm: r = 0.030, p = 0.66
dqfim_d_eff: r = 0.015, p = 0.828
dqfim_spread_metric_variance: r = 0.088, p = 0.194
dqfim_spread_metric_mad: r = 0.033, p = 0.625

Pairwise correlations vs. error (Spearman):
dqfim_var_all_normalized_by_rank: rho = -0.286, p = 1.6e-05
dqfim_var_nonzero_normalized_by_rank: rho = -0.286, p = 1.63e-05
dqfim_trace_normalized_by_rank: rho = -0.298, p = 6.72e-06
dqfim_var_nonzero_log: rho = -0.286, p = 1.63e-05
dqfim_abbas_deff_raw: rho = -0.302, p = 5.24e-06
dqfim_spread_metric_variance: rho = 0.142, p = 0.0354


In [17]:
fig =plot_metric_vs_error_plotly(df_agg_by_test_key, qfim_type="GHZ", x_metric="trace_eigenvalues")
fig.show()
fig =plot_metric_vs_error_plotly(df_agg_by_test_key, qfim_type="GHZ", x_metric="var_nonzero_eigenvalues")
fig.show()
fig =plot_metric_vs_error_plotly(df_agg_by_test_key, qfim_type="GHZ", x_metric="d_eff")
fig.show()


# fig = plot_two_metrics_with_color(
#     df_final,
#     x_qfim_type=None,
   
#     x_metric="dqfim_trace_eigenvalues",
#     y_qfim_type=None,
#     y_metric="dqfim_var_all_eigenvalues",
#     color_metric="error",
#     # color_metric="dqfim_abbas_deff_raw",
#     add_trendline=True,
#     cmap_name="plasma",
#     # log_x=True,
#     # log_y=True,
#     gamma=1.2,
# )
# fig.show() 


In [40]:
df_agg_by_test_key.columns

Index(['test_key', 'avg_fidelity', 'error', 'avg_infidelity', 'N_ctrl', 'N_R',
       'Trotter_Step', 'num_sampled_states', 'num_train', 'GHZ_draw_rank',
       'GHZ_var_all_eigenvalues', 'GHZ_var_all_eigenvalues_doff_0',
       'GHZ_var_nonzero_eigenvalues', 'GHZ_var_nonzero_eigenvalues_doff_0',
       'GHZ_trace_eigenvalues', 'GHZ_var_all_normalized_by_rank',
       'GHZ_var_nonzero_normalized_by_rank', 'GHZ_trace_normalized_by_rank',
       'GHZ_var_nonzero_log', 'GHZ_ipr_deff_norm', 'GHZ_abbas_deff_raw',
       'GHZ_abbas_deff_norm', 'GHZ_d_eff', 'GHZ_min_nonzero_eigenvalue',
       'GHZ_spread_metric_variance', 'GHZ_spread_metric_mad',
       'basis_draw_rank', 'basis_var_all_eigenvalues',
       'basis_var_all_eigenvalues_doff_0', 'basis_var_nonzero_eigenvalues',
       'basis_var_nonzero_eigenvalues_doff_0', 'basis_trace_eigenvalues',
       'basis_var_all_normalized_by_rank',
       'basis_var_nonzero_normalized_by_rank',
       'basis_trace_normalized_by_rank', 'basis_var_nonz

In [20]:
fig = plot_two_metrics_with_color(
    df_agg_by_test_key,
    x_qfim_type=None,
   
    x_metric="GHZ_trace_eigenvalues",
    y_qfim_type=None,
    y_metric="error",
    # color_metric="target_abbas_deff_norm",
    color_metric="GHZ_var_nonzero_eigenvalues",
    add_trendline=True,
gamma=1.2,
    cmap_name="plasma",
    # log_x=True,
    # log_y=True,
)
fig.show() 
fig = plot_two_metrics_with_color(
    df_agg_by_test_key,
    x_qfim_type=None,
   
    x_metric="GHZ_var_nonzero_eigenvalues", 
    y_qfim_type=None,
    y_metric="error",
    # color_metric="target_abbas_deff_norm",
    color_metric="GHZ_trace_eigenvalues",
    add_trendline=True,
gamma=1.2,
    cmap_name="plasma",
    # log_x=True,
    # log_y=True,
)
fig.show() 
fig = plot_two_metrics_with_color(
    df_agg_by_test_key,
    x_qfim_type=None,
   
    x_metric="GHZ_trace_eigenvalues",
    y_qfim_type=None,
    y_metric="GHZ_var_nonzero_eigenvalues",
    # color_metric="target_abbas_deff_norm",
    color_metric="error",
    # add_trendline=True,
gamma=1.2,
    cmap_name="plasma",
    # log_x=True,
    # log_y=True,
)
fig.show() 
fig = plot_two_metrics_with_color(
    df_agg_by_test_key,
    x_qfim_type=None,
   
    x_metric="GHZ_trace_normalized_by_rank",
    y_qfim_type=None,
    y_metric="GHZ_d_eff",
    # color_metric="target_abbas_deff_norm",
    color_metric="error",
    # add_trendline=True,
gamma=1.2,
    cmap_name="plasma",
    # log_x=True,
    # log_y=True,
)
fig.show() 

Color column 'GHZ_var_nonzero_eigenvalues' minimum value: 123.15338134765625
test corresponding to minimum value:
12    test64
Name: test_key, dtype: object
Color column 'GHZ_var_nonzero_eigenvalues' maximum value: 2292.374267578125
test corresponding to maximum value:
6    test286
Name: test_key, dtype: object

adj_min: 116.99571228027344, adj_max: 2292.374267578125


Color column 'GHZ_trace_eigenvalues' minimum value: 95.70286560058594
test corresponding to minimum value:
1    test131
Name: test_key, dtype: object
Color column 'GHZ_trace_eigenvalues' maximum value: 333.079833984375
test corresponding to maximum value:
8    test346
Name: test_key, dtype: object

adj_min: 90.917724609375, adj_max: 333.079833984375


Color column 'error' minimum value: -4.971550399060236
test corresponding to minimum value:
0    test0
Name: test_key, dtype: object
Color column 'error' maximum value: -3.092023919708461
test corresponding to maximum value:
12    test64
Name: test_key, dtype: object

adj_min: -4.971550399060236, adj_max: -3.092023919708461


Color column 'error' minimum value: -4.971550399060236
test corresponding to minimum value:
0    test0
Name: test_key, dtype: object
Color column 'error' maximum value: -3.092023919708461
test corresponding to maximum value:
12    test64
Name: test_key, dtype: object

adj_min: -4.971550399060236, adj_max: -3.092023919708461


In [67]:
'test169' in df_agg_by_test_key['test_key'].unique()

True

updated_row.update({f"GHZ_{k}": v for k, v in ghz_stats.items()})
updated_row.update({f"basis_{k}": v for k, v in basis_stats.items()})
updated_row.update({f"dqfim_{k}": v for k, v in dqfim_stats_train.items()})
updated_row.update({f"random_dample_dqfim_{k}": v for k, v in file_dqfim_stats.items()})
updated_row.update({f"tdqfim_{k}": v for k, v in dqfim_stats_targ.items()})

In [None]:
x_metric = 'avg_fidelity'
metrics_of_interest_targ_dqfim = [
    
       'tdqfim_var_all_eigenvalues',
       'tdqfim_var_nonzero_eigenvalues',
       'tdqfim_var_all_normalized_by_rank',
       'tdqfim_var_nonzero_log',
      'tdqfim_ipr_deff_raw', 'tdqfim_abbas_deff_raw', 
      'tdqfim_abbas_deff_norm', 'tdqfim_d_eff', 'tdqfim_min_nonzero_eigenvalue',
      'tdqfim_spread_metric_variance', 'tdqfim_spread_metric_mad']
pearson_corrs_targ, spearman_corrs_targ = analyze_correlations(
    df_final, x_metric, metrics_of_interest_targ_dqfim,
    corr_threshold=CORR_THRESHOLD, p_threshold=P_THRESHOLD, print_all_pearson=True
)


Pairwise correlations vs. avg_fidelity (Pearson):
tdqfim_var_all_eigenvalues: r = -0.125, p = 0.0536
tdqfim_var_nonzero_eigenvalues: r = -0.117, p = 0.0692
tdqfim_var_all_normalized_by_rank: r = -0.125, p = 0.0536
tdqfim_var_nonzero_log: r = -0.112, p = 0.0827
tdqfim_ipr_deff_raw: r = 0.068, p = 0.293
tdqfim_abbas_deff_raw: r = -0.111, p = 0.0855
tdqfim_abbas_deff_norm: r = 0.036, p = 0.579
tdqfim_d_eff: r = 0.011, p = 0.866
tdqfim_min_nonzero_eigenvalue: r = -0.114, p = 0.0771
tdqfim_spread_metric_variance: r = -0.003, p = 0.966
tdqfim_spread_metric_mad: r = 0.007, p = 0.911

Pairwise correlations vs. avg_fidelity (Spearman):


In [None]:

metrics_of_interest_dqfim = [

     
       'dqfim_var_all_normalized_by_rank',
       'dqfim_var_nonzero_normalized_by_rank',
       'dqfim_trace_normalized_by_rank',
       'dqfim_var_nonzero_log',
     'dqfim_ipr_deff_norm', 'dqfim_abbas_deff_raw',
       'dqfim_abbas_deff_norm', 'dqfim_d_eff', 'dqfim_spread_metric_variance',
       'dqfim_spread_metric_mad'
]
pearson_corrs_dqfim, spearman_corrs_dqfim = analyze_correlations(
    df_final, x_metric, metrics_of_interest_dqfim,
    corr_threshold=CORR_THRESHOLD, p_threshold=P_THRESHOLD, print_all_pearson=True
)


Pairwise correlations vs. avg_fidelity (Pearson):
dqfim_var_all_normalized_by_rank: r = -0.028, p = 0.671
dqfim_var_nonzero_normalized_by_rank: r = -0.023, p = 0.722
dqfim_trace_normalized_by_rank: r = -0.064, p = 0.327
dqfim_var_nonzero_log: r = -0.095, p = 0.144
dqfim_ipr_deff_norm: r = -0.052, p = 0.426
dqfim_abbas_deff_raw: r = -0.074, p = 0.256
dqfim_abbas_deff_norm: r = -0.062, p = 0.341
dqfim_d_eff: r = -0.061, p = 0.346
dqfim_spread_metric_variance: r = 0.062, p = 0.34
dqfim_spread_metric_mad: r = 0.057, p = 0.38

Pairwise correlations vs. avg_fidelity (Spearman):


In [26]:
n_colors = 100
import plotly.express as px
import seaborn as sns
import matplotlib.colors as mcolors
from matplotlib.colors import LinearSegmentedColormap, PowerNorm
def plot_two_metrics_with_color(
    df,
    x_qfim_type,
    x_metric,
    y_qfim_type,
    y_metric,
    color_metric,
    log_x = False,
    log_y = False,
    title=None,
    add_trendline=True,
    n_colors=100,
    gamma=0.7,
    cmap_name="Spectral",
    width=1000,
    height=600,
    scale_factor=0.05
):
    """
    Create a scatter plot comparing two metrics (x and y axes), potentially from 
    different QFIM types, with points colored by a third metric.
    
    If x_qfim_type or y_qfim_type is None, we assume that x_metric or y_metric
    is already a column in df (e.g. "error", "avg_fidelity").

    Parameters
    ----------
    df : pd.DataFrame
        DataFrame with one row per test_key (or per gate) containing QFIM metrics.
    x_qfim_type : str or None
        QFIM type for the x-axis metric (e.g. "GHZ", "basis", "dqfim", "target") 
        or None if x_metric is already a column in df with no prefix.
    x_metric : str
        The metric name for the x-axis (no prefix if x_qfim_type is None).
    y_qfim_type : str or None
        QFIM type for the y-axis metric, or None if y_metric is already a column in df.
    y_metric : str
        The metric name for the y-axis (no prefix if y_qfim_type is None).
    color_metric : str
        The column name (or QFIM metric name) used for coloring the points.
        (If you want to color by a QFIM metric with a prefix, pass the full col name.)
    title : str, optional
        Custom title for the plot. If None, a default is generated.
    add_trendline : bool, optional
        If True, add an Ordinary Least Squares (OLS) trendline across all points.
    n_colors : int, optional
        Number of steps in the custom colormap.
    width, height : int
        Plot width and height in pixels.

    Returns
    -------
    fig : plotly.graph_objects.Figure
        Scatter plot with custom color scale, bounding box, black marker borders, 
        and optional OLS trend line.
    """
    # 1) Construct the full column names for x and y
    if x_qfim_type is not None:
        x_col = f"{x_qfim_type}_{x_metric}"
    else:
        x_col = x_metric  # no prefix

    if y_qfim_type is not None:
        y_col = f"{y_qfim_type}_{y_metric}"
    else:
        y_col = y_metric  # no prefix

    color_col = color_metric  # e.g., "avg_fidelity", or "target_abbas_deff_norm", etc.

    # Ensure required columns exist.
    for col in [x_col, y_col, color_col]:
        if col not in df.columns:
            raise ValueError(f"Column '{col}' not found in the DataFrame.")

    # 2. Subset the DataFrame to only rows that have non-null x and y values.
    df_plot = df[[x_col, y_col, color_col, "test_key"]].dropna()

    # 3. Compute the min and max for the color metric from only the plotted rows.
    data_min = df_plot[color_col].min()
    data_max = df_plot[color_col].max()
    if np.isclose(data_min, data_max):
        data_max = data_min + 1e-12
     # Print the rows corresponding to the minimum and maximum values.
    min_rows = df_plot[df_plot[color_col] == data_min]
    max_rows = df_plot[df_plot[color_col] == data_max]
    print(f"Color column '{color_col}' minimum value: {data_min}")
    print("test corresponding to minimum value:")
    print(min_rows.get('test_key', None))
    print(f"Color column '{color_col}' maximum value: {data_max}")
    print("test corresponding to maximum value:")
    print(max_rows.get('test_key', None))
    # Optionally adjust the lower bound.
    if color_col == "error":
        scale_factor = 0.0
        # data_max =  np.log(0.75)
        # data_max *= 1e-
    adj_min = data_min - scale_factor * abs(data_min)
    adj_max = data_max 

    print(f"\nadj_min: {adj_min}, adj_max: {adj_max}")

    # Create a PowerNorm based on the adjusted min and data_max.
    norm = PowerNorm(gamma=gamma, vmin=adj_min, vmax=data_max)
    # Create n_colors evenly spaced values between data_min and data_max.
    values = np.linspace(data_min, data_max, n_colors)
    normed_values = norm(values)
    # Use a reversed Spectral palette from seaborn.
    base_colormap = sns.color_palette(cmap_name, as_cmap=True).reversed()
    colors_rgba = base_colormap(normed_values)
    # Build a Matplotlib colormap and convert to a list of hex strings for Plotly.
    custom_cmap = LinearSegmentedColormap.from_list('vibrant', colors_rgba, N=n_colors)
    plotly_color_scale = [mcolors.to_hex(custom_cmap(i)) for i in np.linspace(0, 1, n_colors)]

    # 3) Trend line options
    trendline_option = "ols" if add_trendline else None
    # 4. Create the scatter plot
    #    trendline_scope="overall" forces a single line across the entire dataset.
    fig = px.scatter(
        df,
        x=x_col,
        y=y_col,
        color=color_col,
        log_x=log_x,
        log_y=log_y,
        # color_continuous_scale="plasma_r",
        color_continuous_scale=plotly_color_scale,
        range_color=[adj_min, data_max],
        title=title if title else f"{x_col} vs {y_col} colored by {color_col}",
        hover_data=["test_key"],
        template="plotly_white",
        trendline=trendline_option,
        trendline_scope="overall",
        trendline_color_override="red",
        width=width,
        height=height
    )


    # 5. Marker styling: bigger, black border
    fig.update_traces(
        marker=dict(
            size=12,
            line=dict(width=0.75, color="black"),
            symbol="circle"
        )
    )

    # 6. Adjust color bar: make it the full height of the plot
    fig.update_layout(
        coloraxis_colorbar=dict(
            title=color_col,
            len=1.1,            # 100% of plot height
            thickness=35,       # adjust width of the color bar
            yanchor="middle",   # center it vertically
            x=1.0,             # shift slightly to the right
            y=0.53
        ),
        shapes=[
            dict(
                type="rect",
                xref="paper", yref="paper",
                x0=0, y0=0, x1=1, y1=1,
                line=dict(color="black", width=2),
                layer="below"
            )
        ]
    )
    fig.update_xaxes(showgrid=True,gridwidth=1,gridcolor="lightgray", ticklabelstandoff=10)
    fig.update_yaxes(showgrid=True,gridwidth=1,gridcolor="lightgray", ticklabelstandoff=10)

    return fig



In [27]:
df_final['test_key'].unique()

array(['test131', 'test150', 'test164', 'test169', 'test286', 'test326',
       'test346', 'test43', 'test56', 'test58', 'test64'], dtype=object)

In [28]:

fig = plot_two_metrics_with_color(
    df_final,
    x_qfim_type=None,
   
    x_metric="tdqfim_trace_eigenvalues",
    y_qfim_type=None,
    y_metric="error",
    # color_metric="target_abbas_deff_norm",
    color_metric="tdqfim_abbas_deff_raw",
    add_trendline=True,
    cmap_name="plasma",
    # log_x=True,
    # log_y=True,
)
fig.show() 

Color column 'tdqfim_abbas_deff_raw' minimum value: 3.4690494537353516
test corresponding to minimum value:
61    test169
Name: test_key, dtype: object
Color column 'tdqfim_abbas_deff_raw' maximum value: 8.90238094329834
test corresponding to maximum value:
131    test346
Name: test_key, dtype: object

adj_min: 3.2955970764160156, adj_max: 8.90238094329834


In [None]:
fig = plot_two_metrics_with_color(
    df_agg_by_test_key,
    x_qfim_type='dqfim',
   
    x_metric="d_eff",
    y_qfim_type=None,
    y_metric="dqfim_var_nonzero_log",
    color_metric="error",
    add_trendline=False,
    cmap_name="viridis",
    gamma=1.5,
    # log_x=True,
    # log_y=True,
)
fig.show() 

Color column 'error' minimum value: -4.680982703069576
test corresponding to minimum value:
4    test286
Name: test_key, dtype: object
Color column 'error' maximum value: -3.092023919708461
test corresponding to maximum value:
10    test64
Name: test_key, dtype: object

adj_min: -4.680982703069576, adj_max: -3.092023919708461
