In [1]:
import numpy as np
from pathlib import Path
from collections import Counter
import json # If you decide to save/load theoretical expectations

# --- Configuration (should match the data generation script) ---
NUM_QUBITS = 6 # Or whatever NUM_QUBITS you used for data generation
RNG_SEED = 42   # Same seed to regenerate the original W-state

# --- Define Data Directory (where measurements are stored) ---
DATA_DIR = Path("./w_aug_tomography_data") # Must match previous notebook
if not DATA_DIR.exists():
    raise FileNotFoundError(f"ERROR: Data directory {DATA_DIR.resolve()} not found!")
print(f"Reading data from: {DATA_DIR.resolve()}")

# --- Initialize RNG for regenerating the original W-state ---
rng_phases_original = np.random.default_rng(RNG_SEED)

print(f"Number of qubits for validation: {NUM_QUBITS}")

Reading data from: /Users/Tonni/Desktop/master-code/neural-quantum-tomo/case_studies/w_phase_augmented/w_aug_tomography_data
Number of qubits for validation: 6


In [2]:
print(f"Regenerating the original {NUM_QUBITS}-qubit phase-augmented W state...")
state_dim = 1 << NUM_QUBITS
w_aug_original_true_state = np.zeros(state_dim, dtype=complex)
thetas_original = rng_phases_original.uniform(0, 2 * np.pi, size=NUM_QUBITS)
for k in range(NUM_QUBITS):
    idx = 1 << (NUM_QUBITS - 1 - k)
    w_aug_original_true_state[idx] = np.exp(1j * thetas_original[k]) / np.sqrt(NUM_QUBITS)
print(f"Original W state norm: {np.linalg.norm(w_aug_original_true_state):.6f}")

Regenerating the original 6-qubit phase-augmented W state...
Original W state norm: 1.000000


In [4]:
# Define Pauli matrices (useful for theoretical calculations)
sigma_i = np.array([[1, 0], [0, 1]], dtype=complex)
sigma_x = np.array([[0, 1], [1, 0]], dtype=complex)
sigma_y = np.array([[0, -1j], [1j, 0]], dtype=complex)
sigma_z = np.array([[1, 0], [0, -1]], dtype=complex)
pauli_ops = {'I': sigma_i, 'X': sigma_x, 'Y': sigma_y, 'Z': sigma_z}

all_measurement_bases_strings = []
# ... (same code as before to populate all_measurement_bases_strings) ...
amplitude_basis_str = 'Z' * NUM_QUBITS
all_measurement_bases_strings.append(amplitude_basis_str)
for i in range(NUM_QUBITS - 1):
    basis_list = ['Z'] * NUM_QUBITS; basis_list[i] = 'X'; basis_list[i+1] = 'X'
    all_measurement_bases_strings.append("".join(basis_list))
for i in range(NUM_QUBITS - 1):
    basis_list = ['Z'] * NUM_QUBITS; basis_list[i] = 'X'; basis_list[i+1] = 'Y'
    all_measurement_bases_strings.append("".join(basis_list))


# Infer NUM_SAMPLES_PER_BASIS (same logic as before)
num_samples_inferred = None
# ... (same inference logic) ...
if num_samples_inferred is None: # Fallback if inference fails
    print("Warning: Could not infer num_samples. Using a default of 20.")
    num_samples_inferred = 20


# Store parsed data: {basis_string: list_of_numerical_eigenvalue_tuples}
# Each tuple will be (-1, 1, 1, -1, ...) corresponding to eigenvalues
parsed_datasets = {}

print("\n--- Ingesting and Parsing Measurement Data ---")
for basis_str in all_measurement_bases_strings:
    filename = DATA_DIR / f"w_aug_{basis_str}_{num_samples_inferred}.txt"
    dataset_for_basis_eigenvalues = []
    if filename.exists():
        # print(f"Reading data for basis: {basis_str} from {filename.name}")
        with open(filename, 'r') as f_in:
            for line_num, line in enumerate(f_in):
                measurement_char_str = line.strip()
                if len(measurement_char_str) != NUM_QUBITS:
                    print(f"Warning: Line {line_num+1} in {filename.name} has wrong length: '{measurement_char_str}'")
                    continue

                eigenvalues_for_sample = []
                valid_sample = True
                for char_idx, measured_char in enumerate(measurement_char_str):
                    # pauli_measured_on_qubit = basis_str[char_idx] # This is the *intended* measurement
                    # The character itself tells us the Pauli and outcome
                    if measured_char.islower(): # -1 eigenvalue
                        eigenvalues_for_sample.append(-1)
                    elif measured_char.isupper(): # +1 eigenvalue
                        eigenvalues_for_sample.append(1)
                    else:
                        print(f"Warning: Invalid char '{measured_char}' in {filename.name}, line {line_num+1}")
                        valid_sample = False
                        break
                if valid_sample:
                    dataset_for_basis_eigenvalues.append(tuple(eigenvalues_for_sample))

        parsed_datasets[basis_str] = dataset_for_basis_eigenvalues
        # print(f"  Read and parsed {len(dataset_for_basis_eigenvalues)} samples.")
    else:
        print(f"Warning: Measurement file not found for basis {basis_str}: {filename}")

if not parsed_datasets:
    raise RuntimeError("ERROR: No measurement data was loaded. Cannot proceed.")

print("\n--- Data Ingestion and Parsing Complete ---")
print(f"Loaded data for {len(parsed_datasets)} bases.")
# Example:
# if amplitude_basis_str in parsed_datasets:
#     print(f"First 5 parsed samples for ZZZ basis: {parsed_datasets[amplitude_basis_str][:5]}")


--- Ingesting and Parsing Measurement Data ---

--- Data Ingestion and Parsing Complete ---
Loaded data for 11 bases.


In [5]:
print("\n--- Validation 1: Z-Basis Probability Distribution ---")
z_basis_key = 'Z' * NUM_QUBITS
if z_basis_key in parsed_datasets:
    z_eigenvalue_samples = parsed_datasets[z_basis_key] # List of tuples like (1,-1,1,...)

    # Convert eigenvalue tuples to binary strings (0 for +1, 1 for -1)
    # This mapping is crucial for comparing with |<binary_string | Psi>|^2
    # If +1 eigenvalue of sigma_Z is state |0> and -1 is state |1>
    z_binary_outcomes = []
    for eigen_tuple in z_eigenvalue_samples:
        binary_string = "".join(['0' if e == 1 else '1' for e in eigen_tuple])
        z_binary_outcomes.append(binary_string)

    empirical_counts = Counter(z_binary_outcomes)
    total_z_samples = len(z_binary_outcomes)

    print(f"Empirical probabilities from {total_z_samples} Z-basis samples:")
    empirical_probs = {outcome: count / total_z_samples for outcome, count in empirical_counts.items()}

    # Calculate theoretical probabilities from w_aug_original_true_state
    theoretical_probs_z = {}
    for i in range(state_dim):
        binary_string = format(i, f'0{NUM_QUBITS}b') # int to binary string, e.g., 0 -> "000", 1 -> "001"
        # This assumes computational basis |000>, |001>, ...
        # matches our binary string interpretation.
        prob = np.abs(w_aug_original_true_state[i])**2
        if prob > 1e-9: # Only store non-negligible probabilities
            theoretical_probs_z[binary_string] = prob

    print("\nComparison (Empirical vs Theoretical):")
    all_outcomes = sorted(list(set(empirical_probs.keys()) | set(theoretical_probs_z.keys())))

    max_empirical_dev = 0
    total_squared_error = 0

    for outcome_str in all_outcomes:
        emp_p = empirical_probs.get(outcome_str, 0.0)
        the_p = theoretical_probs_z.get(outcome_str, 0.0)

        # W-state components are those with a single '1' (if 0 means +1 Z-eigenstate |0>)
        # or single '0' (if 1 means +1 Z-eigenstate |0>)
        # Our W-state has |100...>, |010...>, etc.
        # binary_string: '0' means sigma_Z = +1 (state |0>), '1' means sigma_Z = -1 (state |1>)
        # So, W-state "single excitation" |...1...> (where |1> is sigma_Z = -1)
        # corresponds to binary strings with a single '1'.
        is_w_like = (outcome_str.count('1') == 1)

        if emp_p > 0 or the_p > 1e-7: # Print if either is significant
            print(f"  Outcome {outcome_str} (W-like: {is_w_like}): Emp_Prob={emp_p:.4f}, Theory_Prob={the_p:.4f}, Diff={emp_p-the_p:+.4f}")
            if abs(emp_p - the_p) > max_empirical_dev:
                max_empirical_dev = abs(emp_p - the_p)
            total_squared_error += (emp_p - the_p)**2

    print(f"\nMax absolute deviation in Z-probabilities: {max_empirical_dev:.4f}")
    print(f"Mean Squared Error in Z-probabilities: {total_squared_error/len(all_outcomes):.6f}")

    # Chi-squared test could also be used if counts are high enough
else:
    print(f"Data for Z-basis '{z_basis_key}' not found.")


--- Validation 1: Z-Basis Probability Distribution ---
Empirical probabilities from 20 Z-basis samples:

Comparison (Empirical vs Theoretical):
  Outcome 000001 (W-like: True): Emp_Prob=0.0500, Theory_Prob=0.1667, Diff=-0.1167
  Outcome 000010 (W-like: True): Emp_Prob=0.1500, Theory_Prob=0.1667, Diff=-0.0167
  Outcome 000100 (W-like: True): Emp_Prob=0.1500, Theory_Prob=0.1667, Diff=-0.0167
  Outcome 001000 (W-like: True): Emp_Prob=0.1000, Theory_Prob=0.1667, Diff=-0.0667
  Outcome 010000 (W-like: True): Emp_Prob=0.2500, Theory_Prob=0.1667, Diff=+0.0833
  Outcome 100000 (W-like: True): Emp_Prob=0.3000, Theory_Prob=0.1667, Diff=+0.1333

Max absolute deviation in Z-probabilities: 0.1333
Mean Squared Error in Z-probabilities: 0.007222


In [6]:
def get_operator_for_qubit(pauli_char, qubit_idx, total_qubits):
    """Helper to construct N-qubit operator for a single Pauli on one qubit."""
    op_list = [sigma_i] * total_qubits
    if pauli_char == 'X': op_list[qubit_idx] = sigma_x
    elif pauli_char == 'Y': op_list[qubit_idx] = sigma_y
    elif pauli_char == 'Z': op_list[qubit_idx] = sigma_z
    else: raise ValueError(f"Unknown Pauli char: {pauli_char}")

    full_op = op_list[0]
    for i in range(1, total_qubits):
        full_op = np.kron(full_op, op_list[i])
    return full_op

def get_two_qubit_operator(p1_char, q1_idx, p2_char, q2_idx, total_qubits):
    """Helper for N-qubit operator for two Paulis on two qubits."""
    op_list = [sigma_i] * total_qubits
    # Set first Pauli
    if p1_char == 'X': op_list[q1_idx] = sigma_x
    elif p1_char == 'Y': op_list[q1_idx] = sigma_y
    elif p1_char == 'Z': op_list[q1_idx] = sigma_z
    # Set second Pauli
    if p2_char == 'X': op_list[q2_idx] = sigma_x
    elif p2_char == 'Y': op_list[q2_idx] = sigma_y
    elif p2_char == 'Z': op_list[q2_idx] = sigma_z

    full_op = op_list[0]
    for i in range(1, total_qubits):
        full_op = np.kron(full_op, op_list[i])
    return full_op

print("\n--- Validation 2: Two-Qubit Correlators ---")

correlator_bases_to_check = []
if NUM_QUBITS >= 2:
    # XX on (0,1), rest Z
    b_xx = ['Z'] * NUM_QUBITS; b_xx[0] = 'X'; b_xx[1] = 'X'; correlator_bases_to_check.append("".join(b_xx))
    # XY on (0,1), rest Z
    b_xy = ['Z'] * NUM_QUBITS; b_xy[0] = 'X'; b_xy[1] = 'Y'; correlator_bases_to_check.append("".join(b_xy))
    # Add more if desired, e.g., YX, YY, XZ, ZX, etc. on different qubit pairs

for basis_str in correlator_bases_to_check:
    if basis_str in parsed_datasets:
        eigenvalue_samples = parsed_datasets[basis_str] # List of tuples like (1,-1,1,...)
        if not eigenvalue_samples:
            print(f"No samples for basis {basis_str}, skipping correlator.")
            continue

        # Identify the two non-Z qubits and their Pauli ops
        # This is simplified for our specific basis generation.
        # A more general approach would parse the basis_str fully.
        q_indices_measured = []
        paulis_measured = []
        for i, p_char in enumerate(basis_str):
            if p_char != 'Z':
                q_indices_measured.append(i)
                paulis_measured.append(p_char)

        if len(q_indices_measured) != 2: # Should be 2 for our XX, XY correlator bases
            print(f"Warning: Basis {basis_str} does not seem to be a two-qubit correlator basis as expected.")
            continue

        q0_idx, q1_idx = q_indices_measured[0], q_indices_measured[1]
        p0_char, p1_char = paulis_measured[0], paulis_measured[1]

        # Empirical correlator: product of eigenvalues for q0_idx, q1_idx, averaged
        empirical_correlator_values = []
        for sample_eigenvalues in eigenvalue_samples:
            # Eigenvalues in the sample correspond to the order of qubits
            val_q0 = sample_eigenvalues[q0_idx]
            val_q1 = sample_eigenvalues[q1_idx]
            empirical_correlator_values.append(val_q0 * val_q1)

        empirical_mean_corr = np.mean(empirical_correlator_values)
        empirical_std_err_corr = np.std(empirical_correlator_values) / np.sqrt(len(empirical_correlator_values))

        # Theoretical correlator
        # This is <Psi_W | Op_q0 * Op_q1 * (Product_k Z_k for Z-measured qubits) | Psi_W>
        # For W-state, only terms where Z-measured qubits are in |0> (eigenvalue +1) contribute significantly
        # if the W-state components are |...1...>.
        # For the W state |Psi_W> = sum_k c_k |phi_k> where |phi_k> are comp. basis states.
        # <P0_idx0 P1_idx1> can be tricky because the measurement basis also has Zs.
        # The paper implies that their basis choice simplifies this for phase extraction.
        # For a pure state |Psi>, <A> = <Psi| A |Psi>.
        # The correlator we are estimating from this basis is <P_q0 P_q1>
        # where P_q0 is Pauli on q0, P_q1 on q1, AND other qubits k are implicitly
        # projected onto the Z-eigenstates that are consistent with the W-state structure.
        #
        # A simpler theoretical value to compare against is the direct expectation value
        # of the two-qubit Pauli product operator on the *full* W-state.
        op_to_measure = get_two_qubit_operator(p0_char, q0_idx, p1_char, q1_idx, NUM_QUBITS)
        theoretical_corr = np.vdot(w_aug_original_true_state, op_to_measure @ w_aug_original_true_state).real
        # .real because expectation of Hermitian op is real. Small imag part due to numerics.

        print(f"\nCorrelator for basis {basis_str} (measuring {p0_char}{q0_idx} {p1_char}{q1_idx}):")
        print(f"  Empirical: {empirical_mean_corr:.4f} +/- {empirical_std_err_corr:.4f} (from {len(eigenvalue_samples)} samples)")
        print(f"  Theoretical <{p0_char}{q0_idx} {p1_char}{q1_idx}>: {theoretical_corr:.4f}")
        print(f"  Difference: {empirical_mean_corr - theoretical_corr:+.4f}")

    else:
        print(f"Data for correlator basis {basis_str} not found.")


--- Validation 2: Two-Qubit Correlators ---

Correlator for basis XXZZZZ (measuring X0 X1):
  Empirical: 0.0000 +/- 0.2236 (from 20 samples)
  Theoretical <X0 X1>: 0.3293
  Difference: -0.3293

Correlator for basis XYZZZZ (measuring X0 Y1):
  Empirical: 0.3000 +/- 0.2133 (from 20 samples)
  Theoretical <X0 Y1>: 0.0520
  Difference: +0.2480
