In [None]:
import subprocess
import neutralb1.utils

WORKSPACE_DIR = neutralb1.utils.get_workspace_dir()

git_hash = subprocess.check_output(['git', 'rev-parse', 'HEAD'], cwd=WORKSPACE_DIR).decode('utf-8').strip()
print(git_hash)

**Repository Version** 
This notebook was run at commit:
`FILL WHEN DONE`

# Verifying the Projection of Moments with Signal MC
As found previously, the projected moments have some missing factors causing them not to match expectations. Since then, two major updates have occured:
1. I've developed a direct fit to data using moments as AmpTools parameters, which should now provide a set of "true" values we can compare the projected moments to.
   <br>a. Has issues in extracting moments with $>1\%$ contribution, but this should be enough to track down factors
2. The old python projection script has now been replaced by a c++ version, that also includes the necessary normalization integrals
   a. The script may likely be updated over time, so check the commit hash for what version to use.

This study will proceed as follows:
1. Generate Signal Monte Carlo (MC) according to a pseudo-realistic set of waves (no acceptance effects i.e. *thrown*)
   <br>a. 35% polarization and in the PARA_0 orientation
2. Fit MC with same waveset, and obtain a fit result that should match the generated wave values
3. Project moments from the fit result to obtain a projected moment-set $H_{\text{proj}}$
4. Fit MC with the same number of moments, and obtain a fitted moment-set $H_{\text{fit}}$
5. Compare the fit and projected sets to investigate the missing factors.

## Setup

In [None]:
# load common libraries
import pandas as pd
import pathlib
import os, sys
import numpy as np
import matplotlib.pyplot as plt

import neutralb1.utils as utils
from neutralb1.analysis.result import ResultManager

utils.load_environment()

# load in useful directories as constants
CWD = pathlib.Path.cwd()
STUDY_DIR = f"{WORKSPACE_DIR}/studies/input-output-tests/verify-moment"
AMP_DIR = f"{STUDY_DIR}/data/amplitude_results"
MOMENT_DIR = f"{STUDY_DIR}/data/moment_results"

# set env variables for shell cells
os.environ["WORKSPACE_DIR"] = WORKSPACE_DIR
os.environ['STUDY_DIR'] = STUDY_DIR
os.environ['AMP_DIR'] = AMP_DIR
os.environ['MOMENT_DIR'] = MOMENT_DIR


## Data Generation and Fits

### Generate
We'll use the same cfg file to generate and fit the amplitude-based Monte Carlo with. This will be done in a single bin of mass at:
* $0.1 < -t < 0.2$
* $8.2 < E_\gamma < 8.8$
* $1.20 < M_{\omega\pi^0} < 1.22$

In [None]:
with open(f"{STUDY_DIR}/cfg_files/amplitudes.cfg", "r") as f:
    for i in range(18):
        print(f.readline(), end="")

In [None]:
%%bash 
if [ -e "${STUDY_DIR}/data/root_files/data.root" ]; then
    echo "data exists, skipping generation."
else 
    echo "Generating data..."
    gen_vec_ps -c ${STUDY_DIR}/cfg_files/amplitudes.cfg\
        -o ${STUDY_DIR}/data/root_files/data.root\
        -l 1.20 -u 1.22\
        -n 50000\
        -a 8.2 -b 8.8\
        -tmin 0.1 -tmax 0.2
    if [ -e "${STUDY_DIR}/data/root_files/data.root" ]; then
        echo "Data generation successful."
    else
        echo "Data generation failed."
        exit 1
    fi
fi

In [None]:
%%bash

# convert the data to CSV
if [ -e "${STUDY_DIR}/data/root_files/data.csv" ]; then
    echo "Data CSV exists, skipping conversion."
else
    echo "Converting data to CSV..."
    python $WORKSPACE_DIR/src/neutralb1/batch/convert_to_csv.py\
        -i $STUDY_DIR/data/root_files/gen_vec_ps_diagnostic.root -o $STUDY_DIR/data/root_files/data.csv
fi

### Fitting

#### Amplitudes
Amplitude fits will require a GPU session due to their performance requirements

In [None]:
%%bash
if [ -e "${AMP_DIR}/omegapi.fit" ]; then
    echo "Amplitude results exist, skipping fitting."
else
    echo "Run 'fit -c ${STUDY_DIR}/cfg_files/amplitudes.cfg -m 10000000 -r 50 > amplitude_fit.log' on an interactive GPU node to fit the data."
fi

Once fits are complete, generate files to view the angular distributions for the vecps_plotter

In [None]:
%%bash

cd ${AMP_DIR}

# create symlinks so the vecps_plotter can find the data/phasespace files
ln -sf ${STUDY_DIR}/data/root_files/data.root ./data.root
ln -sf ${STUDY_DIR}/data/root_files/anglesOmegaPiPhaseSpace.root ./anglesOmegaPiPhaseSpace.root
ln -sf ${STUDY_DIR}/data/root_files/anglesOmegaPiPhaseSpaceAcc.root ./anglesOmegaPiPhaseSpaceAcc.root

if [ -e ./vecps_plot.root ]; then
    echo "Plotter output already exists, skipping plotting."
else
    echo "Plotting results..."
    vecps_plotter ./omegapi.fit
    angle_plotter ./vecps_plot.root "Thrown MC" "" ${AMP_DIR} --gluex-style
fi

Convert the fit output to csv files

In [None]:
%%bash

cd ${AMP_DIR}
python $WORKSPACE_DIR/src/neutralb1/batch/convert_to_csv.py\
    -i omegapi.fit -o result.csv
python $WORKSPACE_DIR/src/neutralb1/batch/convert_to_csv.py\
    -i omegapi.fit -o projected_moments.csv --moments

#### Moments

Same process as the amplitude fits

In [None]:
%%bash
cat ${STUDY_DIR}/cfg_files/moments.cfg

In [None]:
%%bash
cd $MOMENT_DIR
ln -sf ${STUDY_DIR}/data/root_files/data.root ./data.root
ln -sf ${STUDY_DIR}/data/root_files/anglesOmegaPiPhaseSpace.root
ln -sf ${STUDY_DIR}/data/root_files/anglesOmegaPiPhaseSpaceAcc.root

if [ -e "./omegapi.fit" ]; then
    echo "Moment results exist, skipping fitting."
else
    echo "Run 'fit -c ${STUDY_DIR}/cfg_files/moments.cfg -m 10000000 -r 50 > moment_fit.log' on an interactive GPU node to fit the data."
fi

In [None]:
%%bash

cd ${MOMENT_DIR}
if [ -e ./vecps_plot.root ]; then
    echo "Plotter output already exists, skipping plotting."
else
    echo "Plotting results..."
    vecps_plotter ./omegapi.fit
    angle_plotter ./vecps_plot.root "Thrown MC" "" ${MOMENT_DIR} --gluex-style
fi

In [None]:
%%bash

cd ${MOMENT_DIR}
python $WORKSPACE_DIR/src/neutralb1/batch/convert_to_csv.py\
    -i omegapi.fit -o result.csv

## Analysis

In [None]:
# first load in our dataframes
data_df = pd.read_csv(f"{STUDY_DIR}/data/root_files/data.csv")
fit_results_df = pd.read_csv(f"{AMP_DIR}/result.csv")
projected_moments_df = pd.read_csv(f"{AMP_DIR}/projected_moments.csv")
fitted_moments_df = pd.read_csv(f"{MOMENT_DIR}/result.csv")


### Checking Amplitude Results
We'll first want to make sure that our amplitude-based fits actually resolved to the values we generated with before we project them out

In [None]:
# Find columns starting with "H" in both dataframes
proj_cols = [col for col in projected_moments_df.columns if col.startswith("H") and not col.endswith("_err")]
fit_cols = [col for col in fitted_moments_df.columns if col.startswith("H") and not col.endswith("_err")]

# Drop the "_imag" columns for H0 and H1, and the "_real" columns for H2
imag_cols_to_drop = [col for col in proj_cols if (col.startswith("H0") or col.startswith("H1")) and col.endswith("_imag")]
filtered_proj_moments_df = projected_moments_df.drop(columns=imag_cols_to_drop)
real_cols_to_drop = [col for col in proj_cols if col.startswith("H2") and col.endswith("_real")]
filtered_proj_moments_df = filtered_proj_moments_df.drop(columns=real_cols_to_drop)

# remove the real or imag suffix
filtered_proj_moments_df = filtered_proj_moments_df.rename(
    columns={col: col.replace("_real", "").replace("_imag", "") for col in filtered_proj_moments_df.columns}
)


In [None]:
# Find intersection of moment names
common_moments = sorted(set(filtered_proj_moments_df.columns) & set(fit_cols))
print(f"Common moments: {common_moments}")

In [None]:
# Print ratios for each matching moment (real part only)
for moment in common_moments:
    proj_val = filtered_proj_moments_df[moment].iloc[0]
    fit_val = fitted_moments_df[moment].iloc[0]
    ratio = proj_val / fit_val if fit_val != 0 else float('nan')
    print(f"{moment}: projected / fitted = {proj_val:.1g} / {fit_val:.1g} \t= {ratio:.4g}")