pygsti/report/templates/standard_pdf_report.tex

\documentclass{article}[11pt]
\usepackage{longtable}
\usepackage{graphicx}
\usepackage{fix-cm}
\usepackage[margin=1in,paperwidth=8.5in,paperheight=11in]{geometry}
\usepackage[section]{placeins}
\usepackage{flafter}
\usepackage{amssymb}
\usepackage{amsmath}
\usepackage{etoolbox}
\usepackage{units}
\usepackage{multirow}
\usepackage{adjustbox}
\usepackage{array}
\usepackage{hyperref}
\usepackage{pdfcomment}
\usepackage{color}
\usepackage{spverbatim}

\definecolor{darkgreen}{RGB}{0, 128, 0}

\setcounter{topnumber}{3}
\setcounter{bottomnumber}{3}
\setcounter{totalnumber}{4}
\renewcommand{\topfraction}{0.9}
\renewcommand{\bottomfraction}{0.9}
\renewcommand{\textfraction}{0.1}
\renewcommand{\floatpagefraction}{0.7}

\newcommand{\rrangle}{\rangle\!\rangle} \newcommand{\llangle}{\langle\!\langle}
\newcommand{\ket}[1]{\ensuremath{\left|#1\right\rangle}}
\newcommand{\bra}[1]{\ensuremath{\left\langle#1\right|}}
\newcommand{\braket}[2]{\ensuremath{\left\langle#1|#2\right\rangle}}
\newcommand{\expec}[1]{\ensuremath{\left\langle#1\right\rangle}}
\newcommand{\ketbra}[2]{\ket{#1}\!\!\bra{#2}}
\newcommand{\braopket}[3]{\ensuremath{\bra{#1}#2\ket{#3}}}
\newcommand{\proj}[1]{\ketbra{#1}{#1}}
\newcommand{\sket}[1]{\ensuremath{\left|#1\right\rrangle}}
\newcommand{\sbra}[1]{\ensuremath{\left\llangle#1\right|}}
\newcommand{\sbraket}[2]{\ensuremath{\left\llangle#1|#2\right\rrangle}}
\newcommand{\sketbra}[2]{\sket{#1}\!\!\sbra{#2}}
\newcommand{\sbraopket}[3]{\ensuremath{\sbra{#1}#2\sket{#3}}}
\newcommand{\sproj}[1]{\sketbra{#1}{#1}}
\newcommand{\norm}[1]{\left\lVert#1\right\rVert}
\def\Id{1\!\mathrm{l}}
\newcommand{\Tr}[0]{\mathrm{Tr}}
\providecommand{\e}[1]{\ensuremath{\times 10^{#1}}}

%Command allowing iftoggle to work with verbatims inside clause
\makeatletter
\newcommand{\iftoggleverb}[1]{%
  \ifcsdef{etb@tgl@#1}
    {\csname etb@tgl@#1\endcsname\iftrue\iffalse}
    {\etb@noglobal\etb@err@notoggle{#1}\iffalse}%
}
\makeatother

%Command used for python automatic substitution
\newcommand{\putfield}[2]{#2}

\newcommand*{\vcenteredhbox}[1]{\begingroup
\setbox0=\hbox{#1}\parbox{\wd0}{\box0}\endgroup}

\newtoggle{confidences}
\newtoggle{LsAndGermsSet}
\newtoggle{showAppendix}

\newtoggle{ShowScaling}
\newtoggle{CompareDatasets}
\newtoggle{BrevityLT1}
\newtoggle{BrevityLT2}
\newtoggle{BrevityLT3}
\newtoggle{BrevityLT4}
\newtoggle{CombineRobust}
\putfield{settoggles}{}


\hypersetup{
  pdfinfo={ \putfield{pdfinfo}{}  }
}


\begin{document}

\title{\putfield{title}{Report Title Goes Here}}
\date{\vspace{-1cm}\today}
%\author{}

\begingroup
\let\center\flushleft
\let\endcenter\endflushleft
\maketitle
\endgroup

\tableofcontents

\section{Summary}
GST characterizes logic operations on a quantum device (e.g., a qubit).  It treats the device as a black box equipped with a set of ``buttons'' that perform operations to the quantum system inside.  One button initializes it, a second button triggers an $n$-outcome measurement, and the remaining buttons perform transformations called \emph{gates}.  %add reference(s) to GST paper/ pygsti document?
GST's primary output is an estimated \emph{gate set} that predictively models the device's observed behavior.  Gate sets consist of state preparation(s), POVM effects, and gates. This report presents a gate-set tomography (GST) analysis of a single GST estimate that has been gauge-optimized in a particular way.

\begin{figure}
  \begin{center}
    \begin{adjustbox}{max width=\textwidth}
      \putfield{final_model_fit_progress_bar_plot_sum}{}
    \end{adjustbox}
    \caption{\textbf{Model violation summary.} This figure is about goodness-of-fit.  This plot shows how well this estimate fits the data.    PyGSTi finds the maximum value of the loglikelihood ($-2\log\mathrm{Pr(data|gateset)}$), and compares it to what we expect to see \emph{if} the data were generated by a Markovian gateset.  In this plot, each bar shows by how many standard deviations the \emph{actual} log-likelihood exceeds its expected value.  Expected values and standard deviations are derived from $\chi^2$ theory.  On the horizontal axis, $L$ indexes different ML estimates based on datasets including only circuits of length up to $L$.  Low values indicate better fits (less model violation).  Each bar is colored according to the ``star'' rating shown in the Model Violation tab.\label{final_model_fit_progress_bar_plot_sum}}
  \end{center}
\end{figure}

\begin{figure}
  \begin{center}
    \begin{adjustbox}{max width=\textwidth}
      \putfield{final_model_fit_histogram}{}
    \end{adjustbox}
    \caption{\textbf{Histogram of per-circuit model violation} This figure is about goodness-of-fit.  When the estimate doesn't fit the data perfectly, we can quantify how well it fails to predict each individual circuit in the dataset, using the excess loglikelihood ($-2\log\mathrm{Pr}(\mathrm{data}|\mathrm{gateset})$) above and beyond the minimum value ($-2 \log \mathrm{Pr}(\mathrm{data}|\mathrm{observed\ frequencies})$).  This plot shows a histogram of the those values for all the circuits in the dataset.  Ideally, they should have the $\chi^2$ distribution shown by the solid line.  Red indicates data that are inconsistent with the model at the 0.95 confidence level, as shown in more detail in the Model Violation tab.}
  \end{center}
\end{figure}


\begin{table}
  \begin{center}
    \begin{adjustbox}{max width=\textwidth}
      \putfield{final_gates_vs_target_table_insummary}{}
    \end{adjustbox}
    \caption{\textbf{Comparison of estimated gates to targets} This table is about gate error metrics (fidelity). The metrics in this table compare the estimated gates to their ideal counterparts, and can generally be interpreted as some kind of error rate (per gate use).  Entanglement (process) fidelity and 1/2-diamond norm are the best known of these; they are the same for purely stochastic errors, but coherent errors contribute much more to diamond norm.  1/2-trace-distance is a proxy for diamond norm that doesn't require cvxPy to be installed.  The Eigenvalue metrics are gauge-invariant versions of fidelity and diamond-norm that only depend on the gate itself (not its relationship to other gates).  Hovering the pointer over a heading will pop up a description.}
  \end{center}
\end{table}


\iftoggle{BrevityLT4}{
\section{Model Violation}
The plots and tables in this section summarize how well the GST estimate actually fits the data.  Although they may be less familiar to you than, say, process fidelity, these analyses are essential to understanding the GST estimate, and how much to trust it!  Real qubit systems often display behavior that isn't consistent with modeling each gate as a stationary CPTP map.  When pyGSTi tries to fit such data to its model (a single gateset), this "non-Markovianity" manifests as \emph{model violation}.  This section provides several views of the model violation observed for this fit, ranging from the coarse-grained (total violation) to hyper-detailed (per-circuit violation).  More observed model violation imply that the error metrics in other sections should be trusted \emph{less}.

This section asks the question ``How well was GST able to fit all of the data -- and did it fit well enough to suggest that its model is valid?''. A central tool used to do this is the \emph{likelihood function}, which we denote $\mathcal{L}$, which formally is the probability of the observed data given a set of model parameters.  The basic idea is that we maximize the likelihood function to obtain the best set of model parameters (i.e.~gate set), and by looking at the value of this maximum we can determine the model's goodness-of-fit.  We will actually deal primarily with the logarithm of the likelihood function, $\log(\mathcal{L})$, which is simliarly maximized.

The log-likelihood for an $n$-outcome system with predicted probabilities $p_i$ and observed frequencies $f_i$ ($i=1\ldots n$) is given by:
\begin{equation}
\log(\mathcal{L}) = \sum_i N f_i \log(p_i).
\end{equation}
where $N$ is the total number of counts. In \emph{this} analysis, $\log(\mathcal{L})$ is used to compare the set of probabilities predicted by a gate set ($p_s$) and the frequencies obtained from a dataset ($f_s$).  Each experiment (or gate sequence) $s$ is associated to two probabilities:  ``plus'' has probability $p_s$ and ``minus'' has probability $1-p_s$.  The $\log(\mathcal{L})$ contribution of a single gate string $s$ is
\begin{equation}
\log(\mathcal{L})_s = N f_s \log(p_s) + N (1-f_s) \log(1-p_s),\label{eqGateStringLogL}
\end{equation}
where $N$ is the number of times the experiment $s$ was performed, $p_s$ is the probability of a ``plus'' outcome as predicted by the gate set, and $f_s$ is the observed frequency of ``plus''.  The total log-likelihood for an entire dataset is just the sum
\begin{equation}
\log(\mathcal{L}) = \sum_{s\in\mathcal{S}}{ \log(\mathcal{L})_s}.\label{eqDatasetLogL}
\end{equation}
A theoretical upper bound on the log-likelihood can be found by replacing $p_s$ with $f_s$ in Eq.~\ref{eqGateStringLogL} and evaluating Eq.~\ref{eqDatasetLogL}.  We will refer to this quantity as $\log(\mathcal{L})_{ub}$.

Statistical theory has quite a lot to say about the likelihood function (see any of the major textbooks).  Using some of these results, we can predict that if there are $N_p$ free parameters in the gate set that GST is fitting, and GST fits a dataset containing $N_s > N_p$ distinct experiments (gate sequences), then \emph{if the gate set model is correct}, then two times the difference between $\log(\mathcal{L})_{ub}$ and the maximum $\log(\mathcal{L})$ obtained is a random variable with a $\chi^2_{k}$ distribution, where
$$k \equiv N_s - N_p.$$
Its expected value is $\expec{\chi^2}=k$, and its standard deviation is $\sqrt{2k}$.  Thus, if the fit is ``good'', then twice $\Delta\log(\mathcal{L}) \equiv \log(\mathcal{L})_{ub} - \max(\log(\mathcal{L}))$ should lie roughly within the interval $[k-\sqrt{2k},k+\sqrt{2k}]$.
Thus, by comparing the difference $2\Delta\log(\mathcal{L}) - k$ to $\sqrt{2k}$, one can determine how well the GST estimate was able to fit the data.

The ML-GST algorithm used to generate this estimate is iterative.  It starts by fitting only data from the shortest gate sequences (which are easy to fit \emph{and} insensitive to most non-Markovian noise), then successively adds longer and longer sequences (with base sequence length $L\leq 1,2,4,8,\ldots$) to the mix.  Since we get an estimate at each intermediate $L$, it is possible to quantify not just the goodness of the \emph{best} fit (presented in the previous section), but how the goodness-of-fit behaves as longer and longer sequences are added in.

This data is presented in Table \ref{final_model_fit_progress_table}.  What you should be looking for here is whether -- at each value of $L$ -- the $2\Delta\log(\mathcal{L})$ quantity is roughly the same as $k$.  More precisely, is $|2\Delta\log(\mathcal{L})-k|$ less than or equal to $\sqrt{2k}$?  If not, then the model is not fitting as well as it should, which usually indicates non-Markovian noise (or, rarely, that the GST algorithm has simply failed to find a good fit even though one exists).

As a rough rule of thumb, for GST experiments involving relatively long sequences (e.g. $L\geq100$):
\begin{itemize}
\item ``Incredibly good'' ($\bigstar\bigstar\bigstar\bigstar\bigstar$) experiments have $2\Delta\log(\mathcal{L}) \approx k$, as predicted by theory (and seen in simulations).
\item ``Great'' ($\bigstar\bigstar\bigstar\bigstar$) experiments have $2\Delta\log(\mathcal{L}) \leq 2k$ or so.
\item ``Good'' ($\bigstar\bigstar\bigstar$) experiments have $2\Delta\log(\mathcal{L}) \leq 5k$ or so.
\item ``Okay'' ($\bigstar\bigstar$) experiments have $2\Delta\log(\mathcal{L}) \leq 10k$.
\item Experiments in which $2\Delta\log(\mathcal{L}) > 10k$ ($\bigstar$) have very significant non-Markovian noise, and the results in the previous section should be viewed very cautiously.
\end{itemize}

\begin{table}[h]
  \begin{center}
    \begin{adjustbox}{max width=\textwidth}
      \putfield{final_model_fit_progress_table}{logL progress table will be placed here}
    \end{adjustbox}
    \caption{\textbf{Detailed overall model violation.}  This table provides a detailed look at how the observed model violation -- defined by how badly the GST model fits the data -- evolves as more and more of the data are incorporated into the fit.  PyGSTi fits the data iteratively, starting by just fitting data from the shortest circuits ($L=1$), and then adding longer and longer sequences.  Each subset of the data, defined by its maximum sequence length $L$, yields an independent fit that is analyzed here.  The key quantity is the difference between the observed and expected maximum loglikelihood ($\log(\mathcal{L})$).  If the model fits, then $2\Delta\log(\mathcal{L})$ should be a $\chi^2_k$ random variable, where $k$ (the degrees of freedom) is the difference between $N_S$ (the number of independent data points) and $N_p$ (the number of model parameters).  So $2\Delta\log(\mathcal{L})$ should lie in $[k-\sqrt{2k},k+\sqrt{2k}]$, and $N_\sigma = (2\Delta\log(\mathcal{L})-k)/\sqrt{2k}$ quantifies how many standard deviations it falls above the mean (a $p$-value can be straightforwardly derived from $N_\sigma$).  The rating from 1 to 5 stars gives a very crude indication of goodness of fit.\label{final_model_fit_progress_table}}
\end{center}
\end{table}

\iftoggle{BrevityLT1}{
\subsection{Model violation for each individual circuit} %per-sequence detail

This section presents the second of two statistical tests available to detect that a dataset violates pyGSTi's Markovian gateset model.  The first is based on the \emph{aggregate} loglikelihood score presented elsewhere.  This section shows the \emph{individual} loglikelihood scores for each circuit in the dataset.  Each circuit is represented by a colored box, whose color indicates how badly this estimate (model) failed to predict the observed outcome frequencies of that circuit.  Light gray indicates consistency with the model, dark gray indicates possible -- but statistically insignificant -- inconsistency, and red squares indicate circuits whose outcomes are inconsistent with the model at the \emph{family-wise}  \putfield{linlg_pcntle_inv}{} percent confidence level.  In other words, if data are generated by a Markovian gateset, then with \putfield{linlg_pcntle_inv}{}\% probability \emph{every} box will be gray.  Even a single red square thus represents a clear detection of model violation.  When many squares are red, their pattern can provide useful diagnostic clues to what kind of non-Markovian noise is present.

\begin{figure}
  \begin{center}
    \begin{adjustbox}{max width=\textwidth}
      \putfield{final_model_fit_colorscatter_plot}{}
    \end{adjustbox}
    \caption{\textbf{Per-circuit model violation vs. circuit length} The fit's total $2\Delta\log(\mathcal{L})$ is a sum over all $N_s$ circuits used for GST.  This plot shows $2\Delta\log(\mathcal{L})$ for each individual circuit, plotted against that circuit's length (on the X axis).  Certain forms of non-Markovian noise, like slow drift, produce a characteristic linear relationship.  Note that the length plotted here is the \emph{actual} length of the circuit, not its nominal $L$.}
  \end{center}
\end{figure}


\begin{figure}
  \begin{center}
    \begin{adjustbox}{max width=\textwidth}
      \putfield{color_boxplot_key_plot}{Key for color box plot sub grids}
    \end{adjustbox}
    \caption{\textbf{Sub-block key for subsequent plots.} Shows how elements of the sub-blocks in Figure \ref{final_model_fit_colorbox_plot} correspond to preparation and measurement fiducial sequences.  Note that the column indicates the fiducial adjacent to state preparation, while the row indicates the fiducial adjacent to measurement.\label{color_boxplot_key_plot}}
  \end{center}
\end{figure}


\begin{figure}
  \begin{center}
    \begin{adjustbox}{max width=\textwidth}
      \putfield{final_model_fit_colorbox_plot}{Box plot of best gate set logL}
    \end{adjustbox}
    \caption{\textbf{Per-sequence model violation box plot.}  This plot shows the $2\Delta\log(\mathcal{L})$ contribution for each individual circuit in the dataset.  Each box represents a single gate sequence, and its color indicates whether GST was able to fit the corresponding frequency well.  Shades of white/gray indicate typical (within the expected) values. Red squares represent statistically significant evidence for model violation (non-Markovianity), and the probabilty that \emph{any} red squares appear is \putfield{linlg_pcntle}{}\% when the data really are Markovian. Each square block of pixels (``plaquette'') corresponds to a particular germ-power "base sequence", and each pixel within a block corresponds to a specific "fiducial pair" -- i.e., choice of pre- and post-fiducial sequences.  The base sequences are arranged by germ (varying from row to row), and by power/length (varying from column to column).  Hovering over a colored box will pop up the exact circuit to which it corresponds, the observed frequencies, and the corresponding probabilities predicted by the GST estimate of the gateset.\label{final_model_fit_colorbox_plot}}
\end{center}
\end{figure}

\begin{figure}
  \begin{center}
    \begin{adjustbox}{max width=\textwidth}
      \putfield{final_model_tvd_colorbox_plot}{Box plot of best gate set TVD}
    \end{adjustbox}
    \caption{\textbf{Per-sequence total variational distance box plot.}  This plot shows the total variational distance ($\frac{1}{2}\sum_i|p_i-f_i|$) contribution for each individual circuit in the dataset.  Each box represents a single gate sequence, and its color indicates how well the observed frequency matches the probability generated by GST's best-estimate gate set.  Each square block of pixels (``plaquette'') corresponds to a particular germ-power "base sequence", and each pixel within a block corresponds to a specific "fiducial pair" -- i.e., choice of pre- and post-fiducial sequences.  The base sequences are arranged by germ (varying from row to row), and by power/length (varying from column to column).  Hovering over a box will pop up the exact circuit to which it corresponds, the observed frequencies, and the corresponding probabilities predicted by the GST estimate of the gateset.\label{final_model_tvd_colorbox_plot}}
\end{center}
\end{figure}


}{}

%Sequences whose observed frequencies are consistent with a Markovian gate set are shown in gray, with darker shades indicating greater inconsistency with the estimated gate set.  Data shown in red are \emph{not} consistent with a Markovian gate set.  It may appear contradictory to say that (a) gray is ``consistent" with Markovian, but (b) darker shades indicate ``greater inconsistency".  The resolution is that the $\chi^2$ values quantify inconsistency with the model, \emph{but} they themselves are also subject to random fluctuations.  Therefore, even if the data are perfectly consistent with the model, we expect to see (for example) a single $\chi^2_s \geq 10$ once per each 638 experiments.  Observing $\chi^2_s \geq 10$ for any given sequence does suggest that the data from $s$ were relatively surprising, but we also expect to see one such fluctuation if there are more than about 600 experiments.  The gray/red threshold is chosen based on the total number of sequences so that \emph{if} the data are perfectly Markovian, then the probability of one or more experiments being colored red is only \putfield{linlg_pcntle}{X}\%.

% Identifying patterns and trends within such ``pixel plots'' can aid in identifying specific sources and types of non-Markovian noise which may be to blame if the GST algorithms are unable to produce a ``good'' estimate.  For example, it is often the case that all the short sequences [$L = O(1)$] can be fit reasonably well, but the right-hand side of Figure \ref{final_model_fit_colorbox_plot} becomes a sea of red.  This indicates that non-Markovian behavior (potentially due to slow drift of gate set parameters) is becoming more significant for longer experiments.  In other cases, a single row may be particularly bad, indicating that a particular gate or germ is especially problematic (e.g., was not stabilized using dynamical decoupling techniques).  Be cautious in debugging, however -- sometimes bad $\log(\mathcal{L})$ values for a particular gate or germ can result \emph{not} from faults in that operation, but because another operation failed so badly that it distorted the entire fit (e.g., in trying to fit catastrophically non-Markovian data at Point A, GST ended up failing to fit perfectly good data at Point B).

\iftoggle{ShowScaling}{
\subsection{Model Violation Analysis: Robust data scaling}
GST datasets are often \emph{very} inconsistent with the Markovian gateset model.  This is relatively unsurprising, and means only that real qubits often drift or experience other forms of noise that aren't stationary and Markovian.  But this "voids the warranty" on GST's results, at least in principle.  The properties of the estimated gates usually appear to be meaningful anyway, but when the model is violated, normal methods for generating \emph{error bars} become radically overoptimistic.  As a partial remedy for this, pyGSTi can be configured to generate "robust" analyses of model-violating data, by artificially deprecating data that are inconsistent with the fit (a variant of some robust statistics methods).

% Toggle descriptive text
\iftoggle{CombineRobust}{
  This section shows several important quantities.  Before describing these, however, it is important to note that \textbf{all of the other model violation figures show you the model violation \emph{before} any data deprecation was perfomed.  \emph{This section} shows the model violation \emph{after} the data deprecation}, and so, by construction, the fit metrics shown here should always look pretty good.  The first several figures replicate those of the other model violation tabs (except for the \emph{post-scaled} data!), and the final plot shows how much each individual experiment (circuit) was deprecated (essentially, by throwing out many of the counts for that circuit while keeping the overall observed frequencies constant).  When a figure shows up as ``N/A'' then it means that the currently-selected estimate has not been deprecated at all, and so there's nothing to show.
}{
  This section shows how much each individual experiment (circuit) was deprecated (essentially, by throwing out many of the counts for that circuit while keeping the overall observed frequencies constant).  When a figure shows up as ``N/A'' then it means that the currently-selected estimate has not been altered at all.
}


% Toggle showing of post-scaling plots
\iftoggle{CombineRobust}{
\begin{figure}
  \begin{center}
    \begin{adjustbox}{max width=\textwidth}
      \putfield{final_model_fit_progress_bar_plot_scl}{}
    \end{adjustbox}
    \caption{\textbf{SCALED Model violation summary.} This figure is about goodness-of-fit.  This plot shows how well this estimate fits the data.    PyGSTi finds the maximum value of the loglikelihood ($-2\log\mathrm{Pr(data|gateset)}$), and compares it to what we expect to see \emph{if} the data were generated by a Markovian gateset.  In this plot, each bar shows by how many standard deviations the \emph{actual} log-likelihood exceeds its expected value.  Expected values and standard deviations are derived from $\chi^2$ theory.  On the horizontal axis, $L$ indexes different ML estimates based on datasets including only circuits of length up to $L$.  Low values indicate better fits (less model violation).  Each bar is colored according to the ``star'' rating shown in the Model Violation tab.\label{final_model_fit_progress_bar_plot_scl}}
  \end{center}
\end{figure}

\begin{figure}
  \begin{center}
    \begin{adjustbox}{max width=\textwidth}
      \putfield{final_model_fit_histogram_scl}{}
    \end{adjustbox}
    \caption{\textbf{SCALED Histogram of per-circuit model violation} This figure is about goodness-of-fit.  When the estimate doesn't fit the data perfectly, we can quantify how well it fails to predict each individual circuit in the dataset, using the excess loglikelihood ($-2\log\mathrm{Pr}(\mathrm{data}|\mathrm{gateset})$) above and beyond the minimum value ($-2 \log \mathrm{Pr}(\mathrm{data}|\mathrm{observed\ frequencies})$).  This plot shows a histogram of the those values for all the circuits in the dataset.  Ideally, they should have the $\chi^2$ distribution shown by the solid line.  Red indicates data that are inconsistent with the model at the 0.95 confidence level, as shown in more detail in the Model Violation tab.}
  \end{center}
\end{figure}

\begin{table}[h]
  \begin{center}
    \begin{adjustbox}{max width=\textwidth}
      \putfield{final_model_fit_progress_table_scl}{logL progress table will be placed here}
    \end{adjustbox}
    \caption{\textbf{SCALED Detailed overall model violation.}  This table provides a detailed look at how the observed model violation -- defined by how badly the GST model fits the data -- evolves as more and more of the data are incorporated into the fit.  PyGSTi fits the data iteratively, starting by just fitting data from the shortest circuits ($L=1$), and then adding longer and longer sequences.  Each subset of the data, defined by its maximum sequence length $L$, yields an independent fit that is analyzed here.  The key quantity is the difference between the observed and expected maximum loglikelihood ($\log(\mathcal{L})$).  If the model fits, then $2\Delta\log(\mathcal{L})$ should be a $\chi^2_k$ random variable, where $k$ (the degrees of freedom) is the difference between $N_S$ (the number of independent data points) and $N_p$ (the number of model parameters).  So $2\Delta\log(\mathcal{L})$ should lie in $[k-\sqrt{2k},k+\sqrt{2k}]$, and $N_\sigma = (2\Delta\log(\mathcal{L})-k)/\sqrt{2k}$ quantifies how many standard deviations it falls above the mean (a $p$-value can be straightforwardly derived from $N_\sigma$).  The rating from 1 to 5 stars gives a very crude indication of goodness of fit.\label{final_model_fit_progress_table}}
\end{center}
\end{table}


\begin{figure}
  \begin{center}
    \begin{adjustbox}{max width=\textwidth}
      \putfield{final_model_fit_colorscatter_plot_scl}{}
    \end{adjustbox}
    \caption{\textbf{SCALED Per-circuit model violation vs. circuit length} The fit's total $2\Delta\log(\mathcal{L})$ is a sum over all $N_s$ circuits used for GST.  This plot shows $2\Delta\log(\mathcal{L})$ for each individual circuit, plotted against that circuit's length (on the X axis).  Certain forms of non-Markovian noise, like slow drift, produce a characteristic linear relationship.  Note that the length plotted here is the \emph{actual} length of the circuit, not its nominal $L$.}
  \end{center}
\end{figure}

\begin{figure}
  \begin{center}
    \begin{adjustbox}{max width=\textwidth}
      \putfield{final_model_fit_colorbox_plot_scl}{Box plot of best gate set logL}
    \end{adjustbox}
    \caption{\textbf{SCALED Per-sequence model violation box plot.}  This plot shows the $2\Delta\log(\mathcal{L})$ contribution for each individual circuit in the dataset.  Each box represents a single gate sequence, and its color indicates whether GST was able to fit the corresponding frequency well.  Shades of white/gray indicate typical (within the expected) values. Red squares represent statistically significant evidence for model violation (non-Markovianity), and the probabilty that \emph{any} red squares appear is \putfield{linlg_pcntle}{}\% when the data really are Markovian. Each square block of pixels (``plaquette'') corresponds to a particular germ-power "base sequence", and each pixel within a block corresponds to a specific "fiducial pair" -- i.e., choice of pre- and post-fiducial sequences.  The base sequences are arranged by germ (varying from row to row), and by power/length (varying from column to column).  Hovering over a colored box will pop up the exact circuit to which it corresponds, the observed frequencies, and the corresponding probabilities predicted by the GST estimate of the gateset.\label{final_model_fit_colorbox_plot}}
\end{center}
\end{figure}

}{}

\begin{figure}
  \begin{center}
    \putfield{data_scaling_colorbox_plot}{}
    \caption{\textbf{Data scaling factor for each circuit in the dataset.}  Each colored box represents a single experiment (circuit), arranged in the same way as in other related tabs.  A circuit's color indicates the how much the original data counts were scaled down when they were used to compute the log-likelihood or $\chi^2$ for this estimate (and its error bars).  A white box (value 1.0) indicates that all of the original data was used, because that circuit was not originally seen to be inconsistent with the fit. On the other hand, gray or black boxes (numbers between 0 and 1) indicate that the total number of counts for that circuit was scaled down (multiplied by the given factor) to reduce its significance, and therefore that circuit's inconsistency with the fit.  Generally, the only circuits scaled down are those deemed significantly inconsistent in the original (unscaled) fit.
    }
  \end{center}
\end{figure}

}{}


\section{Gauge-invariant Error Metrics}
GST can estimate gates \emph{up to an overall gauge}.  PyGSTi tries to find a good gauge in which to report process matrices and gauge-variant metrics like fidelity -- but sometimes this goes wrong.  The most reliable error metrics and gate properties are \emph{gauge-invariant} ones, and these are listed in this section.

\begin{table}[h]
  \begin{center}
    \begin{adjustbox}{max width=\textwidth}
      \putfield{final_model_vs_target_table}{}
    \end{adjustbox}
    \caption{\textbf{RB error metrics}  This table shows estimates for the error rate that would be obtained using two different Randomized Benchmarking (RB) protocols . The Clifford RB number corresponds to the most standard form of RB, Clifford RB (CRB), where random Clifford gate sequences are performed. This number is dependent on how the Clifford operations are compiled into the primitive gates, and so if you didn't specify a Clifford compilation and pygsti couldn't deduce one, this quantity will be absent. Note that this is the error rate per-Clifford; it has not been rescaled to a per-primitive error rate. The primitive RB number corresponds to performing RB on random sequences of the primitive gates, rather than the Cliffords, which is known as ``Direct RB'' (DRB). DRB allows for sampling layers of primitives according to a general probability distribution over the primitive gates; the number reported here corresponds to uniformly sampling the primitive gates. This number does not require any compilation table and is always be computed by pyGSTi. Two caveats regarding these RB numbers: 1) The primitive RB number is not meaningful for arbitrary gate sets; if the gate set generates the Clifford group or it is a universal gate set then it is definitely meaningful, modulo the second caveat. 2) These predicted RB numbers rely on a perturbative technique, and if the estimated gates are far from their ideal counterparts the predicted numbers may be very inaccurate (and the empirical RB error rate itself may even be ill-defined: the RB decay could be non-exponential). For both of these RB protocols there is also more than one definition of the RB number, as a function of the p obtained from fitting RB data to $A + Bp^m$. Here we use the definition $r = (4^n - 1)(1-p)/4^n$ for an n-qubit gate set, which means that $r$ = entanglement infidelity = 1/2 diamond distance if there are uniform depolarizing errors on all the gates (where these two quantities are w.r.t. the gate set benchmarked, so the Clifford gates for CRB and the primitive gates for DRB). For more general errors, these first two quantities will often be roughly equal, although that is not guaranteed. Note that these numbers should not be directly compared to RB numbers derived using the commonly-used alternative formula $r = (2^n - 1)(1-p)/2^n$ (which is related to average gate infidelity, rather than entanglement infidelity).}
  \end{center}
\end{table}

\begin{table}[h]
  \begin{center}
    \begin{adjustbox}{max width=\textwidth}
      \putfield{final_model_spam_parameters_table}{}
    \end{adjustbox}
    \caption{\textbf{SPAM probabilities.} This table shows estimated SPAM probabilities for each measurement outcome.  These are computed as $\mathrm{Tr}[\rho E_i]$, where $\rho$ is an estimated initial state (often labelled $\rho_0$), and $\{E_i\}$ is the estimated \emph{n}-outcome POVM.  The symbol $E_C$ denotes the \emph{n}th POVM effect, which is not allowed to vary freely but is defined by subtracting the sum of the other effects (which \emph{are} freely varied) from the identity.}
  \end{center}
\end{table}


\begin{figure}[h]
  \begin{center}
    \begin{adjustbox}{max width=\textwidth}
      \putfield{gram_bar_plot}{}
    \end{adjustbox}
    \caption{\textbf{Gram matrix spectrum} The GST Gram matrix is not a standard error metric, but it is gauge-invariant and critical to the GST process.  It provides some insight into generalized SPAM.  It is the (estimated) matrix of inner products between all the input states prepared by the various preparation fiducials, and all the measured effects prepared by the various measurement fiducials.  LGST involves inverting the Gram matrix, so it needs to be full rank.  In the plot, each pair of bars shows the \emph{n}th  eigenvalues of the estimated Gram matrix and the Gram matrix predicted by the ideal targets (respectively).  Larger eigenvalues indicate better sensitivity, and the number of non-zero values indicates the dimension of the state (density matrix) space being probed (e.g., for a single qubit, the Gram matrix should have 4 O(1) eigenvalues).}
  \end{center}
\end{figure}


\begin{table}[h]
  \begin{center}
    \begin{adjustbox}{max width=\textwidth}
      \putfield{final_gates_vs_target_table_gauge_inv}{}
    \end{adjustbox}
    \caption{\textbf{Spectral error metrics between estimated gates and ideal targets} This table presents a variety of gauge-invariant quantities that quantify the distance or discrepancy between (1) an estimated gate, and (2) the ideal corresponding target operation.  Each of these error metrics depends \emph{only} on a specific gate's spectrum (eigenvalues), which are gauge-invariant and non-relational (i.e., they pertain to a single gate).  Hovering over a column header will pop up a mathematical description of the corresponding metric.}
  \end{center}
\end{table}


\begin{table}[h]
  \begin{center}
    \begin{adjustbox}{max width=\textwidth}
      \putfield{final_model_eigenvalue_table}{}
    \end{adjustbox}
    \caption{\textbf{Eigenvalues of estimated gates.}  This table lists the spectrum of each estimated gate.  It also breaks out the real and imaginary parts of each eigenvalue, \emph{and} it compares the estimated eigenvalues to those of the ideal target gates in several useful ways.  To do these comparisons, each estimated eigenvalue needs to be matched up with a target eigenvalue, and pyGSTi does this independently for each metric by computing a minimum-weight matching based on that metric.  Hovering over a column header will pop up a mathematical description of the corresponding metric.}
  \end{center}
\end{table}


\iftoggle{BrevityLT3}{
\subsection{Gauge Invariant Error Metrics applied to germ sequences} % Germs detail
All of the per-gate gauge-invariant metrics of the prior section are functions of each gate's spectrum and do not account for how the gate relates to other gates.  In an attempt to extract some of that information in a gauge-invariant way, this section looks at the spectra of the germ-sequences.  Each germ amplifies (i.e. has eigenvalues which correspond to) certain directions in ``gate-set space''.  Some of these directions describe how the single gates relate to one another, and, if an amplificationally complete set of germs was used, \emph{every direction is amplified by at least one germ}. This implies that the (gauge-invariant) spectra of the germs should constitute a full description of the gate set.  The tables of this section compare each germ-spectrum to the spectrum of that germ if it were generated using the set ``eigenspace-projected'' gates obtained by placing each gate's GST-estimated eigenvalues within eigenbasis of the ideal target gate.

\begin{table}
  \begin{center}
    \begin{adjustbox}{max width=\textwidth}
      \putfield{final_gates_vs_target_table_gauge_invgerms}{}
    \end{adjustbox}
    \caption{\textbf{Discrepancy between germs and spectral gates.} This table requires some explaining.  It tries to answer the following question:  "Is it plausible that each gate has \emph{only} spectral errors, so that its eigenvectors are exactly correct?".  GST can more or less directly estimate each gate's spectrum, because that's gauge-invariant.  But gate eigen\emph{bases} are relational to other gates and gauge-variant.  To infer them, GST basically does precise spectrum estimation on \emph{germs} that incorporate multiple gates.  Each germ's estimated spectrum is shown elsewhere.  \emph{This} table compares each germ's estimated spectrum to the spectrum it \emph{would} have if each individual gate had the eigenvalues that GST estimated for it, but exactly the right (target) eigenbasis.  This is the "eigenspace-projected" estimate of that gate.  Comparing the estimated germ spectra with those predicted by the eigenspace-projected gates yields gauge-invariant metrics of how much of the overall error can be attributed to purely spectral errors in each gate.  If the estimated-gate eigenvalues account for everything, all of the discrepancy values in this table would equal zero.}
  \end{center}
\end{table}

\begin{table}
  \begin{center}
    \begin{adjustbox}{max width=\textwidth}
      \putfield{germs_eigenvalue_table}{}
    \end{adjustbox}
    \caption{\textbf{Eigenvalues of estimated germs}  GST directly estimates each gate's spectrum (it's gauge-invariant), But the gates' eigen\emph{bases} are relational to other gates and gauge-variant.  GST infers them from the spectra of \emph{germs} that incorporate multiple gates.  Each germ's spectrum is gauge-invariant and directly estimatable, and this table lists them.  It also lists metrics that compare these spectra to the ones predicted by the ``eigenspace-projected'' gates (see elsewhere on this tab).  If the individual gates' eigenvalues account for all imperfections, then the estimated and predicted germ spectra should be equal.  Since spectra aren't ordered, the eigenvalues need to be matched up or aligned somehow.  PyGSTi does this by identifying a minimum-weight matching based on the metric being computed.  Mathematical descriptions of the metrics appear when hovering over the column headers.}
  \end{center}
\end{table}
}{}

\section{Gauge-dependent Error Metrics}
This section provides a variety of common (and uncommon) error metrics derived from the estimated gate set.  All of these quanties are ``gauge-dependent'', which means two things.  First, they aren't directly physically measurable, so they can't map directly to observable error rates.  Second, they are only as reliable (as diagnostics) inasmuch as pyGSTi is able to pick a sensible gauge (reference frame) in which to report gates.  PyGSTi does this by first finding an estimate based on the data (and ignoring gauge entirely), then varying over all possible representations of those gates (gauges) to minimize a measure of the gates' implausibility (distance from the targets, combined with violation of positivity).  This measure has parameters -- e.g. the weights placed on different gates -- which are shown in Table \ref{bestGatesetGaugeParamsTable}.

%Generally, the first thing that you want to know is ``How far from ideal are the gates?''.  To answer this, the report tabulates several well-known definitions of distance.  Table \ref{final_model_vs_target_table} lists the discrepancy from each estimated gate to its corresponding target, as measured by:
%\begin{enumerate}
%\item \textbf{Process infidelity}.  Infidelity is simply $1-F$, where $F$ is the \emph{fidelity}.  The process fidelity between quantum processes $G_a$ and $G_b$ is given by $F = \Tr\left( \sqrt{ \sqrt{\chi_a} \chi_b \sqrt{\chi_a} } \right)^2$, where $\chi_a$ and $\chi_b$ are the Jamiolkowski states (normalized Choi process matrices) corresponding to gate matrices $G_a$ and $G_b$ respectively.  If the target is unitary (as is often the case), $F = \Tr\left( \chi_a \chi_b \right)$.  Process infidelity is roughly what is measured in randomized benchmarking protocols; it quantifies the \emph{incoherent} error rate if coherent errors (e.g. over-rotations) are not allowed to accumulate.
%\item \textbf{Trace distance}.  This is the \emph{Jamiolkowski trace distance} between the Jamiolkowski states corresponding to the two processes:  $d_{tr} = \vert\chi_a - \chi_b\vert_1 = \Tr\left(\sqrt{(\chi_a-\chi_b)^2}\right)$.  This distance is useful primarily as a proxy for the \emph{diamond norm distance}, because $d_{tr} \leq d_{\diamond} \leq \mathrm{dim}(\mathcal{H}) d_{tr}$.
%\item \textbf{Diamond Norm}.  The diamond norm between two quantum processes $G_a$ and $G_b$ is given by $\norm{G_a - G_b}_\Diamond = \sup_\rho \norm{(G_a \otimes I_k)(\rho) - (G_b \otimes I_k)(\rho)}_1$, where $I_k$ is the $k$-dimensional identity operation, $\norm{\cdot}_1$ denotes the trace norm, and the supremum is taken over all $k \ge 1$ and density matrices $\rho$ of dimension $nk$, with $n$ the dimension of $G_a$ and $G_b$.  The diamond norm is also called the \emph{completely bounded trace norm}, and plays the analogous role for quantum process distinguishability that the trace norm plays for density matrices.  Specifically, the optimal probability of distinguishing $G_a$ from $G_b$ after a \emph{single evaluation} is given by $\frac{1}{2} + \frac{1}{4}\norm{G_a - G_b}_\Diamond$.  The diamond norm distance is an upper bound on the rate of error under any possible circumstance (including coherent accumulation of errors) and is often used in proofs of fault tolerance.  For gates dominated by coherent/unitary error, it is common to see $d_{\diamond} \approx \sqrt{1-F}$.  For gates dominated by incoherent error, $d_{\diamond} \approx 1-F$.
%%\item \textbf{Frobenius-norm distance}.  The Frobenius norm distance between two gates $G_a$ and $G_b$ is simply $d_F = \sqrt{\Tr\left[\left(G_a-G_b\right)^2\right]}$.  It has no known \emph{operational} interpretation, but is very convenient as a rough measure of inaccuracy.  It is also equal to the sum of the RMS errors in the individual matrix elements of the gates.
%\end{enumerate}
%Table \ref{final_model_spam_vs_target_table} shows a similar comparision using the information using the standard state infidelity and trace distance between the estimated and ideal state preparations and POVM effects.


\begin{table}[h]
  \begin{center}
    \begin{adjustbox}{max width=\textwidth}
      \putfield{final_model_gaugeopt_params_table}{Table of gauge optimization parameters}
    \end{adjustbox}
    \caption{\textbf{Gauge Optimization Details}.  A list of the parameters used when performing the gauge optimization that produced the final GST results found in subsequent tables and figures.\label{bestGatesetGaugeParamsTable}}
  \end{center}
\end{table}

\begin{table}[h]
  \begin{center}
    \begin{adjustbox}{max width=\textwidth}
      \putfield{final_model_spam_vs_target_table}{}
    \end{adjustbox}
    \caption{\textbf{SPAM error metrics} This table presents (gauge-variant) metrics that quantify errors in the \emph{SPAM operations} -- the estimated initial state preparation[s] and POVM measurement -- with respect to the ideal target operations,   A description of each metric can be found by hovering the pointer over the column header.}
  \end{center}
\end{table}

\begin{table}[h]
  \begin{center}
    \begin{adjustbox}{max width=\textwidth}
      \putfield{final_gates_vs_target_table_gauge_var}{}
    \end{adjustbox}
    \caption{\textbf{Individual gate error metrics} This table presents various (gauge-variant) metrics that quantify errors in each individual estimated logic gate, with respect to the ideal target gates.  Note that "Entanglement infidelity" and "Average gate infidelity" are two common definitions of process fidelity, and related by a constant dimensional factor.  A description of each metric can be found by  hovering the pointer over the column header.}
  \end{center}
\end{table}


\begin{table}[h]
  \begin{center}
    \begin{adjustbox}{max width=\textwidth}
      \putfield{final_gates_vs_target_table_gauge_vargerms}{}
    \end{adjustbox}
    \caption{\textbf{Per-germ error metrics} This table presents various (gauge-variant) metrics that quantify errors in the estimated \emph{germs}, with respect to their ideal target counterparts (as computed from the ideal target gates).  A description of each metric can be found by  hovering the pointer over the column header.}
  \end{center}
\end{table}


\subsection{Raw Estimates\label{sec_raw_estimates}}
This section shows the raw GST estimates \textendash the density matrices, POVM effects, and process matrices that comprise the estimated gateset.  These are explicitly gauge-dependent, and most reports include multiple ``gauge optimizations'' that correspond to slightly different (but physically equivalent and indistinguishable) representations  of the operations.  Usually, these raw estimates are less useful than the derived properties and decompositions shown elsewhere, but sometimes it's useful to see them.  Furthermore, it's possible (at least in some versions of the report) to download the raw gateset in machine-readable form, in order to do calculations and simulations with it.

\begin{table}[h]
  \begin{center}
    \begin{adjustbox}{max width=\textwidth}
      \putfield{final_model_brief_spam_table}{}
    \end{adjustbox}
    \caption{\textbf{Estimated SPAM operations.} This table presents the GST-estimated SPAM operations \textendash the initial state, as a density matrix, and the terminating measurement as a POVM \textendash and compares them to the corresponding ideal target operations.  All of these matrices should be positive semidefinite, so their eigenvalues are shown to provide a quick diagnostic.}
  \end{center}
\end{table}


\begin{table}[h]
  \begin{center}
    \begin{adjustbox}{max width=\textwidth}
      \putfield{final_gates_box_table}{}
    \end{adjustbox}
    \caption{\textbf{Estimated logic gate operations.} This table presents the GST-estimated logic gates \textendash represented as process matrices (aka Liouville superoperators or Pauli transfer matrices) \textendash and compares them to the \emph{ideal} (generally unitary) ``target'' logic gates.  Each gate is represented as a $d^2\times d^2$ \emph{superoperator} that acts by matrix multiplication on vectors in the vector space $\mathcal{B}(\mathcal{H})$ of operators.  Matrices are displayed using a heat map that ranges between 1.0 (\textcolor{red}{red}) and -1.0 (\textcolor{blue}{blue}), and hovering the pointer over a matrix element will pop up its precise numerical value.  Note that it is impossible to discern even order-1\% deviations from the ideal in this view; that's what other analyses (especially the Gate Error Generators tab) are for.}
  \end{center}
\end{table}


\subsection{Gate Decompositions}
This section presents several ``decompositions'' of the process matrices shown in section \ref{sec_raw_estimates}.  These are derived properties of the process matrices that aren't directly interpretable as error metrics, but help understand both the overall observed/estimated behavior of the gates \emph{and} how they differ from the targets.  Note that the tables here do not compare the estimated gates' properties directly to those of the ideal targets and that these properties are all at least mildly gauge-dependent.

\begin{table}
  \begin{center}
    \begin{adjustbox}{max width=\textwidth}
      \putfield{final_model_decomposition_table}{}
    \end{adjustbox}
    \caption{\textbf{Decomposition of estimated gates.} This table attempts to describe each gate as a rotation operator (this interpretation is more reliable for single qubits than for other systems).   From each gate, a rotation axis and angle are extracted by considering the projection of its logarithm onto the Pauli Hamiltonian projectors.  The rotation axis and angle are (respectively) given by the direction and the magnitude (up to a conventional constant) of this projected logarithm.  In other words, the ``rotation axis'' is basically the Hamiltonian that generated the gate.  The angles between the various gates' rotation axes are computed from the dot products between them.}
  \end{center}
\end{table}


%Finally, Table \ref{bestGatesetChoiTable} presents the spectrum of each estimated gate's \emph{Choi matrix}.  The Choi matrix (sometimes ambiguously referred to as the ``process matrix'') is an alternative way to describe a process.  We usually prefer the ``superoperator representation'', which has the very useful property that the process matrix corresponding to applying $G_a$ and then $G_b$ is simply $G_bG_a$.  This is completely false for the Choi representation.  Nonetheless, the Choi representation is often useful, so we present it here -- but without a detailed discussion of its properties (see, e.g. the textbook by Nielsen and Chuang).

%The Choi matrix $\chi(G)$ for a gate $G$ can be simply understood in either of two ways.  First, it is equivalent (up to choice of basis) to the \emph{Jamiolkowski state} defined by applying $G$ to one half of a maximally entangled bipartite state.  Second, it is the general (non-diagonal) form of the well-known Kraus representation, $G[\rho] = \sum_i{K_i\rho K_i^\dagger}$.  The Choi matrix behaves in many ways like a quantum state, and appears naturally in expressions for the process fidelity and Jamiolkowsi trace distance just as density matrices would enter these expressions when computing differences between states.

%Additionally, the condition of \emph{complete positivity} or CP (which all real quantum processes must satisfy) is simply the positivity of the Choi matrix.  Thus, negative eigenvalues in Table \ref{bestGatesetChoiTable} indicate that the estimate violates complete positivity.  If they are very small, they may simply indicate statistical fluctuations (unitary gates have $\chi$ matrices with zero eigenvalues, so any small fluctuation is likely to violate CP).  If they are large, they serve as a warning that (1) the model of CPTP maps is probably violated (usually because of non-Markovian behavior), and (2) this estimate may produce negative or greater-than-unity probabilities.  GST does \emph{not} generally impose complete positivity, precisely because violation of CP is a warning flag for non-Markovian behavior (which is very common in experimental qubits).

\begin{table}[h]
  \begin{center}
    \begin{adjustbox}{max width=\textwidth}
      \putfield{final_model_choi_eigenvalue_table}{Best gate set's choi matrix table will be placed here}
    \end{adjustbox}
    \caption{\textbf{Spectra of estimated gates' Choi representation}.  These plots show, for each gate, the eigenvalues of that gate's Choi-Jamiolkowski representation.  Every completely positive (CP) map has a non-negative Choi spectrum, so any negative eigenvalues (shown in \textcolor{red}{red}) indicate that the estimate violates positivity.  If a gate is perfectly unitary, its Choi spectrum will be rank-1, and real-world gates often have many Choi eigenvalues that are very close to zero.  Therefore, although negative eigenvalues indicate that the estimate is non-physical, this can easily stem from statistical fluctuations.  If statistically significant, though, it usually indicates either non-Markovianity or a failed gauge optimization.\label{bestGatesetChoiTable}}
  \end{center}
\end{table}

\def \logTiG {logTiG}
\def \logGTi {logGTi}
\def \logdiff {logG-logT}
\def \errorgentype {\putfield{errorgenType}{error-gen-type}}
\ifx \errorgentype \logTiG
\def \errorgenformula {$G = G_0 e^{\mathbb{L}}$}
\def \errorgendescription {\emph{pre-gate} generator, so it answers the question "If all the noise occurred \emph{before} the ideal gate, what Lindbladian would generate it?"}
\else \ifx \errorgentype \logGTi
\def \errorgenformula {$G = e^{\mathbb{L}} G_0$}
\def \errorgendescription {\emph{post-gate} generator, so it answers the question "If all the noise occurred \emph{after} the ideal gate, what Lindbladian would generate it?"}
\else \ifx \errorgentype \logdiff
\def \errorgenformula {$G = e^{\mathbb{L} + \log G_0}$}
\def \errorgendescription {\emph{during-gate} generator, so it answers the question "What Lindblad-type generate would produce this noise if it acted continuously \emph{during} the gate?"  Note that this does \emph{not necessarily} give insight into physics producing the noise.}
\else
\def \errorgenformula {???}
\def \errorgendescription {???}
\fi
\fi
\fi

\subsection{Gate Error Generators}
This section presents the \emph{error generators} for each of the estimated gates.  Although these are not especially well-known in the literature, they are (in the pyGSTi authors' opinion) the most useful detailed diagnostic for gate errors.  The error generator $\mathbb{L}$ for a noisy gate $G$ with ideal target $G_0$ is defined by writing \errorgenformula.  It can be thought of, more or less, as a Lindbladian superoperator that generates the error in the gate \textendash with two caveats.  First, it is not necessarily of strict Lindblad form, because the GST-estimated gates may not be CP, and because even if they are, not every CP map is "divisible" (and nondivisible maps are not generated by Lindblad evolution).  Second, the generator reported here is a \errorgendescription  Finally:  the error generators are very definitely gauge-dependent, so \emph{caveat emptor} (cross-validating any inferences drawn from these generators with some sort of gauge-invariant diagnostic is highly recommended).

\begin{table}[h]
  \begin{center}
    \begin{adjustbox}{max width=\textwidth}
      \putfield{final_model_errorgen_box_table}{Best gate set error generator table will be placed here}
    \end{adjustbox}
    \caption{\textbf{Logic gate error generators.} The \textbf{first column} displays a heat map of the estimated ``error generator'' for each gate.  This is (more or less) the Lindbladian $\mathbb{L}$ that describes \emph{how} the gate is failing to match the target.  This error generator is defined by the equation \errorgenformula. If it is zero, the estimated gate matches the corresponding ideal target gate.  Note that the range of the \textcolor{red}{color} \textcolor{blue}{scale} is dynamically adjusted. \textbf{Subsequent columns} show the result of projecting each generator onto some subspaces of the error generator space.  Each corresponds to a different classes of well-known errors:  Hamiltonian (coherent) errors, Pauli-stochastic errors, and affine (aka non-unital) errors.  The Hamiltonian generators act by commutation with each Pauli basis element $B_i$, that is $\rho \rightarrow -i[B_i, \rho]$.  Stochastic generators act by conjugation with each basis element, $\rho \rightarrow B_i \rho B_i^\dagger$.  Affine generators act by projecting everything onto a particular basis element, $\rho \rightarrow \mathrm{Tr}(\rho) B_i$.  Roughly speaking, the Hamiltonian projection corresponds precisely to the Hamiltonian that would produce the coherent part of the error, while the Pauli-stochastic generators correspond to the rates of all the Pauli errors (e.g., X errors, Z errors, their 2-qubit counterparts, or whatever is appropriate for the system being analyzed).\label{bestGatesetErrGenTable}}
  \end{center}
\end{table}

}{} %end iftoggle{BrevityLT4}

\iftoggleverb{BrevityLT2} % usual iftoggle won't work b/c of enclosed verbatims
% ends at \fi below

\section{Input Reference}
This section presents a grab bag of potentially-useful information about the target gate set and the data set(s), and is primarily useful to verify that the inputs were correct.

\begin{table}[h]
  \begin{center}
    \begin{adjustbox}{max width=\textwidth}
      \putfield{target_spam_brief_table}{}
    \end{adjustbox}
    \caption{\textbf{Ideal SPAM operations.}  The \emph{ideal} ``target'' state preparations ($\rho_i$) and POVM effects $E_i$ for the device analyzed in this report.  SPAM (state preparation and measurement) operations are given as $d\times d$ matrices in the standard (matrix unit) basis of Hilbert space.}
  \end{center}
\end{table}

\begin{table}[h]
  \begin{center}
    \begin{adjustbox}{max width=\textwidth}
      \putfield{fiducial_list_table}{}
    \end{adjustbox}
    \caption{\textbf{Fiducial circuits.}  A list of the preparation and measurement ``fiducial'' circuits. These circuits precede and follow, respectively, the potentially long germ-to-some-power portion of GST gate sequences, and generate informationally complete input/output ensembles from the native state preparation[s] and measurement.}
  \end{center}
\end{table}

\begin{table}[h]
  \begin{center}
    \begin{adjustbox}{max width=\textwidth}
      \putfield{germ_list_2col_table}{}
    \end{adjustbox}
    \caption{\textbf{Germs.} A list of the ``germ'' circuits used in this experiment.  Germs are relatively short circuits that get repeated (perhaps many times) in order to amplify various types of qubit errors.  The list of germs is often chosen so that \emph{all} possible types of in-model errors are amplified by \emph{at least one} germ.  Note: it's generally impossible to find a single germ that amplifies all possible errors, except when the gate being studied is the identity operation.}
  \end{center}
\end{table}

\begin{table}[h]
  \begin{center}
    \begin{adjustbox}{max width=\textwidth}
      \putfield{dataset_overview_table}{}
    \end{adjustbox}
    \caption{\textbf{Dataset properties.} This table lists various properties of the data set used in the analysis.  It is useful for sanity checks.}
  \end{center}
\end{table}

\begin{table}[h]
  \begin{center}
    \begin{adjustbox}{max width=\textwidth}
      \putfield{target_gates_box_table}{}
    \end{adjustbox}
    \caption{\textbf{Ideal logic gates.} The \emph{ideal} ``target'' (generally unitary) logic gates.  Each has a name starting with ``G'', and is represented as a $d^2\times d^2$ \emph{superoperator} that acts by matrix multiplication on vectors in $\mathcal{B}(\mathcal{H})$.  Matrices are displayed using a heat map that ranges between 1.0 (red) and -1.0 (blue).}
  \end{center}
\end{table}


\section{System and pyGSTi parameters\label{metadata}}
This section contains a raw dump of system information and various pyGSTi parameters.  Its purpose is to stamp this report with parameters that indicate how exactly GST was run to create it, as well as to record the software environment in within which the report creation was run.  However, if the core GST computation was done on a different computer, then the software information contained here will be less useful (it describes the environment in which the report was generated, not the one in which the estimate was generated).

\begin{table}[h]
  \begin{center}
    \begin{adjustbox}{max width=\textwidth}
      \putfield{metadata_table}{meta data table will be placed here}
    \end{adjustbox}
    \caption{\textbf{Listing of GST parameters and meta-data.}  These parameters and related metadata describe how the GST computation that produced this report was performed. \label{metadata_table}}
  \end{center}
\end{table}

\begin{table}[h]
  \begin{center}
    \begin{adjustbox}{max width=\textwidth}
      \putfield{software_environment_table}{software environment table will be placed here}
    \end{adjustbox}
    \caption{\textbf{Listing of the software environment.} This table describes the software environment of the machine used to generate this report.  It may not describe the machine used to perform the core GST gate set estimation.\label{metadata_table}}
  \end{center}
\end{table}

\fi


\end{document}