# Read summary file of simulations and plot results   

After all simulations (`SIXTE`+ `xifusim`) a file is saved with all the information.   
This notebook reads that file and plot
1. **Histogram** of fraction of lost photons: has a 'normal' distribution? Can I assign a median value and a std deviation for that flux?   
   It is important to establish a simulation time that produces enough number of close pairs that could give rise to pile-up. Here we check that the fractions obtained in the different simulations are 'normally' distributed so that we can finally obtain a mean value   

In [None]:
# import modules for ploting and data manipulation
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from astropy.visualization import hist
import glob
from scipy.stats import anderson, shapiro
import glob

In [None]:
config_version = "v5_20250621" #v3_20240917"

## Read CSV files for all fluxes and store full information

In [None]:
# read all csv files in current dir
files = glob.glob(f'{config_version}/info*.csv')
nfiles = len(files)
# get list of fluxes from the files
fluxes = []
for file in files:
    df = pd.read_csv(file)
    fluxes.append(df['flux[mcrab]'].values[0])
# sort the files by flux
files = [x for _, x in sorted(zip(fluxes, files))]


# open figure (mosaic with nfiles subplots)
fig, axs = plt.subplots(3,3, figsize=(20,20))
ax = axs.flatten()
# loop over files
for i in range(nfiles):
    file = files[i]
    # read data from csv file
    df = pd.read_csv(file)
    # Read columns "simulation" and "fraction[%lost]" for the given exposure time
    sim = df["simulation"]
    frac = df["fraction_lost[%]"]
    # get data from the table
    exposure = df['exposure[s]'].values[0]
    filter = df['filter'].values[0]
    focus = df['focus'].values[0]
    Nimpacts = int(np.mean(df['Nimpacts'].values))
    Nmissing = int(np.mean(df['Missing'].values))
    flux_mcrab = df['flux[mcrab]'].values[0]
    nsims = len(sim)
    
    # plot a histogram (in the corresponding axis) of the fraction of lost photons for each simulation 
    hist(frac, ax=ax[i],bins='scott', color="blue", alpha=0.7)
    ax[i].set_xlabel("Fraction of lost photons [%]")
    ax[i].set_ylabel("Number of simulations")
    #ax[i].set_title(f"Histogram of fraction of lost photons ({len(sim)} simulations)")
    # print info about flux, exposure time, Nimpacts, filter and focus
    mean = np.mean(frac)
    std = np.std(frac)
    pm_str = u"\u00B1"

    # do a normality test
    stat, p = shapiro(frac)
    #print("Shapiro-Wilk test statistic: ", stat)
    #print("Shapiro-Wilk test p-value: ", p)
    # print a conclusion about the normality of the data
    if not p > 0.05:
        print(f"For flux {flux_mcrab}: The fraction of lost photons is NOT normally distributed (shapiro test)")
    
    # do an anderson-darling test for normality
    result = anderson(frac)
    #print("Anderson-Darling test statistic: ", result.statistic)
    #print("Anderson-Darling test critical values: ", result.critical_values)
    #print("Anderson-Darling test significance levels: ", result.significance_level)
    #print("Anderson-Darling test p-value: ", result.significance_level[2])
    # print a conclusion about normality
    normality = True
    if not result.statistic < result.critical_values[2]:
        print(f"For {flux_mcrab}: The fraction of lost photons is NOT normally distributed (anderson test)")
        normality = False
    boxtext = (f"Flux: {flux_mcrab}mCrab\nExposure time: {exposure} s\n"
               f"sims: {nsims}\n<Nimpacts>: {Nimpacts}\n"
               f"<Nmissing>: {Nmissing}\nFilter: {filter}\n"
               f"Focus: {focus}\nMean: {mean:.3f} {pm_str} {std:.3f}%\n"
               f"Anderson-Darling normality: {normality}\n")
    ax[i].text(0.45, 0.55, boxtext, fontsize=10, transform=ax[i].transAxes)
plt.show()

In [None]:
# save the figure to PNG file
fig.savefig(f"{config_version}/Figures/normality/histograms_normality.png", dpi=300)
# save the figure to PDF file
fig.savefig(f"{config_version}/Figures/normality/histograms_normality.pdf", dpi=300)

### Plot distribution of fake pulses

In [None]:
from subprocess import run
extra_events = []
# sort fluxes in ascending order
fluxes = sorted(fluxes)
for ifl in range(len(fluxes)):
    flux = fluxes[ifl]
    fluxDir = f"flux{flux:.2f}mcrab"
    print(f"Analyzing dir {fluxDir}")
    # check log files in the flux directory
    logFiles = glob.glob(f"{fluxDir}/sim_*.log")
    # check how many times logFiles contains the string "extra" preceeded by a number different from 0
    extra = 0
    # run 'grep' shell command to count the number of lines containing "extra" and not "0"
    comm = f"grep 'extra' {fluxDir}/sim_*.log | grep -v '0 (extra'"
    # run the command and capture the output
    result = run(comm, shell=True, capture_output=True, text=True)
    matching_lines = result.stdout.strip().split('\n') if result.stdout else []
    # get the numerical value from the matching lines
    for line in matching_lines:
        # split the line by whitespace and get the first element
        # (the number of extra events)
        extra += int(line.split()[1])
    # append the number of extra events to the list
    extra_events.append(extra)
    # print the number of extra events
    print(f"                         Flux {flux:.2f} mCrab: {extra} extra events")


In [None]:
# plot a bar plot of the number of extra events for each flux
fig, ax = plt.subplots(figsize=(10, 5))
ax.bar(range(len(fluxes)), extra_events, color='blue', alpha=0.7)
ax.set_xlabel("Flux [mCrab]")
ax.set_ylabel("Number of extra events")
ax.set_title("Number of extra events for each flux")
# label the x axis with the flux values
ax.set_xticks(range(len(fluxes)))   
ax.set_xticklabels([f"{flux}" for flux in fluxes])

In [None]:
# save the figure to PNG file
fig.savefig(f"{config_version}/Figures/normality/fake_detections.png", dpi=300)
# save the figure to PDF file
fig.savefig(f"{config_version}/Figures/normality/fake_detections.pdf", dpi=300)