Notebook to read and process the mcmc outputs and return csv with parameters of all bursts 

In [None]:
%load_ext nb_black

In [None]:
import emcee
import numpy as np
from emcee.autocorr import AutocorrError
from tqdm.auto import tqdm

In [None]:
import pprint

import json

import pandas as pd

import corner
from chainconsumer import ChainConsumer

import glob

In [None]:
def radiometer(tsys, gain, bandwidth, time, npol=2):
    return tsys / gain / np.sqrt(npol * bandwidth * time)

In [None]:
PATH = "121102_paper/mcmc_final/"
cids = [x.split("/")[-1][:-11] for x in glob.glob(PATH + "*.h5")]

In [None]:
len(cids)

In [None]:
# figure out of tau has to be used or not, remove samples if tau doesn't have to be used
def samples2params(samples, meta_info):
    fraction = np.sum(samples[:, 4] / samples[:, 5] < 6) / samples.shape[0]
    print(f"tau fraction {fraction:.3f}")
    if fraction > 0.5:
        print("Using tau")
        use_tau = True
        mask = samples[:, 4] / samples[:, 5] < 6
        samples = samples[mask, :]
    else:
        use_tau = False
        mask = samples[:, 4] / samples[:, 5] > 6
        samples = np.delete(samples, 5, 1)
        samples = samples[mask, :]

    samples[:, 0] = (
        meta_info["fileheader"]["fch1"]
        + samples[:, 0] * meta_info["fileheader"]["native_foff"]
    )
    samples[:, 1] *= np.abs(meta_info["fileheader"]["native_foff"])
    samples[:, 2] *= (
        radiometer(
            27, 10, 2.355 * samples[:, 1] * 1e6, meta_info["fileheader"]["native_tsamp"]
        )
        * 81.92e-3
        / np.sqrt(64 - sum(meta_info["mask"]))
    )
    samples[:, 3] = (
        (samples[:, 3] + meta_info["nstart"])
        * meta_info["fileheader"]["native_tsamp"]
        / 3600
        / 24
    ) + meta_info["fileheader"]["tstart"]
    samples[:, 4] *= meta_info["fileheader"]["native_tsamp"] * 1e3
    if use_tau:
        samples[:, 5] *= meta_info["fileheader"]["native_tsamp"] * 1e3 * 81.92e-3
        samples[:, 5] *= (1000 / meta_info["fileheader"]["fch1"]) ** (-4)

    param_list = [
        r"$\mu_f$ (MHz)",
        r"$\sigma_f$ (MHz)",
        r"$S$ (Jy ms)",
        r"$\mu_t$ (ms)",
        r"$\sigma_t$ (ms)",
    ]
    if use_tau:
        param_list += [r"$\tau$ (ms)"]

    param_list += [r"DM (pc cm$^{-3}$)"]
    return samples, param_list, mask


# read mcmc output h5 file, remove burnin, return samples for all components 
def get_chains_and_parameters(h5_filename, json_filename):
    reader = emcee.backends.HDFBackend(h5_filename)

    try:
        tau = reader.get_autocorr_time()
        burnin = int(2 * np.max(tau))
        print(f"burnin using tau is: {burnin}")
        samples = reader.get_chain(discard=burnin, flat=True)

    except (AutocorrError, ValueError):
        samples = reader.get_chain(discard=0, flat=True)
        burnin = int(samples.shape[0] * 0.75)
        samples = samples[burnin:, :]

    print("burn-in: {0}".format(burnin))
    print("flat chain shape: {0}".format(samples.shape))

    with open(json_filename, "r") as f:
        meta_info = json.loads(f.read())

    if samples.shape[-1] == 7:
        samples, param_list, _ = samples2params(samples, meta_info)
        return samples, param_list
    elif samples.shape[-1] == 14:
        first_samples, first_params, mask1 = samples2params(samples[:, :7], meta_info)
        second_samples, second_params, mask2 = samples2params(
            samples[mask1, 7:], meta_info
        )
        param_list = []
        for index, param in enumerate(first_params):
            param_list.append(param + str(1))
        for index, param in enumerate(second_params):
            param_list.append(param + str(2))
        return (np.hstack([first_samples[mask2], second_samples]), param_list)
    else:
        first_samples, first_params, mask1 = samples2params(samples[:, :7], meta_info)
        second_samples, second_params, mask2 = samples2params(
            samples[mask1, 7:14], meta_info
        )
        third_samples, third_params, mask3 = samples2params(
            samples[mask2, 14:], meta_info
        )
        param_list = []
        for index, param in enumerate(first_params):
            param_list.append(param + str(1))
        for index, param in enumerate(second_params):
            param_list.append(param + str(2))
        for index, param in enumerate(third_params):
            param_list.append(param + str(3))
        return (
            np.hstack(
                [
                    first_samples[mask2, :][mask3, :],
                    second_samples[mask3, :],
                    third_samples,
                ]
            ),
            param_list,
        )

In [None]:
all_params = []

In [None]:
AutocorrError_issue_list = []
FileNotFoundError_issue_list = []

In [None]:
def try_or_move_ahead(cand_id):
    try:
        h5_filename = PATH + cand_id + "_samples.h5"
        json_filename = PATH + cand_id + ".json"
        samples, param_list = get_chains_and_parameters(h5_filename, json_filename)
        if samples is not None:
            a = np.quantile(samples, [0.16, 0.5, 0.84], axis=0)
            median_values = a[1]
            upper_errors = a[2] - a[1]
            lower_error = a[1] - a[0]
            value_dict = {}
            for index, key in enumerate(param_list):
                value_dict[key] = median_values[index]
                value_dict["upper error" + key] = upper_errors[index]
                value_dict["lower error" + key] = lower_error[index]
            value_dict["cand_id"] = cand_id
            c = ChainConsumer()
            c.add_chain(samples, parameters=param_list)
            corner_plot_path = "121102_paper/"
            corner_plot_path += "mcmc_final/final_corner_plots/"

            fig = c.plotter.plot(
                figsize="grow",
                filename=corner_plot_path + cand_id + ".png",
                display=False,
            )
            return value_dict
    except FileNotFoundError as e:
        return cand_id, "FileNotFoundError"

In [None]:
from joblib import Parallel, delayed

ans = Parallel(n_jobs=10)(delayed(try_or_move_ahead)(cid) for cid in tqdm(cids))

In [None]:
AutocorrError_issue_list

In [None]:
FileNotFoundError_issue_list

In [None]:
single_comp = []
multi_comp = []

for _dict in ans:
    if "$\\mu_f$ (MHz)1" in _dict:
        multi_comp.append(_dict)
    else:
        single_comp.append(_dict)

In [None]:
df = pd.DataFrame(single_comp)
df = df.sort_values(by="$\mu_t$ (ms)")

In [None]:
# bad cands to remove!
try:
    idx = df[df["cand_id"].str.contains("snr_6.38018")].index[0]
    df = df.drop(idx, axis=0)
except IndexError:
    print("candidate not there")

try:
    idx = df[df["cand_id"].str.contains("snr_7.0830")].index[0]
    df = df.drop(idx, axis=0)
except IndexError:
    print("candidate not there")

In [None]:
# handle the reruns candidates?
# PATH = "/121102_paper/mcmc_final/reruns/"
# cids = [x.split("/")[-1][:-11] for x in glob.glob(PATH + "*.h5")]

In [None]:
df.to_csv("single_comp_all_topo.csv")

In [None]:
df_mc = pd.DataFrame(multi_comp).sort_values(by="$\mu_t$ (ms)1")
df_mc.to_csv("multi_comp_all_topo.csv")