--- 
---
# SPOCK
### Spectrum Parser for Organic Chemical Kinetics
--- 
---
v. 0.0.1 
#### Written and updated by Carlos E. Muñoz-Romero (2020)

#### Import dependencies

In [13]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import scipy.signal as sig
import corner as corner
import dynesty

from lmfit import minimize, Parameters, fit_report, Model, Parameter
%matplotlib qt5

------------------------------------------------
## PART I
### Emission Line Finder
------------------------------------------------

#### User inputs and constants

In [2]:
# Molecule catalog
molecule_name = "CH3CN"
molecule_file = "{}.txt".format(molecule_name)
spectrum_units = "MHz"
database = "JPL"
# Molecular dipole moment in cgs units
mu = 3.9037 * 1e-18
# Molecular partition function values to interpolate from
Qrot = [13.8355, 28.4924, 64.0955, 164.3168, 449.0811, 1267.6705, 2628.0493]
Trot = [2.725, 5.0, 9.375, 18.75, 37.5, 75.0, 120.0]
# Assumed FWHM of emission lines in spectrum_units (0.5 km/s = 0.156355 MHz)
line_width = 0.156355
# Maximum distance from catalog frequency to consider a detection: 
line_error = line_width
# Velocity of source in m/s
v_lsr = 0#(-6.8+7.1)*1e3
# Telescope data file
spectrum_file = "./181_WSW_FTS200_3mm_average_data_Tmb.dat"
# Root-mean-square error of intensity data
rms = 5e-3
# 1/2 size of fitting window around emission lines in spectrum_units
window_size = 8
# Constants
c =  2.998*1e8 # speed of light in m/s
k = 1.3807 * 1e-16  # Boltzmann constant in cgs units

#### Load data files and extract parameters

In [3]:
spectrum = np.loadtxt(spectrum_file)
molecule_catalog = pd.read_csv(molecule_file, delimiter="\t", header=0, index_col=False)

frequencies = spectrum[:,0]
intensities = spectrum[:,1]

molecule = dict({"frequencies":molecule_catalog["Frequency(Ghz)"], 
                 "intensities":molecule_catalog["Intensity(K)"],
                 "eup":molecule_catalog["Eup(K)"],
                 "aij":molecule_catalog["Aij"],
                 "transition":molecule_catalog["Transition"]})

#### Apply redshift correction 

In [4]:
def redshift(spectrum, velocity):
    z = np.sqrt( (1 + (velocity/c)) / (1 - (velocity/c)) ) - 1
    return spectrum/(1+z)

frequencies = redshift(frequencies, v_lsr)

#### Find all peaks in the spectrum

In [5]:
peaks = sig.find_peaks(intensities)[0]
peak_intensities = intensities[peaks]
peak_frequencies = frequencies[peaks]
# Discriminate based on rms peak threshold
peak_frequencies = peak_frequencies[peak_intensities>=5*rms]
peak_intensities = peak_intensities[peak_intensities>=5*rms]

#### Identify which peaks correspond to the molecular emission lines

In [6]:
detection = dict({"frequencies":[], 
                  "catalog_frequencies":[],
                  "intensities":[],
                  "eup":[],
                  "aij":[],
                  "transition":[]})

for i,peak_frequency in enumerate(peak_frequencies):
    for j,molecule_frequency in enumerate(molecule["frequencies"]):
        if abs(peak_frequency-molecule_frequency)<=line_error:
            detection["frequencies"].append(peak_frequencies[i])
            detection["catalog_frequencies"].append(molecule["frequencies"][j])
            detection["intensities"].append(peak_intensities[i])
            detection["eup"].append(molecule["eup"][j])
            detection["transition"].append(molecule["transition"][j])
            
n_detections = len(detection["frequencies"])
            
print("Found {} transitions for {} at \n {} {} \n".format(
    n_detections, molecule_name, [round(x,5) for x in detection["frequencies"]], spectrum_units))
print("The corresponding catalog values for {} are \n {} {} \n".format(
    molecule_name, [round(x,5) for x in detection["catalog_frequencies"]], spectrum_units))
print("Errors: \n {} {} \n".format(
    np.array(detection["catalog_frequencies"])-np.array(detection["frequencies"]), spectrum_units))

Found 6 transitions for CH3CN at 
 [73588.7747, 73590.3372, 91985.2624, 91987.0202, 110381.359, 110383.508] MHz 

The corresponding catalog values for CH3CN are 
 [73588.799, 73590.218, 91985.314, 91987.088, 110381.372, 110383.5] MHz 

Errors: 
 [ 0.0243 -0.1192  0.0516  0.0678  0.013  -0.008 ] MHz 



------------------------------------------------
## PART II
### Line Profile Fitter
------------------------------------------------

#### Define a window around each line and identify the points that make up the line itself. Center the windows on zero to facilitate the fits.

In [7]:
windows = []

for det_frequency in detection["frequencies"]:
    window = dict({"frequencies":[],
                   "intensities":[]})
    f = frequencies[abs(frequencies-det_frequency)<window_size]
    i = intensities[abs(frequencies-det_frequency)<window_size]
    window["frequencies"] = f-det_frequency
    window["intensities"] = i
    windows.append(window)
    
"LINE ID"
# Pick all monotonically decreasing neighbors of each detected line to fit.
lines = []
for i,window in enumerate(windows):
    neighbors = dict({"frequencies":[],
                      "intensities":[],
                      "indices":[],
                      "line_idx":0})
    line_idx = np.where(window["frequencies"]+detection["frequencies"][i] == detection["frequencies"][i])[0][0]
    neighbors['line_idx'] = line_idx
    # Left neighbors
    diff = 0
    i = 1
    current = line_idx
    while diff <= 0 and window["intensities"][current-i] > -0.01 and abs(window["frequencies"][current-i])<line_width*6:
        current = line_idx-i
        neighbors["frequencies"].append(window["frequencies"][current])
        neighbors["intensities"].append(window["intensities"][current])
        neighbors["indices"].append(current)
        diff = window["intensities"][current-1]-window["intensities"][current]
        i+=1
    # Right neighbors
    diff = 0
    i = 0
    current = line_idx
    while diff <= 0 and window["intensities"][current+i] > -0.01 and abs(window["frequencies"][current+i])<line_width*6:
        current = line_idx+i
        neighbors["frequencies"].append(window["frequencies"][current])
        neighbors["intensities"].append(window["intensities"][current])
        neighbors["indices"].append(current)
        diff = window["intensities"][current+1]-window["intensities"][current]
        i+=1
    lines.append(neighbors)
    

#### For each detected emission line, fit a baseline polynomial via least squares and a Gaussian via dynamic nested sampling.

In [37]:
"1-D GAUSSIAN"
def gaussian(x, amp, wid):
    return (amp / (np.sqrt(2*np.pi) * wid)) * np.exp(-(x)**2 / (2*wid**2))
"LINE MODEL"
def line_model(x, amp, wid):
    return gaussian(x, amp, wid)

"LOG LIKELIHOOD"
def loglike(theta):
    amp, wid, lnf = theta
    model = line_model(x, amp, wid)
    inv_sigma2 = 1.0 / (yerr**2 + model**2 * np.exp(2 * lnf))
    
    return -0.5 * (np.sum((y-model)**2 * inv_sigma2 - np.log(inv_sigma2)))

"PRIORS"
def prior_transform(utheta):
    uamp, uwid, ulf = utheta
    amp = uamp*guess*3
    wid = 3*line_width*uwid + 0.05
    lnf = 11. * ulf - 10.
    return amp, wid, lnf

In [None]:
results = []
baselines = []
fitlinesy = []
fitlinesx = []

for i,window in enumerate(windows):
    print("Fitting line {} of {} \n".format(i+1, n_detections))
    line_indices = np.zeros(len(window["frequencies"]))
    line_indices[lines[i]["indices"]] = 1
    x = window["frequencies"]
    y = window["intensities"]

    nonline_frequecies = x[line_indices==0]
    nonline_intensities = y[line_indices==0]
    
    baseline_frequecies  = nonline_frequecies[abs(nonline_intensities)<rms*2]
    baseline_intensities = nonline_intensities[abs(nonline_intensities)<rms*2]
    baseline_fit = np.polyfit(baseline_frequecies, baseline_intensities, 2)
    baseline = np.poly1d(baseline_fit)
    y = y-baseline(x)
    
    x = x[line_indices==1]
    y = y[line_indices==1]
    
    guess = max(y)*np.sqrt(2*np.pi)*line_width
    
    y = np.array([k if k>0 else 0 for k in y])
    yerr = np.array([1e-3 for i in y])
    
    fitlinesy.append(y)
    fitlinesx.append(x)
    dsampler = dynesty.DynamicNestedSampler(loglike, prior_transform, ndim=3,
                                            bound='multi', sample='auto')
    dsampler.run_nested(dlogz_init=0.0001, maxiter=10000)
    results.append(dsampler.results)
    baselines.append(baseline)

113it [00:00, 380.67it/s, batch: 0 | bound: 0 | nc: 2 | ncall: 131 | eff(%): 17.908 | loglstar:   -inf < -894.295 <    inf | logz: -901.425 +/-  0.169 | dlogz: 918.385 >  0.000] 

Fitting line 1 of 6 



10279it [03:18,  3.52it/s, batch: 5 | bound: 0 | nc: 1 | ncall: 70076 | eff(%): 14.668 | loglstar:  7.120 < 11.372 <  7.631 | logz:  7.769 +/-  0.109 | stop:    nan]             
56it [00:00, 558.74it/s, batch: 0 | bound: 0 | nc: 1 | ncall: 62 | eff(%):  9.964 | loglstar:   -inf < -2123.026 <    inf | logz: -2130.045 +/-  0.167 | dlogz: 2169.069 >  0.000]

Fitting line 2 of 6 



9517it [01:16, 29.56it/s, batch: 2 | bound: 28 | nc: 2 | ncall: 38415 | eff(%): 24.759 | loglstar: 19.997 < 22.411 < 20.441 | logz: 17.224 +/-  0.123 | stop:  2.049]               

#### Visualize fits and convergence

In [35]:
from dynesty import plotting as dyplot
labels = ["FLUX", "LINE WIDTH", "ERROR MULTIPLIER"]

for i,res in enumerate(results):
    #fig, axes = dyplot.traceplot(res, labels=labels)
    plt.figure()
    window_linspace = np.linspace(min(windows[i]["frequencies"]), max(windows[i]["frequencies"]),10000)
    
    best_amp = np.median(res.samples[:,0])
    best_wid = np.median(res.samples[:,1])
    lnf = np.median(res.samples[:,2])
    
    plt.step(windows[i]["frequencies"], windows[i]["intensities"])
    plt.step(windows[i]["frequencies"], windows[i]["intensities"]-baselines[i](window["frequencies"]))
    
    for sample in res.samples[::20]:
        gauss = gaussian(window_linspace, sample[0], sample[1])
        plt.plot(window_linspace, gauss, linestyle='-', color="lime", label='best fit', linewidth=0.05, alpha=0.2)
    
    gauss = gaussian(window_linspace, best_amp, best_wid)
    plt.plot(window_linspace, gauss, linestyle='--', color="green", label='best fit', linewidth=3)
    plt.plot(window_linspace, baselines[i](window_linspace), linestyle='--', color="black", label='best fit', linewidth=3)
    plt.scatter(fitlinesx[i], fitlinesy[i], color="red", linewidth=5, alpha=0.4)
    plt.title("TRANSITION AT {} {}".format(detection["frequencies"][i], spectrum_units))
    plt.ylim(min(windows[i]["intensities"]),gaussian(0, best_amp, best_wid)+gaussian(0, best_amp, best_wid)*1.1)
    
    plt.xlabel("[{}]".format(spectrum_units), fontsize=15)
    plt.ylabel("Intensity [K]", fontsize=15)
    
    plt.show()
    

In [36]:
for i,res in enumerate(results):
    fig, axes = dyplot.traceplot(res, labels=labels)

In [None]:
print("\n BEST FIT FLUXES \n")
for i,res in enumerate(results):
    flux = round(np.median(res.samples[:,0])*0.5/line_width,5)
    fluxstd = round(np.std(res.samples[:,0])*0.5/line_width,5)
    print("{} +- {} K.km/s".format(flux, fluxstd))
    
print("\n BEST FIT LINE WIDTHS \n")
for i,res in enumerate(results):
    wid = round(np.median(res.samples[:,1])*0.5/line_width,5)
    widstd = round(np.std(res.samples[:,1])*0.5/line_width,5)
    print("{} +- {} km/s".format(wid, widstd))

------------------------------------------------
## PART III
### Rotation Diagrams
------------------------------------------------

In [None]:
# Use the transition numbers to look up the line strengths (sorry, gotta do it manually)
strength = np.array([79.48062, 84.80078, 101.74150, 105.98313, 123.64362, 127.17758])/((mu*1e18)**2)
# Save fluxes in cm/s
fluxes = dict({"values":np.array([np.median(res.samples[:,0]) for res in results])*0.5/line_width*100000,
               "errors":np.array([np.std(res.samples[:,0]) for res in results])*0.5/line_width*100000})
# Convert line widths to km/s
widths = dict({"values":np.array([np.median(res.samples[:,1]) for res in results])*0.5/0.156355,
               "errors":np.array([np.std(res.samples[:,1]) for res in results])*0.5/0.156355})
toHz = 1e6
detection["frequencies"] = np.array(detection["frequencies"])
detection["eup"] = np.array(detection["eup"])
# Upper-level populations Nu/gu
lnnugu = np.log((3*k*fluxes["values"])/(8*(np.pi**3)*detection["frequencies"]*toHz*(mu**2)*strength))

nugu_err = ((3*k*fluxes["errors"])/(8*(np.pi**3)*detection["frequencies"]*toHz*(mu**2)*strength))
lnnugu_err = nugu_err / np.exp(lnnugu)

#### Generate rotation diagram via dynamic nested sampling

In [None]:

x = detection["eup"]
y = lnnugu
yerr = lnnugu_err

# log-likelihood
def loglike(theta):
    m, intrcpt, lnf = theta
    model = intrcpt + m*x
    inv_sigma2 = 1.0 / (yerr**2 + model**2 * np.exp(2 * lnf))
    
    return -0.5 * (np.sum((y-model)**2 * inv_sigma2 - np.log(inv_sigma2)))

# prior transform
def prior_transform(utheta):
    um, uintrcpt, ulnf = utheta
    m = -0.5*um - 0.03 
    intrcpt = 3 * uintrcpt + 20 
    lnf = 11. * ulnf - 10.
    
    return m, intrcpt, lnf

dsampler = dynesty.DynamicNestedSampler(loglike, prior_transform, ndim=3, bound='multi', sample='auto')
dsampler.run_nested(dlogz_init=0.00001, maxiter=10000)
dres = dsampler.results
labels = [r'$-1/T$', r'$\ln(N_u/g_u)[E_u=0]$', r'$\ln f$']
fig, axes = dyplot.traceplot(dres, labels=labels)

In [None]:
Trot = dict({"value":-1/np.median(dres.samples[:,0]),
             "error": np.std(1/dres.samples[:,0])})

lnNugu_0 = dict({"value":np.median(dres.samples[:,1]),
                 "error":np.std(dres.samples[:,1])})

print("BEST FIT ROTATIONAL TEMPERATURE: {} +- {} K\n".format(round(Trot["value"],4), round(Trot["error"],4)))
print("BEST FIT INTERCEPT: {} +- {} \n".format(round(lnNugu_0["value"],4), round(lnNugu_0["error"],4)))

Eu_linspace = np.linspace(0, max(detection["eup"])+5, 1000)

def rotdiag_model(Eu, m, lnNugu):
    return lnNugu + m*Eu

plt.scatter(detection["eup"] , lnnugu, c="purple")

plt.errorbar(detection["eup"] , lnnugu, yerr=yerr, capsize=5, c="purple", fmt="none")

for sample in dres.samples[::10]:
    plt.plot(Eu_linspace, rotdiag_model(Eu_linspace, sample[0], sample[1]), color="lime", linewidth=0.05, alpha=0.2)    

plt.plot(Eu_linspace, rotdiag_model(Eu_linspace, np.median(dres.samples[:,0]), lnNugu_0["value"]), color="red", linewidth=1, alpha=1)    
plt.ylabel("$\ln(N_u/g_u)$", fontsize=15)
plt.xlabel("$E_u/k$", fontsize=15)
plt.show()
    
