# Spectrum Analysis

#### This piece of code does the following so far:

1. Reads spectrum data in ASCII format.

2. Finds all "peaks" in the spectrum, consistent with a given flux threshold and line width. The desired prominence of the peaks can also be adjusted, if necessary. 

3. Identifies N most significant peaks

4. Reads CASSIS line catalog.txt file for one or multiple molecule(s).

5. Compares detected peaks in the spectrum with CASSIS data to identify the lines.

In [2]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import scipy.signal as sig

%matplotlib qt5
plt.style.use('default')

### Obtain telescope spectrum data

In [3]:
# LOAD ASCII FILE
data = np.loadtxt("181_WSW_FTS200_3mm_average_data_Tmb.dat")

# RID OF GAPS IN THE COVERAGE
gap = -1000
freqs = data[:,0][data[:,1]!=gap] # FREQUENCY ARRAY 
temps = data[:,1][data[:,1]!=gap] # TEMPERATURE ARRAY 


# UNITS
freq_units = "MHz"
flux_units = "K"

threshold = 5*5e-3 

# FIND DATA ABOVE THRESHOLD
passfreqs = freqs[temps>threshold]
passtemps = temps[temps>threshold]

# ASSUMED LINE WIDTH
width = 0.5 #(MHz)

### Find peaks

In [6]:
peaks = sig.find_peaks(temps, height=threshold)[0]
peaktemps = temps[peaks]
peakfreqs = freqs[peaks]

# Save N strongest lines separately
N = 100
maxtemps = sorted(peaktemps, reverse=True)[0:N]
maxfreqs = [peakfreqs[list(peaktemps).index(i)] for i in maxtemps]

### Visualize Probable Emission Lines. This only compares the peak temperature to the S/N . Many lines will be discriminated later based on their integrated intensities once a Gaussian is fit.

In [8]:
plt.scatter(passfreqs, passtemps, s=5, c="grey")
plt.plot(freqs[temps>0], temps[temps>0], c="black", linewidth=0.5)
plt.vlines(peakfreqs, ymin=-0.2, ymax=0, colors="lime" , linewidth=0.5, label="Probable Emission Lines")
plt.vlines(maxfreqs, ymin=-0.2, ymax=0, colors="red" , linewidth=1, label="Strongest {} Emission Lines".format(N))
plt.scatter(peakfreqs, peaktemps, s=50, c="green", marker="*", label="Peaks")

plt.xlabel("Frequency ({})".format(freq_units), fontsize=15)
plt.ylabel("Temperature ({})".format(flux_units), fontsize=15)

plt.ylim([-0.2,2])
plt.legend()

plt.show()

## Hunt for Molecules

### First, find EVERYTHING that might be produced by molecules on the catalog. 

In [9]:
def hunt(catalog, catalog_file):

    # LOAD CASSIS CATALOGUE
    catalog_freqs = pd.read_csv(catalog_file, sep="\t", usecols=[2])
    catalog_freqs = catalog_freqs["Frequency(Ghz)"]
    
    catalog_Aij = pd.read_csv(catalog_file, sep="\t", usecols=[4])
    catalog_Aij = catalog_Aij["Aij"]

    catalog_names = catalog_freqs.keys()
    catalog_names = [m.split()[0] for m in catalog_names]

    detections = []
    
    # COMPARE PEAKS WITH CASSIS CATALOGUE
    for i,f1 in enumerate(catalog_freqs):
        for j,f2 in enumerate(peakfreqs):
            if abs(f1-f2) < 0.5*width:
                detections.append([catalog_names[i], f1, catalog_Aij[i]])
                
    return detections

In [10]:
catalog = "Organics"
catalog_file = "COMsCatalog.txt"



detections = hunt(catalog, catalog_file)
            
print("Found {} probable emission lines corresponding to {}".format(len(detections), catalog))

Found 337 probable emission lines corresponding to Organics


In [11]:
fig, ax = plt.subplots(constrained_layout=True)

ax.step(freqs[temps>0], temps[temps>0], c="black", linewidth=0.5)

ax.set_ylabel("Temperature ({})".format(flux_units), fontsize=15)

ax.set_ylim([-0.2,2])

ax.vlines([d[1] for d in detections], ymin=-0.2, ymax=0, colors="blue" , linewidth=1, label="Probable Detections")



secax = ax.secondary_xaxis('top')
secax.set_xlabel("Frequency ({})".format(freq_units), fontsize=15)

ax.set_xticks([d[1] for d in detections])
ax.set_xticklabels([d[0] for d in detections], rotation=75, fontsize=10)

plt.legend()
plt.show()

## Now, we count the individual molecules detected and determine how many transitions were found in the frequency range, compared to the expected lines.

### To start, determine the frequency range in which we will look for transitions (i.e. identify gaps in the coverage as intervals

In [12]:
def get_coverage(frequencies, cutoff = 0.5):
    intervals = []
    
    left = min(frequencies)
    for i,f in enumerate(frequencies):
        if frequencies[i]-frequencies[i-1] > cutoff and i>0:
            right = frequencies[i-1]
            intervals.append([left, right])
            left = frequencies[i]
    
    intervals.append([left, max(frequencies)])
    return intervals

In [13]:
intervals = get_coverage(freqs)

plt.step(freqs[temps>0], temps[temps>0], c="black", linewidth=1)

plt.xlabel("Frequency ({})".format(freq_units), fontsize=15)
plt.ylabel("Temperature ({})".format(flux_units), fontsize=15)

plt.ylim([-0.2,2])

for interval in intervals:
    plt.fill_betweenx(np.linspace(0,2,100), interval[0], interval[1], color="lime", alpha=0.5)
             
plt.show()

### Determine individual molecules and count transitions

In [14]:
def find_individual(detections):
    
    molecules = []
    
    for mol in detections:
        if mol[0] not in molecules:
            molecules.append(mol[0])
    
    return(molecules)
            

def count_transitions(catalog_file, intervals, detections, molecules, eup_limit, aij_limit=[0,1]):
    
    # LOAD CATALOGUE
    catalog_freqs = pd.read_csv(catalog_file, sep="\t", usecols=[2])
    catalog_freqs = catalog_freqs["Frequency(Ghz)"]
    
    catalog_eup = pd.read_csv(catalog_file, sep="\t", usecols=[3])
    catalog_eup = catalog_eup["Eup(K)"]
    
    catalog_Aij = pd.read_csv(catalog_file, sep="\t", usecols=[4])
    catalog_Aij = catalog_Aij["Aij"]

    catalog_names = catalog_freqs.keys()
    catalog_names = [m.split()[0] for m in catalog_names]
    
    catalog = []
    for i,f1 in enumerate(catalog_freqs):
        catalog.append([catalog_names[i], f1, catalog_eup[i], catalog_Aij[i]])
        
    transitions = []
    
    found_lines = []
    expected_lines = []
        
        
    for mol in molecules:
        found = 0
        expected = 0
        found_lines_mol = [] 
        expected_lines_mol = []
        found_Aij_mol = [] 
        expected_Aij_mol = []
        
        # COUNT TRANSITIONS FOUND IN SPECTRA
        for line in detections:
            if mol == line[0]:
                found+=1
                found_lines.append(line[1])
                found_lines_mol.append(line[1])
                found_Aij_mol.append(line[2])
                
        # COUNT TOTAL EXPECTED TRANSITIONS IN FREQUENCY RANGE
        for interval in intervals:
            for cat_line in catalog:
                if mol == cat_line[0] and interval[0] < cat_line[1] < interval[1] and eup_limit[0]<cat_line[2]<eup_limit[1] and aij_limit[0]<cat_line[3]<aij_limit[1]:
                    expected+=1
                    expected_lines.append(cat_line[1])
                    expected_lines_mol.append(cat_line[1])
                    expected_Aij_mol.append(cat_line[3])
        transitions.append([mol, found, expected, found_lines_mol, expected_lines_mol, found_Aij_mol, expected_Aij_mol])  
    
    return transitions, found_lines, expected_lines
    

In [19]:
# SET A THRESHOLD FOR THE UPPER LEVEL ENERGY (K)
eup_limit = [0,150] #K
aij_limit = [1e-8, 1e-4]

molecules = find_individual(detections)
Nmols = len(molecules)
print("There are at most {} different molecules in the spectrum".format(Nmols))

There are at most 84 different molecules in the spectrum


In [20]:
transitions, found_lines, expected_lines = count_transitions(catalog_file, intervals, detections, molecules, eup_limit)
for i,mol in enumerate(transitions):
    print("{}\n\t\tEXPECTED {}\tFOUND {} ".format(mol[0], mol[2], mol[1]))


CH3CH(NH2)CN
		EXPECTED 737	FOUND 7 
DCO+
		EXPECTED 1	FOUND 1 
Phenol
		EXPECTED 2392	FOUND 19 
C3H6O2
		EXPECTED 1702	FOUND 10 
HC7N
		EXPECTED 35	FOUND 1 
C2H3CN
		EXPECTED 763	FOUND 17 
c-C6H5CN
		EXPECTED 659	FOUND 5 
CH3COOH,
		EXPECTED 980	FOUND 11 
C3H7CN
		EXPECTED 511	FOUND 8 
HCCCH2OH
		EXPECTED 395	FOUND 1 
CCS
		EXPECTED 11	FOUND 8 
CH3COCH3
		EXPECTED 882	FOUND 9 
DCN
		EXPECTED 3	FOUND 3 
DCCN
		EXPECTED 11	FOUND 1 
HCCCN
		EXPECTED 4	FOUND 4 
HC3N,
		EXPECTED 4	FOUND 4 
H2CO
		EXPECTED 3	FOUND 1 
C6O
		EXPECTED 81	FOUND 1 
AlCl
		EXPECTED 596	FOUND 6 
OCS
		EXPECTED 4	FOUND 3 
C2H5OH,v=0
		EXPECTED 326	FOUND 2 
CH3CN
		EXPECTED 14	FOUND 6 
HC5N,
		EXPECTED 14	FOUND 2 
CCCS
		EXPECTED 6	FOUND 5 
C3S,
		EXPECTED 6	FOUND 5 
C4H
		EXPECTED 40	FOUND 20 
C3H
		EXPECTED 49	FOUND 4 
DNC
		EXPECTED 1	FOUND 1 
CH2(OH)CHO
		EXPECTED 188	FOUND 2 
CH3CHO
		EXPECTED 128	FOUND 4 
C3N
		EXPECTED 72	FOUND 19 
HCCNC
		EXPECTED 24	FOUND 12 
HSC
		EXPECTED 9	FOUND 1 
CH3OH
		EXPECTED 39	FO

In [21]:
rows = int(np.floor(np.sqrt(Nmols)))
cols = int(rows+1)

i = 0
j = 0
n = 0
while i < rows:
    while j < cols:
        if n < Nmols:
            plt.subplot2grid((rows, cols),(i,j))
            plt.step(freqs[temps>0], temps[temps>0], c="black", linewidth=1)
            plt.vlines(transitions[n][4], ymin=-0.2, ymax=0, colors="blue" , linewidth=0.6, alpha=1)
            plt.vlines(transitions[n][3], ymin=-0.2, ymax=0, colors="red" , linewidth=1)
            plt.title(transitions[n][0])
            plt.xlim(min(freqs)-1000, max(freqs)+1000)
            if transitions[n][2]!=0:
                detect_pct = transitions[n][1]/transitions[n][2]
            else:
                detect_pct = 1
            if detect_pct > 0.5:
                plt.fill_betweenx(np.linspace(-0.2,2,100), min(freqs)-1000, max(freqs)+1000, color="yellow", alpha=0.2)
            if detect_pct > 0.75:
                plt.fill_betweenx(np.linspace(-0.2,2,100), min(freqs)-1000, max(freqs)+1000, color="lime", alpha=0.2)

            plt.yticks([])

        j+=1
        n+=1
    j=0
    i+=1

plt.xlabel("MHz")
plt.show()



## Discriminate based on Einstein coefficients

In [23]:
molecule = "H2CO"

fig, ax = plt.subplots(nrows=1, ncols=1)

for mol in transitions:
    if mol[0] == molecule:
        plt.vlines(sorted(mol[4]), ymin=0, ymax=1, linewidth=1,color=([(0.2,m/max(mol[6]),m/max(mol[6])) for m in mol[6]]))
        plt.vlines(sorted(mol[3]), ymin=1, ymax=2, colors="white")
       
        print(sorted(mol[3]))
        print(sorted(mol[6]))
        print(sorted(mol[5]))
        
plt.yticks([0.5,1.5],labels=["Expected", "Detected"])
plt.title("H2CO")
ax.set_facecolor("black")
plt.show()


[72837.948]
[8.02e-07, 1.57e-06, 8.15e-06]
[8.15e-06]
