# JWST Data Analysis Use Case: MOS Spectroscopy of Typical Extragalactic Fields

### This should work with the JWST pipeline final products (1D and 2D spectra). These are not available yet, so I will use LEGA-C data (van der Wel et al. 2016, Straatmann et al. 2018) for now.

First, set the environment with astroconda and specutils.

`conda create -n astroutils stsci`

`source activate astroutils`

## Imports

In [None]:
#general os
import os
import zipfile
import urllib.request

#general plotting
from matplotlib import pyplot as plt

params={'legend.fontsize':'18','axes.labelsize':'18',
        'axes.titlesize':'18','xtick.labelsize':'18',
        'ytick.labelsize':'18','lines.linewidth':2,'axes.linewidth':2,'animation.html': 'html5'}
plt.rcParams.update(params)
plt.rcParams.update({'figure.max_open_warning': 0})

#table/math handling
import pandas as pd
import numpy as np
np.seterr(all='ignore')  # hides irrelevant warnings about divide-by-zero, etc

#astropy
import astropy
import astropy.units as u
from astropy.table import QTable
from astropy.io import fits,ascii
from astropy.nddata import StdDevUncertainty
from astropy.modeling import models
from astropy.visualization import quantity_support
from astropy import constants as const

#specutils
import specutils
from specutils import Spectrum1D, SpectralRegion
from specutils.manipulation import gaussian_smooth
from specutils.fitting import fit_generic_continuum
from specutils.fitting import find_lines_derivative
from specutils.fitting import find_lines_threshold
from specutils.fitting import fit_lines
from specutils.manipulation import noise_region_uncertainty
from specutils.analysis import centroid
from specutils.analysis import line_flux
from specutils.analysis import equivalent_width
from specutils.analysis import template_comparison

quantity_support();  # auto-recognizes units on matplotlib plots

## Check versions. Should be:

Pandas:  0.25.2

Numpy:  1.17.2

Astropy:  3.2.2

Specutils:  0.6

In [None]:
print("Pandas: ",pd.__version__)
print("Numpy: ",np.__version__)
print("Astropy: ",astropy.__version__)
print("Specutils: ",specutils.__version__)

## Set path to data and download from box link

In [None]:
boxlink = 'https://data.science.stsci.edu/redirect/JWST/jwst-data_analysis_tools/mos_spectroscopy/mos_spectroscopy.zip'
boxfile = './mos_spectroscopy.zip'
urllib.request.urlretrieve(boxlink, boxfile)

zf = zipfile.ZipFile(boxfile, 'r')
zf.extractall()

observedfiles = './mos_spectroscopy/observed/'

## Choose one galaxy

In [None]:
file1d = observedfiles + 'legac_M1_v3.7_spec1d_130902.fits'
file1dwht = observedfiles + 'legac_M1_v3.7_wht1d_130902.fits'
file2d = observedfiles + 'legac_M1_v3.7_spec2d_130902.fits'

## Inspect its 2D spectrum

In [None]:
hdu2d = fits.open(file2d)
hdu2d.info()

In [None]:
hdu2d[0].header

In [None]:
plt.figure(figsize=[20,20])
plt.imshow(hdu2d[0].data)
plt.xlim(2000,3000) #spec is very big, plot just a bit

## Now work with 1D spectrum
## Calibrate (in wavelength), inspect, and write in Spectrum1D object

In [None]:
hdu1d = fits.open(file1d)
hdu1dwht = fits.open(file1dwht)
hdu1d.info()

In [None]:
hdu1d[0].header

If i want to do it in Pandas.

In [None]:
flux = hdu1d[0].data.byteswap().newbyteorder()
wht = hdu1dwht[0].data.byteswap().newbyteorder()
unc = 1./ np.sqrt(wht)
wave = np.arange(flux.shape[0])*hdu1d[0].header['CD1_1'] + hdu1d[0].header['CRVAL1']

##for certain functions, I need to cut the spectrum where the weight is 0.
d = {'wavelength':wave, 'flux':flux, 'weight':wht, 'uncertainty':unc}
dataspec = pd.DataFrame(data=d)

##define subset where wht>0
dataspec_sub = dataspec[dataspec['weight'] > 0.].reset_index(drop=True)
dataspec_sub.head()

If I want to do it in astropy Tables.

In [None]:
flux = hdu1d[0].data
wht = hdu1dwht[0].data
unc = 1./ np.sqrt(wht)
wave = np.arange(flux.shape[0])*hdu1d[0].header['CD1_1'] + hdu1d[0].header['CRVAL1']

spec_unit = u.Unit('10^-19 erg s^-1 cm^-2 angstrom^-1')
dataspec = QTable([wave*u.angstrom, flux*spec_unit, wht, unc*spec_unit], 
                   names=('wavelength','flux','weight','uncertainty'))
dataspec_sub = dataspec[dataspec['weight']>0.]
dataspec_sub

In [None]:
plt.figure(figsize=[10,6])
plt.fill_between(dataspec_sub['wavelength'],
                 dataspec_sub['flux']+dataspec_sub['uncertainty'],
                 dataspec_sub['flux']-dataspec_sub['uncertainty'],
                 color='gray')
plt.plot(dataspec_sub['wavelength'],dataspec_sub['flux'])
plt.xlabel("wavelength ({:latex})".format(dataspec_sub['wavelength'].unit))
plt.ylabel("flux ({:latex})".format(dataspec_sub['flux'].unit))
plt.show()

## Go with specutils

In [None]:
#write Spectrum1D object
spec1d = Spectrum1D(spectral_axis=dataspec_sub['wavelength'], 
                    flux=dataspec_sub['flux'], 
                    uncertainty=StdDevUncertainty(dataspec_sub['uncertainty']))

For supported datasets (like final JWST data products), this will be as simple as:
```
spec1d = Spectrum1d.read('datafile.fits')
```

Implemented but not yet released: `snr_threshold`, which will allow cutting the spectrum using that function.

## Smooth to better inspect the features
### The uncertainty is not carried over

In [None]:
spec1d_gsmooth = gaussian_smooth(spec1d, stddev=5)
plt.figure(figsize=[10,6])
plt.plot(spec1d_gsmooth.spectral_axis,spec1d_gsmooth.flux)
plt.xlabel("wavelength ({:latex})".format(spec1d_gsmooth.spectral_axis.unit))
plt.ylabel("flux ({:latex})".format(spec1d_gsmooth.flux.unit))
plt.show()

## Back to the non-smoothed spectrum to find lines

Documentation says I need a continuum subtracted spectrum.

## So fit continuum first

In [None]:
cont_spec1d = fit_generic_continuum(spec1d)
cont_fit = cont_spec1d(spec1d.spectral_axis)

In [None]:
plt.figure(figsize=[10,6])
plt.plot(spec1d.spectral_axis, spec1d.flux)
plt.plot(spec1d.spectral_axis, cont_fit)
plt.xlabel("wavelength ({:latex})".format(spec1d.spectral_axis.unit))
plt.ylabel("flux ({:latex})".format(spec1d.flux.unit))
plt.show()

plt.figure(figsize=[10,6])
plt.plot(spec1d.spectral_axis, spec1d.uncertainty.array)
plt.xlabel("wavelength ({:latex})".format(spec1d.spectral_axis.unit))
plt.ylabel("uncertainty ({:latex})".format(spec1d.uncertainty.unit))
plt.show()

### Creating the new normalized spectrum
It should figure out what to do with the uncertainty

In [None]:
spec1d_sub = spec1d - cont_fit
spec1d_sub

In [None]:
plt.figure(figsize=[10,6])
plt.plot(spec1d_sub.spectral_axis, spec1d_sub.flux)
plt.xlabel("wavelength ({:latex})".format(spec1d_sub.spectral_axis.unit))
plt.ylabel("flux ({:latex})".format(spec1d_sub.flux.unit))
plt.show()

plt.figure(figsize=[10,6])
plt.plot(spec1d_sub.spectral_axis,spec1d_sub.uncertainty.array)
plt.xlabel("wavelength ({:latex})".format(spec1d_sub.spectral_axis.unit))
plt.ylabel("uncertainty ({:latex})".format(spec1d_sub.uncertainty.unit))
plt.show()

## Now look for the lines

In [None]:
lines = find_lines_derivative(spec1d_sub, flux_threshold=50)
lines

In [None]:
plt.figure(figsize=[10,6])
plt.plot(spec1d_sub.spectral_axis,spec1d_sub.flux)
for line in lines:
    plt.axvline(line['line_center'].value, color='red')
plt.xlabel("wavelength ({:latex})".format(spec1d_sub.spectral_axis.unit))
plt.ylabel("flux ({:latex})".format(spec1d.flux.unit))
plt.show()

This works for cases where you understand the thresholds well, but doesn't automate as well with noisy spectra.

### Works better with find_lines_threshold

In [None]:
lines = find_lines_threshold(spec1d_sub, noise_factor=6)
lines

Plot lines on the spectrum.

In [None]:
plt.figure(figsize=[10,6])
plt.plot(spec1d_sub.spectral_axis,spec1d_sub.flux)
for line in lines:
    plt.axvline(line['line_center'].value, color='red')
plt.xlabel("wavelength ({:latex})".format(spec1d_sub.spectral_axis.unit))
plt.ylabel("flux ({:latex})".format(spec1d_sub.flux.unit))
plt.show()

Zoom in to see how well it does on [OII]

In [None]:
plt.figure(figsize=[10,6])
plt.plot(spec1d_sub.spectral_axis,spec1d_sub.flux)
plt.scatter(spec1d_sub.spectral_axis,spec1d_sub.flux)
for line in lines:
    plt.axvline(line['line_center'].value, color='red')
plt.xlim(6500,6600)
plt.xlabel("wavelength ({:latex})".format(spec1d_sub.spectral_axis.unit))
plt.ylabel("flux ({:latex})".format(spec1d_sub.flux.unit))
plt.show()

## Measure line centroids and fluxes
These too need spectra continuum subtracted.

In [None]:
#example with just one line
centroid(spec1d_sub, SpectralRegion(6540*u.AA, 6580*u.AA))

In [None]:
sline = centroid(spec1d_sub, SpectralRegion(6540*u.AA, 6580*u.AA))

plt.figure(figsize=[10,6])
plt.plot(spec1d_sub.spectral_axis,spec1d_sub.flux)
plt.scatter(spec1d_sub.spectral_axis,spec1d_sub.flux)
plt.axvline(sline.value, color='red')
plt.axhline(0,color='black')
plt.xlim(6500,6600)
plt.xlabel("wavelength ({:latex})".format(spec1d_sub.spectral_axis.unit))
plt.ylabel("flux ({:latex})".format(spec1d_sub.flux.unit))
plt.show()

In [None]:
line_flux(spec1d_sub, SpectralRegion(6540*u.AA, 6570*u.AA))  

## Fit the line with a Gaussian

In [None]:
spec1d_sub.flux

In [None]:
g_init = models.Gaussian1D(amplitude= 3 * 1e-19 * u.erg / u.s / u.cm**2 / u.AA, mean=6554*u.AA, stddev=2.*u.AA)
g_fit = fit_lines(spec1d_sub, g_init)
spec1d_fit = g_fit(spec1d_sub.wavelength)
g_fit

In [None]:
vel = ((3.71890256/6554.415) * const.c.to('km/s').value)*u.km/u.s
print(vel)

In [None]:
plt.figure(figsize=[10,6])
plt.plot(spec1d_sub.wavelength,spec1d_sub.flux,color='b')
plt.plot(spec1d_sub.wavelength,spec1d_fit,color='darkorange')
plt.xlim(6500,6600)
plt.xlabel("wavelength ({:latex})".format(spec1d_sub.spectral_axis.unit))
plt.xlabel("flux ({:latex})".format(spec1d_sub.flux.unit))
plt.show()

## Measure the equivalent width of the lines
This needs the spectrum continuum normalized.

In [None]:
spec1d_norm = spec1d / cont_fit

In [None]:
plt.figure(figsize=[10,6])
plt.plot(spec1d_norm.spectral_axis, spec1d_norm.flux)
plt.axhline(1,color='black')
plt.xlabel("wavelength ({:latex})".format(spec1d_norm.spectral_axis.unit))
plt.ylabel("flux (normalized)")
plt.xlim(6500,6600)
plt.show()

plt.figure(figsize=[10,6])
plt.plot(spec1d_norm.spectral_axis, spec1d_norm.uncertainty.array)
plt.xlabel("wavelength ({:latex})".format(spec1d_norm.spectral_axis.unit))
plt.ylabel("flux (normalized)")
plt.xlim(6500,6600)
plt.show()

In [None]:
equivalent_width(spec1d_norm, regions=SpectralRegion(6540*u.AA, 6570*u.AA))

## Find the best-fitting template
It needs a list of templates.

In [None]:
templatedir = './mos_spectroscopy/templates/'

In [None]:
zz = (1. + 0.758)

templatelist = []
for i in range (1, 101):
    template_file = "{0}{1:05d}.dat".format(templatedir,i)
    template = ascii.read(template_file)
    temp1d = Spectrum1D(spectral_axis=template['col1']*zz*u.AA,flux=template['col2']/zz*u.erg/u.s/u.AA)
    templatelist.append(temp1d)
    
    
tempnorm, chi2, index = template_comparison.template_match(spec1d, templatelist)
print(chi2, index)

In [None]:
tempnorm

In [None]:
plt.figure(figsize=[10, 6])
plt.plot(spec1d.wavelength, spec1d.flux,color='b')
plt.plot(tempnorm.wavelength, tempnorm.flux,color='r')
plt.xlim(6000, 9000)
plt.xlabel("wavelength ({:latex})".format(spec1d_norm.spectral_axis.unit))
plt.ylabel("flux (normalized)")
plt.show()

Implemented but not yet released: automatic template *fitting* to get the redshift.

## Potential next steps:
    - measure if emission-line profile is consistent with PSF
    - measure line intensities in 2D
    - convert distances from pixels to kpc