# Bayesian AGN Decomposition Analysis for SDSS Spectra (BADASS2) 

### Remington O. Sexton 

In [None]:
# % matplotlib notebook
# % matplotlib iline

import glob
from time import clock
from os import path
import os
import shutil
from astropy.io import fits
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt 
from numpy.polynomial import legendre, hermite
from scipy import linalg, special, fftpack
import sys
import emcee
# import scipy.optimize as op
import matplotlib.gridspec as gridspec
from numpy import linspace, meshgrid
from scipy.interpolate import griddata, interp1d
from matplotlib import cm
import psutil
import time
import matplotlib.gridspec as gridspec
from scipy.integrate import simps
import datetime
from astropy.stats import mad_std
import natsort
import badass_sdss_type_1_v4_1 as badass
import matplotlib
matplotlib.rcParams['agg.path.chunksize'] = 100000

In [None]:
spec_dir = '/Users/rem/BADASS_SDSS_Type_1/S18_SDSS/'
# spec_dir = '/Users/rem/BADASS_SDSS_Type_1/Jenna_SDSS/'
ppxf_dir = '/Users/rem/BADASS_SDSS_Type_1/badass_data_files/'
temp_dir = ppxf_dir+'final_library'

#################################### Options ################################################################
# Fitting parameters
fit_reg = (4400,5800) # Fitting region; MILES Stellar Library=(3540,7409); Indo-US Library=(3460,9464)
# fit_reg = 'auto'
good_thresh = 0.60 # good pixels threshold; if number of good pixels is below this percentage, do NOT fit the spectrum.
mcbs_niter = 25 # number of monte carlo bootstrap simulations for outflows

# MCMC algorithm parameters
auto_stop = False # Automatic stop when 1% threshold is reached
auto_stop_thresh = 1.0 # percentage between checking iterations below which we can automatically stop MCMC
burn_in = 0.1 # take last N fraction of iterations [0.0,1.0]
write_iter = 100 # check every N number of iterations
write_thresh = 200 # when to start writing parameters
# min_iter = 1500 # minimum number of iterations before checking; default 2*burn_in
max_iter     = 2500 # max num ber of MCMC iterations before stopping
write_chain = True # write out complete chain to re-calculate histograms
#################################################
# Final component fitting options
    # Note: by default, all options are set to 'True' for the initial fit, so 
    # as to make no assumptions on the presence of outflows
fit_feii     = True # fit FeII (option for Type 2 AGN)
fit_losvd    = True # fit LOSVD (Stellar population) in final model only
fit_power    = True # fit AGN power-law continuum (option for Type 2 AGN)
fit_broad    = True # fit broad lines (option for Type 2 AGN)
fit_narrow   = True # fit narrow lines (because why not)
fit_outflows = True # fit outflows; DO NOT SET TO FALSE 
tie_narrow   = True # tie all narrow components to [OIII]5007 (also ties outflow components)
#############################################################################################################

# Get full list of spectrum folders; these will be the working directories
spec_loc = natsort.natsorted( glob.glob(spec_dir+'J*') )

# for i in range(0,len(spec_loc),1):
for i in range(0,1,1):
    work_dir = spec_loc[i]+'/'
    run_dir,prev_dir = badass.setup_dirs(work_dir)
    file = glob.glob(work_dir+'*.fits')[0]
    # Uncomment below line to use previously-chosen templates
#     temp_dir = spec_loc[i]+'/'+'templates_final'
    
    print('\n Starting object %s...' % (spec_loc[i][-19:]) )

    # Determine fitting region
    fit_reg,good_frac = badass.determine_fit_reg(file,good_thresh,run_dir,fit_reg=fit_reg)
    if (fit_reg is None):
        print('\n Fit region too small! Moving to next object... \n')
        break
    elif (good_frac < good_thresh) and (fit_reg is not None): # if fraction of good pixels is less than good_threshold, then move to next object
        print('\n Not enough good channels above threshold! Moving onto next object... \n')
        break
    elif (good_frac >= good_thresh) and (fit_reg is not None):
        print('          Fitting region: (%d,%d)' % (fit_reg[0],fit_reg[1]))
        print('          Fraction of good channels = %0.2f' % (good_frac))
#     if 1: sys.exit()
    
    # Prepare SDSS spectrum for fitting
    lam_gal,galaxy,templates,noise,velscale,vsyst,temp_list,z,ebv,npix,ntemp,temp_fft,npad = badass.sdss_prepare(file,fit_reg,temp_dir,run_dir)
    print('          z = %0.4f' % z)
    print('          E(B-V) =  %0.4f' % ebv)
    print('          Velocity Scale = %0.4f (km/s/pixel)' % velscale)
    
#     if 1: sys.exit()
#
    ###########################################################################################################
    run_maxlike = 1
    if run_maxlike==1:
        
        if (fit_outflows==False) & ((fit_reg[0]<=4400.)==True) & ((fit_reg[1] >=5800.)==True): # Only do this for Hb Region
            print('\n Running outflow tests...')
            fit_outflows = badass.outflow_test(lam_gal,galaxy,noise,run_dir,velscale,mcbs_niter)
            if fit_outflows==True:
                print('  Outflows detected: including outflow components in fit...')
            elif fit_outflows==False:
                print('  Outflows not detected: disabling outflow components from fit...')
            
#         if 1: sys.exit()
        
        param_dict = badass.initialize_mcmc(lam_gal,galaxy,fit_reg=fit_reg,fit_type='init',
                                            fit_feii=fit_feii,fit_losvd=fit_losvd,
                                            fit_power=fit_power,fit_broad=fit_broad,
                                            fit_narrow=fit_narrow,fit_outflows=fit_outflows,
                                            tie_narrow=tie_narrow)
        
#         for key in param_dict:
#             print key
#         if 1: sys.exit()
        
        # By generating the galaxy and FeII templates before, instead of generating them with each iteration, we save a lot of time
        # gal_temp = badass.galaxy_template(lam_gal,age=15)
        gal_temp = badass.galaxy_template(lam_gal,age=10)
        if (fit_feii==True):
            na_feii_temp,br_feii_temp = badass.initialize_feii(lam_gal,velscale,fit_reg)
        elif (fit_feii==False):
            na_feii_temp,br_feii_temp = None,None

#         print pd.DataFrame(param_dict)
#         if 1: sys.exit()
        
        # Step 1: Test for Outflows by fitting the region 4750-5200 
        # Initialize function arguments
        # Peform maximum likelihood
        result_dict = badass.max_likelihood(param_dict,lam_gal,galaxy,noise,gal_temp,na_feii_temp,br_feii_temp,
                                       temp_list,temp_fft,npad,velscale,npix,vsyst,run_dir,monte_carlo=False,niter=0)

        # Perform the final max. likelihood fit
        param_dict = badass.initialize_mcmc(lam_gal,galaxy,fit_reg=fit_reg,fit_type='final',
                                            fit_feii=fit_feii,fit_losvd=fit_losvd,
                                            fit_power=fit_power,fit_broad=fit_broad,
                                            fit_narrow=fit_narrow,fit_outflows=fit_outflows)
        # Replace initial conditions with best fit max. likelihood parameters (the old switcharoo)
        for key in result_dict:
            if key in param_dict:
                param_dict[key]['init']=result_dict[key]['res']
        print('\n     Final parameters:')
        for key in param_dict:
            print('          %s = %0.2f' % (key,param_dict[key]['init']) )
            
#     if 1: sys.exit()
    #######################################################################################################
    print(' Performing MCMC iterations...')
    # Extract relevant stuff from dicts
    param_names  = [param_dict[key]['name'] for key in param_dict ]
    init_params  = [param_dict[key]['init'] for key in param_dict ]
    bounds       = [param_dict[key]['plim'] for key in param_dict ]

    for i in range(0,len(param_names),1):
        print param_names[i],init_params[i]

    ndim, nwalkers = len(init_params), 2*len(init_params) # minimum walkers = 2*len(params)
    # initial parameters Maximum Likelihood best-fit
    pos = [init_params + 1e-2*np.random.randn(ndim) for i in range(nwalkers)]
#     initial parameters from an initial run using MCMC
    sampler = emcee.EnsembleSampler(nwalkers, ndim, badass.lnprob, args=(param_names,bounds,lam_gal,galaxy,noise,gal_temp,na_feii_temp,br_feii_temp,
                                                                         temp_list,temp_fft,npad,velscale,npix,vsyst,run_dir),threads=4)
#     lnp = badass.lnprob(init_params,param_names,bounds,lam_gal,galaxy,noise,gal_temp,na_feii_temp,br_feii_temp,
#                   temp_list,temp_fft,npad,velscale,npix,vsyst,run_dir)

    # Run emcee; writes progress to MCMC_chain.csv
    sampler_chain = badass.run_emcee(sampler,pos,ndim,max_iter,init_params,auto_stop,auto_stop_thresh,write_thresh,write_iter,param_names,run_dir)
    
    print np.shape(sampler_chain)
#     if 1: sys.exit()
    # Add chains to each parameter in param dictionary
    for i,key in enumerate(param_names):
        if key in param_dict:
            param_dict[key]['chain']=sampler_chain[i]
    
    print('\n Making plots... \n')
    param_dict = badass.param_plots(param_dict,burn_in,run_dir)
    flux_dict  = badass.emline_flux_plots(burn_in,run_dir)
    lum_dict   = badass.flux2lum(flux_dict,burn_in,z,run_dir,H0=71.0,Om0=0.27)
    
    print('\n Saving Data... \n')
    badass.write_param(param_dict,flux_dict,lum_dict,run_dir)
    badass.write_chain(param_dict,flux_dict,lum_dict,run_dir)
    badass.plot_best_model(param_dict,lam_gal,galaxy,noise,gal_temp,na_feii_temp,br_feii_temp,
                           temp_list,temp_fft,npad,velscale,npix,vsyst,run_dir) 
    
    print('\n Done! \n')