# SED Processing of full CIGALE SED Decomposition fits
This script is used to explore and proccess some of the outputs from the fits files generated by the CIGALE SED decomposition. We intend for this to explore the AGN contribution with a more robust set of tools to see how AGN contamination effects UVJ and other diagnostics.

This script extends the previous script with a full breakdown of the entire ZFOURGE catalogue decomposed SEDS so that a full analysis can be conducted.

In [1]:
# Import all required packages
import matplotlib.pyplot as plt
import astropy.units as u
import numpy as np
import pandas as pd
import os
from astLib import astSED
import astropy.io.fits as fits
from carf import * # custom module for functions relating to the project
import matplotlib.path as mpath
import seaborn as sns

# refresh

# So that we can change the helper functions without reloading the kernel
%load_ext autoreload
%autoreload 2

This script is intended to make use of extra fits files provided form ollie to investigate the UVJ diagram.

In particular this is also intended to be used as a stepping stone for the next half of the project.
We will look at quantifying the UVJ points throughout to see how they change with varying parameters.
additionally we may also explore the intermediate type AGN and see how that works.

We will try to find out a way of quantiying the movement with respect to a propgated error and see if we can
make a tool that can be used. 


In [2]:
# Load in all of the filters
# Filters
pb_U_path = os.path.join('datasets', 'Filters', 'Generic_Johnson.U.dat')
pb_V_path = os.path.join('datasets', 'Filters', 'Generic_Johnson.V.dat')
pb_J_path = os.path.join('datasets', 'Filters', '2MASS_2MASS.J.dat')


pb_U = astSED.Passband(pb_U_path, normalise=False)
pb_V = astSED.Passband(pb_V_path, normalise=False)
pb_J = astSED.Passband(pb_J_path, normalise=False)

In [9]:
# cosmos_decomposed_df
# uds_decomposed_df
# cdfs_decomposed_df

# # Get all SEDs
# def get_all_seds(path):
#     all_seds = []
#     for file in os.listdir(path):

# What i'll need to do for this is to go through my decomposed dataframes and get the SEDs for each galaxy
# I'll also need to cross check this against the full ZFOURGE files and get the redshift for each
# to ensure they are correctly redshifted to the restframe

In [3]:
#Using the full_CDFS_ids.csv, full_UDS_ids.csv and full_COSMOS_ids.csv files, I can get the redshifts for each galaxy
# Read in each file
# Read in full set 
full_cdfs_ids = pd.read_csv('datasets/zfourge/full_CDFS_ids.csv')
full_cosmos_ids = pd.read_csv('datasets/zfourge/full_COSMOS_ids.csv')
full_uds_ids = pd.read_csv('datasets/zfourge/full_UDS_ids.csv')


In [11]:

# Using dataframes is wildly inefficient. 
# A most robust approach would be to use only the 2 columns we are interested in, specified in the function call
# and then read these in as numpy arrays


def get_n_seds(df, n, field, restframe=False, all=False):
    # Select n galaxies
    
    df_list = []
    names = []
    redshifts = []
    if all==False:
        selected_galaxies = df.sample(n)
    else: 
        selected_galaxies = df
        
    # Reset the index
    selected_galaxies = selected_galaxies.reset_index(drop=True)
    
    # name 
    gal_name = selected_galaxies['id'].astype(str)
    
    # field
    gal_field = field#selected_galaxies['field'].astype(str)
    
    
    names = gal_field + '_' + gal_name
    gal_redshift = selected_galaxies['zpk'].astype(float)

    # Now we will read in the fits files for these galaxies

    for i in range(len(selected_galaxies)):
        path = 'datasets\\full_zfourge_decomposed\\'+ str(gal_field).lower() +'_best_models_fits\\'
        name = str(gal_name[i])+'_best_model.fits'

        galaxy_path = os.path.join(path, name)
        with fits.open(galaxy_path) as data:
            df = pd.DataFrame(np.array(data[1].data).byteswap().newbyteorder())
        
        # Convert to angstroms
        df['wavelength'] = df['wavelength']*10

        if restframe:
            df['Snu'] = df['Fnu']*10**-3 # milliJanksys to Janksys <- J = ergs/(s*(cm^2)*(s^-1))
            # F_nu currently has a frequency dependence, convert to nuFnu by multiplying the the frequency associated
            # with the wavelength, as we are in angstroms, we can use the formula c = f*lambda
            
            
            # This should prevent any issues, but check
            freq = (3*10**18)/df['wavelength'] # in Hz
            # multiply the Snu * nu to get nuSnu
            df['nuSnu'] = df['Snu']*freq
            # Restframe the values of wavelength
            df['wavelength'] = df['wavelength'] / (1 + gal_redshift[i]) # we redshift the values of of wavelength
            # now calculate a new frequency, based on the new wavelength
            freq = (3*10**18)/df['wavelength'] # in Hz
            # divide the nuSnu by the new frequency to get the restframed values
            df['Snu'] = df['nuSnu']/freq
            
            # Convert flux values
            df['Flambda'] = df['Snu']*(3*10**-5)/(df['wavelength']**2) # S_nu to F_lambda <- angstroms 
            
        else:
            # Convert flux values
            df['Snu'] = df['Fnu']*10**-3 # milliJanksys to Janksys <- J = ergs/(s*(cm^2)*(s^-1))
            df['Flambda'] = df['Snu']*(3*10**-5)/(df['wavelength']**2) # S_nu to F_lambda <- angstroms 
            
        
            
            
        redshift_Val = gal_redshift[i]
        redshifts.append(redshift_Val)        
        
        

        
        # For simplicity, just create some extra columns
        df['lambda (Angstroms)'] = df['wavelength']
        df['Total Flux (erg/s/cm^2/Angstrom)'] = df['Flambda']
        
        
        
        df_list.append(df)
        
        
        plt.loglog(df['wavelength'], df['Flambda'])
    plt.xlabel('Wavelength (Angstroms)')
    plt.ylabel('Flux (Fnu)')
    #plt.xlim(1e3, 1e5)
    plt.ylim(1e-30, 1e-2)
    plt.title('SED of galaxies')
    plt.legend()
    plt.show()
    
    print(len(df_list))
    
    return df_list, names, redshifts


In [4]:
# Lets read in just one 
full_cdfs_ids

Unnamed: 0,id,zpk,uv,vj
0,5880,1.8316,0.843975,0.801469
1,5886,1.1001,0.580949,0.407415
2,5928,0.7154,1.482554,1.256439
3,5972,0.9668,0.788785,1.086186
4,6034,0.3502,0.898603,0.438185
...,...,...,...,...
7701,30807,1.1353,0.488649,0.291412
7702,30810,1.4991,0.710090,0.390829
7703,30861,0.8414,0.612418,0.004617
7704,30873,1.7169,0.484812,0.598343


In [50]:

gal_field = 'CDFS'
gal_name = '5886'
path = 'datasets\\full_zfourge_decomposed\\'+ str(gal_field).lower() +'_best_models_fits\\'
name = str(gal_name)+'_best_model.fits'

galaxy_path = os.path.join(path, name)
with fits.open(galaxy_path) as data:
    np_arr = np.array(data[1].data)
    #print(data[1].data)
    df = pd.DataFrame(np.array(data[1].data).byteswap().newbyteorder())

In [37]:
# np_arr['wavelength']
# np_arr['Fnu']

# df
np_arr
# Define the new dtype with the additional fields
new_dtype_descr = np_arr.dtype.descr + [('Snu', np.float64), ('nuSnu', np.float64), ('Flambda', np.float64) ]
new_dtype = np.dtype(new_dtype_descr)

# Create a new array with the new dtype
new_np_arr = np.zeros(np_arr.shape, dtype=new_dtype)

# Copy over the existing data
for name in np_arr.dtype.names:
    new_np_arr[name] = np_arr[name]

# Add data to the new fields
#new_np_arr['lambda (Angstroms)'] = np_arr['wavelength']
#new_np_arr['Total Flux (erg/s/cm^2/Angstrom)'] = np_arr['Flambda']

# Release the memory of the previous array
del np_arr

# Importantly we only want to read in information from the

In [38]:
new_np_arr

array([(2.10100000e+00, 0.        , 0.00000000e+00, 0., 0., 0., 0., 0., 0., 0.00000000e+00, 0.00000000e+00, 0., 0.,  0.,  0.,         0.        ,  0.00000000e+00, 0., 0., 0., 0., -0., 0., 0., 0.),
       (2.41227076e+00, 0.        , 0.00000000e+00, 0., 0., 0., 0., 0., 0., 0.00000000e+00, 0.00000000e+00, 0., 0.,  0.,  0.,         0.        ,  0.00000000e+00, 0., 0., 0., 0., -0., 0., 0., 0.),
       (2.76965741e+00, 0.        , 0.00000000e+00, 0., 0., 0., 0., 0., 0., 0.00000000e+00, 0.00000000e+00, 0., 0.,  0.,  0.,         0.        ,  0.00000000e+00, 0., 0., 0., 0., -0., 0., 0., 0.),
       ...,
       (2.06265243e+09, 0.00024857, 1.22529350e+20, 0., 0., 0., 0., 0., 0., 6.88335735e+17, 1.21841015e+20, 0., 0., -0., -0., -17893658.48723127, -3.16732286e+09, 0., 0., 0., 0.,  0., 0., 0., 0.),
       (2.08173792e+09, 0.00024877, 1.20387262e+20, 0., 0., 0., 0., 0., 0., 6.76302082e+17, 1.19710960e+20, 0., 0., -0., -0., -17322020.51206618, -3.06613829e+09, 0., 0., 0., 0.,  0., 0., 0., 0.),
   

In [43]:
# A More robust implementation


def get_n_seds_np(df, n, field, restframe=False, all=False):
    # Select n galaxies
    
    df_list = []
    names = []
    redshifts = []
    if all==False:
        selected_galaxies = df.sample(n)
    else: 
        selected_galaxies = df
        
    # Reset the index
    selected_galaxies = selected_galaxies.reset_index(drop=True)
    
    # name 
    gal_name = selected_galaxies['id'].astype(str)
    
    # field
    gal_field = field#selected_galaxies['field'].astype(str)
    
    
    names = gal_field + '_' + gal_name
    gal_redshift = selected_galaxies['zpk'].astype(float)

    # Now we will read in the fits files for these galaxies

    for i in range(len(selected_galaxies)):
        path = 'datasets\\full_zfourge_decomposed\\'+ str(gal_field).lower() +'_best_models_fits\\'
        name = str(gal_name[i])+'_best_model.fits'

        galaxy_path = os.path.join(path, name)
        with fits.open(galaxy_path) as data:
            np_arr_temp = np.array(data[1].data)
            
            # Define the new dtype with the additional fields
            new_dtype_descr = np_arr_temp.dtype.descr + [('Snu', np.float64), ('nuSnu', np.float64), ('Flambda', np.float64) ]
            new_dtype = np.dtype(new_dtype_descr)

            # Create a new array with the new dtype
            np_arr = np.zeros(np_arr_temp.shape, dtype=new_dtype)

            # Copy over the existing data
            for name in np_arr_temp.dtype.names:
                np_arr[name] = np_arr_temp[name]
                
            del np_arr_temp # Release the memory of the previous array

            
            # Release the memory of the previous array
            
        
        # Convert to angstroms
        np_arr['wavelength'] = np_arr['wavelength']*10

        if restframe:
            np_arr['Snu'] = np_arr['Fnu']*10**-3 # milliJanksys to Janksys <- J = ergs/(s*(cm^2)*(s^-1))
            # F_nu currently has a frequency dependence, convert to nuFnu by multiplying the the frequency associated
            # with the wavelength, as we are in angstroms, we can use the formula c = f*lambda
            
            
            # This should prevent any issues, but check
            freq = (3*10**18)/np_arr['wavelength'] # in Hz
            # multiply the Snu * nu to get nuSnu
            np_arr['nuSnu'] = np_arr['Snu']*freq
            # Restframe the values of wavelength
            np_arr['wavelength'] = np_arr['wavelength'] / (1 + gal_redshift[i]) # we redshift the values of of wavelength
            # now calculate a new frequency, based on the new wavelength
            freq = (3*10**18)/np_arr['wavelength'] # in Hz
            # divide the nuSnu by the new frequency to get the restframed values
            np_arr['Snu'] = np_arr['nuSnu']/freq
            
            # Convert flux values
            np_arr['Flambda'] = np_arr['Snu']*(3*10**-5)/(np_arr['wavelength']**2) # S_nu to F_lambda <- angstroms 
            
        else:
            # Convert flux values
            np_arr['Snu'] = np_arr['Fnu']*10**-3 # milliJanksys to Janksys <- J = ergs/(s*(cm^2)*(s^-1))
            np_arr['Flambda'] = np_arr['Snu']*(3*10**-5)/(np_arr['wavelength']**2) # S_nu to F_lambda <- angstroms 
            
        
            
            
        redshift_Val = gal_redshift[i]
        redshifts.append(redshift_Val)        
        
        

        
        # # For simplicity, just create some extra columns
        # df['lambda (Angstroms)'] = df['wavelength']
        # df['Total Flux (erg/s/cm^2/Angstrom)'] = df['Flambda']
        
        
        
        df_list.append(np_arr)
        
        
    #     plt.loglog(np_arr['wavelength'], np_arr['Flambda'])
    # plt.xlabel('Wavelength (Angstroms)')
    # plt.ylabel('Flux (Fnu)')
    # #plt.xlim(1e3, 1e5)
    # plt.ylim(1e-30, 1e-2)
    # plt.title('SED of galaxies')
    # plt.legend()
    # plt.show()
    
    print(len(df_list))
    
    return df_list, names, redshifts


In [51]:
# Try optimised version of code

# CDFS
cdfs_seds, cdfs_names, cdfs_redshifts = get_n_seds_np(full_cdfs_ids, 500, 'CDFS', restframe=True, all=True)

# COSMOS
cosmos_seds, cosmos_names, cosmos_redshifts = get_n_seds_np(full_cosmos_ids, 500, 'COSMOS', restframe=True, all=True)

# UDS
uds_seds, uds_names, uds_redshifts = get_n_seds_np(full_uds_ids, 500, 'UDS', restframe=True, all=True)


7706
7790
6788


In [53]:
master_seds = []

master_seds.extend(cdfs_seds)
master_seds.extend(cosmos_seds)
master_seds.extend(uds_seds)

In [55]:
len(master_seds)

22284

In [64]:
all_names = cdfs_names
all_names

0        CDFS_5880
1        CDFS_5886
2        CDFS_5928
3        CDFS_5972
4        CDFS_6034
           ...    
7701    CDFS_30807
7702    CDFS_30810
7703    CDFS_30861
7704    CDFS_30873
7705    CDFS_30906
Name: id, Length: 7706, dtype: object

In [66]:
# add the next set of names
all_names = all_names.append(cosmos_names)
all_names = all_names.append(uds_names)

len(all_names)

22284

In [69]:
# Do the same thing for redshifts
all_redshifts = cdfs_redshifts
len(all_redshifts)


all_redshifts.extend(cosmos_redshifts)
len(all_redshifts)

all_redshifts.extend(uds_redshifts)
len(all_redshifts)


22284

In [74]:
# No we can do some more analysis
export_df = pd.DataFrame(columns=['ID', 'UV', 'VJ'])

