In [1]:
%matplotlib inline

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

from mattpy.utils import smooth

In [2]:
def save_shift_figure(plot_dir, classifier, iso_filename, wave, flux,
                      smooth_flux, smooth_flux_shift, verbose=True):
    """Saves a .PDF figure of the raw spectrum and shifted smoothed spectrum."""
    
    # Plot the raw spectrum and smoothed spectrum.
    plt.plot(wave, flux)
    plt.plot(wave, smooth_flux)
    plt.axhline(y=0, color='k', ls='--', lw=1)
    plt.axhline(y=np.nanmax(smooth_flux_shift), ls='--', lw=1,
                zorder=-10, color='k')
    
    # Save to disk.
    plt.title(iso_filename + ' - classifier: ' + classifier)
    savepath = plot_dir + iso_filename + '_A.pdf'
    plt.savefig(savepath, format='pdf', bbox_inches='tight')
    plt.close()
    
    if verbose:
        print('Saved: ', savepath)
    
    return


def save_renorm_figure(plot_dir, classifier, iso_filename, renorm_wave,
                       renorm_flux_shift, smooth_flux_shift,
                       verbose=True):
    """Saves a .PDF figure of the renormalized spectrum."""
    
    # Plot the renormalized/shifted spectrum.
    plt.plot(renorm_wave, renorm_flux_shift)
    plt.plot(renorm_wave, smooth_flux_shift)
    plt.axhline(y=0, color='red', ls='--', lw=1)
    plt.axhline(y=1, ls='--', lw=1, zorder=-10, color='red')
    
    # Save to disk.
    plt.title(iso_filename + ' - classifier: ' + classifier)
    savepath = plot_dir + iso_filename + '_B.pdf'
    plt.savefig(savepath, format='pdf', bbox_inches='tight')
    plt.close()
    
    if verbose:
        print('Saved: ', savepath)
    
    return

In [3]:
def read_spectrum(file_path):
    """Returns an ISO spectrum (wave, flux, etc.) from a pickle."""
    spectrum = pd.read_pickle(file_path)
    
    wave = spectrum['wavelength']
    flux = spectrum['flux']
#     specerr = spectrum['spec_error']
#     normerr = spectrum['norm_error']
#     fluxerr = specerr + normerr
    fluxerr = spectrum['uncertainty']
    
    return wave, flux, fluxerr


def smooth_spectrum(flux, **kwargs):
    """Returns a shifted, smoothed spectrum, ready for normalization."""
    spec_min = np.nanmin(flux)
    spec_max = np.nanmax(flux)
    # print(spec_min, spec_max)

    # Smooth it to find the general low/high points.    
    smooth_flux = smooth(flux.values, **kwargs)
    
    # Smooth it to find the general low/high points, and shift to zero.
    smooth_flux_shift = smooth(flux.values - spec_min, **kwargs)

    # Upper normalization factor (s.t. the maximum of the continuum is 1.0).
    norm_factor = np.nanmax(smooth_flux_shift)

    return spec_min, spec_max, smooth_flux, smooth_flux_shift, norm_factor


def normalize_spectrum(file_path, classifier, plot=True, verbose=True):
    """Normalizes an ISO spectrum to span 0-1 (the main curvature)."""
    wave, flux, fluxerr = read_spectrum('../../' + file_path)
    
    # Shift spectrum and get normalization factors.
    # Minimum should now=0.0.
    spec_min, spec_max, smooth_flux, smooth_flux_shift, norm_factor = \
        smooth_spectrum(flux, window_len=40)

    # Final renormalized quantities.
    renorm_wave = wave
    renorm_flux_shift = (flux - spec_min) / norm_factor
    
    # Plotting directory
    plot_dir = 'plots/'
    
    # Save file name.
    iso_filename = file_path.split('/')[-1].split('.pkl')[0]
    
    if plot:
        # Save a figure showing the initial smooth/shift.
        save_shift_figure(plot_dir, classifier, iso_filename,
                          wave, flux, smooth_flux, smooth_flux_shift,
                          verbose=False)

        # Save a figure of the final renormalized spectrum.
        save_renorm_figure(plot_dir, classifier, iso_filename,
                           renorm_wave, renorm_flux_shift,
                           smooth_flux_shift/norm_factor,
                           verbose=False)
    
    return spec_min, spec_max, norm_factor

In [4]:
meta = pd.read_pickle('../metadata_step0.pkl')

In [5]:
nrows = meta.shape[0]
meta

Unnamed: 0,object_name,tdt,ra,dec,full_classifier,group,subgroup,uncertainty_flag,note,Unnamed: 10,file_path,object_type
0,NGC 6543,2400714,269.639167,66.633194,4.PN,4,PN,,,,spectra/02400714_irs.pkl,PN
1,NGC 6543,2400807,269.639167,66.633194,4.PN,4,PN,,,,spectra/02400807_irs.pkl,PN
2,NGC 6543,2400910,269.639125,66.633194,4.PN,4,PN,,,,spectra/02400910_irs.pkl,PN
3,NGC 7027,2401183,316.757125,42.235861,4.PU,4,PU,,,,spectra/02401183_irs.pkl,PN
4,{gamma} Dra,2401579,269.151708,51.488972,1.NO,1,NO,,"(0,0)",,spectra/02401579_irs.pkl,
5,{gamma} Dra,2402105,269.151667,51.488972,1.NO,1,NO,,,,spectra/02402105_irs.pkl,
6,AFGL 2591,2800433,307.352750,40.188583,5.SA,5,SA,,,,spectra/02800433_irs.pkl,YSO
7,NGC 6543,2800908,269.639167,66.633194,4.PN,4,PN,,,,spectra/02800908_irs.pkl,PN
8,P Cyg,3201129,304.446667,38.032861,2.E,2,E,,,,spectra/03201129_irs.pkl,BlueSG*
9,NGC 6543,3201202,269.639167,66.633194,4.PN,4,PN,,,,spectra/03201202_irs.pkl,PN


In [6]:
def norm_and_plot(meta):
    param_list = []
    
    for index, filename in enumerate(meta['file_path']):
        if index % 200 == 0:
            print(index, ' / ', nrows)

        # Full classifier
        classifier = meta['full_classifier'][index]
            
        # Perform shift/renormalization
        parameters = normalize_spectrum(filename, classifier,
                                        plot=True, verbose=False)
        
        # Save parameters to a list
        spec_min, spec_max, norm_factor = parameters
        param_list.append([filename, *parameters])
        
    return param_list

In [7]:
par_list = norm_and_plot(meta)

0  /  1235
200  /  1235
400  /  1235
600  /  1235
800  /  1235
1000  /  1235
1200  /  1235


In [8]:
np.savetxt('step1_norm_params.txt', par_list, delimiter=',', fmt='%s',
           header='iso_filename, spec_min, spec_max, norm_factor (shift first, then norm!!)')