In [1]:
%matplotlib inline

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

from mattpy.utils import smooth
from swsnet.norm_utils import normalize_spectrum, renormalize_spectrum

# Read metadata in

In [2]:
meta = pd.read_pickle('../metadata_step0.pkl')

In [3]:
nrows = meta.shape[0]
meta

Unnamed: 0,object_name,tdt,ra,dec,full_classifier,group,subgroup,uncertainty_flag,note,Unnamed: 10,file_path,object_type,data_ok
0,NGC 6543,2400714,269.639167,66.633194,4.PN,4,PN,,,,spectra/02400714_irs.pkl,PN,True
1,NGC 6543,2400807,269.639167,66.633194,4.PN,4,PN,,,,spectra/02400807_irs.pkl,PN,True
2,NGC 6543,2400910,269.639125,66.633194,4.PN,4,PN,,,,spectra/02400910_irs.pkl,PN,True
3,NGC 7027,2401183,316.757125,42.235861,4.PU,4,PU,,,,spectra/02401183_irs.pkl,PN,True
4,{gamma} Dra,2401579,269.151708,51.488972,1.NO,1,NO,,"(0,0)",,spectra/02401579_irs.pkl,,True
5,{gamma} Dra,2402105,269.151667,51.488972,1.NO,1,NO,,,,spectra/02402105_irs.pkl,,True
6,AFGL 2591,2800433,307.352750,40.188583,5.SA,5,SA,,,,spectra/02800433_irs.pkl,YSO,True
7,NGC 6543,2800908,269.639167,66.633194,4.PN,4,PN,,,,spectra/02800908_irs.pkl,PN,True
8,P Cyg,3201129,304.446667,38.032861,2.E,2,E,,,,spectra/03201129_irs.pkl,BlueSG*,True
9,NGC 6543,3201202,269.639167,66.633194,4.PN,4,PN,,,,spectra/03201202_irs.pkl,PN,True


# Determine normalization parameters (and plot-opt)

In [4]:
determine_parameters = False

def norm_and_plot(meta):
    param_list = []
    
    for index, filename in enumerate(meta['file_path']):
        if index % 200 == 0:
            print(index, ' / ', nrows)

        # Full classifier
        classifier = meta['full_classifier'][index]
            
        # Perform shift/renormalization
        parameters = normalize_spectrum(filename, classifier,
                                        plot=True, verbose=False)
        
        # Save parameters to a list
        spec_min, spec_max, norm_factor = parameters
        param_list.append([filename, *parameters])
        
    return param_list

In [5]:
if determine_parameters:
    par_list = norm_and_plot(meta)
    header = 'iso_filename, spec_min, spec_max, norm_factor (shift first, then norm!!)'
    np.savetxt('step1_norm_params.txt', par_list, delimiter=',', fmt='%s',
               header=header)

### Confirm we can read them back in later.

In [6]:
norm_params = np.loadtxt('../step1_norm/step1_norm_params.txt', delimiter=',', dtype='str')
nrows = norm_params.shape[0]

In [7]:
spectra_paths = norm_params.T[0]
spectra_paths

array(['spectra/02400714_irs.pkl', 'spectra/02400807_irs.pkl',
       'spectra/02400910_irs.pkl', ..., 'spectra/87700401_irs.pkl',
       'spectra/87700716_irs.pkl', 'spectra/87901801_irs.pkl'],
      dtype='<U24')

In [8]:
norm_params[0]

array(['spectra/02400714_irs.pkl', '-3.9378679', '285.0948',
       '164.7205999051585'], dtype='<U24')

# Perform normalization

In [10]:
file_path_list = []

for index, file_path in enumerate(spectra_paths):
    if index % 100 == 0:
        print(index, ' / ', nrows)
    
    # Normalization parameters for this spectrum:
    norm_factors = norm_params[index]
    
    # Renormalize, save to new pickle.
    save_path = renormalize_spectrum(file_path, norm_factors, verbose=False)
    
    # Do something with meta dataframe??
    file_path_list.append(save_path)
    
#     if index >= 10:
#         break

0  /  1235
100  /  1235
200  /  1235
300  /  1235
400  /  1235
500  /  1235
600  /  1235
700  /  1235
800  /  1235
900  /  1235
1000  /  1235
1100  /  1235
1200  /  1235


In [11]:
def update_dataframe(meta, file_path_list):
    
    def check_tdts(old_file_paths, new_file_paths):

        old_list = [x.split('/')[-1].split('_')[0] for x in old_file_paths]
        new_list = [x.split('/')[-1].split('_')[0] for x in new_file_paths]

        if old_list != new_list:
            raise SystemExit("TDTs don't match.")

        return    
    
    # Make a copy of the dataframe.
    new_meta = meta.copy()
    
    # Isolate file_path from meta dataframe.
    old_file_paths = meta['file_path']
    new_file_paths = file_path_list
    
    # Compare them by TDT as a sanity check.
    check_tdts(old_file_paths, new_file_paths)
    
    # Update paths.
    new_meta['file_path'] = new_file_paths
    
    # Save to disk.
    new_meta.to_pickle('../metadata_normalized.pkl')
    print('Saved: ', '../metadata_normalized.pkl')
    
    return new_meta

In [12]:
new_meta = update_dataframe(meta, file_path_list)

Saved:  ../metadata_normalized.pkl


In [13]:
meta.head()

Unnamed: 0,object_name,tdt,ra,dec,full_classifier,group,subgroup,uncertainty_flag,note,Unnamed: 10,file_path,object_type,data_ok
0,NGC 6543,2400714,269.639167,66.633194,4.PN,4,PN,,,,spectra/02400714_irs.pkl,PN,True
1,NGC 6543,2400807,269.639167,66.633194,4.PN,4,PN,,,,spectra/02400807_irs.pkl,PN,True
2,NGC 6543,2400910,269.639125,66.633194,4.PN,4,PN,,,,spectra/02400910_irs.pkl,PN,True
3,NGC 7027,2401183,316.757125,42.235861,4.PU,4,PU,,,,spectra/02401183_irs.pkl,PN,True
4,{gamma} Dra,2401579,269.151708,51.488972,1.NO,1,NO,,"(0,0)",,spectra/02401579_irs.pkl,,True


In [14]:
new_meta.head()

Unnamed: 0,object_name,tdt,ra,dec,full_classifier,group,subgroup,uncertainty_flag,note,Unnamed: 10,file_path,object_type,data_ok
0,NGC 6543,2400714,269.639167,66.633194,4.PN,4,PN,,,,spectra_normalized/02400714_irs_renorm.pkl,PN,True
1,NGC 6543,2400807,269.639167,66.633194,4.PN,4,PN,,,,spectra_normalized/02400807_irs_renorm.pkl,PN,True
2,NGC 6543,2400910,269.639125,66.633194,4.PN,4,PN,,,,spectra_normalized/02400910_irs_renorm.pkl,PN,True
3,NGC 7027,2401183,316.757125,42.235861,4.PU,4,PU,,,,spectra_normalized/02401183_irs_renorm.pkl,PN,True
4,{gamma} Dra,2401579,269.151708,51.488972,1.NO,1,NO,,"(0,0)",,spectra_normalized/02401579_irs_renorm.pkl,,True
