In [1]:
%matplotlib inline

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

In [24]:
meta = pd.read_pickle('step1_sort/metadata_step1_sorted.pkl')

In [82]:
norm_params = np.loadtxt('step2_norm/step2_norm_params.txt', delimiter=',', dtype='str')
nrows = norm_params.shape[0]

In [83]:
spectra_paths = norm_params.T[0]
spectra_paths

array(['spectra/02400714_sws.pkl', 'spectra/02400807_sws.pkl',
       'spectra/02400910_sws.pkl', ..., 'spectra/87700401_sws.pkl',
       'spectra/87700716_sws.pkl', 'spectra/87901801_sws.pkl'],
      dtype='<U24')

In [84]:
norm_params[0]

array(['spectra/02400714_sws.pkl', '-3.938295760531123',
       '356.8436809729308', '164.63236323735268'], dtype='<U24')

***

In [43]:
def read_spectrum(filename):
    """Returns an ISO spectrum (wave, flux, etc.) from a pickle."""
    spectrum = pd.read_pickle(filename)
    
    wave = spectrum['wavelength']
    flux = spectrum['flux']
    specerr = spectrum['spec_error']
    normerr = spectrum['norm_error']
    fluxerr = specerr + normerr
    
    return wave, flux, fluxerr, spectrum

In [72]:
def renormalize_spectrum(file_path, norm_factors, output_dir='../spectra_normalized/',
                         verbose=True):

    # Sanity check that the parameters are for this particular file.
    if file_path != norm_factors[0]:
        raise SystemExit('File paths do not match!')
    
    # Read the original pickled spectrum.
    full_file_path = '../' + file_path
    wave, flux, fluxerr, spectrum = read_spectrum(filename=full_file_path)
    
    # Identify the scaling factors.
    _, spec_min, spec_max, norm_fac = norm_factors
    spec_min = float(spec_min)
    spec_max = float(spec_max)
    norm_fac = float(norm_fac)
    
    # Scale its flux using the norm factors.
    renorm_flux = (flux - spec_min) / norm_fac
    
    # Create a new pickle with the scaled spectrum (otherwise the same structure).
    spectrum['flux'] = renorm_flux
    
    # Save new pickle.
    save_path = file_path.replace('.pkl', '_renorm.pkl')
    save_path = save_path.replace('spectra', 'spectra_normalized')
    full_save_path = '../' + save_path
    spectrum.to_pickle(full_save_path)
    
    # Print 'Saved!' statement if verbose.
    if verbose:
        print('Saved: ', full_save_path)
    
    return save_path

***

In [89]:
file_path_list = []

for index, file_path in enumerate(spectra_paths):
    if index % 100 == 0:
        print(index, ' / ', nrows)
    
    # Normalization parameters for this spectrum:
    norm_factors = norm_params[index]
    
    # Renormalize, save to new pickle.
    save_path = renormalize_spectrum(file_path, norm_factors, verbose=False)
    
    # Do something with meta dataframe??
    file_path_list.append(save_path)
    
#     if index >= 10:
#         break

0  /  1235
100  /  1235
200  /  1235
300  /  1235
400  /  1235
500  /  1235
600  /  1235
700  /  1235
800  /  1235
900  /  1235
1000  /  1235
1100  /  1235
1200  /  1235


In [123]:
def check_tdts(old_file_paths, new_file_paths):
    
    old_list = [x.split('/')[-1].split('_')[0] for x in old_file_paths]
    new_list = [x.split('/')[-1].split('_')[0] for x in new_file_paths]
    
    if old_list != new_list:
        raise SystemExit("TDTs don't match.")
    
    return

In [124]:
def update_dataframe(meta, file_path_list):
    
    # Make a copy of the dataframe.
    new_meta = meta.copy()
    
    # Isolate file_path from meta dataframe.
    old_file_paths = meta['file_path']
    new_file_paths = file_path_list
    
    # Compare them by TDT as a sanity check.
    check_tdts(old_file_paths, new_file_paths)
    
    # Update paths.
    new_meta['file_path'] = new_file_paths
    
    # Save to disk.
    new_meta.to_pickle('../metadata_normalized.pkl')
    print('Saved: ', '../metadata_normalized.pkl')
    
    return new_meta

In [125]:
new_meta = update_dataframe(meta, file_path_list)

Saved:  ../metadata_normalized.pkl


***

In [20]:
meta.head()

Unnamed: 0,object_name,tdt,ra,dec,full_classifier,group,subgroup,uncertainty_flag,note,Unnamed: 10,file_path,object_type,data_ok
0,NGC 6543,2400714,269.639167,66.633194,4.PN,4,PN,,,,spectra/02400714_sws.pkl,PN,True
1,NGC 6543,2400807,269.639167,66.633194,4.PN,4,PN,,,,spectra/02400807_sws.pkl,PN,True
2,NGC 6543,2400910,269.639125,66.633194,4.PN,4,PN,,,,spectra/02400910_sws.pkl,PN,True
3,NGC 7027,2401183,316.757125,42.235861,4.PU,4,PU,,,,spectra/02401183_sws.pkl,PN,True
4,{gamma} Dra,2401579,269.151708,51.488972,1.NO,1,NO,,"(0,0)",,spectra/02401579_sws.pkl,,True


In [122]:
new_meta.head()

Unnamed: 0,object_name,tdt,ra,dec,full_classifier,group,subgroup,uncertainty_flag,note,Unnamed: 10,file_path,object_type,data_ok
0,NGC 6543,2400714,269.639167,66.633194,4.PN,4,PN,,,,spectra_normalized/02400714_sws_renorm.pkl,PN,True
1,NGC 6543,2400807,269.639167,66.633194,4.PN,4,PN,,,,spectra_normalized/02400807_sws_renorm.pkl,PN,True
2,NGC 6543,2400910,269.639125,66.633194,4.PN,4,PN,,,,spectra_normalized/02400910_sws_renorm.pkl,PN,True
3,NGC 7027,2401183,316.757125,42.235861,4.PU,4,PU,,,,spectra_normalized/02401183_sws_renorm.pkl,PN,True
4,{gamma} Dra,2401579,269.151708,51.488972,1.NO,1,NO,,"(0,0)",,spectra_normalized/02401579_sws_renorm.pkl,,True
