# TF neural net with normalized ISO spectra

In [1]:
# TensorFlow and tf.keras
import tensorflow as tf
from tensorflow import keras

# Helper libraries
import glob
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from concurrent.futures import ProcessPoolExecutor
from IPython.core.debugger import set_trace as st
from sklearn.model_selection import train_test_split
from time import time

# My modules
from swsnet import helpers

print(tf.__version__)

1.11.0


## Dataset: ISO-SWS (normalized)

In [2]:
# Needed directories
base_dir = '../data/isosws_atlas/'

# Pickles containing our spectra in the form of pandas dataframes:
spec_dir = base_dir + 'spectra_normalized/'
spec_files = np.sort(glob.glob(spec_dir + '*.pkl'))

# Metadata pickle (pd.dataframe). Note each entry contains a pointer to the corresponding spectrum pickle.
metadata = base_dir + 'metadata_normalized.pkl'

#### Labels ('group'):

1. Naked stars
2. Stars with dust
3. Warm, dusty objects
4. Cool, dusty objects
5. Very red objects
6. Continuum-free objects but having emission lines
7. Flux-free and/or fatally flawed spectra

N.B., these are shifted down by 1 in the labels (to span 0-6) for the model.

### Subset 1: all data included

In [3]:
features, labels = helpers.load_data(base_dir=base_dir, metadata=metadata, clean=False, verbose=False)

In [4]:
print(features.shape)
print(labels.shape)

(1235, 359)
(1235,)


### Subset 2: exclude group=7 data

In [5]:
features_clean, labels_clean = helpers.load_data(base_dir=base_dir, metadata=metadata, clean=True, verbose=False)

In [6]:
print(features_clean.shape)
print(labels_clean.shape)

(1058, 359)
(1058,)


# The model itself

In [9]:
def neural(features, labels, l2norm=0.01):

    X_train, X_test, y_train, y_test = \
        train_test_split(features, labels, test_size=0.3, random_state = 42)

    # Sequential model, 7 classes of output.
    model = keras.Sequential()
    model.add(keras.layers.Dense(64, activation='relu', kernel_regularizer=keras.regularizers.l2(l2norm), input_dim=359))
    model.add(keras.layers.Dense(64, activation='relu', kernel_regularizer=keras.regularizers.l2(l2norm)))
    model.add(keras.layers.Dense(64, activation='relu', kernel_regularizer=keras.regularizers.l2(l2norm)))
    model.add(keras.layers.Dense(64, activation='relu', kernel_regularizer=keras.regularizers.l2(l2norm)))
    model.add(keras.layers.Dense(7, activation='softmax'))

    # Early stopping condition.
    callback = [tf.keras.callbacks.EarlyStopping(monitor='acc', patience=3, verbose=0)]

    # Recompile model and fit.
    model.compile(optimizer=keras.optimizers.Adam(0.0005),
                  loss='sparse_categorical_crossentropy',
                  metrics=['accuracy'])
    #     model.fit(X_train, y_train, epochs=50, batch_size=32, verbose=False)
    model.fit(X_train, y_train, epochs=50, batch_size=32, callbacks=callback, verbose=False)

    # Check accuracy.
    score = model.evaluate(X_test, y_test, verbose=0)
    accuracy = score[1]
    print("L2 norm, accuracy: ", l2norm, accuracy)
    
    return model, accuracy

In [13]:
for l2norm in (0.1, 0.01, 0.001, 0.0001, 0.00001):
    model, accuracy = neural(features, labels, l2norm=l2norm)

L2 norm, accuracy:  0.1 0.3854447448992665
L2 norm, accuracy:  0.01 0.7277628046804361
L2 norm, accuracy:  0.001 0.7708894893165548
L2 norm, accuracy:  0.0001 0.7412398936292232
L2 norm, accuracy:  1e-05 0.7304582224701935


In [14]:
for l2norm in (0.1, 0.01, 0.001, 0.0001, 0.00001):
    model, accuracy = neural(features_clean, labels_clean, l2norm=l2norm)

L2 norm, accuracy:  0.1 0.4874213840226707
L2 norm, accuracy:  0.01 0.7547169826315634
L2 norm, accuracy:  0.001 0.8176100640176976
L2 norm, accuracy:  0.0001 0.7798742123369901
L2 norm, accuracy:  1e-05 0.789308174601141


***

In [38]:
def neural2(features, labels, l2norm=0.01):

    X_train, X_test, y_train, y_test = \
        train_test_split(features, labels, test_size=0.25, random_state = 22)

    # Sequential model, 7 classes of output.
    model = keras.Sequential()
    model.add(keras.layers.Dense(128, activation='relu', kernel_regularizer=keras.regularizers.l2(l2norm), input_dim=359))
    model.add(keras.layers.Dense(64, activation='relu', kernel_regularizer=keras.regularizers.l2(l2norm)))    
    model.add(keras.layers.Dense(6, activation='softmax'))

    # Early stopping condition.
    callback = [tf.keras.callbacks.EarlyStopping(monitor='acc', patience=3, verbose=0)]

    # Recompile model and fit.
    model.compile(optimizer=keras.optimizers.Adam(0.0005),
                  loss='sparse_categorical_crossentropy',
                  metrics=['accuracy'])
    #     model.fit(X_train, y_train, epochs=50, batch_size=32, verbose=False)
    model.fit(X_train, y_train, epochs=50, batch_size=32, callbacks=callback, verbose=False)

    # Check accuracy.
    score = model.evaluate(X_test, y_test, verbose=0)
    accuracy = score[1]
    print("L2 norm, accuracy: ", l2norm, accuracy)
    
    return model, accuracy

In [30]:
for l2norm in (0.1, 0.01, 0.001, 0.0001, 0.00001):
    model, accuracy = neural2(features, labels, l2norm=l2norm)

L2 norm, accuracy:  0.1 0.5789473660079091
L2 norm, accuracy:  0.01 0.6963562728905002
L2 norm, accuracy:  0.001 0.7530364394187927
L2 norm, accuracy:  0.0001 0.7246963548274176
L2 norm, accuracy:  1e-05 0.7449392698071746


In [39]:
# using... 128, 64, 6. test_size=0.20, epoch=50, patience=5.
for l2norm in (0.1, 0.01, 0.001, 0.0001, 0.00001):
    model, accuracy = neural2(features_clean, labels_clean, l2norm=l2norm)

L2 norm, accuracy:  0.1 0.6415094348619569
L2 norm, accuracy:  0.01 0.724528302561562
L2 norm, accuracy:  0.001 0.784905661052128
L2 norm, accuracy:  0.0001 0.8113207556166739
L2 norm, accuracy:  1e-05 0.8113207556166739


In [37]:
# using... 128, 64, 6. test_size=0.15, epoch=50, patience=5.
for l2norm in (0.1, 0.01, 0.001, 0.0001, 0.00001):
    model, accuracy = neural2(features_clean, labels_clean, l2norm=l2norm)

L2 norm, accuracy:  0.1 0.6289308194844228
L2 norm, accuracy:  0.01 0.779874214586222
L2 norm, accuracy:  0.001 0.8113207498436454
L2 norm, accuracy:  0.0001 0.8113207494687734
L2 norm, accuracy:  1e-05 0.8301886743719473


***

Based on the above, probably need to do more data preprocessing:
- e.g., remove untrustworthy data

In [21]:
save_path = '../models/neural_net.h5'

In [22]:
# model.save(save_path)