## Artificial Neural Networks

### Import libraries 

In [1]:
# import keras
# from keras.models import Sequential
# from keras.layers import Dense, Activation, Dropout
# from keras.layers.normalization import BatchNormalization
# from keras import metrics
# from keras.callbacks import ModelCheckpoint

import pandas as pd
import numpy as np
from scipy import ndimage, fft
import matplotlib.pyplot as plt
import math
import time

from imblearn.over_sampling import SMOTE

from pathlib import Path

from sklearn.utils import shuffle
from sklearn.preprocessing import StandardScaler, MinMaxScaler, normalize
from sklearn.metrics import (
    accuracy_score,
    precision_score,
    recall_score,
    confusion_matrix,
)
from sklearn.metrics import classification_report

# from .preprocess_data import LightFluxProcessor
import tensorflow as tf

2024-11-21 13:24:59.878096: I external/local_xla/xla/tsl/cuda/cudart_stub.cc:32] Could not find cuda drivers on your machine, GPU will not be used.
2024-11-21 13:25:00.195537: I external/local_xla/xla/tsl/cuda/cudart_stub.cc:32] Could not find cuda drivers on your machine, GPU will not be used.
2024-11-21 13:25:00.490881: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1732191900.695310   22457 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1732191900.744654   22457 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-11-21 13:25:01.264180: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU ins

In [2]:
class LightFluxProcessor:

    def __init__(self, fourier=True, normalize=True, gaussian=True, standardize=True):
        self.fourier = fourier
        self.normalize = normalize
        self.gaussian = gaussian
        self.standardize = standardize

    # Computing the FFT on X and returning the amplitude of the frequencies in the signal 
    def fourier_transform(self, X):
        return np.abs(fft.fft(X, n=X.size))

    # Applying fourier transform
    def process(self, df_train_x, df_dev_x):
        if self.fourier:
            print("Applying Fourier...")
            shape_train = df_train_x.shape
            shape_dev = df_dev_x.shape
            df_train_x = df_train_x.apply(self.fourier_transform, axis=1)
            df_dev_x = df_dev_x.apply(self.fourier_transform, axis=1)

            # builds new arrays to sore the transformed rows 
            df_train_x_build = np.zeros(shape_train)
            df_dev_x_build = np.zeros(shape_dev)

            for ii, x in enumerate(df_train_x): #?
                df_train_x_build[ii] = x

            for ii, x in enumerate(df_dev_x): #?
                df_dev_x_build[ii] = x

            df_train_x = pd.DataFrame(df_train_x_build)
            df_dev_x = pd.DataFrame(df_dev_x_build)

            # Keep first half of data as it is symmetrical after previous steps (FFT results are symmetric)
            df_train_x = df_train_x.iloc[:, : (df_train_x.shape[1] // 2)].values
            df_dev_x = df_dev_x.iloc[:, : (df_dev_x.shape[1] // 2)].values

        # Normalizing to unit norm
        if self.normalize:
            print("Normalizing...")
            df_train_x = pd.DataFrame(normalize(df_train_x)) 
            df_dev_x = pd.DataFrame(normalize(df_dev_x))

            # df_train_x = df_train_x.div(df_train_x.sum(axis=1), axis=0)
            # df_dev_x = df_dev_x.div(df_dev_x.sum(axis=1), axis=0)

        # Gaussian filter to smooth out data (to remove noise)
        if self.gaussian:
            print("Applying Gaussian Filter...")
            df_train_x = ndimage.filters.gaussian_filter(df_train_x, sigma=10)
            df_dev_x = ndimage.filters.gaussian_filter(df_dev_x, sigma=10)

        # Standardize X data
        if self.standardize:
            print("Standardizing...")
            std_scaler = StandardScaler()
            df_train_x = std_scaler.fit_transform(df_train_x)
            df_dev_x = std_scaler.transform(df_dev_x)

        print("Finished Processing!")
        return df_train_x, df_dev_x

### Build the model 

In [3]:
np.random.seed(1)

def build_network(shape, n_layers, n_neurons):

    model = tf.keras.models.Sequential(
        [
        tf.keras.layers.Input(shape), # input layer defined by 'shape' parameter  
        tf.keras.layers.Flatten(), # covert into 1d array
        ])
    
    for n in range(n_layers):
        model.add(tf.keras.layers.Dense(n_neurons, activation="relu"))  # Dense layer with 1 unit and ReLU
        model.add(tf.keras.layers.Dropout(rate=0.25)) # Dropout layer to prevent overfitting 
    
    model.add(tf.keras.layers.Dense(1, activation="sigmoid")) # output layer with sigmoid

    loss_fn = tf.keras.losses.BinaryCrossentropy(from_logits=True)
    model.compile(optimizer="adam", loss=loss_fn, metrics=["accuracy"])
    return model

# output: X (numpy array of features) and Y (numpy array of lables - 1 or 2)
def np_X_Y_from_df(df):
    df = shuffle(df)
    df_X = df.drop(["LABEL"], axis=1)
    X = np.array(df_X)
    Y_raw = np.array(df["LABEL"]).reshape((len(df["LABEL"]), 1))
    Y = Y_raw == 2
    return X, Y


### Load and process the dataset

In [5]:
def load_data(train_dataset_path, dev_dataset_path):
    print("Loading datasets...")
    df_train = pd.read_csv(train_dataset_path, encoding="ISO-8859-1")
    df_dev = pd.read_csv(dev_dataset_path, encoding="ISO-8859-1")

    # Generate X and Y dataframe sets
    df_train_x = df_train.drop("LABEL", axis=1)
    df_dev_x = df_dev.drop("LABEL", axis=1)
    df_train_y = df_train.LABEL
    df_dev_y = df_dev.LABEL

    # Process dataset with LPF
    LFP = LightFluxProcessor(
        fourier=True, normalize=True, gaussian=True, standardize=True
    )
    df_train_x, df_dev_x = LFP.process(df_train_x, df_dev_x)

    # Rejoin X and Y to form dataframe 
    df_train_processed = pd.DataFrame(df_train_x).join(pd.DataFrame(df_train_y))
    df_dev_processed = pd.DataFrame(df_dev_x).join(pd.DataFrame(df_dev_y))

    # Load X and Y numpy arrays
    X_train, Y_train = np_X_Y_from_df(df_train_processed)
    X_dev, Y_dev = np_X_Y_from_df(df_dev_processed)

    # Print data set stats
    (num_examples, n_x) = (X_train.shape)  # (n_x: input size, m : number of examples in the train set)
    n_y = Y_train.shape[1]  # n_y : output size
    print("X_train.shape: ", X_train.shape)
    print("Y_train.shape: ", Y_train.shape)
    print("X_dev.shape: ", X_dev.shape)
    print("Y_dev.shape: ", Y_dev.shape)
    
    return X_train, Y_train, X_dev, Y_dev

### NN on no injection data

In [7]:
path_to_data = '../data/kepler'
train_dataset_path = f"{path_to_data}/data_no_injection/exoTrain.csv"
dev_dataset_path = f"{path_to_data}/data_no_injection/exoTest.csv"

LOAD_MODEL = True  # continue training previous weights or start fresh
RENDER_PLOT = False  # render loss and accuracy plots

Below we run the model with deafult parameters (one hidden layer and one neuron per layer)

In [8]:
X_train, Y_train, X_dev, Y_dev = load_data(train_dataset_path, dev_dataset_path)
model = build_network(X_train.shape[1:], n_layers=1, n_neurons=1)

############## do we want to balance using SMOTE ??
sm = SMOTE()
X_train_sm, Y_train_sm = sm.fit_resample(X_train, Y_train)

# Training the NN 
print("Training...")

history = model.fit(X_train_sm, Y_train_sm, epochs=50, batch_size=32)

train_outputs = model.predict(X_train, batch_size=32) 
dev_outputs = model.predict(X_dev, batch_size=32) 
train_outputs = np.rint(train_outputs)
dev_outputs = np.rint(dev_outputs)

# Metrics
accuracy_train = accuracy_score(Y_train, train_outputs)
accuracy_dev = accuracy_score(Y_dev, dev_outputs)
precision_train = precision_score(Y_train, train_outputs)
precision_dev = precision_score(Y_dev, dev_outputs)
recall_train = recall_score(Y_train, train_outputs)
recall_dev = recall_score(Y_dev, dev_outputs)
confusion_matrix_train = confusion_matrix(Y_train, train_outputs)
confusion_matrix_dev = confusion_matrix(Y_dev, dev_outputs)

print("train set error", 1.0 - accuracy_train)
print("dev set error", 1.0 - accuracy_dev)
print("------------")
print("precision_train", precision_train)
print("precision_dev", precision_dev)
print("------------")
print("recall_train", recall_train)
print("recall_dev", recall_dev)
print("------------")
print("confusion_matrix_train")
print(confusion_matrix_train)
print("confusion_matrix_dev")
print(confusion_matrix_dev)
print("------------")
print("Train Set Positive Predictions", np.count_nonzero(train_outputs))
print("Dev Set Positive Predictions", np.count_nonzero(dev_outputs))
#  Predicting 0's will give you error:
print("------------")
print("All 0's error train set", 37 / 5087)
print("All 0's error dev set", 5 / 570)

print("------------")
print("------------")

if RENDER_PLOT:
    # list all data in history
    print(history.history.keys())
    # summarize history for accuracy
    plt.plot(history.history["accuracy"])
    # plt.plot(history.history['val_acc'])
    plt.title("model accuracy")
    plt.ylabel("accuracy")
    plt.xlabel("epoch")
    plt.legend(["train", "test"], loc="upper left")
    plt.show()

    # summarize history for loss
    plt.plot(history.history["loss"])
    # plt.plot(history.history['val_loss'])
    plt.title("model loss")
    plt.ylabel("loss")
    plt.xlabel("epoch")
    plt.legend(["train", "test"], loc="upper left")
    plt.show()

Loading datasets...
Applying Fourier...
Normalizing...
Applying Gaussian Filter...


  df_train_x = ndimage.filters.gaussian_filter(df_train_x, sigma=10)
  df_dev_x = ndimage.filters.gaussian_filter(df_dev_x, sigma=10)


Standardizing...
Finished Processing!
X_train.shape:  (5087, 1598)
Y_train.shape:  (5087, 1)
X_dev.shape:  (570, 1598)
Y_dev.shape:  (570, 1)


2024-11-21 13:26:37.519047: E external/local_xla/xla/stream_executor/cuda/cuda_driver.cc:152] failed call to cuInit: INTERNAL: CUDA error: Failed call to cuInit: UNKNOWN ERROR (303)


Training...
Epoch 1/50


  output, from_logits = _get_logits(


[1m316/316[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 4ms/step - accuracy: 0.7183 - loss: 0.5110
Epoch 2/50
[1m316/316[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.8486 - loss: 0.3909
Epoch 3/50
[1m316/316[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.8689 - loss: 0.3607
Epoch 4/50
[1m316/316[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.8670 - loss: 0.3460
Epoch 5/50
[1m316/316[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.8733 - loss: 0.3273
Epoch 6/50
[1m316/316[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.8632 - loss: 0.3377
Epoch 7/50
[1m316/316[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.8713 - loss: 0.3234
Epoch 8/50
[1m316/316[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.8762 - loss: 0.3134
Epoch 9/50
[1m316/316[0m [32m━━━━━━━━━━━━━━━━━━━

We go on varying the number of layers and number of neurons per layer

In [9]:
import itertools

n_layers = [1,2,3]
n_neurons = [16,32,64]
combinations = list(itertools.product(n_layers, n_neurons))

for combo in combinations:
    model = build_network(X_train.shape[1:], n_layers=combo[0], n_neurons=combo[1])

    sm = SMOTE()
    X_train_sm, Y_train_sm = sm.fit_resample(X_train, Y_train)

    # Training the NN 
    print(f"Training with {combo[0]} hidden layers and {combo[1]} neurons per layer...")

    history = model.fit(X_train_sm, Y_train_sm, epochs=50, batch_size=32, verbose=0)

    train_outputs = model.predict(X_train, batch_size=32, verbose=0) 
    dev_outputs = model.predict(X_dev, batch_size=32, verbose=0) 
    train_outputs = np.rint(train_outputs)
    dev_outputs = np.rint(dev_outputs)

    confusion_matrix_train = confusion_matrix(Y_train, train_outputs)
    confusion_matrix_dev = confusion_matrix(Y_dev, dev_outputs)

    print("confusion_matrix_train")
    print(confusion_matrix_train)
    print("confusion_matrix_dev")
    print(confusion_matrix_dev)

    print("------------")
    print("------------")

Training with 1 hidden layers and 16 neurons per layer...


  output, from_logits = _get_logits(


confusion_matrix_train
[[5048    2]
 [   0   37]]
confusion_matrix_dev
[[565   0]
 [  5   0]]
------------
------------
Training with 1 hidden layers and 32 neurons per layer...


  output, from_logits = _get_logits(


confusion_matrix_train
[[5048    2]
 [   0   37]]
confusion_matrix_dev
[[565   0]
 [  5   0]]
------------
------------
Training with 1 hidden layers and 64 neurons per layer...


  output, from_logits = _get_logits(


confusion_matrix_train
[[5047    3]
 [   0   37]]
confusion_matrix_dev
[[565   0]
 [  5   0]]
------------
------------
Training with 2 hidden layers and 16 neurons per layer...


  output, from_logits = _get_logits(


confusion_matrix_train
[[5048    2]
 [   0   37]]
confusion_matrix_dev
[[565   0]
 [  5   0]]
------------
------------
Training with 2 hidden layers and 32 neurons per layer...


  output, from_logits = _get_logits(


confusion_matrix_train
[[5045    5]
 [   0   37]]
confusion_matrix_dev
[[565   0]
 [  5   0]]
------------
------------
Training with 2 hidden layers and 64 neurons per layer...


  output, from_logits = _get_logits(


confusion_matrix_train
[[5047    3]
 [   0   37]]
confusion_matrix_dev
[[565   0]
 [  5   0]]
------------
------------
Training with 3 hidden layers and 16 neurons per layer...


  output, from_logits = _get_logits(


confusion_matrix_train
[[5045    5]
 [   0   37]]
confusion_matrix_dev
[[565   0]
 [  5   0]]
------------
------------
Training with 3 hidden layers and 32 neurons per layer...


  output, from_logits = _get_logits(


confusion_matrix_train
[[5046    4]
 [   0   37]]
confusion_matrix_dev
[[565   0]
 [  5   0]]
------------
------------
Training with 3 hidden layers and 64 neurons per layer...


  output, from_logits = _get_logits(


confusion_matrix_train
[[5047    3]
 [   0   37]]
confusion_matrix_dev
[[565   0]
 [  5   0]]
------------
------------


### NN on injection data

In [11]:
train_dataset_path = f"{path_to_data}/data_injected/exoTrain.csv"
dev_dataset_path = f"{path_to_data}/data_injected/exoTest.csv"

X_train, Y_train, X_dev, Y_dev = load_data(train_dataset_path, dev_dataset_path)
model = build_network(X_train.shape[1:], n_layers=1, n_neurons=1)

############## do we want to balance using SMOTE ??
sm = SMOTE()
X_train_sm, Y_train_sm = sm.fit_resample(X_train, Y_train)

# Training the NN 
print("Training...")

history = model.fit(X_train_sm, Y_train_sm, epochs=50, batch_size=32)

train_outputs = model.predict(X_train, batch_size=32) 
dev_outputs = model.predict(X_dev, batch_size=32) 
train_outputs = np.rint(train_outputs)
dev_outputs = np.rint(dev_outputs)

# Metrics
accuracy_train = accuracy_score(Y_train, train_outputs)
accuracy_dev = accuracy_score(Y_dev, dev_outputs)
precision_train = precision_score(Y_train, train_outputs)
precision_dev = precision_score(Y_dev, dev_outputs)
recall_train = recall_score(Y_train, train_outputs)
recall_dev = recall_score(Y_dev, dev_outputs)
confusion_matrix_train = confusion_matrix(Y_train, train_outputs)
confusion_matrix_dev = confusion_matrix(Y_dev, dev_outputs)

print("train set error", 1.0 - accuracy_train)
print("dev set error", 1.0 - accuracy_dev)
print("------------")
print("precision_train", precision_train)
print("precision_dev", precision_dev)
print("------------")
print("recall_train", recall_train)
print("recall_dev", recall_dev)
print("------------")
print("confusion_matrix_train")
print(confusion_matrix_train)
print("confusion_matrix_dev")
print(confusion_matrix_dev)
print("------------")
print("Train Set Positive Predictions", np.count_nonzero(train_outputs))
print("Dev Set Positive Predictions", np.count_nonzero(dev_outputs))
#  Predicting 0's will give you error:
print("------------")
print("All 0's error train set", 37 / 5087)
print("All 0's error dev set", 5 / 570)

print("------------")
print("------------")

if RENDER_PLOT:
    # list all data in history
    print(history.history.keys())
    # summarize history for accuracy
    plt.plot(history.history["accuracy"])
    # plt.plot(history.history['val_acc'])
    plt.title("model accuracy")
    plt.ylabel("accuracy")
    plt.xlabel("epoch")
    plt.legend(["train", "test"], loc="upper left")
    plt.show()

    # summarize history for loss
    plt.plot(history.history["loss"])
    # plt.plot(history.history['val_loss'])
    plt.title("model loss")
    plt.ylabel("loss")
    plt.xlabel("epoch")
    plt.legend(["train", "test"], loc="upper left")
    plt.show()

Loading datasets...
Applying Fourier...
Normalizing...
Applying Gaussian Filter...


  df_train_x = ndimage.filters.gaussian_filter(df_train_x, sigma=10)
  df_dev_x = ndimage.filters.gaussian_filter(df_dev_x, sigma=10)


Standardizing...
Finished Processing!
X_train.shape:  (5087, 1598)
Y_train.shape:  (5087, 1)
X_dev.shape:  (570, 1598)
Y_dev.shape:  (570, 1)
Training...
Epoch 1/50


  output, from_logits = _get_logits(


[1m179/179[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.4920 - loss: 0.7534
Epoch 2/50
[1m179/179[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - accuracy: 0.4941 - loss: 0.7386
Epoch 3/50
[1m179/179[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.4841 - loss: 0.7089
Epoch 4/50
[1m179/179[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.5050 - loss: 0.6993
Epoch 5/50
[1m179/179[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.5005 - loss: 0.7068
Epoch 6/50
[1m179/179[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.5130 - loss: 0.7086
Epoch 7/50
[1m179/179[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.5149 - loss: 0.6961
Epoch 8/50
[1m179/179[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.5072 - loss: 0.7007
Epoch 9/50
[1m179/179[0m [32m━━━━━━━━━━━━━━━━━━━

In [13]:
for combo in combinations:
    model = build_network(X_train.shape[1:], n_layers=combo[0], n_neurons=combo[1])

    sm = SMOTE()
    X_train_sm, Y_train_sm = sm.fit_resample(X_train, Y_train)

    # Training the NN 
    print(f"Training with {combo[0]} hidden layers and {combo[1]} neurons per layer...")

    history = model.fit(X_train_sm, Y_train_sm, epochs=50, batch_size=32, verbose=0)

    train_outputs = model.predict(X_train, batch_size=32, verbose=0) 
    dev_outputs = model.predict(X_dev, batch_size=32, verbose=0) 
    train_outputs = np.rint(train_outputs)
    dev_outputs = np.rint(dev_outputs)

    confusion_matrix_train = confusion_matrix(Y_train, train_outputs)
    confusion_matrix_dev = confusion_matrix(Y_dev, dev_outputs)

    print("confusion_matrix_train")
    print(confusion_matrix_train)
    print("confusion_matrix_dev")
    print(confusion_matrix_dev)

    print("------------")
    print("------------")

Training with 1 hidden layers and 16 neurons per layer...


  output, from_logits = _get_logits(


confusion_matrix_train
[[ 898 1328]
 [ 906 1955]]
confusion_matrix_dev
[[ 83 171]
 [ 97 219]]
------------
------------
Training with 1 hidden layers and 32 neurons per layer...


  output, from_logits = _get_logits(


confusion_matrix_train
[[1076 1150]
 [1192 1669]]
confusion_matrix_dev
[[ 45 209]
 [ 49 267]]
------------
------------
Training with 1 hidden layers and 64 neurons per layer...


  output, from_logits = _get_logits(


confusion_matrix_train
[[1217 1009]
 [1345 1516]]
confusion_matrix_dev
[[  4 250]
 [ 10 306]]
------------
------------
Training with 2 hidden layers and 16 neurons per layer...


  output, from_logits = _get_logits(


confusion_matrix_train
[[1366  860]
 [1522 1339]]
confusion_matrix_dev
[[124 130]
 [157 159]]
------------
------------
Training with 2 hidden layers and 32 neurons per layer...


  output, from_logits = _get_logits(


confusion_matrix_train
[[1228  998]
 [1314 1547]]
confusion_matrix_dev
[[ 33 221]
 [ 26 290]]
------------
------------
Training with 2 hidden layers and 64 neurons per layer...


  output, from_logits = _get_logits(


confusion_matrix_train
[[ 428 1798]
 [ 303 2558]]
confusion_matrix_dev
[[ 15 239]
 [ 14 302]]
------------
------------
Training with 3 hidden layers and 16 neurons per layer...


  output, from_logits = _get_logits(


confusion_matrix_train
[[2030  196]
 [2492  369]]
confusion_matrix_dev
[[254   0]
 [316   0]]
------------
------------
Training with 3 hidden layers and 32 neurons per layer...


  output, from_logits = _get_logits(


confusion_matrix_train
[[1988  238]
 [2410  451]]
confusion_matrix_dev
[[223  31]
 [277  39]]
------------
------------
Training with 3 hidden layers and 64 neurons per layer...


  output, from_logits = _get_logits(


confusion_matrix_train
[[1990  236]
 [2478  383]]
confusion_matrix_dev
[[208  46]
 [262  54]]
------------
------------
