In [None]:
# Dependencies and setup
import numpy as np
import pandas as pd
import warnings
warnings.simplefilter("ignore")

# Read the CSV and Perform Basic Data Cleaning

In [None]:
# Read CSV into DataFrame
df = pd.read_csv("../Data/exoplanet_data.csv")

# Drop the null columns where all values are null
df = df.dropna(axis = "columns", how = "all")

# Drop the null rows
df = df.dropna()

# Display DataFrame
df.head()

In [None]:
# List all columns
df.columns

# Select Desired Features (Columns)

In [None]:
# Set features to be used as x values
selected = df.drop(columns = ["koi_disposition"])
X = selected
X.head()

# Create a Train-Test Split

Use `koi_disposition` for the y values.

In [None]:
# Set y values
y = df["koi_disposition"].values.reshape(-1, 1)
print(X.shape, y.shape)

In [None]:
# Dependencies and setup
from sklearn.model_selection import train_test_split
from tensorflow.keras.utils import to_categorical

# Split data into training and testing
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state = 1)
X_train.head()

# Pre-Processing

Scale the data using the MinMaxScaler and perform some feature selection.

In [None]:
# Dependencies and setup
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import LabelEncoder

# Scale data
X_scale = MinMaxScaler().fit(X_train)
X_train_scaled = X_scale.transform(X_train)
X_test_scaled = X_scale.transform(X_test)

# Label Encode dataset
labelEncoder = LabelEncoder()
labelEncoder.fit(y_train)
y_train = labelEncoder.transform(y_train)
y_test = labelEncoder.transform(y_test)

In [None]:
# One-hot encoding
y_train_categorical = to_categorical(y_train)
y_test_categorical = to_categorical(y_test)

# Train the Neural Network Model

In [None]:
# Dependencies and setup
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

# Create a normal neural network with 40 inputs, 50 hidden nodes, and 2 outputs
model = Sequential()
model.add(Dense(units = 50, activation = "relu", input_dim = 40))
model.add(Dense(units = 3, activation = "softmax"))

In [None]:
# Compile the model
model.compile(optimizer = "adam",
              loss = "categorical_crossentropy",
              metrics = ["accuracy"])

In [None]:
# Fit the model to the training data
model.fit(
    X_train_scaled,
    y_train_categorical,
    epochs = 100,
    shuffle = True,
    verbose = 2
)

# Train the Deep Learning Model

In [None]:
# For this network, simply add an additional hidden layer of 50 nodes
deep_model = Sequential()
deep_model.add(Dense(units = 50, activation = "relu", input_dim = 40))
deep_model.add(Dense(units = 50, activation = "relu"))
deep_model.add(Dense(units = 3, activation = "softmax"))

In [None]:
# Compile the deep learning model
deep_model.compile(optimizer = "adam",
                   loss = "categorical_crossentropy",
                   metrics = ["accuracy"])

In [None]:
# Fit the deep learning model to the training data
deep_model.fit(
    X_train_scaled,
    y_train_categorical,
    epochs = 100,
    shuffle = True,
    verbose = 2
)

In [None]:
# Compare the models
model_loss, model_accuracy = model.evaluate(X_test_scaled, y_test_categorical, verbose = 2)
print(f"Normal Neural Network - Loss: {model_loss}, Accuracy: {model_accuracy}\n")
model_loss, model_accuracy = deep_model.evaluate(X_test_scaled, y_test_categorical, verbose = 2)
print(f"Deep Neural Network - Loss: {model_loss}, Accuracy: {model_accuracy}")

In [None]:
# Make predictions
predictions = model.predict_classes(X_test_scaled[:10])
predictionLabels = labelEncoder.inverse_transform(predictions)
print(f"First 10 Predicted Classes:   {predictionLabels}\n")
print(f"First 10 Actual labels: {list(y_test[:10])}")

# Save the Model

In [None]:
# Save the model
model.save("../Models/deepLearning.h5")