In [1]:
import warnings
warnings.simplefilter('ignore')

In [2]:
# Set the seed value for the notebook so the results are reproducible
from numpy.random import seed
seed(1)


In [3]:
# Dependencies
import numpy as np
import pandas as pd


In [4]:
import tensorflow
tensorflow.keras.__version__

'2.2.4-tf'

In [5]:
df = pd.read_csv("exoplanet_data.csv")
# Drop the null columns where all values are null
df = df.dropna(axis='columns', how='all')
# Drop the null rows
df = df.dropna()
df.head(2)

Unnamed: 0,koi_disposition,koi_fpflag_nt,koi_fpflag_ss,koi_fpflag_co,koi_fpflag_ec,koi_period,koi_period_err1,koi_period_err2,koi_time0bk,koi_time0bk_err1,...,koi_steff_err2,koi_slogg,koi_slogg_err1,koi_slogg_err2,koi_srad,koi_srad_err1,koi_srad_err2,ra,dec,koi_kepmag
0,CONFIRMED,0,0,0,0,54.418383,0.000248,-0.000248,162.51384,0.00352,...,-81,4.467,0.064,-0.096,0.927,0.105,-0.061,291.93423,48.141651,15.347
1,FALSE POSITIVE,0,1,0,0,19.89914,1.5e-05,-1.5e-05,175.850252,0.000581,...,-176,4.544,0.044,-0.176,0.868,0.233,-0.078,297.00482,48.134129,15.436


# Create a Train Test Split
Use koi_disposition for the y values

In [6]:
# ## Define X, y
X = df.drop("koi_disposition", axis=1)
y = df["koi_disposition"]

print(X.shape, y.shape)

(6991, 40) (6991,)


In [7]:
# Data Pre-Processing
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, MinMaxScaler
from tensorflow.keras.utils import to_categorical

In [40]:

X_train, X_test, y_train, y_test = train_test_split(X, y,random_state=115, stratify=y)

In [41]:
X_train.head()

Unnamed: 0,koi_fpflag_nt,koi_fpflag_ss,koi_fpflag_co,koi_fpflag_ec,koi_period,koi_period_err1,koi_period_err2,koi_time0bk,koi_time0bk_err1,koi_time0bk_err2,...,koi_steff_err2,koi_slogg,koi_slogg_err1,koi_slogg_err2,koi_srad,koi_srad_err1,koi_srad_err2,ra,dec,koi_kepmag
2700,0,0,0,0,31.80476,9.1e-05,-9.1e-05,190.25413,0.00224,-0.00224,...,-128,4.235,0.143,-0.117,1.356,0.212,-0.259,287.62082,43.141647,13.178
21,0,0,0,0,4.280964,6e-06,-6e-06,171.89659,0.00115,-0.00115,...,-169,4.564,0.032,-0.168,0.831,0.207,-0.069,294.26581,49.314091,15.356
5040,0,1,1,0,0.829019,2e-06,-2e-06,131.46667,0.00401,-0.00401,...,-255,4.271,0.124,-0.186,1.328,0.408,-0.22,296.03485,45.490822,14.661
2795,0,0,1,0,1.387838,6e-06,-6e-06,132.11351,0.00318,-0.00318,...,-219,4.442,0.054,-0.216,1.044,0.335,-0.112,295.64212,48.773071,15.167
1765,0,0,0,0,14.383227,8.6e-05,-8.6e-05,140.57648,0.0053,-0.0053,...,-135,4.249,0.137,-0.112,1.291,0.216,-0.216,290.08276,50.86348,13.312


# Pre-processing
Scale the data using the MinMaxScaler and perform some feature selection

In [42]:

X_scaler = MinMaxScaler().fit(X_train)
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

# Train Model - Neural Network
Use Neural Network model

In [43]:

# Step 1: Label-encode data set
label_encoder = LabelEncoder()
label_encoder.fit(y_train)
encoded_y_train = label_encoder.transform(y_train)
encoded_y_test = label_encoder.transform(y_test)

In [44]:

# Step 2: Convert encoded labels to one-hot-encoding
y_train_categorical = to_categorical(encoded_y_train)
y_test_categorical = to_categorical(encoded_y_test)

In [45]:
print(label_encoder.classes_)

['CANDIDATE' 'CONFIRMED' 'FALSE POSITIVE']


In [69]:
label_encoder.fit(y)
encoded_y = label_encoder.transform(y)
#pd.plotting.scatter_matrix(X, c=encoded_y, figsize=(40, 40));

In [47]:
# # Create a Deep Learning Model

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

In [70]:
# Create model and add layers
model = Sequential()
model.add(Dense(units=10, activation='relu', input_dim=40))
model.add(Dense(units=3, activation='softmax'))


In [71]:

model.summary()

Model: "sequential_6"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_16 (Dense)             (None, 10)                410       
_________________________________________________________________
dense_17 (Dense)             (None, 3)                 33        
Total params: 443
Trainable params: 443
Non-trainable params: 0
_________________________________________________________________


In [72]:
# Compile and fit the model
model.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])

In [73]:
# Fit the model to the training data
model.fit(
    X_train_scaled,
    y_train_categorical,
    epochs=100,
    shuffle=True,
    verbose=2
)

Train on 5243 samples
Epoch 1/100
5243/5243 - 2s - loss: 0.9721 - accuracy: 0.4808
Epoch 2/100
5243/5243 - 0s - loss: 0.7945 - accuracy: 0.5293
Epoch 3/100
5243/5243 - 0s - loss: 0.6216 - accuracy: 0.7162
Epoch 4/100
5243/5243 - 0s - loss: 0.4989 - accuracy: 0.7595
Epoch 5/100
5243/5243 - 0s - loss: 0.4343 - accuracy: 0.8167
Epoch 6/100
5243/5243 - 0s - loss: 0.4053 - accuracy: 0.8171
Epoch 7/100
5243/5243 - 0s - loss: 0.3893 - accuracy: 0.8207
Epoch 8/100
5243/5243 - 0s - loss: 0.3788 - accuracy: 0.8262
Epoch 9/100
5243/5243 - 0s - loss: 0.3708 - accuracy: 0.8240
Epoch 10/100
5243/5243 - 0s - loss: 0.3640 - accuracy: 0.8295
Epoch 11/100
5243/5243 - 0s - loss: 0.3583 - accuracy: 0.8314
Epoch 12/100
5243/5243 - 0s - loss: 0.3530 - accuracy: 0.8337
Epoch 13/100
5243/5243 - 0s - loss: 0.3495 - accuracy: 0.8316
Epoch 14/100
5243/5243 - 0s - loss: 0.3449 - accuracy: 0.8354
Epoch 15/100
5243/5243 - 0s - loss: 0.3418 - accuracy: 0.8383
Epoch 16/100
5243/5243 - 0s - loss: 0.3377 - accuracy: 0.

<tensorflow.python.keras.callbacks.History at 0x1a9a3c91e48>

In [52]:
# ## Quantify our Trained Model

model_loss, model_accuracy = model.evaluate(
    X_test_scaled, y_test_categorical, verbose=2)
print(
    f"Normal Neural Network - Loss: {model_loss}, Accuracy: {model_accuracy}")

1748/1748 - 0s - loss: nan - accuracy: 0.2414
Normal Neural Network - Loss: nan, Accuracy: 0.2414187639951706


In [53]:
# ## Make Predictions

encoded_predictions = model.predict_classes(X_test_scaled[:5])
prediction_labels = label_encoder.inverse_transform(encoded_predictions)

In [54]:

print(f"Predicted classes: {prediction_labels}")
print(f"Actual Labels: {list(y_test[:5])}")

Predicted classes: ['CANDIDATE' 'CANDIDATE' 'CANDIDATE' 'CANDIDATE' 'CANDIDATE']
Actual Labels: ['FALSE POSITIVE', 'FALSE POSITIVE', 'CANDIDATE', 'FALSE POSITIVE', 'CONFIRMED']


# Deep Learning

In [55]:
deep_model = Sequential()
deep_model.add(Dense(units=10, activation='relu', input_dim=40))
deep_model.add(Dense(units=10, activation='relu'))
deep_model.add(Dense(units=3, activation='softmax'))

In [56]:
deep_model.summary()

Model: "sequential_5"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_13 (Dense)             (None, 10)                410       
_________________________________________________________________
dense_14 (Dense)             (None, 10)                110       
_________________________________________________________________
dense_15 (Dense)             (None, 3)                 33        
Total params: 553
Trainable params: 553
Non-trainable params: 0
_________________________________________________________________


In [57]:
deep_model.compile(optimizer='adam',
                   loss='categorical_crossentropy',
                   metrics=['accuracy'])

deep_model.fit(
    X_train_scaled,
    y_train_categorical,
    epochs=100,
    shuffle=True,
    verbose=2
)

Train on 5243 samples
Epoch 1/100
5243/5243 - 1s - loss: 0.9737 - accuracy: 0.4669
Epoch 2/100
5243/5243 - 0s - loss: 0.6226 - accuracy: 0.7599
Epoch 3/100
5243/5243 - 0s - loss: 0.4538 - accuracy: 0.7999
Epoch 4/100
5243/5243 - 0s - loss: 0.4042 - accuracy: 0.8123
Epoch 5/100
5243/5243 - 0s - loss: 0.3839 - accuracy: 0.8159
Epoch 6/100
5243/5243 - 0s - loss: 0.3707 - accuracy: 0.8175
Epoch 7/100
5243/5243 - 0s - loss: 0.3580 - accuracy: 0.8308
Epoch 8/100
5243/5243 - 0s - loss: 0.3526 - accuracy: 0.8367
Epoch 9/100
5243/5243 - 0s - loss: 0.3454 - accuracy: 0.8343
Epoch 10/100
5243/5243 - 0s - loss: 0.3402 - accuracy: 0.8409
Epoch 11/100
5243/5243 - 0s - loss: 0.3375 - accuracy: 0.8409
Epoch 12/100
5243/5243 - 0s - loss: 0.3309 - accuracy: 0.8470
Epoch 13/100
5243/5243 - 0s - loss: 0.3280 - accuracy: 0.8518
Epoch 14/100
5243/5243 - 0s - loss: 0.3248 - accuracy: 0.8535
Epoch 15/100
5243/5243 - 0s - loss: 0.3216 - accuracy: 0.8598
Epoch 16/100
5243/5243 - 0s - loss: 0.3186 - accuracy: 0.

<tensorflow.python.keras.callbacks.History at 0x1a9a5354828>

# Compare 2 models (Neural Networks and Deep Learning

In [74]:
# Neural Networks
model_loss, model_accuracy = model.evaluate(
    X_test_scaled, y_test_categorical, verbose=2)
print(
    f"Normal Neural Network - Loss: {model_loss}, Accuracy: {model_accuracy}")

1748/1748 - 0s - loss: 0.2882 - accuracy: 0.8690
Normal Neural Network - Loss: 0.2882032309162262, Accuracy: 0.8689931631088257


In [59]:
# Deep Learning
model_loss, model_accuracy = deep_model.evaluate(
    X_test_scaled, y_test_categorical, verbose=2)
print(f"Deep Neural Network - Loss: {model_loss}, Accuracy: {model_accuracy}")

1748/1748 - 0s - loss: 0.2890 - accuracy: 0.8816
Deep Neural Network - Loss: 0.2889800428389412, Accuracy: 0.8815789222717285


# Save the Model

In [61]:
import joblib

In [None]:
# Save Deep_model

#import joblib
filename2 = 'ST_deeplearning.sav'
joblib.dump(deep_model, filename2)