In [1]:
#Dependencies
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, MinMaxScaler
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

In [2]:
#Reading the data
data_Kepler = pd.read_csv("Resources/cleaned_Kepler.csv")
data_Kepler.head()

Unnamed: 0.1,Unnamed: 0,koi_pdisposition,koi_fpflag_nt,koi_fpflag_ss,koi_fpflag_co,koi_fpflag_ec,koi_period,koi_time0bk,koi_impact,koi_duration,...,koi_prad,koi_teq,koi_insol,koi_model_snr,koi_steff,koi_slogg,koi_srad,ra,dec,koi_kepmag
0,0,CANDIDATE,0,0,0,0,9.488036,170.53875,0.146,2.9575,...,2.26,793.0,93.59,35.8,5455.0,4.467,0.927,291.93423,48.141651,15.347
1,1,CANDIDATE,0,0,0,0,54.418383,162.51384,0.586,4.507,...,2.83,443.0,9.11,25.8,5455.0,4.467,0.927,291.93423,48.141651,15.347
2,2,FALSE POSITIVE,0,1,0,0,19.89914,175.850252,0.969,1.7822,...,14.6,638.0,39.3,76.3,5853.0,4.544,0.868,297.00482,48.134129,15.436
3,3,FALSE POSITIVE,0,1,0,0,1.736952,170.307565,1.276,2.40641,...,33.46,1395.0,891.96,505.6,5805.0,4.564,0.791,285.53461,48.28521,15.597
4,4,CANDIDATE,0,0,0,0,2.525592,171.59555,0.701,1.6545,...,2.75,1406.0,926.16,40.9,6031.0,4.438,1.046,288.75488,48.2262,15.509


In [3]:
#Spliting data to labels and features, dropping index column
X = data_Kepler.drop(columns=["koi_pdisposition", "Unnamed: 0"], axis=1)
y = data_Kepler["koi_pdisposition"]
print(X.shape, y.shape)

(9200, 19) (9200,)


In [4]:
#Spliting the data into train and test set
X_train, X_test, y_train, y_test = train_test_split(
    X, y, random_state=1)

In [5]:
#Scaling the features
X_scaler = MinMaxScaler().fit(X_train)
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

In [6]:
#Encoding the labels
label_encoder = LabelEncoder()
label_encoder.fit(y_train)
encoded_y_train = label_encoder.transform(y_train)
encoded_y_test = label_encoder.transform(y_test)
y_train_categorical = to_categorical(encoded_y_train)
y_test_categorical = to_categorical(encoded_y_test)
y_train_categorical
y_test_categorical

array([[0., 1.],
       [0., 1.],
       [1., 0.],
       ...,
       [1., 0.],
       [1., 0.],
       [1., 0.]], dtype=float32)

In [7]:
#Creating a model
deep_model = Sequential()
deep_model.add(Dense(units=30, activation='relu', input_dim=19))
deep_model.add(Dense(units=30, activation='relu'))
deep_model.add(Dense(units=2, activation='softmax'))

In [8]:
#Compiling
deep_model.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])

In [9]:
#Summary
deep_model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense (Dense)                (None, 30)                600       
_________________________________________________________________
dense_1 (Dense)              (None, 30)                930       
_________________________________________________________________
dense_2 (Dense)              (None, 2)                 62        
Total params: 1,592
Trainable params: 1,592
Non-trainable params: 0
_________________________________________________________________


In [10]:
#Training
deep_model.fit(
    X_train_scaled,
    y_train_categorical,
    epochs=25,
    shuffle=True,
    verbose=2
)

Epoch 1/25
216/216 - 0s - loss: 0.2864 - accuracy: 0.9151
Epoch 2/25
216/216 - 0s - loss: 0.0541 - accuracy: 0.9899
Epoch 3/25
216/216 - 0s - loss: 0.0510 - accuracy: 0.9899
Epoch 4/25
216/216 - 0s - loss: 0.0496 - accuracy: 0.9899
Epoch 5/25
216/216 - 0s - loss: 0.0495 - accuracy: 0.9899
Epoch 6/25
216/216 - 0s - loss: 0.0486 - accuracy: 0.9899
Epoch 7/25
216/216 - 0s - loss: 0.0474 - accuracy: 0.9899
Epoch 8/25
216/216 - 0s - loss: 0.0477 - accuracy: 0.9899
Epoch 9/25
216/216 - 0s - loss: 0.0478 - accuracy: 0.9899
Epoch 10/25
216/216 - 0s - loss: 0.0470 - accuracy: 0.9900
Epoch 11/25
216/216 - 0s - loss: 0.0474 - accuracy: 0.9899
Epoch 12/25
216/216 - 0s - loss: 0.0465 - accuracy: 0.9899
Epoch 13/25
216/216 - 0s - loss: 0.0467 - accuracy: 0.9900
Epoch 14/25
216/216 - 0s - loss: 0.0457 - accuracy: 0.9900
Epoch 15/25
216/216 - 0s - loss: 0.0461 - accuracy: 0.9900
Epoch 16/25
216/216 - 0s - loss: 0.0457 - accuracy: 0.9900
Epoch 17/25
216/216 - 0s - loss: 0.0455 - accuracy: 0.9900
Epoch 

<tensorflow.python.keras.callbacks.History at 0x2b334263780>

In [15]:
#Creating prediction labels
predictions = deep_model.predict_classes(X_test_scaled[:20])
prediction_labels = label_encoder.inverse_transform(predictions)

In [16]:
print(f"Predicted classes: {prediction_labels}")
print(f"Actual Labels: {list(y_test[:20])}")

Predicted classes: ['FALSE POSITIVE' 'FALSE POSITIVE' 'CANDIDATE' 'FALSE POSITIVE'
 'FALSE POSITIVE' 'FALSE POSITIVE' 'FALSE POSITIVE' 'FALSE POSITIVE'
 'FALSE POSITIVE' 'FALSE POSITIVE' 'CANDIDATE' 'FALSE POSITIVE'
 'FALSE POSITIVE' 'CANDIDATE' 'CANDIDATE' 'CANDIDATE' 'CANDIDATE'
 'CANDIDATE' 'CANDIDATE' 'FALSE POSITIVE']
Actual Labels: ['FALSE POSITIVE', 'FALSE POSITIVE', 'CANDIDATE', 'FALSE POSITIVE', 'FALSE POSITIVE', 'FALSE POSITIVE', 'FALSE POSITIVE', 'FALSE POSITIVE', 'FALSE POSITIVE', 'FALSE POSITIVE', 'CANDIDATE', 'FALSE POSITIVE', 'FALSE POSITIVE', 'CANDIDATE', 'CANDIDATE', 'CANDIDATE', 'CANDIDATE', 'CANDIDATE', 'CANDIDATE', 'FALSE POSITIVE']


In [17]:
#Evaluating the model
deep_model_loss, deep_model_accuracy = deep_model.evaluate(
    X_test_scaled, y_test_categorical, verbose=2)
print(f"Loss: {deep_model_loss}, Accuracy: {deep_model_accuracy}")

72/72 - 0s - loss: 0.0536 - accuracy: 0.9891
Loss: 0.053639594465494156, Accuracy: 0.989130437374115
