In [1]:
# Update sklearn to prevent version mismatches
!pip install sklearn --upgrade



In [2]:
# install joblib. This will be used to save your model. 
# Restart your kernel after installing 
!pip install joblib



In [3]:
import pandas as pd

# Read the CSV and Perform Basic Data Cleaning

In [4]:
df = pd.read_csv("exoplanet_data.csv")
# Drop the null columns where all values are null
df = df.dropna(axis='columns', how='all')
# Drop the null rows
df = df.dropna()
df.head()

Unnamed: 0,koi_disposition,koi_fpflag_nt,koi_fpflag_ss,koi_fpflag_co,koi_fpflag_ec,koi_period,koi_period_err1,koi_period_err2,koi_time0bk,koi_time0bk_err1,...,koi_steff_err2,koi_slogg,koi_slogg_err1,koi_slogg_err2,koi_srad,koi_srad_err1,koi_srad_err2,ra,dec,koi_kepmag
0,CONFIRMED,0,0,0,0,54.418383,0.0002479,-0.0002479,162.51384,0.00352,...,-81,4.467,0.064,-0.096,0.927,0.105,-0.061,291.93423,48.141651,15.347
1,FALSE POSITIVE,0,1,0,0,19.89914,1.49e-05,-1.49e-05,175.850252,0.000581,...,-176,4.544,0.044,-0.176,0.868,0.233,-0.078,297.00482,48.134129,15.436
2,FALSE POSITIVE,0,1,0,0,1.736952,2.63e-07,-2.63e-07,170.307565,0.000115,...,-174,4.564,0.053,-0.168,0.791,0.201,-0.067,285.53461,48.28521,15.597
3,CONFIRMED,0,0,0,0,2.525592,3.76e-06,-3.76e-06,171.59555,0.00113,...,-211,4.438,0.07,-0.21,1.046,0.334,-0.133,288.75488,48.2262,15.509
4,CONFIRMED,0,0,0,0,4.134435,1.05e-05,-1.05e-05,172.97937,0.0019,...,-232,4.486,0.054,-0.229,0.972,0.315,-0.105,296.28613,48.22467,15.714


# Select your features (columns)

In [5]:
# Set features. This will also be used as your x values.
selected_features = df.drop("koi_disposition", axis=1)
selected_features.head()

Unnamed: 0,koi_fpflag_nt,koi_fpflag_ss,koi_fpflag_co,koi_fpflag_ec,koi_period,koi_period_err1,koi_period_err2,koi_time0bk,koi_time0bk_err1,koi_time0bk_err2,...,koi_steff_err2,koi_slogg,koi_slogg_err1,koi_slogg_err2,koi_srad,koi_srad_err1,koi_srad_err2,ra,dec,koi_kepmag
0,0,0,0,0,54.418383,0.0002479,-0.0002479,162.51384,0.00352,-0.00352,...,-81,4.467,0.064,-0.096,0.927,0.105,-0.061,291.93423,48.141651,15.347
1,0,1,0,0,19.89914,1.49e-05,-1.49e-05,175.850252,0.000581,-0.000581,...,-176,4.544,0.044,-0.176,0.868,0.233,-0.078,297.00482,48.134129,15.436
2,0,1,0,0,1.736952,2.63e-07,-2.63e-07,170.307565,0.000115,-0.000115,...,-174,4.564,0.053,-0.168,0.791,0.201,-0.067,285.53461,48.28521,15.597
3,0,0,0,0,2.525592,3.76e-06,-3.76e-06,171.59555,0.00113,-0.00113,...,-211,4.438,0.07,-0.21,1.046,0.334,-0.133,288.75488,48.2262,15.509
4,0,0,0,0,4.134435,1.05e-05,-1.05e-05,172.97937,0.0019,-0.0019,...,-232,4.486,0.054,-0.229,0.972,0.315,-0.105,296.28613,48.22467,15.714


# Create a Train Test Split

Use `koi_disposition` for the y values

In [6]:
y = df["koi_disposition"]

In [7]:
y.unique()

array(['CONFIRMED', 'FALSE POSITIVE', 'CANDIDATE'], dtype=object)

In [8]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(selected_features, y, random_state=42)

In [9]:
X_train.head()

Unnamed: 0,koi_fpflag_nt,koi_fpflag_ss,koi_fpflag_co,koi_fpflag_ec,koi_period,koi_period_err1,koi_period_err2,koi_time0bk,koi_time0bk_err1,koi_time0bk_err2,...,koi_steff_err2,koi_slogg,koi_slogg_err1,koi_slogg_err2,koi_srad,koi_srad_err1,koi_srad_err2,ra,dec,koi_kepmag
6122,0,0,0,0,6.768901,7.38e-05,-7.38e-05,133.07724,0.00844,-0.00844,...,-171,4.327,0.153,-0.187,1.125,0.31,-0.207,294.40472,39.351681,14.725
6370,0,1,0,1,0.733726,6.06e-06,-6.06e-06,132.02005,0.00795,-0.00795,...,-175,4.578,0.033,-0.187,0.797,0.211,-0.056,284.50391,42.46386,15.77
2879,1,0,0,0,7.652707,6.54e-05,-6.54e-05,134.46038,0.00619,-0.00619,...,-189,4.481,0.05,-0.2,0.963,0.29,-0.097,295.50211,38.98354,13.099
107,0,0,0,0,7.953547,1.91e-05,-1.91e-05,174.66224,0.00182,-0.00182,...,-85,4.536,0.056,-0.016,0.779,0.023,-0.049,291.15878,40.750271,15.66
29,0,0,0,0,4.959319,5.15e-07,-5.15e-07,172.258529,8.3e-05,-8.3e-05,...,-77,4.359,0.11,-0.11,1.082,0.173,-0.13,292.16705,48.727589,15.263


In [10]:
print(len(X_train.columns))

40


# Pre-processing

Scale the data using the MinMaxScaler and perform some feature selection

In [11]:
# Scale your data
from sklearn.preprocessing import StandardScaler
    
X_scaler = StandardScaler().fit(X_train)

In [12]:
 # Transform the training and testing data using the X_scaler and y_scaler models

X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

<em> One-hot encode</em> the labels

In [13]:
from tensorflow.keras.utils import to_categorical

In [14]:
# One-hot encoding
y_train = y_train.replace(['CONFIRMED','FALSE POSITIVE', 'CANDIDATE'], [0, 1, 2])
y_test = y_test.replace(['CONFIRMED','FALSE POSITIVE', 'CANDIDATE'], [0, 1, 2])
y_train_categorical = to_categorical(y_train)
y_test_categorical = to_categorical(y_test)
y_train_categorical

array([[0., 0., 1.],
       [0., 1., 0.],
       [0., 1., 0.],
       ...,
       [0., 1., 0.],
       [0., 1., 0.],
       [0., 1., 0.]], dtype=float32)

# Train the Model



### Define the <em> Model Architecture </em>

In [15]:
# Create a sequential model
from tensorflow.keras.models import Sequential

keras = Sequential()

add the <em> first layer </em>

In [16]:
from tensorflow.keras.layers import Dense
number_inputs = 40
number_hidden_nodes = 100
keras.add(Dense(units=number_hidden_nodes,
                activation='relu', input_dim=number_inputs))

we want the model to predict 3 classes (<em>labels</em>): <b>'CONFIRMED'</b> , <b>'FALSE POSITIVE'</b>, or <b>'CANDIDATE'</b>

In [17]:
y.unique()

array(['CONFIRMED', 'FALSE POSITIVE', 'CANDIDATE'], dtype=object)

In [18]:
number_classes = 3
keras.add(Dense(units=number_classes, activation='softmax'))

display the model summary

In [19]:
keras.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense (Dense)                (None, 100)               4100      
_________________________________________________________________
dense_1 (Dense)              (None, 3)                 303       
Total params: 4,403
Trainable params: 4,403
Non-trainable params: 0
_________________________________________________________________


### Compile the Model

In [20]:
# Use 'categorical_crossentropy' for our categorical data
keras.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])

### Fit the Model

In [21]:
keras.fit(
    X_train_scaled,
    y_train_categorical,
    epochs=800,
    shuffle=True,
    verbose=2
)

Epoch 1/800
164/164 - 1s - loss: 0.5982 - accuracy: 0.7677
Epoch 2/800
164/164 - 0s - loss: 0.3433 - accuracy: 0.8678
Epoch 3/800
164/164 - 0s - loss: 0.2932 - accuracy: 0.8808
Epoch 4/800
164/164 - 0s - loss: 0.2744 - accuracy: 0.8856
Epoch 5/800
164/164 - 0s - loss: 0.2633 - accuracy: 0.8888
Epoch 6/800
164/164 - 0s - loss: 0.2555 - accuracy: 0.8919
Epoch 7/800
164/164 - 0s - loss: 0.2484 - accuracy: 0.8945
Epoch 8/800
164/164 - 0s - loss: 0.2433 - accuracy: 0.8970
Epoch 9/800
164/164 - 0s - loss: 0.2381 - accuracy: 0.9008
Epoch 10/800
164/164 - 0s - loss: 0.2344 - accuracy: 0.8981
Epoch 11/800
164/164 - 0s - loss: 0.2314 - accuracy: 0.9016
Epoch 12/800
164/164 - 0s - loss: 0.2267 - accuracy: 0.9039
Epoch 13/800
164/164 - 0s - loss: 0.2238 - accuracy: 0.9025
Epoch 14/800
164/164 - 0s - loss: 0.2204 - accuracy: 0.9056
Epoch 15/800
164/164 - 0s - loss: 0.2180 - accuracy: 0.9058
Epoch 16/800
164/164 - 0s - loss: 0.2142 - accuracy: 0.9094
Epoch 17/800
164/164 - 0s - loss: 0.2121 - accura

Epoch 138/800
164/164 - 0s - loss: 0.1158 - accuracy: 0.9548
Epoch 139/800
164/164 - 0s - loss: 0.1166 - accuracy: 0.9508
Epoch 140/800
164/164 - 0s - loss: 0.1131 - accuracy: 0.9558
Epoch 141/800
164/164 - 0s - loss: 0.1166 - accuracy: 0.9519
Epoch 142/800
164/164 - 0s - loss: 0.1136 - accuracy: 0.9544
Epoch 143/800
164/164 - 0s - loss: 0.1124 - accuracy: 0.9558
Epoch 144/800
164/164 - 0s - loss: 0.1123 - accuracy: 0.9540
Epoch 145/800
164/164 - 0s - loss: 0.1109 - accuracy: 0.9554
Epoch 146/800
164/164 - 0s - loss: 0.1137 - accuracy: 0.9554
Epoch 147/800
164/164 - 0s - loss: 0.1114 - accuracy: 0.9563
Epoch 148/800
164/164 - 0s - loss: 0.1106 - accuracy: 0.9542
Epoch 149/800
164/164 - 0s - loss: 0.1108 - accuracy: 0.9556
Epoch 150/800
164/164 - 0s - loss: 0.1087 - accuracy: 0.9554
Epoch 151/800
164/164 - 0s - loss: 0.1124 - accuracy: 0.9538
Epoch 152/800
164/164 - 0s - loss: 0.1148 - accuracy: 0.9559
Epoch 153/800
164/164 - 0s - loss: 0.1117 - accuracy: 0.9561
Epoch 154/800
164/164 - 

Epoch 273/800
164/164 - 0s - loss: 0.0778 - accuracy: 0.9710
Epoch 274/800
164/164 - 0s - loss: 0.0744 - accuracy: 0.9733
Epoch 275/800
164/164 - 0s - loss: 0.0735 - accuracy: 0.9735
Epoch 276/800
164/164 - 0s - loss: 0.0751 - accuracy: 0.9718
Epoch 277/800
164/164 - 0s - loss: 0.0725 - accuracy: 0.9731
Epoch 278/800
164/164 - 0s - loss: 0.0745 - accuracy: 0.9722
Epoch 279/800
164/164 - 0s - loss: 0.0713 - accuracy: 0.9743
Epoch 280/800
164/164 - 0s - loss: 0.0728 - accuracy: 0.9720
Epoch 281/800
164/164 - 0s - loss: 0.0753 - accuracy: 0.9702
Epoch 282/800
164/164 - 0s - loss: 0.0738 - accuracy: 0.9743
Epoch 283/800
164/164 - 0s - loss: 0.0736 - accuracy: 0.9739
Epoch 284/800
164/164 - 0s - loss: 0.0739 - accuracy: 0.9750
Epoch 285/800
164/164 - 0s - loss: 0.0715 - accuracy: 0.9758
Epoch 286/800
164/164 - 0s - loss: 0.0740 - accuracy: 0.9733
Epoch 287/800
164/164 - 0s - loss: 0.0731 - accuracy: 0.9741
Epoch 288/800
164/164 - 0s - loss: 0.0714 - accuracy: 0.9746
Epoch 289/800
164/164 - 

Epoch 408/800
164/164 - 0s - loss: 0.0482 - accuracy: 0.9847
Epoch 409/800
164/164 - 0s - loss: 0.0474 - accuracy: 0.9847
Epoch 410/800
164/164 - 0s - loss: 0.0466 - accuracy: 0.9861
Epoch 411/800
164/164 - 0s - loss: 0.0495 - accuracy: 0.9834
Epoch 412/800
164/164 - 0s - loss: 0.0505 - accuracy: 0.9804
Epoch 413/800
164/164 - 0s - loss: 0.0455 - accuracy: 0.9853
Epoch 414/800
164/164 - 0s - loss: 0.0464 - accuracy: 0.9855
Epoch 415/800
164/164 - 0s - loss: 0.0474 - accuracy: 0.9849
Epoch 416/800
164/164 - 0s - loss: 0.0464 - accuracy: 0.9834
Epoch 417/800
164/164 - 0s - loss: 0.0479 - accuracy: 0.9834
Epoch 418/800
164/164 - 0s - loss: 0.0464 - accuracy: 0.9855
Epoch 419/800
164/164 - 0s - loss: 0.0470 - accuracy: 0.9838
Epoch 420/800
164/164 - 0s - loss: 0.0507 - accuracy: 0.9842
Epoch 421/800
164/164 - 0s - loss: 0.0604 - accuracy: 0.9809
Epoch 422/800
164/164 - 0s - loss: 0.0460 - accuracy: 0.9846
Epoch 423/800
164/164 - 0s - loss: 0.0434 - accuracy: 0.9878
Epoch 424/800
164/164 - 

Epoch 543/800
164/164 - 0s - loss: 0.0312 - accuracy: 0.9907
Epoch 544/800
164/164 - 0s - loss: 0.0325 - accuracy: 0.9901
Epoch 545/800
164/164 - 0s - loss: 0.0325 - accuracy: 0.9916
Epoch 546/800
164/164 - 0s - loss: 0.0355 - accuracy: 0.9912
Epoch 547/800
164/164 - 0s - loss: 0.0375 - accuracy: 0.9889
Epoch 548/800
164/164 - 0s - loss: 0.0298 - accuracy: 0.9937
Epoch 549/800
164/164 - 0s - loss: 0.0314 - accuracy: 0.9907
Epoch 550/800
164/164 - 0s - loss: 0.0303 - accuracy: 0.9926
Epoch 551/800
164/164 - 0s - loss: 0.0289 - accuracy: 0.9920
Epoch 552/800
164/164 - 0s - loss: 0.0355 - accuracy: 0.9903
Epoch 553/800
164/164 - 0s - loss: 0.0281 - accuracy: 0.9929
Epoch 554/800
164/164 - 0s - loss: 0.0291 - accuracy: 0.9922
Epoch 555/800
164/164 - 0s - loss: 0.0303 - accuracy: 0.9912
Epoch 556/800
164/164 - 0s - loss: 0.0292 - accuracy: 0.9926
Epoch 557/800
164/164 - 0s - loss: 0.0306 - accuracy: 0.9916
Epoch 558/800
164/164 - 0s - loss: 0.0323 - accuracy: 0.9907
Epoch 559/800
164/164 - 

Epoch 678/800
164/164 - 0s - loss: 0.0178 - accuracy: 0.9968
Epoch 679/800
164/164 - 0s - loss: 0.0199 - accuracy: 0.9952
Epoch 680/800
164/164 - 0s - loss: 0.0183 - accuracy: 0.9975
Epoch 681/800
164/164 - 0s - loss: 0.0200 - accuracy: 0.9952
Epoch 682/800
164/164 - 0s - loss: 0.0203 - accuracy: 0.9950
Epoch 683/800
164/164 - 0s - loss: 0.0210 - accuracy: 0.9947
Epoch 684/800
164/164 - 0s - loss: 0.0189 - accuracy: 0.9966
Epoch 685/800
164/164 - 0s - loss: 0.0250 - accuracy: 0.9935
Epoch 686/800
164/164 - 0s - loss: 0.0213 - accuracy: 0.9937
Epoch 687/800
164/164 - 0s - loss: 0.0213 - accuracy: 0.9962
Epoch 688/800
164/164 - 0s - loss: 0.0250 - accuracy: 0.9937
Epoch 689/800
164/164 - 0s - loss: 0.0202 - accuracy: 0.9949
Epoch 690/800
164/164 - 0s - loss: 0.0189 - accuracy: 0.9962
Epoch 691/800
164/164 - 0s - loss: 0.0181 - accuracy: 0.9960
Epoch 692/800
164/164 - 0s - loss: 0.0177 - accuracy: 0.9968
Epoch 693/800
164/164 - 0s - loss: 0.0166 - accuracy: 0.9973
Epoch 694/800
164/164 - 

<tensorflow.python.keras.callbacks.History at 0x169a6109ac0>

### Test the Model

In [22]:
# Evaluate the model using the testing data
model_loss, model_accuracy = keras.evaluate(
    X_test_scaled, y_test_categorical, verbose=2)
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

55/55 - 0s - loss: 1.1182 - accuracy: 0.8833
Loss: 1.1182016134262085, Accuracy: 0.8832951784133911


# Save the Model

In [23]:
# Save the model
keras.save("Models/Keras_model.h5")