# Deep Learning 

In [1]:
# Update sklearn to prevent version mismatches
!pip install scikit-learn 
!pip install scikit-learn --upgrade
!conda update scikit-learn
!pip install joblib 
!pip install joblib --upgrade
!pip update joblib 



In [2]:
import pandas as pd

# Read the CSV and Perform Basic Data Cleaning

In [3]:
df = pd.read_csv("datasets/exoplanet_data.csv")
# Drop the null columns where all values are null
df = df.dropna(axis='columns', how='all')
# Drop the null rows
df = df.dropna()
df.head()

FileNotFoundError: [Errno 2] File b'datasets/exoplanet_data.csv' does not exist: b'datasets/exoplanet_data.csv'

In [4]:
# Data Cleaning: Remove Space for `FALSE POSITIVE` category
mask = df["koi_disposition"] == "FALSE POSITIVE"
df.loc[mask, "koi_disposition"] = "False_Positive"
df["koi_disposition"]

0            CONFIRMED
1       False_Positive
2       False_Positive
3            CONFIRMED
4            CONFIRMED
             ...      
6986    False_Positive
6987    False_Positive
6988         CANDIDATE
6989    False_Positive
6990    False_Positive
Name: koi_disposition, Length: 6991, dtype: object

# Create a Train Test Split

Use `koi_disposition` for the y values

In [5]:
from sklearn.model_selection import train_test_split
y = df["koi_disposition"]
X = df.drop(columns=["koi_disposition"])

X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=1, stratify=y)

In [6]:
X_train.head()

Unnamed: 0,koi_fpflag_nt,koi_fpflag_ss,koi_fpflag_co,koi_fpflag_ec,koi_period,koi_period_err1,koi_period_err2,koi_time0bk,koi_time0bk_err1,koi_time0bk_err2,...,koi_steff_err2,koi_slogg,koi_slogg_err1,koi_slogg_err2,koi_srad,koi_srad_err1,koi_srad_err2,ra,dec,koi_kepmag
4002,0,0,1,0,99.673478,0.0003463,-0.0003463,219.33483,0.0023,-0.0023,...,-148,4.777,0.04,-0.027,0.492,0.026,-0.027,293.05801,45.248821,15.801
4246,0,1,0,0,0.592244,9e-08,-9e-08,131.654831,0.000124,-0.000124,...,-146,4.664,0.056,-0.032,0.591,0.045,-0.045,290.28094,45.46426,15.653
548,0,1,1,0,9.991625,5.36e-06,-5.36e-06,137.447816,0.000445,-0.000445,...,-176,4.338,0.153,-0.187,1.096,0.309,-0.206,301.04239,45.022888,14.039
3953,0,1,0,0,178.41299,3.1e-05,-3.1e-05,218.225235,0.000127,-0.000127,...,-134,4.346,0.084,-0.126,1.148,0.202,-0.124,288.32785,38.627621,13.944
2362,0,0,0,0,45.294223,5.6e-05,-5.6e-05,138.678725,0.000987,-0.000987,...,-68,4.347,0.03,-0.03,1.044,0.057,-0.042,285.67938,50.241299,10.961


# Pre-processing

Scale the data using LabelEncoder and MinMaxScaler

In [7]:
from sklearn.preprocessing import MinMaxScaler
X_scaler = MinMaxScaler().fit(X_train)

X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

In [8]:
import tensorflow as tf
from keras.models import Sequential
from keras.utils import to_categorical
from keras.layers import Dense
from sklearn.preprocessing import LabelEncoder
from keras.callbacks import EarlyStopping

Using TensorFlow backend.


In [9]:
label_encoder = LabelEncoder()
label_encoder.fit(y_train)
encoded_y_train = label_encoder.transform(y_train)
encoded_y_test = label_encoder.transform(y_test)

y_train_categorical = to_categorical(encoded_y_train)
y_test_categorical = to_categorical(encoded_y_test)

In [10]:
# Print shape 
y_train_categorical.shape

(5243, 3)

# Train the Model

In [11]:
model = Sequential()
model.add(Dense(units=100, activation='relu', input_dim=40))
model.add(Dense(units=100, activation='relu'))
model.add(Dense(units=3, activation='softmax'))






In [12]:
# Compile and fit the model
model.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])





In [13]:
# Print model summary
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_1 (Dense)              (None, 100)               4100      
_________________________________________________________________
dense_2 (Dense)              (None, 100)               10100     
_________________________________________________________________
dense_3 (Dense)              (None, 3)                 303       
Total params: 14,503
Trainable params: 14,503
Non-trainable params: 0
_________________________________________________________________


In [14]:
# Set early stopping as callback
callbacks = [EarlyStopping(monitor='val_loss', patience=2)]
model.fit(
    X_train_scaled,
    y_train_categorical,
    callbacks=callbacks,
    epochs=60,
    shuffle=True,
    verbose=2
)

Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where



Epoch 1/60





 - 0s - loss: 0.5587 - acc: 0.7276
Epoch 2/60
 - 0s - loss: 0.3709 - acc: 0.8034
Epoch 3/60




 - 0s - loss: 0.3539 - acc: 0.8205
Epoch 4/60
 - 0s - loss: 0.3484 - acc: 0.8156
Epoch 5/60
 - 0s - loss: 0.3343 - acc: 0.8343
Epoch 6/60
 - 0s - loss: 0.3281 - acc: 0.8432
Epoch 7/60
 - 0s - loss: 0.3217 - acc: 0.8505
Epoch 8/60
 - 0s - loss: 0.3167 - acc: 0.8549
Epoch 9/60
 - 0s - loss: 0.3157 - acc: 0.8545
Epoch 10/60
 - 0s - loss: 0.3065 - acc: 0.8589
Epoch 11/60
 - 0s - loss: 0.3078 - acc: 0.8594
Epoch 12/60
 - 0s - loss: 0.3036 - acc: 0.8604
Epoch 13/60
 - 0s - loss: 0.3045 - acc: 0.8611
Epoch 14/60
 - 0s - loss: 0.3006 - acc: 0.8591
Epoch 15/60
 - 0s - loss: 0.2949 - acc: 0.8663
Epoch 16/60
 - 0s - loss: 0.2944 - acc: 0.8676
Epoch 17/60
 - 0s - loss: 0.2885 - acc: 0.8695
Epoch 18/60
 - 0s - loss: 0.2861 - acc: 0.8732
Epoch 19/60
 - 0s - loss: 0.2828 - acc: 0.8753
Epoch 20/60
 - 0s - loss: 0.2815 - acc: 0.8743
Epoch 21/60
 - 0s - loss: 0.2760 - acc: 0.8798
Epoch 22/60
 - 0s - loss: 0.2719 - acc: 0.8791
Epoch 23/60
 - 0s - loss: 0.2715 - acc: 0.8812
Epoch 24/60
 - 0s - loss: 0.270

<keras.callbacks.History at 0x19d58d26848>

In [1]:
model_loss, model_accuracy = model.evaluate(
    X_test_scaled, y_test_categorical, verbose=2)
print(
    f"Normal Neural Network - Loss: {model_loss}, Accuracy: {model_accuracy}")

NameError: name 'model' is not defined

In [16]:
encoded_predictions = model.predict_classes(X_test_scaled[:5])
prediction_labels = label_encoder.inverse_transform(encoded_predictions)

In [17]:
print(f"Predicted classes: {prediction_labels}")
print(f"Actual Labels: {list(y_test[:5])}")

Predicted classes: ['CANDIDATE' 'False_Positive' 'False_Positive' 'CANDIDATE'
 'False_Positive']
Actual Labels: ['CANDIDATE', 'False_Positive', 'False_Positive', 'CANDIDATE', 'False_Positive']


# Save the Model

In [18]:
# save fitted model to file
import joblib
filename = 'deep_learning.sav'
joblib.dump(model, filename)

['deep_learning.sav']