In [None]:
# Update sklearn
#!pip install sklearn --upgrade
# Install joblib
#!pip install joblib

In [None]:
# Import dependencies
import pandas as pd

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, MinMaxScaler

from tensorflow.keras.utils import to_categorical
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from keras.wrappers.scikit_learn import KerasClassifier

In [None]:
# Load in exoplanet data
planets_df = pd.read_csv("resources/exoplanet_data.csv")
planets_df

In [None]:
# Show all columns
planets_df.columns

In [None]:
# # Drop unneccessary columns
# planets_df = planets_df[['koi_disposition', 'koi_fpflag_nt', 'koi_fpflag_ss', 'koi_fpflag_co',
#         'koi_fpflag_ec', 'koi_period','koi_time0bk', 'koi_impact','koi_duration', 'koi_depth', 
#         'koi_prad','koi_teq', 'koi_insol','koi_model_snr', 'koi_tce_plnt_num', 'koi_steff', 
#         'koi_slogg', 'koi_srad', 'ra', 'dec', 'koi_kepmag']]


In [None]:
# Drop null columns & rows
planets_df = planets_df.dropna(axis='columns', how='all')
planets_df = planets_df.dropna(how="any")
planets_df

## Select Features

In [None]:
X = planets_df.drop("koi_disposition", axis=1)
# X = planets_df[['koi_fpflag_co', 'koi_fpflag_nt', 'koi_fpflag_ss', 'koi_model_snr', 'koi_prad', 'koi_fpflag_ec']]
y = planets_df["koi_disposition"]
print(X.shape, y.shape)

In [None]:
# Feature selection & remove unnecessary features (feature importance below 0)
X = planets_df.drop(['koi_disposition', 'koi_srad', 'koi_steff', 'koi_slogg', 'koi_slogg_err1', 'koi_srad_err2', 'koi_tce_plnt_num'], axis=1)
print(X.shape)


## Train, Test, Split

In [None]:
# Separate the data into training & testing data
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state = 42)

In [None]:
# Show rows for training
X_train

## Pre-Processing

### Min Max Scaler (x)

In [None]:
# MinMaxScaler: scale the numerical data
# Define the scaler
X_scaler = MinMaxScaler().fit(X_train)

# Transform x train & test (with scaler)
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

### One Hot Encoding (y)

In [None]:
# Encode labels for y: "KOI disposition"
label_encoder = LabelEncoder()

# Train the encoder
label_encoder.fit(y_train)

# Encoded y train & test
encoded_y_train = label_encoder.transform(y_train)
encoded_y_test = label_encoder.transform(y_test)

In [None]:
# One hot encoding
y_train_categorical = to_categorical(encoded_y_train)
y_test_categorical = to_categorical(encoded_y_test)

In [None]:
# See the categories
y_train_categorical

In [None]:
# Feature selection & remove unnecessary features


### Canidate: 0
### Confirmed: 1
### False Positive: 2

In [None]:
# # Loop through to show original class & label
# for label, original_class in zip(encoded_y, y):
#     print('Original Class: ' + str(original_class))
#     print('Encoded Label: ' + str(label))
#     print('-' * 12)

## Train the Model

### Deep Learning Model:

In [None]:
# Create model
def createModel():
    model = Sequential()
    # Input layer
    model.add(Dense(100, activation='relu', input_dim=X_train.shape[1]))
    # Second hidden layer
    model.add(Dense(100, activation='relu'))
    # Output layer
    model.add(Dense(y_train_categorical.shape[1], activation='softmax'))
    # Compile the model
    model.compile(optimizer='adam',
          loss='categorical_crossentropy',
          metrics=['accuracy'])
    # Print out the model summary
#     model.summary()
    return model

In [None]:
# Find the shape of the y_train (categorical)
y_train_categorical.shape

In [None]:
# # Output layer
# model.add(Dense(y_train_categorical.shape[1], activation='softmax'))

In [None]:
# # Print out the model summary
# model.summary()

In [None]:
# # Fit/Train the model
# model.fit(
#     X_train_scaled,
#     y_train_categorical,
#     epochs=100,
#     shuffle=True,
#     verbose=2
# )

In [None]:
model = KerasClassifier(build_fn=createModel, verbose=0)

In [None]:
model_loss, model_accuracy = model.evaluate(
    X_test_scaled, y_test_categorical, verbose=2)
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

## Hyperparameter Tuning

In [None]:
# GridSearchCV: tune model parameters
from sklearn.model_selection import GridSearchCV
parameters = {'epochs': [50 , 100, 150]}
grid = GridSearchCV(model, parameters, verbose=3)

In [None]:
# Train the model with GridSearch
grid.fit(X_train_scaled, y_train_categorical)

In [None]:
print(grid.best_params_)

In [None]:
print(grid.best_score_)

In [None]:
new_model = Sequential()
# Input layer
new_model.add(Dense(100, activation='relu', input_dim=X_train.shape[1]))
# Second hidden layer
new_model.add(Dense(100, activation='relu'))
# Output layer
new_model.add(Dense(y_train_categorical.shape[1], activation='softmax'))
# Compile the model
new_model.compile(optimizer='adam',
      loss='categorical_crossentropy',
      metrics=['accuracy'])
new_model.fit(
    X_train_scaled,
    y_train_categorical,
    epochs=150,
    shuffle=True,
    verbose=2
)

In [None]:
predictions = new_model.predict(X_test)

In [None]:
from sklearn.metrics import classification_report
print(classification_report(y_test, predictions,
                            target_names=["Candidate", "Confirmed", "False Positive"]))

## Save the Model

In [None]:
import joblib
filename = 'models/julia_brunett.sav'
joblib.dump(model, filename)

In [None]:
# Save the model
model.save("models/exoplanets.h5")