In [None]:
#>>> Near Earth Object Machine Learning Model by Christopher Madden.

#>>> Import dependencies.
import pandas as pd
import tensorflow as tf
import time
from sklearn import tree
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler,OneHotEncoder
from sklearn.metrics import confusion_matrix, accuracy_score, classification_report

# PREPROCESS THE DATASET.

In [None]:
#>>> Import and read neo_v2.csv.
import pandas as pd 
neo_df = pd.read_csv('./Resources/neo_v2.csv')

#>>> Display the first 10 rows.
neo_df.head(10)

Unnamed: 0,id,name,est_diameter_min,est_diameter_max,relative_velocity,miss_distance,orbiting_body,sentry_object,absolute_magnitude,hazardous
0,2162635,162635 (2000 SS164),1.198271,2.679415,13569.249224,54839740.0,Earth,False,16.73,False
1,2277475,277475 (2005 WK4),0.2658,0.594347,73588.726663,61438130.0,Earth,False,20.0,True
2,2512244,512244 (2015 YE18),0.72203,1.614507,114258.692129,49798720.0,Earth,False,17.83,False
3,3596030,(2012 BV13),0.096506,0.215794,24764.303138,25434970.0,Earth,False,22.2,False
4,3667127,(2014 GE35),0.255009,0.570217,42737.733765,46275570.0,Earth,False,20.09,True
5,54138696,(2021 GY23),0.036354,0.081291,34297.587778,40585690.0,Earth,False,24.32,False
6,54189957,(2021 PY40),0.171615,0.383743,27529.472307,29069120.0,Earth,False,20.95,False
7,54230078,(2021 XD6),0.005328,0.011914,57544.470083,55115020.0,Earth,False,28.49,False
8,2088213,88213 (2001 AF2),0.350393,0.783502,56625.210122,69035980.0,Earth,False,19.4,False
9,3766065,(2016 YM),0.105817,0.236614,48425.840329,38355260.0,Earth,False,22.0,False


In [None]:
#>>> Determine the number of unique values in each column.
neo_df.nunique()

id                    27423
name                  27423
est_diameter_min       1638
est_diameter_max       1638
relative_velocity     90828
miss_distance         90536
orbiting_body             1
sentry_object             1
absolute_magnitude     1638
hazardous                 2
dtype: int64

In [None]:
#>>> Drop unnecessary columns: 'id', 'orbiting_body', and 'sentry_object'.
neo_df= neo_df.drop(['id', 'orbiting_body', 'sentry_object'],1)

#>>> Set index to 'name'.
neo_df = neo_df.set_index('name')

#>>> Display the first 10 rows.
neo_df.head(10)

  


Unnamed: 0_level_0,est_diameter_min,est_diameter_max,relative_velocity,miss_distance,absolute_magnitude,hazardous
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
162635 (2000 SS164),1.198271,2.679415,13569.249224,54839740.0,16.73,False
277475 (2005 WK4),0.2658,0.594347,73588.726663,61438130.0,20.0,True
512244 (2015 YE18),0.72203,1.614507,114258.692129,49798720.0,17.83,False
(2012 BV13),0.096506,0.215794,24764.303138,25434970.0,22.2,False
(2014 GE35),0.255009,0.570217,42737.733765,46275570.0,20.09,True
(2021 GY23),0.036354,0.081291,34297.587778,40585690.0,24.32,False
(2021 PY40),0.171615,0.383743,27529.472307,29069120.0,20.95,False
(2021 XD6),0.005328,0.011914,57544.470083,55115020.0,28.49,False
88213 (2001 AF2),0.350393,0.783502,56625.210122,69035980.0,19.4,False
(2016 YM),0.105817,0.236614,48425.840329,38355260.0,22.0,False


In [None]:
#>>> Define the features set.
X = neo_df.copy()
X = X.drop('hazardous', axis=1)

#>>> Display the first 10 rows.
X.head(10)

Unnamed: 0_level_0,est_diameter_min,est_diameter_max,relative_velocity,miss_distance,absolute_magnitude
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
162635 (2000 SS164),1.198271,2.679415,13569.249224,54839740.0,16.73
277475 (2005 WK4),0.2658,0.594347,73588.726663,61438130.0,20.0
512244 (2015 YE18),0.72203,1.614507,114258.692129,49798720.0,17.83
(2012 BV13),0.096506,0.215794,24764.303138,25434970.0,22.2
(2014 GE35),0.255009,0.570217,42737.733765,46275570.0,20.09
(2021 GY23),0.036354,0.081291,34297.587778,40585690.0,24.32
(2021 PY40),0.171615,0.383743,27529.472307,29069120.0,20.95
(2021 XD6),0.005328,0.011914,57544.470083,55115020.0,28.49
88213 (2001 AF2),0.350393,0.783502,56625.210122,69035980.0,19.4
(2016 YM),0.105817,0.236614,48425.840329,38355260.0,22.0


In [None]:
#>>> Define the target set.
y = neo_df['hazardous'].values

#>>> Display the first ten values.
y[:10]

array([False,  True, False, False,  True, False, False, False, False,
       False])

In [None]:
#>>> Split the preprocessed data into a training and testing dataset.
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=420, train_size=0.80)

#>>> Determine the shape of our training and testing sets.
print(X_train.shape)
print(X_test.shape)
print(y_train.shape)
print(y_test.shape)

(72668, 5)
(18168, 5)
(72668,)
(18168,)


In [None]:
#>>> Create a StandardScaler instance.
scaler = StandardScaler()

#>>> Fit the Standard Scaler with the training data.
X_scaler = scaler.fit(X_train)

#>>> Scale the data.
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

# PREDICT HAZARDOUS CLASSIFICATION.

In [None]:
#>>> Create the decision tree classifier instance.
model = tree.DecisionTreeClassifier()

#>>> Fit the model.
model = model.fit(X_train_scaled, y_train)

#>>> Make predictions using the testing data.
predictions = model.predict(X_test_scaled)

#>>> Calculate the confusion matrix.
cm = confusion_matrix(y_test, predictions)

#>>> Create a DataFrame from the confusion matrix.
cm_df = pd.DataFrame(
    cm, index=['Actual 0', 'Actual 1'], columns=['Predicted 0', 'Predicted 1'])

#>>> Display the Dataframe.
cm_df

Unnamed: 0,Predicted 0,Predicted 1
Actual 0,15404,1023
Actual 1,911,830


In [None]:
#>>> Calculate the accuracy score.
acc_score = accuracy_score(y_test, predictions)

#>>> Display results.
print('Confusion Matrix')
display(cm_df)
print(f'Accuracy Score : {acc_score}')
print('Classification Report')
print(classification_report(y_test, predictions))

Confusion Matrix


Unnamed: 0,Predicted 0,Predicted 1
Actual 0,15404,1023
Actual 1,911,830


Accuracy Score : 0.8935490973139586
Classification Report
              precision    recall  f1-score   support

       False       0.94      0.94      0.94     16427
        True       0.45      0.48      0.46      1741

    accuracy                           0.89     18168
   macro avg       0.70      0.71      0.70     18168
weighted avg       0.90      0.89      0.90     18168



# NEURAL NETWORK: COMPILE, TRAIN AND EVALUATE

In [None]:
#>>> Define the number of input features and hidden nodes for each layer.
number_input_features = len(X_train_scaled[0])

#>>> Number of input features.
nn = tf.keras.models.Sequential()

#>>> First hidden layer.
nn.add(tf.keras.layers.Dense(units=110, activation="relu", input_dim = number_input_features))

#>>> Second hidden layer.
nn.add(tf.keras.layers.Dense(units=80, activation="relu"))

#>>> Third hidden layer.
nn.add(tf.keras.layers.Dense(units=40, activation="sigmoid"))

#>>> Fourth hidden layer.
nn.add(tf.keras.layers.Dense(units=20, activation="sigmoid"))

#>>> Output layer.
nn.add(tf.keras.layers.Dense(units=1, activation="linear"))

#>>> Check the structure of the model.
nn.summary()

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_5 (Dense)             (None, 110)               660       
                                                                 
 dense_6 (Dense)             (None, 80)                8880      
                                                                 
 dense_7 (Dense)             (None, 40)                3240      
                                                                 
 dense_8 (Dense)             (None, 20)                820       
                                                                 
 dense_9 (Dense)             (None, 1)                 21        
                                                                 
Total params: 13,621
Trainable params: 13,621
Non-trainable params: 0
_________________________________________________________________


In [None]:
#>>> Set computation start time.
nn_start = time.time()

#>>> Compile the model.
nn.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

#>>> Train the model.
fit_model = nn.fit(X_train_scaled, y_train, epochs=5)

#>>> Evaluate the model using the test data.
model_loss, model_accuracy = nn.evaluate(X_test_scaled,y_test,verbose=2)

#>>> Export our model to HDF5 file.
nn.save('./Model/NearEarthObjects.h5')

#>>> Display total computation time.
nn_end = time.time()
nn_time = nn_end - nn_start
nn_seconds = nn_time % 3600
nn_minutes = nn_seconds // 60
nn_seconds %= 60
print(f"\nTime to compile, train, evaluate, and export model = {nn_minutes:.0f} minutes {nn_seconds:.0f} seconds")

#>>> Display model evaluation.
print(f'\nLoss: {model_loss}\nAccuracy: {model_accuracy}')

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
568/568 - 1s - loss: 1.4781 - accuracy: 0.9042 - 764ms/epoch - 1ms/step

Time to compile, train, evaluate, and export model = 0 minutes 30 seconds

Loss: 1.4781397581100464
Accuracy: 0.9041721820831299
