In [None]:
#>>> Near Earth Object Machine Learning Models by Christopher Madden.

#_________________ ¶¶¶¶¶¶¶¶ 
#_____________¶¶¶¶¶ _______¶¶¶¶¶ 
#___________¶¶¶ ________________¶¶¶ 
#_________¶¶¶ ____________________¶¶¶ 
#________¶¶ ________________________¶¶ 
#_______¶ ______¶¶¶_____¶¶¶__________¶¶ 
#______¶ _________¶¶______¶¶__________¶¶ 
#_____¶¶ __________¶¶______¶¶_________¶¶ 
#_____¶ ____________¶¶______¶¶___¶¶¶___¶¶ 
#____¶¶ _____¶¶_____¶¶______¶¶_____¶¶__¶¶ 
#____¶¶ ___¶¶¶______¶¶______¶¶______¶¶_¶¶ 
#____¶¶ __¶¶¶¶¶__________________¶¶_¶¶_¶¶ 
#_____¶ __¶¶__¶¶_________________¶¶____¶¶ 
#_____¶¶ ______¶¶______________¶¶¶____¶¶ 
#______¶¶ ______¶¶____________¶¶¶_____¶¶ 
#_______¶¶ _______¶¶¶¶_____¶¶¶¶______¶¶ 
#________¶¶ _________¶¶¶¶¶¶¶________¶¶ 
#__________¶¶ ____________________¶¶ 
#___________¶¶¶ ______________¶¶¶ 
#_____________ ¶¶¶¶¶¶¶¶¶¶¶¶¶¶¶

# LIBRARIES AND RESOURCES

In [None]:
#>>> Import dependencies.
import pandas as pd
import tensorflow as tf
import numpy as np
from matplotlib import pyplot as plt
from sklearn import tree
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler,OneHotEncoder
from sklearn.metrics import confusion_matrix, accuracy_score, classification_report
from autoviz.AutoViz_Class import AutoViz_Class
from keras.preprocessing.image import ImageDataGenerator

#>>> Define source data.
file_path = './Resources/neo_v2a.csv'

# PREPROCESS THE DATASET.

In [None]:
#>>> Import and read source data.
neo_df = pd.read_csv(file_path)

#>>> Display the first 10 rows.
neo_df.head(10)

In [None]:
neo_df.isna()

In [None]:
neo_df.isna().sum()

In [None]:
neo_df.dtypes

In [None]:
neo_df.year = pd.to_numeric(neo_df.year, errors='coerce')
neo_df.dtypes

In [None]:
#>>> Determine the number of unique values in each column.
neo_df.nunique()

In [None]:
#>>> Drop unnecessary columns: 'id', 'orbiting_body', and 'sentry_object'.
neo_df= neo_df.drop(['id', 'orbiting_body', 'sentry_object'],1)

#>>> Set index to 'name'.
neo_df = neo_df.set_index('name')

#>>> Display the first 10 rows.
neo_df.head(10)

In [None]:
#>>> Define the features set.
X = neo_df.copy()
X = X.drop('hazardous', axis=1)

#>>> Display the first 10 rows.
X.head(10)

In [None]:
#>>> Define the target set.
y = neo_df['hazardous'].values

#>>> Display the first ten values.
y[:10]

In [None]:
#>>> Split the preprocessed data into a training and testing dataset.
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=420, train_size=0.80)

#>>> Determine the shape of our training and testing sets.
print(X_train.shape)
print(X_test.shape)
print(y_train.shape)
print(y_test.shape)

In [None]:
#>>> Create a StandardScaler instance.
scaler = StandardScaler()

#>>> Fit the Standard Scaler with the training data.
X_scaler = scaler.fit(X_train)

#>>> Scale the data.
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

# AUTOVIZUALIZE DATA

In [None]:
#>>> Initialize the Autoviz class in an object.
AV = AutoViz_Class()

#>>> Must specify in order for AutoViz to display plots.
%matplotlib inline

#>>> Passing the source data and parameters.
graph = AV.AutoViz(
    file_path,
    save_plot_dir='./Images',
    sep=',',
    depVar='',
    dfte=None,
    header=0,
    verbose=2,
    lowess=False,
    chart_format='jpg',
    max_rows_analyzed=1500000,
    max_cols_analyzed=30,
)

# DECISION TREE CLASSIFICATION

In [None]:
#>>> Create the decision tree classifier instance.
model = tree.DecisionTreeClassifier()

#>>> Fit the model.
model = model.fit(X_train_scaled, y_train)

#>>> Make predictions using the testing data.
predictions = model.predict(X_test_scaled)

#>>> Calculate the confusion matrix.
cm = confusion_matrix(y_test, predictions)

#>>> Create a DataFrame from the confusion matrix.
cm_df = pd.DataFrame(
    cm, index=['Actual 0', 'Actual 1'], columns=['Predicted 0', 'Predicted 1'])

# Export our model to HDF5 file.
model.save("AlphabetSoupCharity.h5")

#>>> Display the Dataframe.
cm_df

In [None]:
#>>> Display decision tree and save the output.
clf = tree.DecisionTreeClassifier(max_leaf_nodes=10, random_state=0)
clf.fit(X_train, y_train)
plt.figure(figsize=(18,18))
tree.plot_tree(clf, fontsize=14)
plt.savefig('./Images/decision_tree.jpg')

In [None]:
#>>> Calculate the accuracy score.
acc_score = accuracy_score(y_test, predictions)

#>>> Display results.
print('Confusion Matrix')
display(cm_df)
print(f'Accuracy Score : {acc_score}')
print('Classification Report')
print(classification_report(y_test, predictions))

# LINEAR REGRESSION ANALYSIS

In [None]:
#>>> TO DO: ADD LINEAR REGRESSION MODEL

# NEURAL NETWORK: COMPILE, TRAIN AND EVALUATE

In [None]:
#>>> Define the number of input features and hidden nodes for each layer.
number_input_features = len(X_train_scaled[0])

#>>> Number of input features.
nn = tf.keras.models.Sequential()

#>>> First hidden layer.
nn.add(tf.keras.layers.Dense(units=110, activation="relu", input_dim = number_input_features))

#>>> Second hidden layer.
nn.add(tf.keras.layers.Dense(units=80, activation="relu"))

#>>> Third hidden layer.
nn.add(tf.keras.layers.Dense(units=40, activation="sigmoid"))

#>>> Fourth hidden layer.
nn.add(tf.keras.layers.Dense(units=20, activation="sigmoid"))

#>>> Output layer.
nn.add(tf.keras.layers.Dense(units=1, activation="linear"))

#>>> Check the structure of the model.
nn.summary()

In [None]:
#>>> Compile the model.
nn.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

#>>> Train the model.
fit_model = nn.fit(X_train_scaled, y_train, epochs=5)

#>>> Evaluate the model using the test data.
model_loss, model_accuracy = nn.evaluate(X_test_scaled,y_test,verbose=2)

# Export our model to HDF5 file.
nn.save("AlphabetSoupCharity.h5")

#>>> Display model evaluation.
print(f'\nLoss: {model_loss}\nAccuracy: {model_accuracy}')

In [None]:
#>>> TO DO: OUTPUT CONFUSION MATRIX VIA KERAS