## Machine Learning Model - Using Neural Network Model to Make a Prediction of Mag and Depth


In [1]:
import numpy as np
import pandas as pd
import datetime
import time
import matplotlib.pyplot as plt
from sklearn.ensemble import RandomForestRegressor

import os
print(os.listdir("../machine_learning-project"))

['.git', '.gitignore', '.ipynb_checkpoints', 'data_base', 'ML_Earthquake_Neuron.ipynb', 'ml_kellog_db.ipynb', 'models', 'README.md']


In [2]:
# Loading the Data

data = pd.read_csv("data_base/database_smalldata.csv")
data.head()

Unnamed: 0,Date,Time,Latitude,Longitude,Type,Depth,Depth Error,Depth Seismic Stations,Magnitude,Magnitude Type,...,Magnitude Seismic Stations,Azimuthal Gap,Horizontal Distance,Horizontal Error,Root Mean Square,ID,Source,Location Source,Magnitude Source,Status
0,1/1/1970,17:11:00,-29.4,-177.169,Earthquake,35.0,,,5.6,MW,...,,,,,,ISCGEM799588,ISCGEM,ISCGEM,ISCGEM,Automatic
1,1/4/1970,17:00:41,24.185,102.543,Earthquake,11.3,,,7.1,MW,...,,,,,,ISCGEM799712,ISCGEM,ISCGEM,ISCGEM,Automatic
2,1/5/1970,11:49:10,23.984,102.732,Earthquake,15.0,,,5.9,MW,...,,,,,,ISCGEM799745,ISCGEM,ISCGEM,ISCGEM,Automatic
3,1/6/1970,5:35:54,-9.583,151.493,Earthquake,15.0,,,6.3,MW,...,,,,,,ISCGEM799772,ISCGEM,ISCGEM,ISCGEM,Automatic
4,1/7/1970,7:56:14,15.785,-59.808,Earthquake,36.7,,,6.0,MW,...,,,,,,ISCGEM799824,ISCGEM,ISCGEM,ISCGEM,Automatic


In [3]:
#Displaying the Columns

data.columns

Index(['Date', 'Time', 'Latitude', 'Longitude', 'Type', 'Depth', 'Depth Error',
       'Depth Seismic Stations', 'Magnitude', 'Magnitude Type',
       'Magnitude Error', 'Magnitude Seismic Stations', 'Azimuthal Gap',
       'Horizontal Distance', 'Horizontal Error', 'Root Mean Square', 'ID',
       'Source', 'Location Source', 'Magnitude Source', 'Status'],
      dtype='object')

In [4]:
data = data[['Date', 'Time', 'Latitude', 'Longitude', 'Depth', 'Magnitude']]
data.head()

Unnamed: 0,Date,Time,Latitude,Longitude,Depth,Magnitude
0,1/1/1970,17:11:00,-29.4,-177.169,35.0,5.6
1,1/4/1970,17:00:41,24.185,102.543,11.3,7.1
2,1/5/1970,11:49:10,23.984,102.732,15.0,5.9
3,1/6/1970,5:35:54,-9.583,151.493,15.0,6.3
4,1/7/1970,7:56:14,15.785,-59.808,36.7,6.0


In [5]:
# import datetime
# import time

timestamp = []
for d, t in zip(data['Date'], data['Time']):
    try:
        ts = datetime.datetime.strptime(d+' '+t, '%m/%d/%Y %H:%M:%S')
        timestamp.append(time.mktime(ts.timetuple()))
    except ValueError:
        # print('ValueError')
        timestamp.append('ValueError')

In [6]:
timeStamp = pd.Series(timestamp)
data['Timestamp'] = timeStamp.values

In [7]:
#Dropping the columns not needed

final_data = data.drop(['Date', 'Time'], axis=1)
final_data = final_data[final_data.Timestamp != 'ValueError']
final_data.head()

Unnamed: 0,Latitude,Longitude,Depth,Magnitude,Timestamp
0,-29.4,-177.169,35.0,5.6,83460
1,24.185,102.543,11.3,7.1,342041
2,23.984,102.732,15.0,5.9,409750
3,-9.583,151.493,15.0,6.3,473754
4,15.785,-59.808,36.7,6.0,568574


Model Splitting 

In [8]:
#Splitting the data

X = final_data[['Timestamp','Latitude', 'Longitude']]
y = final_data[['Magnitude', 'Depth']]
X

Unnamed: 0,Timestamp,Latitude,Longitude
0,83460,-29.400,-177.169
1,342041,24.185,102.543
2,409750,23.984,102.732
3,473754,-9.583,151.493
4,568574,15.785,-59.808
...,...,...,...
6362,5.04654e+08,-13.192,166.507
6363,5.04681e+08,-5.801,104.288
6364,5.04811e+08,-5.546,150.686
6365,5.04816e+08,61.541,-150.340


In [9]:
#Splitting Xs and ys into traing and test datasets 

import numpy as np
from sklearn.model_selection import train_test_split
from sklearn import datasets
from sklearn import svm

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, random_state=42)
print(X_train.shape, X_test.shape, y_train.shape, X_test.shape)

(5092, 3) (1273, 3) (5092, 2) (1273, 3)


In [10]:
# data.Timestamp = da
# pd.set_option('display.float_format', lambda x: x)
# X_train
# df[""]

In [11]:
#from sklearn.ensemble import RandomForestRegressor

reg = RandomForestRegressor(random_state=42)
reg.fit(X_train,y_train)
reg.predict(X_test)

array([[ 5.735  , 51.272  ],
       [ 5.889  , 60.951  ],
       [ 6.0365 ,  8.48988],
       ...,
       [ 5.939  , 33.809  ],
       [ 6.137  , 17.731  ],
       [ 5.945  , 50.694  ]])

In [12]:
#

reg.score(X_test, y_test)


0.3338404313359684

In [13]:
from sklearn.model_selection import GridSearchCV

parameters = {'n_estimators':[10, 20, 50, 100, 200, 500]}

grid_obj = GridSearchCV(reg, parameters)
grid_fit = grid_obj.fit(X_train, y_train)
best_fit = grid_fit.best_estimator_
best_fit.predict(X_test)

array([[ 5.73    , 51.173   ],
       [ 5.8868  , 54.3654  ],
       [ 6.00724 ,  8.317976],
       ...,
       [ 5.9956  , 32.4386  ],
       [ 6.1496  , 17.5436  ],
       [ 5.964   , 45.756   ]])

In [14]:
#Creating the Best Fit 

best_fit.score(X_test, y_test)


0.3425192830708252

##Neural Network Model

In [15]:
from keras.models import Sequential
from keras.layers import Dense

def create_model(neurons, activation, optimizer, loss):
    model = Sequential()
    model.add(Dense(neurons, activation=activation, input_shape=(3,)))
    model.add(Dense(neurons, activation=activation))
    model.add(Dense(2, activation='softmax'))
    
    model.compile(optimizer=optimizer, loss=loss, metrics=['accuracy'])
    
    return model

In [16]:
from keras.wrappers.scikit_learn import KerasClassifier

model = KerasClassifier(build_fn=create_model, verbose=0)

# neurons = [16, 64, 128, 256]
neurons = [16]
# batch_size = [10, 20, 50, 100]
batch_size = [10]
epochs = [10]
# activation = ['relu', 'tanh', 'sigmoid', 'hard_sigmoid', 'linear', 'exponential']
activation = ['sigmoid', 'relu']
# optimizer = ['SGD', 'RMSprop', 'Adagrad', 'Adadelta', 'Adam', 'Adamax', 'Nadam']
optimizer = ['SGD', 'Adadelta']
loss = ['squared_hinge']

param_grid = dict(neurons=neurons, batch_size=batch_size, epochs=epochs, activation=activation, optimizer=optimizer, loss=loss)

In [17]:
def show_shapes(): # can make yours to take inputs; this'll use local variable values
    print("Expected: (num_samples, timesteps, channels)")
    print("Sequences: {}".format(X_train.shape))
    print("Targets:   {}".format(y_train.shape)) 

In [18]:
grid = GridSearchCV(estimator=model, param_grid=param_grid, n_jobs=-1)
X_train = np.asarray(X_train)
y_train = np.asarray(y_train)
show_shapes()


Expected: (num_samples, timesteps, channels)
Sequences: (5092, 3)
Targets:   (5092, 2)


In [19]:
# X_train = np.expand_dims(X_train, -1)
# y_train   = np.expand_dims(y_train, -1)
# show_shapes()

In [20]:
grid_result = grid.fit(X_train, y_train)

print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
    print("%f (%f) with: %r" % (mean, stdev, param))

ValueError: Failed to convert a NumPy array to a Tensor (Unsupported object type float).

In [None]:
model = Sequential()
model.add(Dense(16, activation='relu', input_shape=(3,)))
model.add(Dense(16, activation='relu'))
model.add(Dense(2, activation='softmax'))

model.compile(optimizer='SGD', loss='squared_hinge', metrics=['accuracy'])

In [None]:
model.fit(X_train, y_train, batch_size=10, epochs=20, verbose=1, validation_data=(X_test, y_test))

In [None]:
[test_loss, test_acc] = model.evaluate(X_test, y_test)
print("Evaluation result on Test Data : Loss = {}, accuracy = {}".format(test_loss, test_acc))

In [None]:
model.save('models/ml_model_fulldata.h5')

# Loading a Model

In [None]:
# Load the model
from tensorflow.keras.models import load_model
eq_model = load_model("models/ml_model_fulldata.h5")

## Evaluating the loaded model


In [None]:
model_accuracy = eq_model.evaluate(
    X_test_scaled, y_test_categorical, verbose=2)
print(
    f"Normal Neural Network -  Accuracy: {model_accuracy}")