<a href="https://colab.research.google.com/github/riccardorampon/LANL-Earthquake-Prediction/blob/main/earthquake_detection.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install sklearn.cross_validation

In [None]:
!pip install basemap
from mpl_toolkits.basemap import Basemap

## **Import the necessary libraries required for buidling the model and data analysis of the earthquakes**

In [None]:
#importing all the libraries

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

#loading the dataset
data=pd.read_csv("database.csv")

#visualizing the first dataset rows
data.head()

In [None]:
data.columns

In [None]:
#there are a lot of empty coloumns, let's take only the filled one and that coloumns that we need for our purposess

data = data[['Date', 'Time', 'Latitude', 'Longitude', 'Depth', 'Magnitude']]
data.head()

## **Merge 'Data' and 'Time' columns into one 'Timestamp'**

In [None]:
#let's merge date and time coloumns in a single field called timestamp
import datetime
import time

timestamp = []
for d, t in zip(data['Date'], data['Time']):
    try:
        ts = datetime.datetime.strptime(d+' '+t, '%m/%d/%Y %H:%M:%S')
        timestamp.append(time.mktime(ts.timetuple()))
    except ValueError:
        # print('ValueError')
        timestamp.append('ValueError')

timeStamp = pd.Series(timestamp)
data['Timestamp'] = timeStamp.values

final_data = data.drop(['Date', 'Time'], axis=1)
final_data = final_data[final_data.Timestamp != 'ValueError']
final_data.head()

## **Visualization on Basemap**

In [None]:

#data showing with a geographical map

from mpl_toolkits.basemap import Basemap

m = Basemap(projection='mill',llcrnrlat=-80,urcrnrlat=80, llcrnrlon=-180,urcrnrlon=180,lat_ts=20,resolution='c')

longitudes = data["Longitude"].tolist()
latitudes = data["Latitude"].tolist()
#m = Basemap(width=12000000,height=9000000,projection='lcc',
            #resolution=None,lat_1=80.,lat_2=55,lat_0=80,lon_0=-107.)
x,y = m(longitudes,latitudes)
fig = plt.figure(figsize=(12,10))
plt.title("All affected areas")
m.plot(x, y, "o", markersize = 2, color = 'blue')
m.drawcoastlines()
m.fillcontinents(color='coral',lake_color='aqua')
m.drawmapboundary()
m.drawcountries()
plt.show()

## **Splitting the Data**

In [None]:
#we want to predict magnitude and depth from timestamp, latitude and longitude data

X = final_data[['Timestamp', 'Latitude', 'Longitude']]
y = final_data[['Magnitude', 'Depth']]

#split data for cross validation

from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
print(X_train.shape, X_test.shape, y_train.shape, y_test.shape)

## **Random Forest Regressor**

In [None]:
from sklearn.ensemble import RandomForestRegressor

reg = RandomForestRegressor(random_state=42)
reg.fit(X_train, y_train)
reg.predict(X_test)

In [None]:
reg.score(X_test,y_test)

In [None]:
from sklearn.model_selection import GridSearchCV

parameters = {'n_estimators':[10, 20]} #aumentare il numero di estimator, ma ci mette un sacco di tempo

grid_obj = GridSearchCV(reg, parameters)
grid_fit = grid_obj.fit(X_train, y_train)
best_fit = grid_fit.best_estimator_
best_fit.predict(X_test)

In [None]:
best_fit.score(X_test, y_test)

## **Neural Network model**


In [None]:
from keras.models import Sequential
from keras.layers import Dense

def create_model(neurons, activation, optimizer, loss):
    model = Sequential()
    model.add(Dense(neurons, activation=activation, input_dim=(3)))
    model.add(Dense(neurons, activation=activation))
    model.add(Dense(2, activation='softmax'))
    
    model.compile(optimizer=optimizer, loss=loss, metrics=['accuracy'])
    
    return model

In [None]:
from keras.wrappers.scikit_learn import KerasClassifier

model = KerasClassifier(build_fn=create_model, verbose=0)

# neurons = [16, 64, 128, 256]
neurons = [16]
# batch_size = [10, 20, 50, 100]
batch_size = [10]
epochs = [10]
# activation = ['relu', 'tanh', 'sigmoid', 'hard_sigmoid', 'linear', 'exponential']
activation = ['sigmoid', 'relu']
# optimizer = ['SGD', 'RMSprop', 'Adagrad', 'Adadelta', 'Adam', 'Adamax', 'Nadam']
optimizer = ['SGD', 'Adadelta']
loss = ['squared_hinge']

param_grid = dict(neurons=neurons, batch_size=batch_size, epochs=epochs, activation=activation, optimizer=optimizer, loss=loss)

In [None]:
new_X_train = np.array([np.array(val) for val in X_train])
new_y_train = np.array([np.array(val) for val in y_train])

new_X_test = np.array([np.array(val) for val in X_test])
new_y_test = np.array([np.array(val) for val in y_test])

In [None]:
grid = GridSearchCV(estimator=model, param_grid=param_grid, n_jobs=-1)

grid_result = grid.fit(new_X_train, y_train)

print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
    print("%f (%f) with: %r" % (mean, stdev, param))

In [None]:
model = Sequential()
model.add(Dense(16, activation='relu', input_shape=(3,)))
model.add(Dense(16, activation='relu'))
model.add(Dense(2, activation='softmax'))

model.compile(optimizer='SGD', loss='squared_hinge', metrics=['accuracy'])

In [None]:
print(new_X_train.shape)
print(new_y_train.shape)
from sklearn.svm import SVC

rbf_SVM = SVC(kernel= 'rbf')
rbf_SVM.fit(X_train, y_train)
#model.fit(new_X_train, y_train, batch_size=10, epochs=20, verbose=1, validation_data=(X_test, y_test))

In [None]:
[test_loss, test_acc] = model.evaluate(X_test, y_test)
print("Evaluation result on Test Data : Loss = {}, accuracy = {}".format(test_loss, test_acc))

In [None]:
model.save('earthquake.h5')