In [3]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import GridSearchCV


In [4]:
# Read Training Data and Test Data
df_train = pd.read_csv('database.csv')
df_test = pd.read_csv('earthquakeTest.csv')


In [8]:
# Preview of Train 
df_train.head()

Unnamed: 0,Date,Time,Latitude,Longitude,Type,Depth,Depth Error,Depth Seismic Stations,Magnitude,Magnitude Type,...,Magnitude Seismic Stations,Azimuthal Gap,Horizontal Distance,Horizontal Error,Root Mean Square,ID,Source,Location Source,Magnitude Source,Status
0,01/02/1965,13:44:18,19.246,145.616,Earthquake,131.6,,,6.0,MW,...,,,,,,ISCGEM860706,ISCGEM,ISCGEM,ISCGEM,Automatic
1,01/04/1965,11:29:49,1.863,127.352,Earthquake,80.0,,,5.8,MW,...,,,,,,ISCGEM860737,ISCGEM,ISCGEM,ISCGEM,Automatic
2,01/05/1965,18:05:58,-20.579,-173.972,Earthquake,20.0,,,6.2,MW,...,,,,,,ISCGEM860762,ISCGEM,ISCGEM,ISCGEM,Automatic
3,01/08/1965,18:49:43,-59.076,-23.557,Earthquake,15.0,,,5.8,MW,...,,,,,,ISCGEM860856,ISCGEM,ISCGEM,ISCGEM,Automatic
4,01/09/1965,13:32:50,11.938,126.427,Earthquake,15.0,,,5.8,MW,...,,,,,,ISCGEM860890,ISCGEM,ISCGEM,ISCGEM,Automatic


In [7]:
# Preview of Test
df_test.head()

Unnamed: 0,time,latitude,longitude,depth,mag,magType,nst,gap,dmin,rms,...,updated,place,type,horizontalError,depthError,magError,magNst,status,locationSource,magSource
0,2017-01-01T00:04:56.020Z,32.98,-115.545833,11.5,2.68,ml,41.0,77.0,0.06553,0.26,...,2017-02-08T21:33:00.874Z,"2km W of Brawley, CA",earthquake,0.24,0.46,0.196,64.0,reviewed,ci,ci
1,2017-01-01T00:13:25.380Z,2.8327,127.5786,78.93,5.0,mb,,101.0,2.058,0.75,...,2017-03-27T23:53:16.040Z,"131km NNW of Tobelo, Indonesia",earthquake,6.8,7.1,0.065,75.0,reviewed,us,us
2,2017-01-01T00:22:02.820Z,32.973,-115.5505,9.4,2.65,ml,42.0,75.0,0.07023,0.24,...,2017-02-08T21:36:24.950Z,"2km WSW of Brawley, CA",earthquake,0.23,0.61,0.198,76.0,reviewed,ci,ci
3,2017-01-01T00:23:53.890Z,-5.9497,153.8988,10.0,4.1,mb,,185.0,2.457,0.32,...,2017-03-27T23:53:16.040Z,"180km WNW of Panguna, Papua New Guinea",earthquake,7.5,1.9,0.184,8.0,reviewed,us,us
4,2017-01-01T00:45:57.980Z,-2.9302,139.4328,49.25,4.1,mb,,132.0,7.174,0.9,...,2017-03-27T23:53:16.040Z,"132km WSW of Abepura, Indonesia",earthquake,13.5,8.4,0.166,10.0,reviewed,us,us


In [12]:
# Clean Datasets
df_train_loaded = df_train.drop(['Time', 'Depth Error','Depth Seismic Stations','Magnitude Error','Magnitude Seismic Stations','Azimuthal Gap','Horizontal Distance',
        'Horizontal Error','Root Mean Square','Source','Location Source','Magnitude Source','Status'], axis=1)

df_test_loaded = df_test[['time', 'latitude', 'longitude', 'mag', 'depth']]



In [13]:
# Preview of df_train_loaded
df_train_loaded.head()

Unnamed: 0,Date,Latitude,Longitude,Type,Depth,Magnitude,Magnitude Type,ID
0,01/02/1965,19.246,145.616,Earthquake,131.6,6.0,MW,ISCGEM860706
1,01/04/1965,1.863,127.352,Earthquake,80.0,5.8,MW,ISCGEM860737
2,01/05/1965,-20.579,-173.972,Earthquake,20.0,6.2,MW,ISCGEM860762
3,01/08/1965,-59.076,-23.557,Earthquake,15.0,5.8,MW,ISCGEM860856
4,01/09/1965,11.938,126.427,Earthquake,15.0,5.8,MW,ISCGEM860890


In [14]:
# Preview of df_test_loaded
df_test_loaded.head()

Unnamed: 0,time,latitude,longitude,mag,depth
0,2017-01-01T00:04:56.020Z,32.98,-115.545833,2.68,11.5
1,2017-01-01T00:13:25.380Z,2.8327,127.5786,5.0,78.93
2,2017-01-01T00:22:02.820Z,32.973,-115.5505,2.65,9.4
3,2017-01-01T00:23:53.890Z,-5.9497,153.8988,4.1,10.0
4,2017-01-01T00:45:57.980Z,-2.9302,139.4328,4.1,49.25


In [18]:
#Rename Columns 
df_train_loaded = df_train_loaded.rename(columns={"Magnitude Type":"Magnitude_Type"})
df_test_loaded = df_test_loaded.rename(columns={"time":"Date", "latitude":"Latitude", "longitude":"Longitude", "mag":"Magnitude", "depth":"Depth"})


In [19]:
# Preview of df_train_loaded
df_train_loaded.head()

Unnamed: 0,Date,Latitude,Longitude,Type,Depth,Magnitude,Magnitude_Type,ID
0,01/02/1965,19.246,145.616,Earthquake,131.6,6.0,MW,ISCGEM860706
1,01/04/1965,1.863,127.352,Earthquake,80.0,5.8,MW,ISCGEM860737
2,01/05/1965,-20.579,-173.972,Earthquake,20.0,6.2,MW,ISCGEM860762
3,01/08/1965,-59.076,-23.557,Earthquake,15.0,5.8,MW,ISCGEM860856
4,01/09/1965,11.938,126.427,Earthquake,15.0,5.8,MW,ISCGEM860890


In [20]:
# Preview of df_test_loaded
df_test_loaded.head()

Unnamed: 0,Date,Latitude,Longitude,Magnitude,Depth
0,2017-01-01T00:04:56.020Z,32.98,-115.545833,2.68,11.5
1,2017-01-01T00:13:25.380Z,2.8327,127.5786,5.0,78.93
2,2017-01-01T00:22:02.820Z,32.973,-115.5505,2.65,9.4
3,2017-01-01T00:23:53.890Z,-5.9497,153.8988,4.1,10.0
4,2017-01-01T00:45:57.980Z,-2.9302,139.4328,4.1,49.25


In [21]:
# Define Training and Testing Dataset
df_testing = df_test_loaded[['Latitude', 'Longitude', 'Magnitude', 'Depth']]
df_training = df_train_loaded[['Latitude', 'Longitude', 'Magnitude', 'Depth']]


In [22]:
# Drop Nulls from Dataset
df_training.dropna()
df_testing.dropna()

Unnamed: 0,Latitude,Longitude,Magnitude,Depth
0,32.980000,-115.545833,2.68,11.500
1,2.832700,127.578600,5.00,78.930
2,32.973000,-115.550500,2.65,9.400
3,-5.949700,153.898800,4.10,10.000
4,-2.930200,139.432800,4.10,49.250
...,...,...,...,...
19995,-21.459800,168.774000,4.30,10.000
19996,35.239500,-97.745300,2.60,6.364
19997,42.139833,-121.692667,2.58,6.880
19998,67.461600,-158.713600,2.80,6.500


In [23]:
# Create training data features
x = df_training[['Latitude', 'Longitude']]
y = df_training[['Magnitude', 'Depth']]

# New Test Data Features
x_new = df_testing[['Latitude', 'Longitude']]
y_new = df_testing[['Magnitude', 'Depth']]

# Use Train Test split on training data features
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)