Import Libaries

In [51]:
import pandas as pd
from sklearn.svm import SVR
from sklearn.model_selection import cross_val_score
from sklearn.impute import SimpleImputer
from sklearn.compose import ColumnTransformer
from sklearn.tree import DecisionTreeClassifier

from tensorflow.keras.optimizers import RMSprop

ModuleNotFoundError: No module named 'tensorflow'

Read and format data

In [2]:
df = pd.read_csv("testdata.csv")
df['DateTime'] = pd.to_datetime(df['DateTime'])
df.head()

Unnamed: 0,DateTime,Latitude,Longitude,Depth,Magnitude,MagType,NbStations,Gap,Distance,RMS,Source,EventID
0,2000-01-01 00:03:53.650,37.41667,-121.7665,5.36,1.23,Md,21,78,5,0.04,NCSN,21075021
1,2000-01-01 00:09:21.180,37.63683,-119.04967,0.098,0.95,Md,9,104,3,0.06,NCSN,21075023
2,2000-01-01 02:30:44.070,37.56633,-118.82633,2.423,1.25,Md,14,163,3,0.01,NCSN,30503920
3,2000-01-01 05:19:24.020,36.039,-120.57733,8.695,1.19,Md,13,169,4,0.01,NCSN,21075061
4,2000-01-01 06:05:57.080,35.98967,-120.54884,4.143,1.14,Md,15,133,5,0.03,NCSN,21075067


Filtering data for magnitude >= 5

In [3]:
filtered_df = df.copy()
filtered_df = filtered_df[filtered_df['Magnitude'] >= 5]
filtered_df.head()

Unnamed: 0,DateTime,Latitude,Longitude,Depth,Magnitude,MagType,NbStations,Gap,Distance,RMS,Source,EventID
2380,2000-03-16 15:19:56.380,40.38867,-125.2385,4.803,5.7,Mw,139,228,77,0.29,NCSN,21086915
13604,2001-01-13 13:08:42.100,40.75566,-125.2445,2.243,5.4,Mw,155,233,83,0.26,NCSN,21143281
20427,2001-07-17 12:07:25.830,36.01266,-117.86633,7.158,5.2,Mw,15,177,19,0.06,NCSN,21181820
21603,2001-08-10 20:19:27.060,39.81116,-120.61667,5.011,5.2,Mw,76,111,35,0.34,NCSN,21188442
33074,2002-06-17 16:55:07.680,40.80983,-124.552,17.195,5.2,Mw,63,225,41,0.16,NCSN,21231051


Add empty Column for Aftershock Counter

In [4]:
aftershock_data = filtered_df.copy()
for i in range(0,10):
    aftershock_data['day'+str(i)] = pd.Series([None] * len(aftershock_data))
aftershock_data.head()

Unnamed: 0,DateTime,Latitude,Longitude,Depth,Magnitude,MagType,NbStations,Gap,Distance,RMS,...,day0,day1,day2,day3,day4,day5,day6,day7,day8,day9
2380,2000-03-16 15:19:56.380,40.38867,-125.2385,4.803,5.7,Mw,139,228,77,0.29,...,,,,,,,,,,
13604,2001-01-13 13:08:42.100,40.75566,-125.2445,2.243,5.4,Mw,155,233,83,0.26,...,,,,,,,,,,
20427,2001-07-17 12:07:25.830,36.01266,-117.86633,7.158,5.2,Mw,15,177,19,0.06,...,,,,,,,,,,
21603,2001-08-10 20:19:27.060,39.81116,-120.61667,5.011,5.2,Mw,76,111,35,0.34,...,,,,,,,,,,
33074,2002-06-17 16:55:07.680,40.80983,-124.552,17.195,5.2,Mw,63,225,41,0.16,...,,,,,,,,,,


Count aftershocks after every bigger eartquake and add them as a new column

In [5]:
for event_id in filtered_df['EventID'].unique():
    event_data = filtered_df[filtered_df['EventID'] == event_id]
    
    earthquake_datetime = event_data['DateTime'].iloc[0]

    # Is there an aftershock > 4?
    for i in range(0, 10):
      aftershock_bigger_four = 0
      day = earthquake_datetime + pd.Timedelta(days=i)
      day_after = day + pd.Timedelta(days=1)
      aftershocks_after_day = df[(df['DateTime'] > day)]
      aftershocks_on_day = aftershocks_after_day[(aftershocks_after_day['DateTime'] < day_after)]
      for index,row in aftershocks_on_day.iterrows():
        if row["Magnitude"] >= 4:
           aftershock_bigger_four = 1
           break
      daystring = "day"+str(i)
      aftershock_data.loc[aftershock_data["EventID"]== event_id,daystring] = aftershock_bigger_four

In [6]:
aftershock_data

Unnamed: 0,DateTime,Latitude,Longitude,Depth,Magnitude,MagType,NbStations,Gap,Distance,RMS,...,day0,day1,day2,day3,day4,day5,day6,day7,day8,day9
2380,2000-03-16 15:19:56.380,40.38867,-125.23850,4.803,5.70,Mw,139,228,77,0.29,...,0,0,0,0,0,0,0,0,0,0
13604,2001-01-13 13:08:42.100,40.75566,-125.24450,2.243,5.40,Mw,155,233,83,0.26,...,0,0,0,0,0,0,0,0,0,0
20427,2001-07-17 12:07:25.830,36.01266,-117.86633,7.158,5.20,Mw,15,177,19,0.06,...,1,0,0,1,0,0,0,0,0,0
21603,2001-08-10 20:19:27.060,39.81116,-120.61667,5.011,5.20,Mw,76,111,35,0.34,...,1,0,0,0,0,0,0,0,0,0
33074,2002-06-17 16:55:07.680,40.80983,-124.55200,17.195,5.20,Mw,63,225,41,0.16,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
551535,2022-12-20 10:34:24.770,40.52500,-124.42300,17.910,6.40,Mw,47,214,9,0.18,...,1,0,0,0,1,0,0,0,0,0
552640,2023-01-01 18:35:04.510,40.40900,-123.97100,30.630,5.35,Mw,56,44,13,0.13,...,0,0,0,0,0,0,0,0,0,1
562418,2023-05-11 23:19:41.990,40.20417,-121.10950,5.850,5.48,Mw,56,46,6,0.16,...,1,0,0,0,0,0,0,0,0,1
562481,2023-05-12 10:18:41.310,40.19600,-121.09983,6.060,5.16,Mw,51,37,7,0.19,...,0,0,0,0,0,0,0,0,0,1


In [7]:
aftershock_data.to_csv("result_classification.csv")

Select features

In [44]:
y = aftershock_data["day0"].copy()
y = y.astype(int)
features = ["Latitude","Longitude","Depth","Magnitude","Gap"]
X = aftershock_data[features].copy()

Preprocess

In [45]:
y

2380      0
13604     0
20427     1
21603     1
33074     0
         ..
551535    1
552640    0
562418    1
562481    0
563166    0
Name: day0, Length: 63, dtype: int32

Train model for day0

In [53]:
model = DecisionTreeClassifier()
model.fit(X,y)

In [97]:
scores = cross_val_score(model, X, y, scoring='f1')
print(scores)
print(f'RMSE: {scores.mean()}')

[0.61538462 0.5        0.625      0.61538462 0.76923077]
RMSE: 0.625


Add data to predict

In [98]:
Longtitude = [-125.046387,-117.751465]
Latitude = [40.522151,37.709899]
Depth = [6.74,1.34]
Magnitude = [9.6,5.1]
Gap = [10,360]

data_to_predict = pd.DataFrame({"Longitude":[],"Latitude":[],"Depth":[],"Magnitude":[],"Gap":[]})

for long in Longtitude:
    for lat in Latitude:
        for dep in Depth:
            for mag in Magnitude:
                for gap in Gap:
                    new_line = [long,lat,dep,mag,gap]
                    data_to_predict.loc[len(data_to_predict)] = new_line

In [99]:
data_to_predict.head()

Unnamed: 0,Longitude,Latitude,Depth,Magnitude,Gap
0,-125.046387,40.522151,6.74,9.6,10.0
1,-125.046387,40.522151,6.74,9.6,360.0
2,-125.046387,40.522151,6.74,5.1,10.0
3,-125.046387,40.522151,6.74,5.1,360.0
4,-125.046387,40.522151,1.34,9.6,10.0


In [100]:
predictions = model.predict(data_to_predict[features])

In [101]:
data_to_predict["day0"] = predictions
data_to_predict

Unnamed: 0,Longitude,Latitude,Depth,Magnitude,Gap,day0
0,-125.046387,40.522151,6.74,9.6,10.0,1
1,-125.046387,40.522151,6.74,9.6,360.0,1
2,-125.046387,40.522151,6.74,5.1,10.0,0
3,-125.046387,40.522151,6.74,5.1,360.0,0
4,-125.046387,40.522151,1.34,9.6,10.0,1
5,-125.046387,40.522151,1.34,9.6,360.0,1
6,-125.046387,40.522151,1.34,5.1,10.0,0
7,-125.046387,40.522151,1.34,5.1,360.0,0
8,-125.046387,37.709899,6.74,9.6,10.0,1
9,-125.046387,37.709899,6.74,9.6,360.0,1
