Import Libaries

In [23]:
import pandas as pd
from sklearn.model_selection import cross_val_score
from sklearn.tree import DecisionTreeClassifier
from pymongo import MongoClient

Read and format data

In [3]:
df = pd.read_csv("testdata.csv")
df['DateTime'] = pd.to_datetime(df['DateTime'])
df.head()

Unnamed: 0,DateTime,Latitude,Longitude,Depth,Magnitude,MagType,NbStations,Gap,Distance,RMS,Source,EventID
0,2000-01-01 00:03:53.650,37.41667,-121.7665,5.36,1.23,Md,21,78,5,0.04,NCSN,21075021
1,2000-01-01 00:09:21.180,37.63683,-119.04967,0.098,0.95,Md,9,104,3,0.06,NCSN,21075023
2,2000-01-01 02:30:44.070,37.56633,-118.82633,2.423,1.25,Md,14,163,3,0.01,NCSN,30503920
3,2000-01-01 05:19:24.020,36.039,-120.57733,8.695,1.19,Md,13,169,4,0.01,NCSN,21075061
4,2000-01-01 06:05:57.080,35.98967,-120.54884,4.143,1.14,Md,15,133,5,0.03,NCSN,21075067


Filtering data for magnitude >= 5

In [4]:
filtered_df = df.copy()
filtered_df = filtered_df[filtered_df['Magnitude'] >= 5]
filtered_df.head()

Unnamed: 0,DateTime,Latitude,Longitude,Depth,Magnitude,MagType,NbStations,Gap,Distance,RMS,Source,EventID
2380,2000-03-16 15:19:56.380,40.38867,-125.2385,4.803,5.7,Mw,139,228,77,0.29,NCSN,21086915
13604,2001-01-13 13:08:42.100,40.75566,-125.2445,2.243,5.4,Mw,155,233,83,0.26,NCSN,21143281
20427,2001-07-17 12:07:25.830,36.01266,-117.86633,7.158,5.2,Mw,15,177,19,0.06,NCSN,21181820
21603,2001-08-10 20:19:27.060,39.81116,-120.61667,5.011,5.2,Mw,76,111,35,0.34,NCSN,21188442
33074,2002-06-17 16:55:07.680,40.80983,-124.552,17.195,5.2,Mw,63,225,41,0.16,NCSN,21231051


Add empty Column for Aftershock Counter

In [5]:
aftershock_data = filtered_df.copy()
for i in range(0,10):
    aftershock_data['day'+str(i)] = pd.Series([None] * len(aftershock_data))
aftershock_data.head()

Unnamed: 0,DateTime,Latitude,Longitude,Depth,Magnitude,MagType,NbStations,Gap,Distance,RMS,...,day0,day1,day2,day3,day4,day5,day6,day7,day8,day9
2380,2000-03-16 15:19:56.380,40.38867,-125.2385,4.803,5.7,Mw,139,228,77,0.29,...,,,,,,,,,,
13604,2001-01-13 13:08:42.100,40.75566,-125.2445,2.243,5.4,Mw,155,233,83,0.26,...,,,,,,,,,,
20427,2001-07-17 12:07:25.830,36.01266,-117.86633,7.158,5.2,Mw,15,177,19,0.06,...,,,,,,,,,,
21603,2001-08-10 20:19:27.060,39.81116,-120.61667,5.011,5.2,Mw,76,111,35,0.34,...,,,,,,,,,,
33074,2002-06-17 16:55:07.680,40.80983,-124.552,17.195,5.2,Mw,63,225,41,0.16,...,,,,,,,,,,


Count aftershocks after every bigger eartquake and add them as a new column

In [6]:
for event_id in filtered_df['EventID'].unique():
    event_data = filtered_df[filtered_df['EventID'] == event_id]
    
    earthquake_datetime = event_data['DateTime'].iloc[0]

    # Is there an aftershock > 4?
    for i in range(0, 10):
      aftershock_bigger_four = 0
      day = earthquake_datetime + pd.Timedelta(days=i)
      day_after = day + pd.Timedelta(days=1)
      aftershocks_after_day = df[(df['DateTime'] > day)]
      aftershocks_on_day = aftershocks_after_day[(aftershocks_after_day['DateTime'] < day_after)]
      for index,row in aftershocks_on_day.iterrows():
        if row["Magnitude"] >= 4:
           aftershock_bigger_four = 1
           break
      daystring = "day"+str(i)
      aftershock_data.loc[aftershock_data["EventID"]== event_id,daystring] = aftershock_bigger_four

In [7]:
aftershock_data

Unnamed: 0,DateTime,Latitude,Longitude,Depth,Magnitude,MagType,NbStations,Gap,Distance,RMS,...,day0,day1,day2,day3,day4,day5,day6,day7,day8,day9
2380,2000-03-16 15:19:56.380,40.38867,-125.23850,4.803,5.70,Mw,139,228,77,0.29,...,0,0,0,0,0,0,0,0,0,0
13604,2001-01-13 13:08:42.100,40.75566,-125.24450,2.243,5.40,Mw,155,233,83,0.26,...,0,0,0,0,0,0,0,0,0,0
20427,2001-07-17 12:07:25.830,36.01266,-117.86633,7.158,5.20,Mw,15,177,19,0.06,...,1,0,0,1,0,0,0,0,0,0
21603,2001-08-10 20:19:27.060,39.81116,-120.61667,5.011,5.20,Mw,76,111,35,0.34,...,1,0,0,0,0,0,0,0,0,0
33074,2002-06-17 16:55:07.680,40.80983,-124.55200,17.195,5.20,Mw,63,225,41,0.16,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
551535,2022-12-20 10:34:24.770,40.52500,-124.42300,17.910,6.40,Mw,47,214,9,0.18,...,1,0,0,0,1,0,0,0,0,0
552640,2023-01-01 18:35:04.510,40.40900,-123.97100,30.630,5.35,Mw,56,44,13,0.13,...,0,0,0,0,0,0,0,0,0,1
562418,2023-05-11 23:19:41.990,40.20417,-121.10950,5.850,5.48,Mw,56,46,6,0.16,...,1,0,0,0,0,0,0,0,0,1
562481,2023-05-12 10:18:41.310,40.19600,-121.09983,6.060,5.16,Mw,51,37,7,0.19,...,0,0,0,0,0,0,0,0,0,1


Select features

In [8]:
days = {}
for i in range(10):
    days["y_"+str(i)] = aftershock_data["day"+str(i)].copy()
    days["y_"+str(i)]= days["y_"+str(i)].astype(int)
features = ["Latitude","Longitude","Depth","Magnitude","Gap"]
X = aftershock_data[features].copy()

Train model for day0

In [9]:
models = {}
for i in range(10):
    models["model_"+str(i)] = DecisionTreeClassifier()
    models["model_"+str(i)].fit(X,days["y_"+str(i)])

In [18]:
scores = {}
for i in range(10):
    scores["score_"+str(i)] = cross_val_score(models["model_"+str(i)],X,days["y_"+str(i)],scoring="f1")
    print(f'F1-Score: {scores["score_"+str(i)].mean()}')

F1-Score: 0.5956959706959707
F1-Score: 0.0
F1-Score: 0.32666666666666666
F1-Score: 0.44000000000000006
F1-Score: 0.1
F1-Score: 0.1
F1-Score: 0.26666666666666666
F1-Score: 0.0
F1-Score: 0.0
F1-Score: 0.25555555555555554


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Add data to predict

In [13]:
gps = [[-125.046387,40.522151],[-117.751465,37.709899]]
Depth = [6.74,1.34]
Magnitude = [9.6,5.1]
Gap = [10,360]

data_to_predict = pd.DataFrame({"Longitude":[],"Latitude":[],"Depth":[],"Magnitude":[],"Gap":[]})

for cor in gps:
        for dep in Depth:
            for mag in Magnitude:
                for gap in Gap:
                    new_line = [cor[0],cor[1],dep,mag,gap]
                    data_to_predict.loc[len(data_to_predict)] = new_line

In [14]:
data_to_predict.head()

Unnamed: 0,Longitude,Latitude,Depth,Magnitude,Gap
0,-125.046387,40.522151,6.74,9.6,10.0
1,-125.046387,40.522151,6.74,9.6,360.0
2,-125.046387,40.522151,6.74,5.1,10.0
3,-125.046387,40.522151,6.74,5.1,360.0
4,-125.046387,40.522151,1.34,9.6,10.0


In [21]:
predictions = {}
for i in range(10):
    predictions["day"+str(i)] = models["model_"+str(i)].predict(data_to_predict[features])

In [22]:
for i in range(10):
    data_to_predict["day"+str(i)] = predictions["day"+str(i)]
data_to_predict

Unnamed: 0,Longitude,Latitude,Depth,Magnitude,Gap,day0,day1,day2,day3,day4,day5,day6,day7,day8,day9
0,-125.046387,40.522151,6.74,9.6,10.0,1,0,0,1,1,0,0,0,0,0
1,-125.046387,40.522151,6.74,9.6,360.0,1,0,0,0,0,0,0,0,0,1
2,-125.046387,40.522151,6.74,5.1,10.0,0,0,0,0,0,0,0,0,0,0
3,-125.046387,40.522151,6.74,5.1,360.0,0,0,0,0,0,0,0,0,0,1
4,-125.046387,40.522151,1.34,9.6,10.0,1,0,0,1,1,0,0,0,0,0
5,-125.046387,40.522151,1.34,9.6,360.0,1,0,0,1,0,0,0,0,0,1
6,-125.046387,40.522151,1.34,5.1,10.0,0,0,0,0,0,0,0,0,0,0
7,-125.046387,40.522151,1.34,5.1,360.0,0,0,0,1,0,0,0,0,0,1
8,-117.751465,37.709899,6.74,9.6,10.0,1,1,1,0,0,0,1,1,0,0
9,-117.751465,37.709899,6.74,9.6,360.0,1,0,1,1,0,0,1,1,0,1


In [24]:
MONGO_DATABASE_URI: str = "mongodb://root:example@localhost:27018"
MONGO_DATABASE: str = "disaster_information"

client = MongoClient(MONGO_DATABASE_URI)
db = client.get_database(MONGO_DATABASE)
collection = db["predictions_2"]
data = data_to_predict.to_dict(orient="records")
collection.delete_many({})
collection.insert_many(data)

InsertManyResult([ObjectId('66226f9661c424ec4e2cdda8'), ObjectId('66226f9661c424ec4e2cdda9'), ObjectId('66226f9661c424ec4e2cddaa'), ObjectId('66226f9661c424ec4e2cddab'), ObjectId('66226f9661c424ec4e2cddac'), ObjectId('66226f9661c424ec4e2cddad'), ObjectId('66226f9661c424ec4e2cddae'), ObjectId('66226f9661c424ec4e2cddaf'), ObjectId('66226f9661c424ec4e2cddb0'), ObjectId('66226f9661c424ec4e2cddb1'), ObjectId('66226f9661c424ec4e2cddb2'), ObjectId('66226f9661c424ec4e2cddb3'), ObjectId('66226f9661c424ec4e2cddb4'), ObjectId('66226f9661c424ec4e2cddb5'), ObjectId('66226f9661c424ec4e2cddb6'), ObjectId('66226f9661c424ec4e2cddb7')], acknowledged=True)