In [1]:
# Import Libraries
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

from sklearn.preprocessing import StandardScaler
from sklearn.neural_network import MLPRegressor

import matplotlib.pyplot as plt

In [4]:
# Path to the GeoJSON file
data = pd.read_csv('Resources/all_month.csv')
data

Unnamed: 0,time,latitude,longitude,depth,mag,magType,nst,gap,dmin,rms,...,updated,place,type,horizontalError,depthError,magError,magNst,status,locationSource,magSource
0,2023-08-25T09:26:27.480Z,33.769000,-116.142833,4.84,0.82,ml,17.0,104.00,0.071150,0.1900,...,2023-08-25T09:29:57.187Z,"9 km NE of Indio, CA",earthquake,0.33,0.81,0.133,13.0,automatic,ci,ci
1,2023-08-25T09:24:59.950Z,33.777333,-116.144000,0.93,0.68,ml,16.0,105.00,0.067410,0.1900,...,2023-08-25T09:28:37.994Z,"9 km NE of Indio, CA",earthquake,0.42,0.90,0.113,8.0,automatic,ci,ci
2,2023-08-25T09:24:17.150Z,33.770000,-116.139500,3.48,0.73,ml,14.0,105.00,0.073350,0.2000,...,2023-08-25T09:27:50.625Z,"9 km NE of Indio, CA",earthquake,0.46,1.91,0.151,10.0,automatic,ci,ci
3,2023-08-25T09:11:15.363Z,38.154900,-117.818400,10.10,2.00,ml,21.0,131.84,0.117000,0.3487,...,2023-08-25T09:13:49.965Z,"36 km SE of Mina, Nevada",earthquake,,0.40,0.560,14.0,automatic,nn,nn
4,2023-08-25T08:59:02.690Z,63.103900,-151.437300,3.80,1.10,ml,,,,0.7600,...,2023-08-25T09:01:42.957Z,"50 km SSE of Denali National Park, Alaska",earthquake,,0.60,,,automatic,ak,ak
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
11029,2023-07-26T09:54:34.610Z,38.811668,-122.771835,1.27,0.70,md,10.0,135.00,0.007094,0.0200,...,2023-07-26T11:38:12.999Z,"4 km NNW of The Geysers, CA",earthquake,0.42,0.72,0.290,11.0,automatic,nc,nc
11030,2023-07-26T09:48:37.824Z,36.729900,-116.081200,4.00,-0.30,ml,7.0,111.00,0.028000,0.0821,...,2023-07-26T15:45:50.272Z,"40 km WNW of Indian Springs, Nevada",earthquake,,2.00,0.070,3.0,reviewed,nn,nn
11031,2023-07-26T09:47:44.670Z,37.153500,-121.525167,9.77,0.63,md,10.0,79.00,0.060550,0.0500,...,2023-07-26T23:50:12.237Z,"11 km NE of San Martin, CA",earthquake,0.36,0.82,0.347,13.0,reviewed,nc,nc
11032,2023-07-26T09:47:33.120Z,36.723167,-121.377167,2.10,0.96,md,20.0,163.00,0.029200,0.0700,...,2023-07-26T23:40:11.178Z,"9 km SW of Tres Pinos, CA",earthquake,0.30,0.52,0.260,20.0,reviewed,nc,nc


In [7]:
# Filter type to only Earthquake
data = data[data['type']=='earthquake']

In [8]:
# Check data types
data_df = data
print(data_df.dtypes)

time                object
latitude           float64
longitude          float64
depth              float64
mag                float64
magType             object
nst                float64
gap                float64
dmin               float64
rms                float64
net                 object
id                  object
updated             object
place               object
type                object
horizontalError    float64
depthError         float64
magError           float64
magNst             float64
status              object
locationSource      object
magSource           object
dtype: object


In [9]:
# Remove unwanted columns
remove = ['net','magType','id','updated','place','type','status','magSource','latitude','longitude']

earthquake_df = data.drop(columns=remove)
earthquake_df

Unnamed: 0,time,depth,mag,nst,gap,dmin,rms,horizontalError,depthError,magError,magNst,locationSource
0,2023-08-25T09:26:27.480Z,4.84,0.82,17.0,104.00,0.071150,0.1900,0.33,0.81,0.133,13.0,ci
1,2023-08-25T09:24:59.950Z,0.93,0.68,16.0,105.00,0.067410,0.1900,0.42,0.90,0.113,8.0,ci
2,2023-08-25T09:24:17.150Z,3.48,0.73,14.0,105.00,0.073350,0.2000,0.46,1.91,0.151,10.0,ci
3,2023-08-25T09:11:15.363Z,10.10,2.00,21.0,131.84,0.117000,0.3487,,0.40,0.560,14.0,nn
4,2023-08-25T08:59:02.690Z,3.80,1.10,,,,0.7600,,0.60,,,ak
...,...,...,...,...,...,...,...,...,...,...,...,...
11029,2023-07-26T09:54:34.610Z,1.27,0.70,10.0,135.00,0.007094,0.0200,0.42,0.72,0.290,11.0,nc
11030,2023-07-26T09:48:37.824Z,4.00,-0.30,7.0,111.00,0.028000,0.0821,,2.00,0.070,3.0,nn
11031,2023-07-26T09:47:44.670Z,9.77,0.63,10.0,79.00,0.060550,0.0500,0.36,0.82,0.347,13.0,nc
11032,2023-07-26T09:47:33.120Z,2.10,0.96,20.0,163.00,0.029200,0.0700,0.30,0.52,0.260,20.0,nc


In [17]:
# Convert the datetime_column to a datetime datatype
earthquake_df['time'] = pd.to_datetime(earthquake_df['time'])

# Extract date and time components into separate columns
df['date'] = df['datetime_column'].dt.date
df['time'] = df['datetime_column'].dt.time

# Drop the original datetime_column if needed
df.drop(columns=['datetime_column'], inplace=True)

KeyError: 'time'