# Exploratory Data Analysis - Last 30 Days of Earthquakes

In [None]:
import geopandas

path_to_file = '../data/raw/earthquake-data-last-30-days.geojson'

df = geopandas.read_file(path_to_file)

df[df['gap'].isna()]

In [None]:
df.describe()

In [None]:
# check NAs
df.isna().sum()

In [None]:
# can drop quite a few columns: tz, dmin, net, ids, code, title, type (see below!), rms, nst, types, sources, status, mmi, 

# KEEP:

# alert - green, yellow, orange, red - the nulls are no info coming from PAGER earthquake impact scale
# cdi - max reported intensity of the event
# depth - how deep the earthquake originates from but loads of error around calculation and placeholder values get put in like 33km as a default depth
# depthError - uncertainty of reported depths of the event in km
# detail - Link to GeoJSON detail feed from a GeoJSON summary feed
# felt - the total number of felt reports submitted to DYFI? system
# magType - already buckets earthquakes by magnitude
# sig - significance of the event calculated based on a number of factors, including: magnitude, maximum MMI, felt reports, and estimated impact.
# tsunami - This flag is set to "1" for large events in oceanic regions and "0" otherwise. The existence or value of this flag does not indicate if a tsunami actually did or will exist. 
# If the flag value is "1", the event will include a link to the NOAA Tsunami website for tsunami information.

# GAP issue:

# gap - The largest azimuthal gap between azimuthally adjacent stations (in degrees). In general, the smaller this number, the more reliable is the calculated horizontal position of the earthquake.
# Earthquake locations in which the azimuthal gap exceeds 180 degrees typically have large location and depth uncertainties.

# anything with gap larger than 180 have large location and depth uncertainty - remove all rows without data or above 180 degrees

# DROP type if != 'earthquake'
# the drop type column

In [None]:
# remove anything that isn't type 'earthquake'
df = df[df['type'] == 'earthquake']

df['id'].count()

In [None]:
[column for column in df.columns]

In [None]:
import pandas as pd

file_path = '../data/processed/2025-08-18 19:49:17.389434_transformed_earthquake_data.json'

df = pd.read_json(file_path)

df

In [1]:
import pandas as pd

file_path_transformed_data = '../data/processed/2025-08-19 10:40:20.057548_transformed_earthquake_data.json'

df = pd.read_json(file_path_transformed_data)

df

Unnamed: 0,id,mag,place,time,updated,url,felt,cdi,alert,tsunami,sig,magType,longitude,latitude,depth,time_to_report,depth_group
0,ci40402506,0.72,"5 km N of Cabazon, CA",1755595887570,1755596094868,https://earthquake.usgs.gov/earthquakes/eventp...,,,,False,8,ml,-116.783167,33.964833,11.2100,207298,shallow
2,nc75225757,1.58,"15 km WNW of Lake Pillsbury, CA",1755593809030,1755593904611,https://earthquake.usgs.gov/earthquakes/eventp...,,,,False,38,md,-123.129669,39.431999,5.6300,95581,shallow
3,ci40402498,0.88,"16 km W of Searles Valley, CA",1755593005430,1755593205597,https://earthquake.usgs.gov/earthquakes/eventp...,,,,False,12,ml,-117.583833,35.767167,6.5900,200167,shallow
4,ci40402490,0.81,"6 km S of Idyllwild, CA",1755592321870,1755592523532,https://earthquake.usgs.gov/earthquakes/eventp...,1.0,0.0,,False,10,ml,-116.713000,33.682167,15.6300,201662,shallow
5,nc75225752,1.11,"6 km NNW of The Geysers, CA",1755592077940,1755592175513,https://earthquake.usgs.gov/earthquakes/eventp...,,,,False,19,md,-122.802330,38.816002,3.2700,97573,shallow
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
10962,tx2025odwros,1.10,"9 km NNW of Forsan, Texas",1753004800070,1753095505766,https://earthquake.usgs.gov/earthquakes/eventp...,,,,False,19,ml,-101.403000,32.191000,6.5186,90705696,shallow
10963,us7000qe09,2.60,"12 km NNW of Stanley, Idaho",1753004755303,1755004875040,https://earthquake.usgs.gov/earthquakes/eventp...,,,,False,104,ml,-115.001000,44.316600,10.8110,2000119737,shallow
10964,av93678566,-0.40,"78 km WNW of Tyonek, Alaska",1753004508000,1753216229360,https://earthquake.usgs.gov/earthquakes/eventp...,,,,False,0,ml,-152.507500,61.316333,3.2000,211721360,shallow
10965,tx2025odwmts,1.20,"9 km NNW of Forsan, Texas",1753004465644,1753094266790,https://earthquake.usgs.gov/earthquakes/eventp...,,,,False,22,ml,-101.403000,32.189000,8.1787,89801146,shallow
