# Exploratory Data Analysis - Last 30 Days of Earthquakes

In [None]:
import geopandas

path_to_file = '../data/raw/earthquake-data-last-30-days.geojson'

df = geopandas.read_file(path_to_file)

df[df['gap'].isna()]

In [None]:
df.describe()

In [None]:
# check NAs
df.isna().sum()

In [None]:
# can drop quite a few columns: tz, dmin, net, ids, code, title, type (see below!), rms, nst, types, sources, status, mmi, 

# KEEP:

# alert - green, yellow, orange, red - the nulls are no info coming from PAGER earthquake impact scale
# cdi - max reported intensity of the event
# depth - how deep the earthquake originates from but loads of error around calculation and placeholder values get put in like 33km as a default depth
# depthError - uncertainty of reported depths of the event in km
# detail - Link to GeoJSON detail feed from a GeoJSON summary feed
# felt - the total number of felt reports submitted to DYFI? system
# magType - already buckets earthquakes by magnitude
# sig - significance of the event calculated based on a number of factors, including: magnitude, maximum MMI, felt reports, and estimated impact.
# tsunami - This flag is set to "1" for large events in oceanic regions and "0" otherwise. The existence or value of this flag does not indicate if a tsunami actually did or will exist. 
# If the flag value is "1", the event will include a link to the NOAA Tsunami website for tsunami information.

# GAP issue:

# gap - The largest azimuthal gap between azimuthally adjacent stations (in degrees). In general, the smaller this number, the more reliable is the calculated horizontal position of the earthquake.
# Earthquake locations in which the azimuthal gap exceeds 180 degrees typically have large location and depth uncertainties.

# anything with gap larger than 180 have large location and depth uncertainty - remove all rows without data or above 180 degrees

# DROP type if != 'earthquake'
# the drop type column

In [None]:
# remove anything that isn't type 'earthquake'
df = df[df['type'] == 'earthquake']

df['id'].count()

In [None]:
[column for column in df.columns]

In [3]:
import pandas as pd

file_path = '../data/processed/2025-08-18 19:49:17.389434_transformed_earthquake_data.json'

df = pd.read_json(file_path)

df

Unnamed: 0,id,mag,place,time,updated,url,felt,cdi,alert,tsunami,sig,magType,longitude,latitude,depth
0,ci40401178,0.54,"6 km SSW of Idyllwild, CA",1755440362000,1755440571430,https://earthquake.usgs.gov/earthquakes/eventp...,,,,False,4,ml,-116.736500,33.686667,15.2600
1,nn00902709,2.40,"18 km SSW of Silver Peak, Nevada",1755440264026,1755440531841,https://earthquake.usgs.gov/earthquakes/eventp...,,,,False,89,ml,-117.713600,37.598200,11.2000
2,tx2025qdpddh,1.00,"11 km S of Stanton, Texas",1755438685809,1755439209131,https://earthquake.usgs.gov/earthquakes/eventp...,,,,False,15,ml,-101.785000,32.028000,5.0000
3,ci40401162,1.27,"19 km WNW of Progreso, B.C., MX",1755436003240,1755436224434,https://earthquake.usgs.gov/earthquakes/eventp...,,,,False,25,ml,-115.753667,32.667000,8.0500
4,ci40401146,1.49,"6 km SE of Bombay Beach, CA",1755435597350,1755435811490,https://earthquake.usgs.gov/earthquakes/eventp...,,,,False,34,ml,-115.692167,33.309667,4.9400
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
11357,uu80112156,0.63,"46 km SE of Mammoth, Wyoming",1752850249340,1752853587730,https://earthquake.usgs.gov/earthquakes/eventp...,,,,False,6,md,-110.354333,44.637667,6.1600
11361,tx2025oapazi,1.40,"34 km WSW of Ackerly, Texas",1752849765086,1752860820469,https://earthquake.usgs.gov/earthquakes/eventp...,,,,False,30,ml,-102.068000,32.433000,8.4717
11362,uw62145427,0.17,"23 km ENE of Ashford, Washington",1752849741770,1754341118860,https://earthquake.usgs.gov/earthquakes/eventp...,,,,False,0,ml,-121.760333,46.850500,-0.2600
11363,us6000qwm1,4.20,Banda Sea,1752849697672,1755000820040,https://earthquake.usgs.gov/earthquakes/eventp...,,,,False,271,mb,129.993800,-6.481100,162.6760


In [10]:
from src.transform.transform import transform

file_path_raw_data = '../data/raw/earthquake-data-last-30-days.geojson'

file_path_transformed_data = '../data/processed/2025-08-18 20:04:41.951567_transformed_earthquake_data.json'

columns_to_drop = ['tz',
                    'detail',
                    'mmi',
                    'status',
                    'net',
                    'code',
                    'ids',
                    'sources',
                    'types',
                    'nst',
                    'dmin',
                    'rms',
                    'gap',
                    'type',
                    'title']

df = transform(file_path_raw_data, file_path_transformed_data, columns_to_drop)

df

Unnamed: 0,id,mag,place,time,updated,url,felt,cdi,alert,tsunami,sig,magType,longitude,latitude,depth
0,ci40401178,0.54,"6 km SSW of Idyllwild, CA",2025-08-17 14:19:22+00:00,2025-08-17 14:22:51.430000+00:00,https://earthquake.usgs.gov/earthquakes/eventp...,,,,False,4,ml,-116.736500,33.686667,15.2600
1,nn00902709,2.40,"18 km SSW of Silver Peak, Nevada",2025-08-17 14:17:44.026000+00:00,2025-08-17 14:22:11.841000+00:00,https://earthquake.usgs.gov/earthquakes/eventp...,,,,False,89,ml,-117.713600,37.598200,11.2000
2,tx2025qdpddh,1.00,"11 km S of Stanton, Texas",2025-08-17 13:51:25.809000+00:00,2025-08-17 14:00:09.131000+00:00,https://earthquake.usgs.gov/earthquakes/eventp...,,,,False,15,ml,-101.785000,32.028000,5.0000
3,ci40401162,1.27,"19 km WNW of Progreso, B.C., MX",2025-08-17 13:06:43.240000+00:00,2025-08-17 13:10:24.434000+00:00,https://earthquake.usgs.gov/earthquakes/eventp...,,,,False,25,ml,-115.753667,32.667000,8.0500
4,ci40401146,1.49,"6 km SE of Bombay Beach, CA",2025-08-17 12:59:57.350000+00:00,2025-08-17 13:03:31.490000+00:00,https://earthquake.usgs.gov/earthquakes/eventp...,,,,False,34,ml,-115.692167,33.309667,4.9400
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
11357,uu80112156,0.63,"46 km SE of Mammoth, Wyoming",2025-07-18 14:50:49.340000+00:00,2025-07-18 15:46:27.730000+00:00,https://earthquake.usgs.gov/earthquakes/eventp...,,,,False,6,md,-110.354333,44.637667,6.1600
11361,tx2025oapazi,1.40,"34 km WSW of Ackerly, Texas",2025-07-18 14:42:45.086000+00:00,2025-07-18 17:47:00.469000+00:00,https://earthquake.usgs.gov/earthquakes/eventp...,,,,False,30,ml,-102.068000,32.433000,8.4717
11362,uw62145427,0.17,"23 km ENE of Ashford, Washington",2025-07-18 14:42:21.770000+00:00,2025-08-04 20:58:38.860000+00:00,https://earthquake.usgs.gov/earthquakes/eventp...,,,,False,0,ml,-121.760333,46.850500,-0.2600
11363,us6000qwm1,4.20,Banda Sea,2025-07-18 14:41:37.672000+00:00,2025-08-12 12:13:40.040000+00:00,https://earthquake.usgs.gov/earthquakes/eventp...,,,,False,271,mb,129.993800,-6.481100,162.6760
