# Author: Srikar Kalle

**Student ID: C00313529**

**Date: December 13, 2024**

**Project: Earthquake Insights Analysis**

In [1]:
import pandas as pd
import numpy as np
from sklearn.cluster import KMeans

In [2]:
df = pd.read_csv('quakes-cleaned.csv')

In [3]:
most_frequent_region = df['place'].value_counts().idxmax()
most_frequent_region

'16 km NE of Milford, Utah'

Most active local area

In [4]:
top_places = df['place'].value_counts().head(25)

In [5]:
top_places

place
16 km NE of Milford, Utah         117
22 km NNE of Yerington, Nevada    110
66 km WNW of Beluga, Alaska       107
21 km NNE of Yerington, Nevada    107
65 km WNW of Beluga, Alaska        86
23 km NNE of Yerington, Nevada     83
8 km NNW of The Geysers, CA        75
15 km NE of Milford, Utah          71
20 km NNE of Yerington, Nevada     67
9 km NW of The Geysers, CA         65
7 km NW of The Geysers, CA         61
7 km WNW of Cobb, CA               61
67 km WNW of Beluga, Alaska        55
10 km NW of The Geysers, CA        53
8 km WNW of Cobb, CA               52
7 km NNW of The Geysers, CA        48
24 km NNE of Yerington, Nevada     39
6 km WNW of Cobb, CA               39
64 km WNW of Tyonek, Alaska        38
9 km WNW of Cobb, CA               36
6 km NNW of The Geysers, CA        35
2 km NNW of The Geysers, CA        33
66 km WNW of Tyonek, Alaska        32
67 km WNW of Tyonek, Alaska        31
6 km NW of The Geysers, CA         30
Name: count, dtype: int64

Finding out the Average mag

In [6]:
avg_mag = df['mag'].mean()
avg_mag

np.float64(1.6052180434363226)

Largest Earthquake by magnitude

In [7]:
largestEarthquake = df[df['mag'] == df['mag'].max()]

In [8]:
largestEarthquake

Unnamed: 0,time,latitude,longitude,depth,mag,magType,id,updated,place,type,horizontalError,depthError,magError,magNst,status,locationSource,magSource
3711,2024-12-17 01:47:26.347000+00:00,-17.6859,168.0338,57.094,7.3,mww,us7000nzf3,2024-12-30 15:55:22.671000+00:00,"30 km W of Port-Vila, Vanuatu",earthquake,5.87,4.465,0.036,73.0,reviewed,us,us


In [9]:
df['time'] = pd.to_datetime(df['time'], errors='coerce')

In [10]:
invalid_times = df[df['time'].isna()]

In [11]:
invalid_times

Unnamed: 0,time,latitude,longitude,depth,mag,magType,id,updated,place,type,horizontalError,depthError,magError,magNst,status,locationSource,magSource
462,NaT,39.160500,-119.052300,2.60,1.60,ml,nn00890760,2024-12-28 19:08:23.706000+00:00,"21 km NNE of Yerington, Nevada",earthquake,,0.40,0.200000,5.0,automatic,nn,nn
797,NaT,61.301167,-152.264667,1.78,-0.53,ml,av93428161,2024-12-28 02:51:04.360000+00:00,"65 km WNW of Beluga, Alaska",earthquake,0.30,1.12,0.164671,8.0,reviewed,av,av
929,NaT,60.522333,-152.705333,3.12,-0.68,ml,av93427456,2024-12-27 22:12:53.380000+00:00,"76 km W of Salamatof, Alaska",earthquake,0.78,0.92,0.148367,3.0,reviewed,av,av
1020,NaT,45.790500,-120.387000,17.35,1.47,ml,uw62060622,2024-12-27 23:06:45.060000+00:00,"14 km WNW of Roosevelt, Washington",earthquake,0.43,0.76,0.067781,8.0,reviewed,uw,uw
1136,NaT,44.324500,-115.193167,4.20,1.88,ml,mb90069578,2024-12-26 15:55:46.910000+00:00,"23 km WNW of Stanley, Idaho",earthquake,0.88,1.89,0.211147,14.0,reviewed,mb,mb
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9694,NaT,44.695000,-110.972667,1.85,0.20,md,uu80002218,2024-12-05 19:44:47.520000+00:00,"11 km ENE of West Yellowstone, Montana",earthquake,0.40,0.21,0.508681,6.0,reviewed,uu,uu
9825,NaT,38.512500,-122.490667,8.13,1.13,md,nc75093681,2024-12-13 15:07:18.370000+00:00,"2 km NW of Saint Helena, CA",earthquake,0.18,0.52,0.286000,24.0,reviewed,nc,nc
9853,NaT,38.834835,-122.798500,2.18,1.34,md,nc75093631,2024-12-01 14:54:27.544000+00:00,"7 km WNW of Cobb, CA",earthquake,0.18,0.38,0.180000,28.0,automatic,nc,nc
9938,NaT,32.793000,-115.442833,11.89,1.41,ml,ci40991208,2024-12-02 17:01:33.409000+00:00,"6 km WSW of Holtville, CA",earthquake,0.81,0.85,0.162000,11.0,reviewed,ci,ci


In [12]:
df['date'] = df['time'].dt.date
most_active_day = df['date'].value_counts().idxmax()
print("Most Active Day:", most_active_day)

Most Active Day: 2024-12-05


In [13]:
correlation = df[['depth', 'mag']].corr()
print("Depth-Magnitude Correlation:", correlation.loc['depth', 'mag'])

Depth-Magnitude Correlation: 0.37970868723246265


In [14]:
daily_trends = df.groupby(df['time'].dt.date).size()
print(daily_trends)

time
2024-11-30     77
2024-12-01    350
2024-12-02    352
2024-12-03    372
2024-12-04    371
2024-12-05    616
2024-12-06    440
2024-12-07    392
2024-12-08    389
2024-12-09    576
2024-12-10    557
2024-12-11    301
2024-12-12    279
2024-12-13    293
2024-12-14    296
2024-12-15    376
2024-12-16    332
2024-12-17    395
2024-12-18    290
2024-12-19    291
2024-12-20    335
2024-12-21    226
2024-12-22    212
2024-12-23    279
2024-12-24    225
2024-12-25    199
2024-12-26    242
2024-12-27    284
2024-12-28    325
2024-12-29    230
2024-12-30    179
dtype: int64


In [15]:
df['energy'] = 10 ** (1.5 * df['mag'] + 4.8)
total_energy = df['energy'].sum()
print("Total Energy Released:", total_energy)

Total Energy Released: 1.0827174108409248e+16


Regions with Deepest Earthquakes

In [16]:
deepest_regions = df.nlargest(5, 'depth')[['place', 'depth']]
print("Regions with Deepest Earthquakes:", deepest_regions)

Regions with Deepest Earthquakes:                          place    depth
4910               Fiji region  625.963
5681  289 km E of Levuka, Fiji  618.177
8666  228 km E of Levuka, Fiji  614.545
2403  268 km E of Levuka, Fiji  612.857
8577               Fiji region  608.784


Average Magnitude by Hemisphere:

In [17]:
df['hemisphere'] = df['latitude'].apply(lambda x: 'Northern' if x >= 0 else 'Southern')
magnitude_comparison = df.groupby('hemisphere')['mag'].mean()
print("Average Magnitude by Hemisphere:", magnitude_comparison)

Average Magnitude by Hemisphere: hemisphere
Northern    1.497524
Southern    4.686589
Name: mag, dtype: float64


correlation between magnitude and depth

In [18]:
magnitudeDepthCorrelation = df[['mag', 'depth']].corr().iloc[0, 1]
print("Correlation Between Magnitude and Depth:")
print(magnitudeDepthCorrelation)

Correlation Between Magnitude and Depth:
0.37970868723246265


Finding Most active time of day

In [19]:
def get_time_of_day(hour):
    if 6 <= hour < 12:
        return 'Morning'
    elif 12 <= hour < 18:
        return 'Afternoon'
    elif 18 <= hour < 24:
        return 'Evening'
    else:
        return 'Night'

In [20]:
df['time_of_day'] = df['time'].dt.hour.apply(get_time_of_day)
most_active_time = df['time_of_day'].value_counts().idxmax()
print(f"Most active time of day: {most_active_time}")

Most active time of day: Night


Finding Most active day of the week

In [21]:
most_active_weekday = df['time'].dt.day_name().value_counts().idxmax()
print(f"Most active day of the week: {most_active_weekday}")

Most active day of the week: Monday


Finding the Frequency of earthquakes by magnitude ranges

In [22]:
mag_bins = [0, 2, 4, 6, 8, 10]
df['mag_range'] = pd.cut(df['mag'], bins=mag_bins)
freq_by_mag_range = df['mag_range'].value_counts()
print("Frequency of earthquakes by magnitude ranges:")
print(freq_by_mag_range)

Frequency of earthquakes by magnitude ranges:
mag_range
(0, 2]     6623
(2, 4]     2033
(4, 6]      701
(6, 8]        8
(8, 10]       0
Name: count, dtype: int64


Finding out the Longest time gap between earthquakes

In [23]:
time_diffs = df['time'].sort_values().diff()
max_gap = time_diffs.max()
print(f"Longest time gap between earthquakes: {max_gap}")

Longest time gap between earthquakes: 0 days 00:53:06.128000


In [24]:
smallest_eq_regions = df.nsmallest(5, 'mag')[['place', 'mag']]
print("Regions with the smallest earthquakes:")
print(smallest_eq_regions)

Regions with the smallest earthquakes:
                                       place   mag
10082            8 km NW of Prague, Oklahoma -1.94
9544          65 km ENE of Pedro Bay, Alaska -1.49
4392            79 km W of Salamatof, Alaska -1.29
8741   9 km NNE of West Yellowstone, Montana -1.21
3214          66 km ENE of Pedro Bay, Alaska -1.17


Earthquake activity over 30 days

In [25]:
daily_activity = df['date'].value_counts().sort_index()
print("Earthquake activity over 30 days:")
print(daily_activity)

Earthquake activity over 30 days:
date
2024-11-30     77
2024-12-01    350
2024-12-02    352
2024-12-03    372
2024-12-04    371
2024-12-05    616
2024-12-06    440
2024-12-07    392
2024-12-08    389
2024-12-09    576
2024-12-10    557
2024-12-11    301
2024-12-12    279
2024-12-13    293
2024-12-14    296
2024-12-15    376
2024-12-16    332
2024-12-17    395
2024-12-18    290
2024-12-19    291
2024-12-20    335
2024-12-21    226
2024-12-22    212
2024-12-23    279
2024-12-24    225
2024-12-25    199
2024-12-26    242
2024-12-27    284
2024-12-28    325
2024-12-29    230
2024-12-30    179
Name: count, dtype: int64


Cluster Analysis - Use K-Means clustering to identify natural groupings of earthquake-prone areas.

In [26]:
import plotly.express as px
coords = df[['latitude', 'longitude']]

kmeans = KMeans(n_clusters=5, random_state=42)
df['cluster'] = kmeans.fit_predict(coords)

In [27]:
kmeans

In [28]:
df['cluster']

0        0
1        0
2        2
3        0
4        0
        ..
10152    0
10153    0
10154    0
10155    2
10156    2
Name: cluster, Length: 10157, dtype: int32

In [29]:
df

Unnamed: 0,time,latitude,longitude,depth,mag,magType,id,updated,place,type,...,magNst,status,locationSource,magSource,date,energy,hemisphere,time_of_day,mag_range,cluster
0,2024-12-30 18:44:42.850000+00:00,33.218334,-116.371834,2.64,1.23,ml,ci40830255,2024-12-30 18:46:45.271000+00:00,"4 km S of Borrego Springs, CA",earthquake,...,11.0,automatic,ci,ci,2024-12-30,4.415704e+06,Northern,Evening,"(0.0, 2.0]",0
1,2024-12-30 18:44:26.550000+00:00,33.458500,-116.476667,11.66,0.82,ml,ci40830247,2024-12-30 18:49:53.680000+00:00,"21 km ESE of Anza, CA",earthquake,...,24.0,automatic,ci,ci,2024-12-30,1.071519e+06,Northern,Evening,"(0.0, 2.0]",0
2,2024-12-30 18:41:02.595000+00:00,64.704400,-149.676400,0.00,1.70,ml,ak024gry49oz,2024-12-30 18:43:21.457000+00:00,"28 km WNW of Four Mile Road, Alaska",earthquake,...,,automatic,ak,ak,2024-12-30,2.238721e+07,Northern,Evening,"(0.0, 2.0]",2
3,2024-12-30 18:31:44.143000+00:00,39.403400,-119.933700,2.00,1.00,ml,nn00890898,2024-12-30 18:33:38.776000+00:00,"7 km E of Floriston, California",earthquake,...,3.0,automatic,nn,nn,2024-12-30,1.995262e+06,Northern,Evening,"(0.0, 2.0]",0
4,2024-12-30 18:17:32.520000+00:00,35.992001,-120.549667,3.89,1.66,md,nc75109821,2024-12-30 18:37:18.008000+00:00,"15 km NW of Parkfield, CA",earthquake,...,10.0,automatic,nc,nc,2024-12-30,1.949845e+07,Northern,Evening,"(0.0, 2.0]",0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
10152,2024-11-30 19:09:25.420000+00:00,38.820999,-122.853500,2.65,1.07,md,nc75093221,2024-11-30 20:32:17.857000+00:00,"10 km NW of The Geysers, CA",earthquake,...,21.0,automatic,nc,nc,2024-11-30,2.540973e+06,Northern,Evening,"(0.0, 2.0]",0
10153,2024-11-30 19:08:14.390000+00:00,38.786667,-122.767333,3.01,0.40,md,nc75093216,2024-12-12 00:22:16.359000+00:00,"1 km NNW of The Geysers, CA",earthquake,...,8.0,reviewed,nc,nc,2024-11-30,2.511886e+05,Northern,Evening,"(0.0, 2.0]",0
10154,2024-11-30 18:57:31.100000+00:00,44.061167,-110.681833,7.29,0.34,md,uu80002193,2024-12-05 21:18:33.220000+00:00,"44 km NE of Alta, Wyoming",earthquake,...,3.0,reviewed,uu,uu,2024-11-30,2.041738e+05,Northern,Evening,"(0.0, 2.0]",0
10155,2024-11-30 18:54:39.763000+00:00,61.309000,-152.258300,2.80,0.20,ml,ak024fec2q20,2024-12-03 23:38:52.140000+00:00,"65 km WNW of Beluga, Alaska",earthquake,...,,automatic,ak,ak,2024-11-30,1.258925e+05,Northern,Evening,"(0.0, 2.0]",2


In [30]:
filtered_df = df[df['cluster'].isin([0, 2])]
filtered_df

Unnamed: 0,time,latitude,longitude,depth,mag,magType,id,updated,place,type,...,magNst,status,locationSource,magSource,date,energy,hemisphere,time_of_day,mag_range,cluster
0,2024-12-30 18:44:42.850000+00:00,33.218334,-116.371834,2.64,1.23,ml,ci40830255,2024-12-30 18:46:45.271000+00:00,"4 km S of Borrego Springs, CA",earthquake,...,11.0,automatic,ci,ci,2024-12-30,4.415704e+06,Northern,Evening,"(0.0, 2.0]",0
1,2024-12-30 18:44:26.550000+00:00,33.458500,-116.476667,11.66,0.82,ml,ci40830247,2024-12-30 18:49:53.680000+00:00,"21 km ESE of Anza, CA",earthquake,...,24.0,automatic,ci,ci,2024-12-30,1.071519e+06,Northern,Evening,"(0.0, 2.0]",0
2,2024-12-30 18:41:02.595000+00:00,64.704400,-149.676400,0.00,1.70,ml,ak024gry49oz,2024-12-30 18:43:21.457000+00:00,"28 km WNW of Four Mile Road, Alaska",earthquake,...,,automatic,ak,ak,2024-12-30,2.238721e+07,Northern,Evening,"(0.0, 2.0]",2
3,2024-12-30 18:31:44.143000+00:00,39.403400,-119.933700,2.00,1.00,ml,nn00890898,2024-12-30 18:33:38.776000+00:00,"7 km E of Floriston, California",earthquake,...,3.0,automatic,nn,nn,2024-12-30,1.995262e+06,Northern,Evening,"(0.0, 2.0]",0
4,2024-12-30 18:17:32.520000+00:00,35.992001,-120.549667,3.89,1.66,md,nc75109821,2024-12-30 18:37:18.008000+00:00,"15 km NW of Parkfield, CA",earthquake,...,10.0,automatic,nc,nc,2024-12-30,1.949845e+07,Northern,Evening,"(0.0, 2.0]",0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
10152,2024-11-30 19:09:25.420000+00:00,38.820999,-122.853500,2.65,1.07,md,nc75093221,2024-11-30 20:32:17.857000+00:00,"10 km NW of The Geysers, CA",earthquake,...,21.0,automatic,nc,nc,2024-11-30,2.540973e+06,Northern,Evening,"(0.0, 2.0]",0
10153,2024-11-30 19:08:14.390000+00:00,38.786667,-122.767333,3.01,0.40,md,nc75093216,2024-12-12 00:22:16.359000+00:00,"1 km NNW of The Geysers, CA",earthquake,...,8.0,reviewed,nc,nc,2024-11-30,2.511886e+05,Northern,Evening,"(0.0, 2.0]",0
10154,2024-11-30 18:57:31.100000+00:00,44.061167,-110.681833,7.29,0.34,md,uu80002193,2024-12-05 21:18:33.220000+00:00,"44 km NE of Alta, Wyoming",earthquake,...,3.0,reviewed,uu,uu,2024-11-30,2.041738e+05,Northern,Evening,"(0.0, 2.0]",0
10155,2024-11-30 18:54:39.763000+00:00,61.309000,-152.258300,2.80,0.20,ml,ak024fec2q20,2024-12-03 23:38:52.140000+00:00,"65 km WNW of Beluga, Alaska",earthquake,...,,automatic,ak,ak,2024-11-30,1.258925e+05,Northern,Evening,"(0.0, 2.0]",2


Filtering based on magnitude


In [31]:
filtered_mag_df = df[df['mag'] > 4.0]
filtered_mag_df

Unnamed: 0,time,latitude,longitude,depth,mag,magType,id,updated,place,type,...,magNst,status,locationSource,magSource,date,energy,hemisphere,time_of_day,mag_range,cluster
30,2024-12-30 15:27:54.584000+00:00,12.2240,141.5148,68.222,4.8,mb,us6000pghx,2024-12-30 16:40:28.040000+00:00,"292 km NNE of Fais, Micronesia",earthquake,...,72.0,reviewed,us,us,2024-12-30,1.000000e+12,Northern,Afternoon,"(4, 6]",1
31,2024-12-30 15:25:58.132000+00:00,-24.1824,-69.5355,87.842,4.5,mb,us6000pghw,2024-12-30 16:19:35.040000+00:00,"105 km ESE of Antofagasta, Chile",earthquake,...,14.0,reviewed,us,us,2024-12-30,3.548134e+11,Southern,Afternoon,"(4, 6]",3
45,2024-12-30 14:26:52.836000+00:00,8.6333,39.9575,10.000,4.6,mb,us6000pghr,2024-12-30 15:12:03.040000+00:00,"29 km S of Metahāra, Ethiopia",earthquake,...,44.0,reviewed,us,us,2024-12-30,5.011872e+11,Northern,Afternoon,"(4, 6]",1
48,2024-12-30 14:12:34.988000+00:00,-17.8527,-178.1709,572.011,4.6,mb,us6000pghq,2024-12-30 15:01:06.040000+00:00,"267 km E of Levuka, Fiji",earthquake,...,47.0,reviewed,us,us,2024-12-30,5.011872e+11,Southern,Afternoon,"(4, 6]",4
72,2024-12-30 10:39:05.339000+00:00,-26.8647,179.2949,500.846,4.8,mb,us6000pgh3,2024-12-30 11:32:11.040000+00:00,south of the Fiji Islands,earthquake,...,111.0,reviewed,us,us,2024-12-30,1.000000e+12,Southern,Morning,"(4, 6]",1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
10078,2024-12-01 00:01:58.270000+00:00,-28.7577,-176.4890,10.000,4.9,mb,us7000nv77,2024-12-20 04:25:03.040000+00:00,Kermadec Islands region,earthquake,...,43.0,reviewed,us,us,2024-12-01,1.412538e+12,Southern,Night,"(4, 6]",4
10080,2024-11-30 23:52:45.796000+00:00,-7.6570,127.7555,168.706,4.2,mb,us7000nv73,2024-12-22 23:30:41.040000+00:00,"126 km NE of Lospalos, Timor Leste",earthquake,...,16.0,reviewed,us,us,2024-11-30,1.258925e+11,Southern,Evening,"(4, 6]",1
10111,2024-11-30 22:12:26.968000+00:00,-22.5817,-12.9569,10.000,4.8,mb,us7000nv6w,2024-12-18 01:37:30.040000+00:00,southern Mid-Atlantic Ridge,earthquake,...,39.0,reviewed,us,us,2024-11-30,1.000000e+12,Southern,Evening,"(4, 6]",3
10125,2024-11-30 21:36:05.908000+00:00,13.0543,143.9264,132.912,4.7,mb,us7000nv6t,2024-12-18 01:35:09.243000+00:00,"83 km WSW of Merizo Village, Guam",earthquake,...,129.0,reviewed,us,us,2024-11-30,7.079458e+11,Northern,Evening,"(4, 6]",1


In [32]:
fig = px.scatter_geo(
    filtered_mag_df, 
    lat='latitude', 
    lon='longitude', 
    color='cluster',
    title="Cluster Analysis of Earthquake Locations based on magnitude",
    projection="natural earth"
)
fig.show()

Trend Analysis Over Time - Detect patterns or anomalies in earthquake occurrences over the study timeframe.

In [33]:
df['date'] = pd.to_datetime(df['time']).dt.date
daily_counts = df.groupby('date').size().reset_index(name='count')
daily_counts

Unnamed: 0,date,count
0,2024-11-30,77
1,2024-12-01,350
2,2024-12-02,352
3,2024-12-03,372
4,2024-12-04,371
5,2024-12-05,616
6,2024-12-06,440
7,2024-12-07,392
8,2024-12-08,389
9,2024-12-09,576


Magnitude Distribution by Region

In [34]:
region_mag = df.groupby('place')['mag'].mean().sort_values(ascending=False).head(35).reset_index()
region_mag

Unnamed: 0,place,mag
0,"2024 Offshore Cape Mendocino, California Earth...",7.0
1,"30 km W of Port-Vila, Vanuatu",6.7
2,"56 km ESE of Molina, Chile",6.4
3,"136 km W of Neiafu, Tonga",5.9
4,"37 km S of Guisa, Cuba",5.9
5,"26 km SE of Tinogasta, Argentina",5.9
6,"2024 Parker Butte, Nevada Earthquake",5.7
7,"115 km SW of Adak, Alaska",5.7
8,"255 km E of Levuka, Fiji",5.6
9,"9 km S of Conchagua, El Salvador",5.6


Event Duration Analysis

Most active time of day

In [35]:
df['hour'] = df['time'].dt.hour
timeOfDay = pd.cut(df['hour'], bins=[0, 6, 12, 18, 24], labels=['Night', 'Morning', 'Afternoon', 'Evening'])

In [36]:
mostActiveTime = timeOfDay.value_counts()

In [37]:
mostActiveTime

hour
Morning      2621
Night        2537
Afternoon    2414
Evening      2056
Name: count, dtype: int64