# Author: Srikar Kalle

**Student ID: C00313529**

**Date: December 13, 2024**

**Project: Earthquake Insights Analysis**

In [1]:
import pandas as pd
import plotly.express as px

In [2]:
df = pd.read_csv('quakes-cleaned.csv')
df['time'] = pd.to_datetime(df['time'], errors='coerce')

# Plot 1: Bar chart for most active regions

This plot shows the top 25 regions with the highest frequency of seismic activity. The color scale represents the intensity of seismic events, with darker colors showing higher frequencies. The regions are ordered such that those with higher frequencies of earthquakes appear on top, thus enabling the identification of earthquake-prone areas.

In [3]:
regionCounts = df['place'].value_counts().head(25)

fig1 = px.bar(regionCounts, 
              x=regionCounts.index, 
              y=regionCounts.values,  
              title="Top 25 Regions with Most Seismic Activity", 
              labels={"x": "Region", "y": "Frequency"},
              color=regionCounts.values,
              color_continuous_scale=["#E6E6FA", "#D3D3D3", "#9370DB", "#8A2BE2", "#4B0082"])

fig1.update_layout(plot_bgcolor="#f0f0f0", paper_bgcolor="#ffffff")


fig1.show()

# Plot 2: Earthquake activity by time of day


This chart shows the distribution of earthquakes according to time of day. The data is divided into four segments: Night, Morning, Afternoon, and Evening. It helps to uncover if there are patterns related to the time of day when earthquakes are most likely to occur.

In [4]:
df['hour'] = df['time'].dt.hour
timeOfDay = pd.cut(df['hour'], bins=[0, 6, 12, 18, 24], labels=['Night', 'Morning', 'Afternoon', 'Evening'])
timeCounts = timeOfDay.value_counts()
fig2 = px.bar(timeCounts, x=timeCounts.index, y=timeCounts.values, title="Earthquake Activity by Time of Day",
              color=timeCounts.index,
              color_discrete_sequence=["#FF9933", "#FFFFFF", "#138808", "#000080"]) 

fig2.update_layout(plot_bgcolor="#f0f0f0", paper_bgcolor="#ffffff")

fig2.show()

# Plot 3: Detect patterns or anomalies in earthquake occurrences over the study timeframe.

This line chart displays the daily frequency of earthquakes, enabling you to follow trends over time. By examining the ups and downs in daily seismicity, we can identify periods of unusual or heightened activity.

In [5]:
df['date'] = pd.to_datetime(df['time']).dt.date
dailyCounts = df.groupby('date').size().reset_index(name='count')

fig = px.line(
    dailyCounts,
    x='date',
    y='count',
    title="Daily Earthquake Activity",
    labels={"date": "Date", "count": "Frequency"},
    line_shape='spline',
    color_discrete_sequence=px.colors.qualitative.Bold
)

fig.update_layout(
    title_x=0.5,
    autosize=True,
    plot_bgcolor="#f7f5e6",
    paper_bgcolor="#ffffff",
)

fig.show()

# Plot 4:  Analyze magnitude statistics for top regions.

The above scatter plot shows the top 35 regions with their average earthquake magnitude. The marker size of each is the magnitude, while the color of it denotes the intensity level of that magnitude. This plot can help the reader identify which regions are having strong earthquakes the most.

In [6]:
regionMag = (
    df.groupby('place')['mag']
    .mean()
    .sort_values(ascending=False)
    .head(35)
    .reset_index()
)

coordinates = df[['place', 'latitude', 'longitude']].drop_duplicates(subset='place')
regionMag = regionMag.merge(coordinates, on='place', how='left')

fig4 = px.scatter_geo(
    regionMag,
    lat='latitude',
    lon='longitude',
    size='mag',
    color='mag',
    hover_name='place',
    title="Top 35 Regions by Average Earthquake Magnitude",
    color_continuous_scale="Plasma",
    labels={"mag": "Average Magnitude"},
    projection="natural earth"
)

fig4.update_layout(
    title_x=0.5,
    width=1000,
    height=600,
    plot_bgcolor="#f1f1f1",
    paper_bgcolor="#ffffff",
)

fig4.show()

# Plot 5:  Earthquake Activity Heatmap (by week)

This heatmap shows the number of earthquakes occurring per week. Each cell represents the number of earthquakes that occurred in a given week, and the color of each cell represents the intensity. It gives a rapid overview of the pattern of seismic activity over the weeks.

In [7]:
df['time'] = pd.to_datetime(df['time'], errors='coerce')
df['date'] = df['time'].dt.date
daily_activity = df['date'].value_counts().sort_index()

daily_activity.index = pd.to_datetime(daily_activity.index)
daily_activity = daily_activity.reset_index()
daily_activity.columns = ['date', 'count']


daily_activity['week'] = daily_activity['date'].dt.isocalendar().week


heatmap_data = daily_activity.pivot_table(index='week', columns=daily_activity['date'].dt.day, values='count', aggfunc='sum')

fig = px.imshow(
    heatmap_data,
    title="Earthquake Activity Heatmap (by Week)",
    labels={'x': 'Day of Month', 'y': 'Week Number'},
    color_continuous_scale='Viridis'
)

fig.update_layout(title_x=0.5, width=1000, height=600)
fig.show()