In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

plt.style.use("seaborn-v0_8")


In [3]:
df = pd.read_csv("../data/processed/seismic_master_validated.csv")

df["timestamp"] = pd.to_datetime(
    df["timestamp"],
    format="mixed",
    utc=True
)

df["year_month"] = df["timestamp"].dt.to_period("M")

df.head()


  df["year_month"] = df["timestamp"].dt.to_period("M")


Unnamed: 0,timestamp,latitude,longitude,depth,magnitude,place,source,depth_log,lat_offset,lon_offset,...,month_cos,hour_sin,hour_cos,rolling_count_7d,rolling_count_30d,rolling_mean_mag_30d,days_since_last_major,is_major,month,year_month
0,1990-01-09 02:29:26.690000+00:00,28.225,88.163,79.1,5.5,"86 km NNW of Mangan, India",usgs,4.383276,-0.487576,3.192907,...,0.866025,0.5,0.866025,0,0,0.0,3650.0,1,1,1990-01
1,1990-01-10 23:01:21.960000+00:00,26.559,86.663,68.5,4.7,"8 km WNW of R?jbir?j, Nepal",usgs,4.241327,-2.153576,1.692907,...,0.866025,-0.258819,0.965926,1,1,5.5,1.855501,0,1,1990-01
2,1990-01-30 15:06:26.080000+00:00,28.599,85.714,52.4,4.5,"74 km NNW of Kod?ri??, Nepal",usgs,3.977811,-0.113576,0.743907,...,0.866025,-0.707107,-0.707107,0,2,5.1,21.525687,0,1,1990-01
3,1990-02-09 15:51:23.020000+00:00,29.925,80.73,33.0,4.6,"20 km ENE of D?rchul?, Nepal",usgs,3.526361,1.212424,-4.240093,...,0.5,-0.707107,-0.707107,0,2,4.6,31.556902,0,2,1990-02
4,1990-02-21 07:21:17.300000+00:00,28.082,82.43,33.0,4.8,"14 km ESE of Tuls?pur, Nepal",usgs,3.526361,-0.630576,-2.540093,...,0.5,0.965926,-0.258819,0,2,4.55,43.202669,0,2,1990-02


In [4]:
kathmandu_valley = ["Kathmandu", "Lalitpur", "Bhaktapur"]

western_nepal_keywords = [
    "Karnali", "Sudurpashchim", "Darchula", "Bajhang",
    "Bajura", "Achham", "Dailekh", "Jumla", "Kalikot"
]

def assign_region(place):
    place = str(place).lower()
    if any(k.lower() in place for k in kathmandu_valley):
        return "Kathmandu Valley"
    elif any(w.lower() in place for w in western_nepal_keywords):
        return "Western Nepal"
    else:
        return "Other Regions"

df["region"] = df["place"].apply(assign_region)

df["region"].value_counts()


region
Other Regions       2251
Western Nepal        440
Kathmandu Valley      42
Name: count, dtype: int64

In [5]:
regional_monthly = (
    df[df["region"].isin(["Kathmandu Valley", "Western Nepal"])]
    .groupby(["region", "year_month"])
    .agg(
        earthquake_count=("magnitude", "count"),
        avg_magnitude=("magnitude", "mean"),
        max_magnitude=("magnitude", "max")
    )
    .reset_index()
)

regional_monthly.head()


Unnamed: 0,region,year_month,earthquake_count,avg_magnitude,max_magnitude
0,Kathmandu Valley,1993-07,1,3.8,3.8
1,Kathmandu Valley,1995-11,1,3.8,3.8
2,Kathmandu Valley,1997-01,2,4.8,5.2
3,Kathmandu Valley,2001-07,1,5.0,5.0
4,Kathmandu Valley,2005-10,1,4.7,4.7


In [6]:
regional_monthly["year_month"] = regional_monthly["year_month"].dt.to_timestamp()


In [7]:
full_month_index = pd.date_range(
    start=regional_monthly["year_month"].min(),
    end=regional_monthly["year_month"].max(),
    freq="MS"
)


In [8]:
fixed_regional = []

for region in ["Kathmandu Valley", "Western Nepal"]:
    subset = regional_monthly[regional_monthly["region"] == region]
    subset = subset.set_index("year_month").reindex(full_month_index)

    subset["region"] = region
    subset["earthquake_count"] = subset["earthquake_count"].fillna(0)
    subset["avg_magnitude"] = subset["avg_magnitude"].fillna(method="ffill")

    fixed_regional.append(subset.reset_index())

regional_fixed = (
    pd.concat(fixed_regional)
      .rename(columns={"index": "year_month"})
)


  subset["avg_magnitude"] = subset["avg_magnitude"].fillna(method="ffill")
  subset["avg_magnitude"] = subset["avg_magnitude"].fillna(method="ffill")


In [9]:
regional_fixed.head()
regional_fixed["region"].value_counts()


region
Kathmandu Valley    409
Western Nepal       409
Name: count, dtype: int64