<a href="https://colab.research.google.com/github/praveena1149/project-1/blob/main/earthquake.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import requests
import pandas as pd
import numpy as np
from datetime import datetime


url = "https://earthquake.usgs.gov/fdsnws/event/1/query"

all_records = []
start_year = datetime.now().year - 5   # last 5 years
end_year = datetime.now().year

for year in range(start_year, end_year + 1):
    for month in range(1, 13):
        start_date = f"{year}-{month:02d}-01"
        if month == 12:
            end_date = f"{year+1}-01-01"
        else:
            end_date = f"{year}-{month+1:02d}-01"

        params = {
            "format": "geojson",
            "starttime": start_date,
            "endtime": end_date,
            "minmagnitude": 3
        }

        response = requests.get(url, params=params)
        if response.status_code != 200:
            print(f"⚠️ Failed for {start_date}: {response.text[:200]}")
            continue

        try:
            data = response.json()
        except Exception as e:
            print(f"⚠️ JSON error for {start_date}: {e}")
            continue

        for f in data["features"]:
            p = f["properties"]
            g = f["geometry"]["coordinates"]
            all_records.append({
                "id": f.get("id"),
                "time": pd.to_datetime(p.get("time"), unit="ms"),
                "updated": pd.to_datetime(p.get("updated"), unit="ms"),
                "latitude": g[1] if g else None,
                "longitude": g[0] if g else None,
                "depth_km": g[2] if g else None,
                "mag":p.get("mag"),
                "magType":p.get("magType"),
                'alert':p.get('alert'),
                'felt':p.get('felt'),
                'cdi':p.get('cdi'),
                 'mmi':p.get('mmi'),
                 "code":p.get("code"),


                "place":p.get("place"),
                "status":p.get("status"),
                "tsunami":p.get("tsunami"),
                " sig":p.get("sig"),
                "net":p.get("net"),
                "nst":p.get("nst"),
                "dmin":p.get("dmin"),
                "rms":p.get("rms"),
                "gap":p.get("gap"),
                "types":p.get("types"),
                "ids":p.get("ids"),
                "sources":p.get("sources"),
                "type":p.get("type"),








              # Event type
           })

df = pd.DataFrame(all_records)
df["alert"]=df["alert"].fillna(("green"))
df["felt"]=df["felt"].fillna(df["felt"].mean())
df["cdi"]=df["cdi"].fillna(df["cdi"].mean())
df["mmi"]=df["mmi"].fillna(df["mmi"].mean())
df["nst"]=df["nst"].fillna(df["nst"].mean())
df["dmin"]=df["rms"].fillna(df["rms"].mean())
df["rms"]=df["rms"].fillna(df["rms"].mean())
df["gap"]=df["gap"].fillna(df["gap"].mean())
df



Unnamed: 0,id,time,updated,latitude,longitude,depth_km,mag,magType,alert,felt,...,sig,net,nst,dmin,rms,gap,types,ids,sources,type
0,us6000ddi8,2021-01-31 23:20:49.923,2021-04-16 19:02:44.040,-31.7493,-68.9337,17.270,4.7,mwr,green,12.000000,...,344,us,45.344097,0.82,0.82,42.0,",dyfi,moment-tensor,origin,phase-data,",",us6000ddi8,",",us,",earthquake
1,us6000dev6,2021-01-31 23:08:17.161,2021-04-16 19:03:47.040,-15.4902,-177.2052,426.710,4.1,mb,green,98.690541,...,259,us,45.344097,0.29,0.29,64.0,",origin,phase-data,",",us6000dev6,",",us,",earthquake
2,us6000dev5,2021-01-31 22:54:19.760,2021-04-16 19:03:47.040,19.7529,121.3159,46.730,4.7,mb,green,98.690541,...,340,us,45.344097,0.69,0.69,106.0,",origin,phase-data,",",us6000dev5,",",us,",earthquake
3,us6000ddhs,2021-01-31 22:06:00.832,2021-04-16 19:02:43.040,28.1524,57.2570,10.000,4.9,mb,green,98.690541,...,369,us,45.344097,0.61,0.61,71.0,",origin,phase-data,",",us6000ddhs,",",us,",earthquake
4,us6000dev4,2021-01-31 21:51:14.016,2021-04-16 19:03:46.040,71.3212,-3.7578,10.000,4.0,mb,green,98.690541,...,246,us,45.344097,0.50,0.50,65.0,",origin,phase-data,",",us6000dev4,",",us,",earthquake
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
106045,us6000s5u8,2026-02-01 09:12:02.566,2026-02-01 13:45:33.040,-22.2362,-178.0947,376.934,4.4,mb,green,98.690541,...,298,us,34.000000,1.00,1.00,121.0,",origin,phase-data,",",us6000s5u8,",",us,",earthquake
106046,us6000s5ur,2026-02-01 06:44:49.812,2026-02-04 06:28:18.040,-4.7960,-105.8692,10.000,5.0,mww,green,98.690541,...,385,us,90.000000,0.64,0.64,158.0,",origin,phase-data,",",us6000s5ur,",",us,",earthquake
106047,us6000s5tq,2026-02-01 05:11:19.683,2026-02-04 00:11:13.784,27.4337,52.6436,10.000,5.2,mww,green,4.000000,...,418,us,65.000000,0.71,0.71,47.0,",dyfi,internal-moment-tensor,moment-tensor,ori...",",us6000s5tq,usauto6000s5tq,",",us,usauto,",earthquake
106048,us6000s5t1,2026-02-01 01:22:46.845,2026-02-02 14:00:49.983,47.5295,-111.2143,9.723,3.2,ml,green,197.000000,...,217,us,38.000000,0.90,0.90,95.0,",dyfi,origin,phase-data,",",us6000s5t1,",",us,",earthquake


In [None]:
df["alert"].unique()


array(['green', nan, 'yellow', 'orange', 'red'], dtype=object)

In [None]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 106044 entries, 0 to 106043
Data columns (total 26 columns):
 #   Column     Non-Null Count   Dtype         
---  ------     --------------   -----         
 0   id         106044 non-null  object        
 1   time       106044 non-null  datetime64[ns]
 2   updated    106044 non-null  datetime64[ns]
 3   latitude   106044 non-null  float64       
 4   longitude  106044 non-null  float64       
 5   depth_km   106044 non-null  float64       
 6   mag        106044 non-null  float64       
 7   magType    106044 non-null  object        
 8   alert      4348 non-null    object        
 9   felt       106044 non-null  float64       
 10  cdi        106044 non-null  float64       
 11  mmi        106044 non-null  float64       
 12  code       106044 non-null  object        
 13  place      106044 non-null  object        
 14  status     106044 non-null  object        
 15  tsunami    106044 non-null  int64         
 16   sig       106044 no

In [None]:
df["alert"]=df["alert"].fillna("green")
df["felt"]=df["felt"].fillna(df["felt"].mean())
df["cdi"]=df["cdi"].fillna(df["cdi"].mean())
df["mmi"]=df["mmi"].fillna(df["mmi"].mean())
df["nst"]=df["nst"].fillna(df["nst"].mean())
df["dmin"]=df["rms"].fillna(df["rms"].mean())
df["rms"]=df["rms"].fillna(df["rms"].mean())
df["gap"]=df["gap"].fillna(df["gap"].mean())
df


Unnamed: 0,id,time,updated,latitude,longitude,depth_km,mag,magType,alert,felt,...,sig,net,nst,dmin,rms,gap,types,ids,sources,type
0,us6000ddi8,2021-01-31 23:20:49.923,2021-04-16 19:02:44.040,-31.7493,-68.9337,17.270,4.7,mwr,unknown,12.000000,...,344,us,45.343795,0.82,0.82,42.0,",dyfi,moment-tensor,origin,phase-data,",",us6000ddi8,",",us,",earthquake
1,us6000dev6,2021-01-31 23:08:17.161,2021-04-16 19:03:47.040,-15.4902,-177.2052,426.710,4.1,mb,unknown,98.696097,...,259,us,45.343795,0.29,0.29,64.0,",origin,phase-data,",",us6000dev6,",",us,",earthquake
2,us6000dev5,2021-01-31 22:54:19.760,2021-04-16 19:03:47.040,19.7529,121.3159,46.730,4.7,mb,unknown,98.696097,...,340,us,45.343795,0.69,0.69,106.0,",origin,phase-data,",",us6000dev5,",",us,",earthquake
3,us6000ddhs,2021-01-31 22:06:00.832,2021-04-16 19:02:43.040,28.1524,57.2570,10.000,4.9,mb,unknown,98.696097,...,369,us,45.343795,0.61,0.61,71.0,",origin,phase-data,",",us6000ddhs,",",us,",earthquake
4,us6000dev4,2021-01-31 21:51:14.016,2021-04-16 19:03:46.040,71.3212,-3.7578,10.000,4.0,mb,unknown,98.696097,...,246,us,45.343795,0.50,0.50,65.0,",origin,phase-data,",",us6000dev4,",",us,",earthquake
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
106041,us6000s5u8,2026-02-01 09:12:02.566,2026-02-01 13:45:33.040,-22.2362,-178.0947,376.934,4.4,mb,unknown,98.696097,...,298,us,34.000000,1.00,1.00,121.0,",origin,phase-data,",",us6000s5u8,",",us,",earthquake
106042,us6000s5ur,2026-02-01 06:44:49.812,2026-02-04 06:28:18.040,-4.7960,-105.8692,10.000,5.0,mww,unknown,98.696097,...,385,us,90.000000,0.64,0.64,158.0,",origin,phase-data,",",us6000s5ur,",",us,",earthquake
106043,us6000s5tq,2026-02-01 05:11:19.683,2026-02-04 00:11:13.784,27.4337,52.6436,10.000,5.2,mww,unknown,4.000000,...,418,us,65.000000,0.71,0.71,47.0,",dyfi,internal-moment-tensor,moment-tensor,ori...",",us6000s5tq,usauto6000s5tq,",",us,usauto,",earthquake
106044,us6000s5t1,2026-02-01 01:22:46.845,2026-02-02 14:00:49.983,47.5295,-111.2143,9.723,3.2,ml,unknown,197.000000,...,217,us,38.000000,0.90,0.90,95.0,",dyfi,origin,phase-data,",",us6000s5t1,",",us,",earthquake


In [None]:
df.isnull().sum()

Unnamed: 0,0
id,0
time,0
updated,0
latitude,0
longitude,0
depth_km,0
mag,0
magType,0
alert,0
felt,0
