# USGS earthquake data

In [15]:
import pandas as pd
import requests

from pandas.api.types import is_numeric_dtype, is_datetime64_ns_dtype

In [32]:
def get_query_url(base_url, params):
    query_string = "&".join([f"{key}={value}" for key, value in params.items()])
    full_url = f"{base_url}{query_string}"
    return full_url

# Define the base URL for the USGS earthquake API
base_url = "https://earthquake.usgs.gov/fdsnws/event/1/query?"

# Define parameters for the query
params = {
    "format": "csv",  # Data format (csv)
    "starttime": "1900-01-01",  # Start date for the query
    "endtime": "2023-09-13",  # End date for the query
    "minlatitude": 29.414,  # Minimum latitude for Israel
    "maxlatitude": 33.523,  # Maximum latitude for Israel
    "minlongitude": 33.992,  # Minimum longitude for Israel
    "maxlongitude": 36.255,  # Maximum longitude for Israel
    "minmagnitude": 2.5,  # Minimum earthquake magnitude
    "eventtype": "earthquake",  # Filter for earthquake events
}

# Get url for API request
query_url = get_query_url(base_url, params)

In [33]:
try:
    df = pd.read_csv(query_url, usecols=('time', 'latitude', 'longitude', 'mag', 'depth'))
except requests.exceptions.RequestException as e:
    # Handle request-related exceptions (e.g., network issues)
    print(f"Request error: {e}")
    
except pd.errors.ParserError as e:
    # Handle CSV parsing errors
    print(f"CSV parsing error: {e}")

In [36]:
# Non missing values
assert ~df.isna().any().any()

In [41]:
# No duplicates
assert df.duplicated().sum() == 0

In [69]:
# Check data types
assert is_datetime64_ns_dtype(pd.to_datetime(df['time']))
assert all(is_numeric_dtype(df[col]) for col in df.columns.drop('time'))

In [78]:
# Check erronious values
assert df[(df['mag'] > 10)].size == 0
assert df[(df['depth'] < 0)].size == 0

assert df[(df['latitude'] < params['minlatitude']) | (df['latitude'] > params['maxlatitude'])].size == 0
assert df[(df['longitude'] < params['minlongitude']) | (df['longitude'] > params['maxlongitude'])].size == 0

In [80]:
# Save to csv file
df.to_csv('earthquakes_il.csv', index=True)