In [0]:
import requests

# Define the IRIS API endpoint
iris_api_url = "https://service.iris.edu/fdsnws/event/1/query"

# Define search parameters (Only 2024)
params = {
    "format": "text",  # Use "text" format (IRIS requirement)
    "starttime": "2018-01-01",  # Start of 2018
    "endtime": "2024-12-31",  # End of 2024
    "minmagnitude": 1.0,  # Include all earthquakes from magnitude 1.0+
    "nodata": 404  # Return 404 if no data is found
}

# Send the API request
response = requests.get(iris_api_url, params=params)

# Check if the request was successful
if response.status_code == 200:
    print("Successfully fetched seismic data for 2024.")
    print("First 500 characters of response:\n", response.text[:500])  # Show preview of data
elif response.status_code == 404:
    print("No earthquake events found for 2024.")
else:
    print(f"API request failed with status code {response.status_code}")
    print(response.text)  # Print full error response for debugging





In [0]:
from pyspark.sql import SparkSession
from pyspark.sql.types import StructType, StructField, StringType, DoubleType
from io import StringIO
import pandas as pd

# Initialize Spark Session (if not already created)
spark = SparkSession.builder.appName("SeismicDataProcessing").getOrCreate()

# Extract text data from response
raw_text_data = response.text

# Convert text to Pandas DataFrame (for initial parsing)
df_pandas = pd.read_csv(StringIO(raw_text_data), sep="|")

# Convert Pandas DataFrame to Spark DataFrame
df_spark = spark.createDataFrame(df_pandas)

# Show the first few rows
print("✅ Data successfully converted to Spark DataFrame!")
df_spark.show(5)



In [0]:
from pyspark.sql.functions import col

# Rename columns to remove invalid characters
df_spark_clean = df_spark.select(
    [col(c).alias(c.strip().replace("#", "").replace(" ", "_")) for c in df_spark.columns]
)

# Show the cleaned DataFrame (to verify column names are correct)
df_spark_clean.printSchema()
df_spark_clean.show(5)


In [0]:
# Define the table name

table_name = "iris_seismic_events_bronze"


In [0]:
# Drop the existing table if it exists
# This is because when we will be orchestrating it it is good to delete the old and load the new.
# Truncate the table to remove all rows but keep structure & permissions
spark.sql(f"TRUNCATE TABLE tabular.dataexpert.{table_name}")

print(f"Truncated existing table (if any): {table_name}")



In [0]:
# Save the cleaned DataFrame as a Delta Table inside `tabular.dataexpert`
df_spark_clean.write.format("delta").mode("overwrite").saveAsTable(f"tabular.dataexpert.{table_name}")

print(f"Data successfully stored in Delta Table: tabular.dataexpert.{table_name}")


In [0]:
# Read the stored Delta Table from `tabular.dataexpert`
df_check = spark.read.table(f"tabular.dataexpert.{table_name}")

# Show first 5 rows
if df_check.count() > 0:
    df_check.show(5)
else:
    print("No earthquake events found in the table.")
