In [10]:
import os
import requests
import pandas as pd
from io import StringIO
from dotenv import load_dotenv
from psycopg2.extras import execute_values
from utils import get_db_connection, log

# Load environment
load_dotenv()
NASA_URL = os.getenv("NASA_FIRMS_URL")
API_KEY = os.getenv("NASA_FIRMS_API_KEY")

# Expected columns matching the DB schema
EXPECTED_COLUMNS = [
    "latitude", "longitude", "brightness", "scan", "track",
    "acq_date", "acq_time", "satellite", "confidence", "version",
    "bright_t31", "frp", "daynight"
]

def fetch_firms_df(country="USA", days=3, product="MODIS_NRT"):
    url = f"{NASA_URL}/{API_KEY}/{product}/{country}/{days}"
    log(f"Fetching FIRMS data: {url}")
    response = requests.get(url)
    if response.status_code != 200:
        raise Exception(f"HTTP error {response.status_code}: {response.text}")
    return pd.read_csv(StringIO(response.text))

def load_df_to_postgres(df):
    if df.empty:
        log("⚠️ DataFrame is empty. No data to insert.")
        return

    # Standardize column names
    df.columns = [c.strip().lower().replace(" ", "_") for c in df.columns]

    # Retain only expected columns
    df = df[[col for col in EXPECTED_COLUMNS if col in df.columns]]

    with get_db_connection() as conn:
        with conn.cursor() as cur:
            log("Creating table satellite_signals if not exists...")
            cur.execute("""
                CREATE TABLE IF NOT EXISTS satellite_signals (
                    latitude FLOAT,
                    longitude FLOAT,
                    brightness FLOAT,
                    scan FLOAT,
                    track FLOAT,
                    acq_date DATE,
                    acq_time TEXT,
                    satellite TEXT,
                    confidence TEXT,
                    version TEXT,
                    bright_t31 FLOAT,
                    frp FLOAT,
                    daynight TEXT
                );
            """)
            conn.commit()

            # Prepare and insert rows
            values = [tuple(row) for row in df.to_numpy()]
            insert_sql = f"""
                INSERT INTO satellite_signals ({', '.join(EXPECTED_COLUMNS)})
                VALUES %s
            """
            log(f"Inserting {len(values)} rows using execute_values...")
            execute_values(cur, insert_sql, values)
        conn.commit()
    log("✔ Fast insert completed successfully.")

if __name__ == "__main__":
    df = fetch_firms_df()
    load_df_to_postgres(df)


[LOG] Fetching FIRMS data: https://firms.modaps.eosdis.nasa.gov/api/country/csv/0525bda225833e502171d32a59c8464d/MODIS_NRT/USA/3
[LOG] Creating table satellite_signals if not exists...
[LOG] Inserting 2868 rows using execute_values...
[LOG] ✔ Fast insert completed successfully.


In [11]:
df

Unnamed: 0,country_id,latitude,longitude,brightness,scan,track,acq_date,acq_time,satellite,instrument,confidence,version,bright_t31,frp,daynight
0,USA,62.93378,-156.67082,311.43,1.03,1.01,2025-07-05,13,Aqua,MODIS,34,6.1NRT,298.70,6.84,D
1,USA,63.51502,-152.14247,322.20,1.30,1.13,2025-07-05,13,Aqua,MODIS,78,6.1NRT,298.29,20.98,D
2,USA,63.77963,-144.31073,318.79,2.41,1.50,2025-07-05,13,Aqua,MODIS,74,6.1NRT,293.80,50.73,D
3,USA,63.78171,-144.26161,316.47,2.42,1.50,2025-07-05,13,Aqua,MODIS,64,6.1NRT,291.47,42.30,D
4,USA,64.17995,-147.93640,315.30,1.81,1.32,2025-07-05,13,Aqua,MODIS,39,6.1NRT,295.80,21.71,D
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2863,USA,40.89833,-77.70499,300.91,1.01,1.00,2025-07-07,826,Aqua,MODIS,35,6.1NRT,290.84,4.23,N
2864,USA,34.64052,-87.08317,303.38,1.99,1.38,2025-07-07,828,Aqua,MODIS,55,6.1NRT,291.26,12.11,N
2865,USA,34.64395,-87.07526,303.77,1.99,1.38,2025-07-07,828,Aqua,MODIS,57,6.1NRT,291.52,12.87,N
2866,USA,39.48615,-84.38073,302.36,1.57,1.23,2025-07-07,828,Aqua,MODIS,43,6.1NRT,291.39,6.26,N
