In [6]:
import os
import requests
import pandas as pd
from io import StringIO
from dotenv import load_dotenv
from psycopg2.extras import execute_values
from utils import get_db_connection, log

# Load environment
load_dotenv()
NASA_URL = os.getenv("NASA_FIRMS_URL")
API_KEY = os.getenv("NASA_FIRMS_API_KEY")

# Expected columns matching the DB schema
EXPECTED_COLUMNS = [
    "latitude", "longitude", "brightness", "scan", "track",
    "acq_date", "acq_time", "satellite", "confidence", "version",
    "bright_t31", "frp", "daynight"
]

def fetch_firms_df(country="USA", days=3, product="MODIS_NRT"):
    url = f"{NASA_URL}/{API_KEY}/{product}/{country}/{days}"
    log(f"Fetching FIRMS data: {url}")
    response = requests.get(url)
    if response.status_code != 200:
        raise Exception(f"HTTP error {response.status_code}: {response.text}")
    return pd.read_csv(StringIO(response.text))

def load_df_to_postgres(df):
    if df.empty:
        log("⚠️ DataFrame is empty. No data to insert.")
        return

    # Standardize column names
    df.columns = [c.strip().lower().replace(" ", "_") for c in df.columns]

    # Retain only expected columns
    df = df[[col for col in EXPECTED_COLUMNS if col in df.columns]]

    with get_db_connection() as conn:
        with conn.cursor() as cur:
            log("Creating table satellite_signals if not exists...")
            cur.execute("""
                CREATE TABLE IF NOT EXISTS satellite_signals (
                    latitude FLOAT,
                    longitude FLOAT,
                    brightness FLOAT,
                    scan FLOAT,
                    track FLOAT,
                    acq_date DATE,
                    acq_time TEXT,
                    satellite TEXT,
                    confidence TEXT,
                    version TEXT,
                    bright_t31 FLOAT,
                    frp FLOAT,
                    daynight TEXT
                );
            """)
            conn.commit()

            # Prepare and insert rows
            values = [tuple(row) for row in df.to_numpy()]
            insert_sql = f"""
                INSERT INTO satellite_signals ({', '.join(EXPECTED_COLUMNS)})
                VALUES %s
            """
            log(f"Inserting {len(values)} rows using execute_values...")
            execute_values(cur, insert_sql, values)
        conn.commit()
    log("✔ Fast insert completed successfully.")

if __name__ == "__main__":
    df = fetch_firms_df()
    load_df_to_postgres(df)


[LOG] Fetching FIRMS data: https://firms.modaps.eosdis.nasa.gov/api/country/csv/0525bda225833e502171d32a59c8464d/MODIS_NRT/USA/3
[LOG] Creating table satellite_signals if not exists...
[LOG] Inserting 771 rows using execute_values...
[LOG] ✔ Fast insert completed successfully.


In [7]:
df

Unnamed: 0,country_id,latitude,longitude,brightness,scan,track,acq_date,acq_time,satellite,instrument,confidence,version,bright_t31,frp,daynight
0,USA,19.40279,-155.29152,339.55,2.11,1.41,2025-06-23,28,Aqua,MODIS,89,6.1NRT,313.33,111.76,D
1,USA,19.40621,-155.27267,336.65,2.10,1.41,2025-06-23,28,Aqua,MODIS,81,6.1NRT,309.66,101.23,D
2,USA,62.76897,-145.07637,322.81,3.90,1.84,2025-06-23,39,Aqua,MODIS,71,6.1NRT,292.11,133.32,D
3,USA,62.77476,-145.07990,321.21,3.90,1.84,2025-06-23,39,Aqua,MODIS,70,6.1NRT,291.69,120.92,D
4,USA,64.14719,-147.20775,313.81,3.30,1.72,2025-06-23,39,Aqua,MODIS,26,6.1NRT,291.86,46.32,D
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
766,USA,38.95609,-81.92807,305.16,1.22,1.10,2025-06-25,854,Aqua,MODIS,63,6.1NRT,294.32,7.35,N
767,USA,42.68795,-111.59177,304.96,1.10,1.10,2025-06-25,1031,Aqua,MODIS,95,6.1URT,277.00,13.02,N
768,USA,37.36302,-113.44041,315.61,1.10,1.10,2025-06-25,1032,Aqua,MODIS,75,6.1URT,284.31,19.67,N
769,USA,37.36477,-113.45398,320.03,1.10,1.10,2025-06-25,1032,Aqua,MODIS,75,6.1URT,285.51,24.19,N
