In [8]:
import os
import requests
import pandas as pd
from io import StringIO
from dotenv import load_dotenv
from psycopg2.extras import execute_values
from utils import get_db_connection, log

# Load environment
load_dotenv()
NASA_URL = os.getenv("NASA_FIRMS_URL")
API_KEY = os.getenv("NASA_FIRMS_API_KEY")

# Expected columns matching the DB schema
EXPECTED_COLUMNS = [
    "latitude", "longitude", "brightness", "scan", "track",
    "acq_date", "acq_time", "satellite", "confidence", "version",
    "bright_t31", "frp", "daynight"
]

def fetch_firms_df(country="USA", days=3, product="MODIS_NRT"):
    url = f"{NASA_URL}/{API_KEY}/{product}/{country}/{days}"
    log(f"Fetching FIRMS data: {url}")
    response = requests.get(url)
    if response.status_code != 200:
        raise Exception(f"HTTP error {response.status_code}: {response.text}")
    return pd.read_csv(StringIO(response.text))

def load_df_to_postgres(df):
    if df.empty:
        log("⚠️ DataFrame is empty. No data to insert.")
        return

    # Standardize column names
    df.columns = [c.strip().lower().replace(" ", "_") for c in df.columns]

    # Retain only expected columns
    df = df[[col for col in EXPECTED_COLUMNS if col in df.columns]]

    with get_db_connection() as conn:
        with conn.cursor() as cur:
            log("Creating table satellite_signals if not exists...")
            cur.execute("""
                CREATE TABLE IF NOT EXISTS satellite_signals (
                    latitude FLOAT,
                    longitude FLOAT,
                    brightness FLOAT,
                    scan FLOAT,
                    track FLOAT,
                    acq_date DATE,
                    acq_time TEXT,
                    satellite TEXT,
                    confidence TEXT,
                    version TEXT,
                    bright_t31 FLOAT,
                    frp FLOAT,
                    daynight TEXT
                );
            """)
            conn.commit()

            # Prepare and insert rows
            values = [tuple(row) for row in df.to_numpy()]
            insert_sql = f"""
                INSERT INTO satellite_signals ({', '.join(EXPECTED_COLUMNS)})
                VALUES %s
            """
            log(f"Inserting {len(values)} rows using execute_values...")
            execute_values(cur, insert_sql, values)
        conn.commit()
    log("✔ Fast insert completed successfully.")

if __name__ == "__main__":
    df = fetch_firms_df()
    load_df_to_postgres(df)


[LOG] Fetching FIRMS data: https://firms.modaps.eosdis.nasa.gov/api/country/csv/0525bda225833e502171d32a59c8464d/MODIS_NRT/USA/3
[LOG] Creating table satellite_signals if not exists...
[LOG] Inserting 460 rows using execute_values...
[LOG] ✔ Fast insert completed successfully.


In [9]:
df

Unnamed: 0,country_id,latitude,longitude,brightness,scan,track,acq_date,acq_time,satellite,instrument,confidence,version,bright_t31,frp,daynight
0,USA,69.51749,-154.81786,306.10,1.52,1.22,2025-06-25,22,Aqua,MODIS,63,6.1NRT,291.93,7.79,D
1,USA,27.01926,-81.00515,311.92,1.00,1.00,2025-06-25,230,Terra,MODIS,82,6.1NRT,294.93,9.36,N
2,USA,28.67287,-82.04452,306.01,1.00,1.00,2025-06-25,232,Terra,MODIS,31,6.1NRT,295.14,3.86,N
3,USA,28.68025,-82.05610,308.97,1.00,1.00,2025-06-25,232,Terra,MODIS,73,6.1NRT,295.34,6.04,N
4,USA,28.68164,-82.04613,306.51,1.00,1.00,2025-06-25,232,Terra,MODIS,63,6.1NRT,295.15,4.43,N
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
455,USA,19.40753,-155.27623,310.83,2.14,1.42,2025-06-27,705,Terra,MODIS,60,6.1NRT,291.09,41.28,N
456,USA,41.12299,-85.34764,301.51,1.56,1.23,2025-06-27,833,Aqua,MODIS,42,6.1NRT,291.09,7.64,N
457,USA,33.41731,-110.85141,308.17,1.40,1.20,2025-06-27,1013,Aqua,MODIS,95,6.1URT,292.18,13.72,N
458,USA,33.41888,-110.85654,313.95,1.40,1.20,2025-06-27,1013,Aqua,MODIS,75,6.1URT,292.82,19.52,N
