In [2]:
import os
import json
import numpy as np
from netCDF4 import Dataset
from concurrent.futures import ThreadPoolExecutor, as_completed
from tqdm import tqdm
from pyproj import Proj, Transformer

# ----------------------------
# CONFIG
# ----------------------------
INPUT_DIR = r"E:\GOES-R Lightning Data\Test Batch"
OUTPUT_FILE = r"E:\GOES-R Lightning Data\Processed\new_conversation_v2.geojson"
MAX_WORKERS = 3   # Adjust based on CPU cores

# ----------------------------
# Helpers
# ----------------------------
def is_east_file(ds):
    """Check NetCDF global attribute to see if file is GOES-East."""
    slot = getattr(ds, "orbital_slot", "").lower()
    return "east" in slot

def process_file(file_path):
    """Extract the brightest flash location from one file."""
    try:
        # Open once and check orbital slot immediately
        with Dataset(file_path, "r") as ds:
            slot = getattr(ds, "orbital_slot", "").strip().lower()
            if "east" in slot:
                return None  # Skip east flashes immediately
            elif "west" not in slot:
                return None  # Skip any unknown slot

            # Read projection info
            proj_info = ds.variables["goes_imager_projection"]
            lon_origin = proj_info.longitude_of_projection_origin
            H = proj_info.perspective_point_height + proj_info.semi_major_axis
            r_eq = proj_info.semi_major_axis
            r_pol = proj_info.semi_minor_axis

            # Read x/y grids
            x = ds.variables["x"][:]
            y = ds.variables["y"][:]

            # Read flash energy and mask fill values
            data = ds.variables["Total_Optical_energy"][:].astype(float)
            fill_value = getattr(ds.variables["Total_Optical_energy"], "_FillValue", np.nan)
            data[data == fill_value] = np.nan

            if np.all(np.isnan(data)):
                return None

            # Find brightest flash
            iy, ix = np.unravel_index(np.nanargmax(data), data.shape)
            max_value = data[iy, ix]
            if np.isnan(max_value) or max_value <= 0:
                return None

            x_rad = x[ix]
            y_rad = y[iy]

            # Set up geostationary projection
            p = Proj(proj='geos', h=H, lon_0=lon_origin, a=r_eq, b=r_pol, units='m')
            transformer = Transformer.from_proj(p, "epsg:4326", always_xy=True)

            # Convert from radians to meters
            x_m = x_rad * H
            y_m = y_rad * H

            lon, lat = transformer.transform(x_m, y_m)

            if not np.isfinite(lon) or not np.isfinite(lat):
                return None

            return {
                "type": "Feature",
                "geometry": {"type": "Point", "coordinates": [lon, lat]},
                "properties": {
                    "file": os.path.basename(file_path),
                    "lon": float(lon),
                    "lat": float(lat),
                    "energy": float(max_value)
                }
            }

    except Exception:
        return None

# ----------------------------
# Main
# ----------------------------
def main():
    files = [os.path.join(INPUT_DIR, f) for f in os.listdir(INPUT_DIR) if f.endswith(".nc")]
    print(f"Found {len(files)} NetCDF files")

    results = []
    skipped = 0

    with ThreadPoolExecutor(max_workers=MAX_WORKERS) as executor:
        futures = {executor.submit(process_file, f): f for f in files}
        for future in tqdm(as_completed(futures), total=len(futures), desc="Processing files"):
            res = future.result()
            if res:
                results.append(res)
            else:
                skipped += 1

    # Save GeoJSON
    geojson = {"type": "FeatureCollection", "features": results}
    with open(OUTPUT_FILE, "w") as f:
        json.dump(geojson, f)

    print(f"Processed {len(results)} features")
    print(f"Skipped {skipped} files")

if __name__ == "__main__":
    main()


Found 1959 NetCDF files


Processing files: 100%|██████████| 1959/1959 [06:56<00:00,  4.71it/s]

Processed 314 features
Skipped 1645 files



