# Processing FRP and Temperature Data for ROI

This code processes FRP and Temperature data from NetCDF files for a specified region of interest (ROI), calculates emission estimates, and saves the results in CSV files.

1. Get Files Function  
The `get_files` function retrieves relevant satellite files within a specified year, day, and hour range. It handles missing files gracefully using a `try-except` block.

2. Get Indexes and Matrix Function  
    `get_indexes_v3` creates a 0.5°x0.5° grid within the ROI and calculates the corresponding FEER coefficient for each grid cell, generating a matrix and a mask for valid data points.

4. Processes FRP and Temperature function  
   Processes FRP and Temperature data for the entire ROI and saves the results to CSV files.  
   **Input**: List of files and valid indices  
   **Output**: CSV files with valid FRP and Temperature values

In [None]:
# Import necessary libraries
# os: Provides functions to interact with the operating system, such as file and directory operations
import os
# BytesIO: Enables reading and writing of binary data in memory as if it were a file
from io import BytesIO
# s3fs: A Pythonic interface to Amazon S3, allowing for easy file operations on S3 buckets
import s3fs
# xarray: A powerful library for working with multi-dimensional arrays, particularly for geospatial and time-series data
import xarray as xr
# numpy: Provides support for large, multi-dimensional arrays and matrices, along with a collection of mathematical functions
import numpy as np
# glob: Used for finding all pathnames matching a specified pattern, useful for file pattern matching in directories
import glob
# pyproj: Provides tools for working with projections and coordinate transformations, such as converting between latitude/longitude and projected coordinates
from pyproj import Proj
# pandas: A data analysis library that provides data structures like DataFrame for handling structured data, useful for working with time-series and tabular data
import pandas as pd
# warnings: Used to issue warnings to the user, often to alert about potential issues or deprecated features
import warnings

In [3]:
# Define input and output folders and file
datadir = '/home/jovyan/Article_review/Data/' #This directory will receive the csv file with processed data


#Define output directory, the year for process the data and the file name
#Is recommended for posterior plots to run this algorithm one year at a time
outdir  = datadir
Year =2022
outfile = outdir+'array_T_data_'+str(Year)+'_150_350.csv'
outfile2 = outdir+'array_P_data_'+str(Year)+'_150_350.csv'
#Define the file header
aux1 ='array_temp\n'
aux2 ='array_frp\n'
header = aux1
header2 = aux2

outstring=''
outfn = open(outfile, 'w')
outfn.writelines(header)
outfn2 = open(outfile2, 'w')
outfn2.writelines(header2)
#Close the files
outfn.close()
outfn2.close()

# Initialize S3 file system
fs = s3fs.S3FileSystem(anon=True)

In [None]:
# Initialize geometric variables

# This function extracts the geometric values (latitude and longitude) from one file,
# which contains satellite coordinates, and calculates the corresponding matrix of latitudes and longitudes.
# These values will be used for geospatial referencing in the satellite data.

def get_lat_lon(file_system):
    # List of 6 files from the 'noaa-goes16' directory for the specified date (2022, day 200) and time (15:00 to 15:50 UTC)
    # The directory structure includes data at 10-minute intervals
    files = file_system.ls('noaa-goes16/ABI-L2-FDCF/2022/'+str(200).zfill(3)+'/'+str(15).zfill(2)+'/')

    # Open the first file in the list to extract the geospatial information
    # Here, we use 'h5netcdf' as the engine to read the data from the file
    with fs.open(files[0], 'rb') as f:
        ds0 = xr.open_dataset(BytesIO(f.read()), engine='h5netcdf')

    # Extract satellite geometric parameters from the file
    # These parameters define the satellite's position and the projection system used
    sat_h = ds0.goes_imager_projection.perspective_point_height  # Satellite height
    sat_lon = ds0.goes_imager_projection.longitude_of_projection_origin  # Longitude of the satellite's projection origin
    sat_sweep = ds0.goes_imager_projection.sweep_angle_axis  # Sweep angle axis of the satellite

    # Create a geostationary projection object using pyproj
    # This projection is used to convert the satellite's (x, y) coordinates into geographic (latitude, longitude) coordinates
    p = Proj(proj='geos', h=sat_h, lon_0=sat_lon, sweep=sat_sweep)

    # Multiply the x and y coordinates by the satellite height to scale them properly for the projection
    X = np.array(ds0.x) * sat_h
    Y = np.array(ds0.y) * sat_h

    # Create mesh grids for the x and y coordinates
    XX, YY = np.meshgrid(X, Y)

    # Convert the satellite projection coordinates (XX, YY) into latitude and longitude
    rlon, rlat = p(XX, YY, inverse=True)

    # Return the calculated latitude and longitude matrices
    return rlat, rlon

In [None]:
# Function to collect and save file names in a list for the given period of interest with error handling
# This function iterates over the specified year, day, and hour ranges and collects the corresponding file names
# from the NOAA GOES-16 directory structure, with added error handling for missing files.

def get_files(s_year, e_year, s_day, e_day, s_hour, e_hour):
    print('Getting file names')
    aux = []  # List to store the file names
    # Loop over the years in the specified range
    for y in range(s_year, e_year + 1):
        # Loop over the days in the specified range
        for d in range(s_day, e_day):
            # The variable 'd' determines the days of the product (e.g., day 228 corresponds to 15:00, 15:10, etc.)
            for j in range(s_hour, e_hour):
                try:
                    # List the files for a specific year, day, and hour directory
                    # These directories contain 6 files for each 10-minute interval (e.g., 15:00, 15:10, ..., 15:50 UTC)
                    FD = fs.ls('noaa-goes16/ABI-L2-FDCF/' + str(y) + '/' + str(d).zfill(3) + '/' + str(j).zfill(2) + '/')
                    aux = np.append(aux, FD)  # Append the found files to the list
                except FileNotFoundError as e:
                    # In case a file is not found, print an error message and skip to the next file
                    print(f"FileNotFoundError file {'noaa-goes16/ABI-L2-FDCF/'+str(y)+'/'+str(d).zfill(3)+'/'+str(j).zfill(2)+'/'}: {e}. Skipping this file.")
                    continue  # Skip to the next file in the list
    return aux

In [6]:
# Function to create a 0.5° x 0.5° grid and return the corresponding indices for a given region of interest (ROI)
def get_indexes_v2(min_lon, max_lon, min_lat, max_lat, rlat, rlon):
    # Calculate the center points of each grid element (0.5°x0.5°) within the ROI
    centers_lon = np.linspace(minlon + 0.25, maxlon - 0.25, num=int(maxlon - minlon) * 2)  # Longitude centers
    centers_lat = np.linspace(minlat + 0.25, maxlat - 0.25, num=int(maxlat - minlat) * 2)  # Latitude centers

    # Create a matrix of (latitude, longitude) pairs for each grid center
    for i in range(0, len(centers_lat)):
        # Repeat latitude values for each corresponding longitude value
        if i == 0:
            aux = np.repeat(centers_lat[i], len(centers_lon))
            lat_lon_feer = np.column_stack((aux, centers_lon))
        else:
            aux = np.repeat(centers_lat[i], len(centers_lon))
            aux2 = np.column_stack((aux, centers_lon))
            lat_lon_feer = np.vstack((lat_lon_feer, aux2))

    # Save the (latitude, longitude) grid centers in a matrix
    matrix = lat_lon_feer

    # Create a mask to select valid points within the ROI based on latitude and longitude
    Idx = np.where((rlat >= min_lat) & (rlat <= max_lat) & (rlon >= min_lon) & (rlon <= max_lon))
    index_list = []
    index_list.insert(0, Idx)

    # Generate indices for each 0.5°x0.5° grid element and add them to the list
    for k in range(0, len(matrix)):
        aux1 = np.where((rlat >= matrix[k, 0] - 0.25) & (rlat <= matrix[k, 0] + 0.25) &
                        (rlon >= matrix[k, 1] - 0.25) & (rlon <= matrix[k, 1] + 0.25))
        index_list.insert(k + 1, aux1)

    return index_list, matrix

In [8]:
# Main function to process files, collect FRP (Fire Radiative Power) and Temperature data
# It calculates emission estimates using the FEER coefficients and FRP, then saves the spatial
# and temporal information, as well as the results, in CSV files for further analysis
def process_data_v6(files, indexes):

    all_array_T = []  # List to store temperature values
    all_array_P = []  # List to store FRP (Power) values

    # Change the directory to save the CSV files
    os.chdir(outdir)
    outfn = open(outfile, 'w')   # CSV file for Temperature data
    outfn2 = open(outfile2, 'w') # CSV file for FRP (Power) data

    # Loop through the list of files to process each one
    for i in range(0, len(files)):
        with fs.open(files[i], 'rb') as f:
            # Open the file and load the data into the 'ds' variable (dataset)
            ds = xr.open_dataset(BytesIO(f.read()), engine='h5netcdf')
            try:
                # Extract the datetime information from the file name
                prodbase = files[i].split('/')[5][:23]
                starttime = files[i].split(prodbase)[1].split('_')[0]
                year, julian, hhmm = starttime[:4], starttime[4:7], starttime[7:11]
                print(f'Processing year: {year}, day: {julian}, hour: {hhmm}', end='\r')

                #####################################################################
                # Extract FRP (Fire Radiative Power) and Temperature matrix data
                P = np.array(ds.Power)  # FRP (Power) matrix
                T = np.array(ds.Temp)   # Temperature matrix

                # Use the index matrix to extract data for the region of interest (ROI)
                P_box_amazon = P[indexes]
                T_box_amazon = T[indexes]

                # Convert valid data points (non-NaN) from matrices to arrays to reduce processing demand
                array_P_box_amazon = P_box_amazon[~np.isnan(P_box_amazon)]
                array_T_box_amazon = T_box_amazon[~np.isnan(T_box_amazon)]

                # Write the valid Temperature and FRP values to separate CSV files
                # These files will be used to analyze the distribution of FRP and Temperature over the ROI
                for value in array_T_box_amazon:
                    outstring = f'{value}\n'
                    outfn.writelines(outstring)
                for value in array_P_box_amazon:
                    outstring = f'{value}\n'
                    outfn2.writelines(outstring)

            # Handle any exceptions that occur during file processing
            except OSError as error:
                print(error)

    # Close the output CSV files after processing all the data
    outfn.close()
    outfn2.close()

    # Print completion message
    return print('Done')


In [9]:
###############################################################################
# Define a ROI in degrees
minlon,maxlon,minlat,maxlat=-57,-54,-9,-6 #Amazon small box
#minlon,maxlon,minlat,maxlat=-56,-54,-9,-7 #Amazon small box - corrected
#minlon,maxlon,minlat,maxlat=-72,-48,-11,-3 #Amazon ROI
# minlon,maxlon,minlat,maxlat=-57.5,-56.5,-17.5,-16.5 # cerrado big box

#Starting message
print('Compiling statistics')

rlat,rlon = get_lat_lon(fs)

Indexes,M = get_indexes_v2(minlon,maxlon,minlat,maxlat,rlat,rlon)
print('Got indexes and matrix')

start_year,end_year,start_day,end_day,start_hour,end_our = Year,Year,150,350,0,24
data_list = get_files(start_year,end_year,start_day,end_day,start_hour,end_our)
print('Data listed')

Compiling statistics
Got indexes and matrix
Getting file names
FileNotFoundError file noaa-goes16/ABI-L2-FDCF/2022/256/11/: noaa-goes16/ABI-L2-FDCF/2022/256/11. Skipping this file.
FileNotFoundError file noaa-goes16/ABI-L2-FDCF/2022/256/12/: noaa-goes16/ABI-L2-FDCF/2022/256/12. Skipping this file.
FileNotFoundError file noaa-goes16/ABI-L2-FDCF/2022/256/13/: noaa-goes16/ABI-L2-FDCF/2022/256/13. Skipping this file.
FileNotFoundError file noaa-goes16/ABI-L2-FDCF/2022/256/14/: noaa-goes16/ABI-L2-FDCF/2022/256/14. Skipping this file.
FileNotFoundError file noaa-goes16/ABI-L2-FDCF/2022/256/15/: noaa-goes16/ABI-L2-FDCF/2022/256/15. Skipping this file.
FileNotFoundError file noaa-goes16/ABI-L2-FDCF/2022/256/16/: noaa-goes16/ABI-L2-FDCF/2022/256/16. Skipping this file.
FileNotFoundError file noaa-goes16/ABI-L2-FDCF/2022/256/17/: noaa-goes16/ABI-L2-FDCF/2022/256/17. Skipping this file.
FileNotFoundError file noaa-goes16/ABI-L2-FDCF/2022/256/18/: noaa-goes16/ABI-L2-FDCF/2022/256/18. Skipping this

In [10]:
print('Starting process data')
process_data_v6(data_list,Indexes[0])#For this version of the code we need just the index just for the whole ROI, the 0.5x0.5 grid it's not necessary

Starting process data
Doneessing year: 2022, day: 349, hour: 2350
