In [None]:
import os
import netCDF4 as nc
import numpy as np

def extract_nc_files(base_dir, target_month='001'):
    nc_files = []
    month_path = os.path.join(base_dir, target_month)

    if not os.path.exists(month_path):
        print(f"Month path does not exist: {month_path}")
        return nc_files

    for day in os.listdir(month_path):
        day_path = os.path.join(month_path, day)
        if os.path.isdir(day_path):
            for file in os.listdir(day_path):
                file_path = os.path.join(day_path, file)
                if file.endswith('.nc'):
                    nc_files.append(file_path)

    print(f"Found {len(nc_files)} .nc files for month {target_month}")
    return nc_files

def extract_metadata(file_path):
    try:
        dataset = nc.Dataset(file_path)

        metadata = {
            'latitude': dataset.variables['nominal_satellite_subpoint_lat'][:],
            'longitude': dataset.variables['nominal_satellite_subpoint_lon'][:],
            'elevation': dataset.variables['nominal_satellite_height'][:],
            'time_coverage_start': dataset.time_coverage_start,
            'time_coverage_end': dataset.time_coverage_end
        }

        dataset.close()

        return metadata
    except Exception as e:
        print(f"Error extracting metadata from file {file_path}: {e}")
        return None

def extract_LST_data(file_path):
    try:
        dataset = nc.Dataset(file_path)

        T14 = dataset.variables['CMI_C14'][:]
        T15 = dataset.variables['CMI_C15'][:]

        C = 1.0
        A1 = 0.5
        A2 = 0.1
        A3 = 0.2
        D = 0.5
        theta = dataset.variables['nominal_satellite_subpoint_lat'][:]

        Ts = T14 + C * (T14 - T15) + A1 * (T14 - T15) * np.exp(-A2 / T14) * (1 - np.exp(-A3 / T15)) + D * (1 / np.cos(np.deg2rad(theta)) - 1)

        dataset.close()

        return Ts
    except Exception as e:
        print(f"Error extracting LST data from file {file_path}: {e}")
        return None

def create_input_sequence(nc_files):
    input_sequence = []

    for file_path in nc_files:
        metadata = extract_metadata(file_path)
        if metadata is not None:
            lst_data = extract_LST_data(file_path)

            input_data = {
                'latitude': metadata['latitude'],
                'longitude': metadata['longitude'],
                'elevation': metadata['elevation'],
                'time_coverage_start': metadata['time_coverage_start'],
                'time_coverage_end': metadata['time_coverage_end'],
                'LST_data': lst_data
            }

            input_sequence.append(input_data)
        else:
            print(f"Skipping file {file_path} due to missing metadata.")

    return input_sequence

# Example usage
base_dir = "/Users/srivarshiniksheerasagar/data/noaa-goes16/ABI-L2-MCMIPC/2022"
target_month = '001'
nc_files = extract_nc_files(base_dir, target_month)

if not nc_files:
    print(f"No .nc files found for the specified month {target_month}.")
else:
    input_sequence = create_input_sequence(nc_files)
    print("Input sequence created successfully.")
