### What this script does (short)

- Reads all .dat high-frequency sonic files in a given day’s folder.

- Splits data into 30-minute chunks (for 20 Hz → 30 min = 36 000 samples).

- Computes turbulent covariances/fluxes per chunk: wT_Flux, wrhoqv_Flux, wrhoco2_Flux, and momentum fluxes uw, vw, uv.

- Averages of temperature (raw & corrected, in K), H₂O/CO₂ density, and wind components (Ux, Uy, Uz).

- Derives sensible heat flux (SHF) and latent heat flux (LHF).

- Saves flux_data_30min.csv in the same day folder.

Edit before running:
  
  1) Base folder for the specific day you want to process: e.g. data_dir = r"C:\path\to\your\Sonic\2024-05\2024-05-26"

  2) (Optional) Sampling-rate assumption for 30-min chunks:
     e.g. At 20 Hz → 30 min = 20 * 60 * 30 = 36000 samples
     
       If your sampling rate differs, update the step size below:
       
       for i in range(0, len(data), 36000):


Everything else can remain as-is. The script will discover all .dat files in data_dir, compute the 30-min statistics/fluxes, and create at that same folder\flux_data_30min.csv

In [None]:
import os
import pandas as pd

In [None]:
Cp = 1005  # Specific heat capacity of dry air at constant pressure (J/kg/K)
g = 9.81   # Acceleration due to gravity (m/s^2)
rho=1.2 #air density
Lv = 2.5e6  # Latent heat of vaporization in J/kg

In [None]:
# 1) Base folder for the specific day you want to process
# ⚠️ Edit this to your local path (year/month/day).
# Example: data_dir = r"D:\Thesis\data\Sonic\2024-05\2024-05-26"
data_dir = r"C:\path\to\your\Sonic\2024-05\2024-05-26"

# 2) (Optional) Sampling-rate assumption for 30-min chunks:
#    At 20 Hz → 30 min = 20 * 60 * 30 = 36000 samples
#    If your sampling rate differs, update the step size below.
# for i in range(0, len(data), 36000):


# Initialize lists to store 30-minute fluxes and corresponding timestamps
timestamps_30min = []
wT_fluxes_30min = []
wrhoqv_fluxes_30min = []
wrhoco2_fluxes_30min = []
average_temperatures_30min = []
average_temperatures_corr_30min = []
average_h2o_density_30min = []
average_co2_density_30min = []
average_wind_ux_30min = []
average_wind_uy_30min = []
average_wind_uz_30min = []
uw_flux_30min = []
vw_flux_30min = []
uv_flux_30min = []

# Loop through all .dat files in the directory
for filename in os.listdir(data_dir):
    if filename.endswith('.dat'):
        file_path = os.path.join(data_dir, filename)
        
        # Load data from the .dat file, skipping the first row
        data = pd.read_csv(file_path, skiprows=1, delimiter=',', encoding='latin1', low_memory=False)
        
        # Drop any rows with missing values
        data.dropna(inplace=True)
        
        # Convert the timestamp to datetime format
        data['TIMESTAMP'] = pd.to_datetime(data['TIMESTAMP'], errors='coerce')
        data['Ux'] = pd.to_numeric(data['Ux'], errors='coerce')
        data['Uy'] = pd.to_numeric(data['Uy'], errors='coerce')
        data['Uz'] = pd.to_numeric(data['Uz'], errors='coerce')
        data['T_SONIC'] = pd.to_numeric(data['T_SONIC'], errors='coerce')
        data['T_SONIC_corr'] = pd.to_numeric(data['T_SONIC_corr'], errors='coerce')
        data['H2O_density'] = pd.to_numeric(data['H2O_density'], errors='coerce')
        data['CO2_density'] = pd.to_numeric(data['CO2_density'], errors='coerce')

        # Loop through each 30-minute chunk in the file (36000 samples for 30 minutes at 20 Hz)
        for i in range(0, len(data), 36000):
            thirty_minute_data = data.iloc[i:i+36000]  # Get data for 30 minutes
            
            if not thirty_minute_data.empty:
                # Calculate 30-minute means
                mean_wind_ux = thirty_minute_data['Ux'].mean()
                mean_wind_uy = thirty_minute_data['Uy'].mean()
                mean_wind_uz = thirty_minute_data['Uz'].mean()
                mean_T_sonic = thirty_minute_data['T_SONIC'].mean()
                mean_T_sonic_corr = thirty_minute_data['T_SONIC_corr'].mean()
                mean_h2o_density = thirty_minute_data['H2O_density'].mean()
                mean_co2_density = thirty_minute_data['CO2_density'].mean()

                # Calculate perturbations for wind components
                u_prime = thirty_minute_data['Ux'] - mean_wind_ux
                v_prime = thirty_minute_data['Uy'] - mean_wind_uy
                w_prime = thirty_minute_data['Uz'] - mean_wind_uz
                
                # Calculate momentum fluxes
                uw_flux = u_prime * w_prime
                vw_flux = v_prime * w_prime
                uv_flux = u_prime * v_prime
                
                t_sonic_prime = thirty_minute_data['T_SONIC_corr'] - mean_T_sonic_corr
                
                # Calculate wT flux (w'T' flux)
                wT_flux = (w_prime * (t_sonic_prime + 273.15)).mean()
                
                # w'qv'flux
                rhoqv_prime = thirty_minute_data['H2O_density'] - mean_h2o_density
                wrhoqv_flux = (w_prime * rhoqv_prime).mean()
                
                # rhoCo2'
                rhoco2_prime = thirty_minute_data['CO2_density'] - mean_co2_density
                # w'rhoco2'
                wrhoco2_flux = (rhoco2_prime * w_prime).mean()
        
                # Store data in lists
                timestamps_30min.append(thirty_minute_data['TIMESTAMP'].iloc[0].floor('30T'))
                wT_fluxes_30min.append(wT_flux)
                wrhoqv_fluxes_30min.append(wrhoqv_flux)
                wrhoco2_fluxes_30min.append(wrhoco2_flux)
                average_temperatures_30min.append(mean_T_sonic + 273.15)
                average_temperatures_corr_30min.append(mean_T_sonic_corr + 273.15)
                average_h2o_density_30min.append(mean_h2o_density)
                average_co2_density_30min.append(mean_co2_density)
                average_wind_ux_30min.append(mean_wind_ux)
                average_wind_uy_30min.append(mean_wind_uy)
                average_wind_uz_30min.append(mean_wind_uz)
                uw_flux_30min.append(uw_flux.mean())
                vw_flux_30min.append(vw_flux.mean())
                uv_flux_30min.append(uv_flux.mean())

# Create DataFrame for the collected data
flux_data_30min = pd.DataFrame({
    'TIMESTAMP': timestamps_30min,
    'wT_Flux': wT_fluxes_30min,
    'wrhoqv_Flux': wrhoqv_fluxes_30min,
    'Average_Temperature': average_temperatures_30min,
    'Average_Temperature_Corr': average_temperatures_corr_30min,
    'Average_H2O_Density': average_h2o_density_30min,
    'Average_CO2_Density': average_co2_density_30min,
    'wrhoCO2_Flux': wrhoco2_fluxes_30min,
    'Average_Wind_Ux': average_wind_ux_30min,
    'Average_Wind_Uy': average_wind_uy_30min,
    'Average_Wind_Uz': average_wind_uz_30min,
    'uw_flux': uw_flux_30min,
    'vw_flux': vw_flux_30min,
    'uv_flux': uv_flux_30min
})

print(flux_data_30min)


In [None]:
# Calculate sensible heat flux (SHF)
flux_data_30min['SHF'] = flux_data_30min['wT_Flux'] * rho* Cp

# Calculate latent heat flux (LHF)
flux_data_30min['wqv_Flux'] = flux_data_30min['wrhoqv_Flux'] / rho
flux_data_30min['LHF'] = flux_data_30min['wqv_Flux'] * rho * Lv / 1000

print(flux_data_30min)

In [None]:
# Define the output path for the CSV file
output_file_path = os.path.join(data_dir, 'flux_data_30min.csv')

# Save the DataFrame as a CSV file
flux_data_30min.to_csv(output_file_path, index=False)

print(f"Data successfully saved to {output_file_path}")