### What the script does

- Opens all .NC files for a given day (MET, TPC, HPC, BLH).

- Co- nverts sensor time to datetimes (epoch 2001-01-01).

- Builds tidy tables:

    - MET: surface pressure/temperature vs time.

    - TPC: temperature profiles (T), computes saturation vapor pressure es(T).

    - PC: relative humidity profiles (RH).

    - Merges MET+TPC+HPC by (Time, Altitude).

- Computes:

    - Hydrostatic pressure profile with a trapezoidal integration from surface pressure.

    - Vapor pressure ev = RH * es / 100.

    - Specific humidity qv from ev and pressure.

    - Virtual potential temperature θv (two ways: direct + alternative using θ).

- Resamples profiles to 10-minute means and saves a Parquet dataset.

- Makes several quicklook plots (profiles at selected times, comparison of θv methods, etc.).

- Loads BLH product and plots BLH time series.

- Estimates CBL height z_i from θv with:

- Gradient method (max dθv/dz, smoothed).

- Parcel method (first z where θv(z) > θv_surface).

- Saves z_i time series to CSV.

#### Lines / places you should change

- Input folder (where the day’s .NC files live): folder_path = r"C:\path\to\your\Microwave_radiometer\2024-05\2024-05-13"


In [None]:
import netCDF4 as nc
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os
import datetime as dt
from datetime import datetime, timedelta
from scipy.ndimage import gaussian_filter1d

from matplotlib.backends.backend_pdf import PdfPages


In [None]:
# Constants
C_p = 1005  # Specific heat capacity of dry air at constant pressure (J/kg/K)
g = 9.81    # Gravitational acceleration (m/s^2)
Ttrip = 273.16  # Triple point temperature in Kelvin
Rd=287.04
Rv=461.5
epsilon=(Rv/Rd)-1

In [None]:
# Convert time from seconds since 1.1.2001, 00:00:00 to datetime
def convert_time(base_time, time_array):
    return [base_time + timedelta(seconds=int(t)) for t in time_array]
# Function to calculate saturation vapor pressure (es) from temperature (T)
def calculate_saturation_vapor_pressure(T):
    es = 610.78 * np.exp(17.2694 * (T - Ttrip) / (T - 35.86))
    return es
# Function to calculate specific humidity (qv) from vapor pressure (ev) and atmospheric pressure (p)
def calculate_specific_humidity(ev, p):
    return ev * 1000 / (p*100 + (((Rv / Rd) - 1) * (p*100 - ev)))


In [None]:
'''
# Function to calculate pressure at different heights using numerical integration
def calculate_pressure_at_heights(df):
    calculated_pressures = []  # To store pressure values for all times and altitudes
    
    # Group the dataframe by Time
    for time, group in df.groupby('Time'):
        group = group.sort_values('Altitude').reset_index(drop=True)  # Sort by altitude
        
        # Start with surface pressure at the first (lowest) altitude
        P0 = group['Surface Pressure'].iloc[0] #* 100  # Convert hPa to Pa
        pressures = [P0]
        
        # Perform numerical integration for each altitude step
        for i in range(1, len(group)):
            z1 = group['Altitude'].iloc[i-1]
            z2 = group['Altitude'].iloc[i]
            T1 = group['Temperature'].iloc[i-1]
            
            # Calculate pressure difference using trapezoidal rule
            delta_z = z2 - z1
            P_prev = pressures[-1]
            delta_P = -g * P_prev * delta_z / (Rd * T1)
            P_next = P_prev + delta_P
            pressures.append(P_next)
        
        # Store the calculated pressures for this group
        calculated_pressures.extend(pressures)
    
    return calculated_pressures
'''



In [None]:
def calculate_pressure_at_heights(df):
    calculated_pressures = []  # To store pressure values for all times and altitudes
    
    # Group the dataframe by Time
    for time, group in df.groupby('Time'):
        group = group.sort_values('Altitude').reset_index(drop=True)  # Sort by altitude
        
        # Start with surface pressure at the first (lowest) altitude
        P0 = group['Surface Pressure'].iloc[0]  # Initial surface pressure (hPa)
        pressures = [P0]  # First pressure is the surface pressure

        # Perform second-order (Trapezoidal Rule) integration for each altitude step
        for i in range(1, len(group)):
            z1 = group['Altitude'].iloc[i - 1]
            z2 = group['Altitude'].iloc[i]
            T1 = group['Temperature'].iloc[i - 1]
            T2 = group['Temperature'].iloc[i]
            P1 = pressures[-1]  # Previous pressure value
            
            delta_z = z2 - z1  # Altitude difference
            
            # First order estimate of P2 based on P1
            P2_guess = P1 * np.exp(-g * delta_z / (Rd * T1))
            
            # Trapezoidal rule correction for P2
            delta_P = 0.5 * (-(g * P1 / (Rd * T1)) + -(g * P2_guess / (Rd * T2))) * delta_z
            P2 = P1 + delta_P
            pressures.append(P2)
        
        # Store the calculated pressures for this group
        calculated_pressures.extend(pressures)
    
    return calculated_pressures



In [None]:
# Calculate theta_v using the provided formula
def calculate_theta_v(row):
    T = row['Temperature']  # Temperature at the current altitude
    qv = row['Specific Humidity (qv)']  # Specific humidity
    z=row['Altitude']
    # Calculate theta_v
    theta_v = (T+g*(z/C_p)) * (1 + (epsilon * qv/1000))
    return theta_v


In [None]:


# Function to calculate potential temperature (theta) based on current pressure level and surface pressure
def calculate_potential_temperature(T, P_i, P_0):
    """
    Calculate potential temperature (theta) given temperature T (K) and pressures at level P_i (hPa) and surface pressure P_0 (hPa).
    """
    return T * (P_0 / P_i) ** (Rd / C_p)

# Alternative method to calculate theta_v using potential temperature across altitude steps
def calculate_theta_v_alternative_by_altitude(df):
    """
    Calculate virtual potential temperature (theta_v) for each time and altitude step.
    This function iterates over each altitude for each time step and uses the surface pressure.
    """
    df = df.sort_values('Altitude').reset_index(drop=True)  # Ensure data is sorted by altitude
    
    theta_v_alternative = []  # Store results
    
    theta_list   = []
    # Loop over rows in the DataFrame (i.e., for each altitude level)
    for i in range(len(df)):
        T = df.loc[i, 'Temperature'] #+ 273.15  # Convert temperature from Celsius to Kelvin
        P_i = df.loc[i, 'Calculated Pressure']# / 100  # Pressure at current altitude in hPa
        qv = df.loc[i, 'Specific Humidity (qv)']  # Specific humidity (kg/kg)
        
        if i == 0:  # At the surface (lowest altitude)
            P_0 = P_i  # Surface pressure at the first level
            theta = T  # For the first altitude, theta is simply T
        else:
            P_0 = df.loc[0, 'Calculated Pressure']# / 100  # Surface pressure (remains constant)
            # Calculate potential temperature using surface pressure
            theta = calculate_potential_temperature(T, P_i, P_0)
        
        # Calculate theta_v for this level
        theta_v_alt = theta * (1 + (epsilon * qv / 1000))
        theta_list.append(theta)
        # Append the calculated theta_v to the list
        theta_v_alternative.append(theta_v_alt)
    
    return theta_list, theta_v_alternative


In [None]:
#Edit before running!!
# Define the folder path
folder_path = r"C:\path\to\your\Microwave_radiometer\2024-05\2024-05-12" #or other day

# Get a list of all .NC files in the folder
nc_files = [f for f in os.listdir(folder_path) if f.endswith('.NC')]

# Dictionary to store the datasets
datasets = {}

# Loop through each file and open it
for file_name in nc_files:
    file_path = os.path.join(folder_path, file_name)
    dataset = nc.Dataset(file_path, mode='r')
    datasets[file_name] = dataset

### MET FILE PROCESSING ###
met_file = next((file_name for file_name in datasets.keys() if file_name.upper().endswith('.MET.NC')), None)
surf_p_values = None
surf_t_values = None
time_met = None

if met_file in datasets:
    met_dataset = datasets[met_file]

    try:
        # Extract Surf_P, Surf_T, and time
        if 'Surf_P' in met_dataset.variables:
            surf_p_values = met_dataset.variables['Surf_P'][:]

        if 'Surf_T' in met_dataset.variables:
            surf_t_values = met_dataset.variables['Surf_T'][:]

        if 'time' in met_dataset.variables:
            times = met_dataset.variables['time'][:]
            base_time = datetime(2001, 1, 1, 0, 0, 0)
            time_met = convert_time(base_time, times)
    
    except Exception as e:
        print(f"Error accessing data in {met_file}: {e}")

# Create DataFrame for MET data
df_met = pd.DataFrame({
    'Time': time_met,
    'Surface Temperature': surf_t_values,
    'Surface Pressure': surf_p_values
}) if time_met is not None else pd.DataFrame()

### TPC FILE PROCESSING ###
tpc_file = next((file_name for file_name in datasets.keys() if file_name.upper().endswith('.TPC.NC')), None)
time_tpc = None
altitude_tpc = None
t_profs = None
es_profiles = None

if tpc_file:
    tpc_dataset = datasets[tpc_file]

    try:
        # Extract time, altitude, and temperature profiles
        time_tpc = tpc_dataset.variables['time'][:]
        altitude_tpc = tpc_dataset.variables['altitude'][:]
        t_profs = tpc_dataset.variables['T_prof'][:]

        # Calculate saturation vapor pressure profiles
        es_profiles = np.array([calculate_saturation_vapor_pressure(T) for T in t_profs])
    
    except Exception as e:
        print(f"Error accessing data in {tpc_file}: {e}")

# Create DataFrame for TPC data
if time_tpc is not None:
    times_tpc = convert_time(base_time, time_tpc)
    
    # Flattening the data for creating DataFrame
    data = []
    for i, t in enumerate(times_tpc):
        for alt, temp, es in zip(altitude_tpc, t_profs[i, :], es_profiles[i, :]):
            data.append([t, alt, temp, es])
    
    df_tpc = pd.DataFrame(data, columns=['Time', 'Altitude', 'Temperature', 'Saturation Vapor Pressure'])
else:
    df_tpc = pd.DataFrame()

### HPC FILE PROCESSING ###
hpc_file = next((file_name for file_name in datasets.keys() if file_name.upper().endswith('.HPC.NC')), None)
time_hpc = None
altitude_hpc = None
rh_profiles = None

if hpc_file:
    hpc_dataset = datasets[hpc_file]

    try:
        # Extract time, altitude, and relative humidity profiles
        time_hpc = hpc_dataset.variables['time'][:]
        altitude_hpc = hpc_dataset.variables['altitude'][:]
        rh_profiles = hpc_dataset.variables['RH_prof'][:]
    
    except Exception as e:
        print(f"Error accessing data in {hpc_file}: {e}")

# Create DataFrame for HPC data
if time_hpc is not None:
    times_hpc = convert_time(base_time, time_hpc)

    # Flattening the data for creating DataFrame
    data = []
    for i, t in enumerate(times_hpc):
        for alt, rh in zip(altitude_hpc, rh_profiles[i, :]):
            data.append([t, alt, rh])
    
    df_hpc = pd.DataFrame(data, columns=['Time', 'Altitude', 'Relative Humidity'])
else:
    df_hpc = pd.DataFrame()



In [None]:
### MERGE DATAFRAMES ###

# Merge TPC and HPC data on 'Time' and 'Altitude'
df_combined = pd.merge(df_hpc, df_tpc, on=['Time', 'Altitude'], how='inner')

# Merge with MET data on 'Time'
df_combined_pressure = pd.merge(df_met, df_combined, on='Time', how='inner')

# Display the first few rows of the final combined DataFrame
print(df_combined_pressure.head())


In [None]:
print(df_combined_pressure)


In [None]:

# Apply the pressure calculation for each time and altitude
df_combined_pressure['Calculated Pressure'] = calculate_pressure_at_heights(df_combined_pressure)

# Display the updated DataFrame with calculated pressures
print(df_combined_pressure)

In [None]:
# Calculate vapor pressure and add it as a new column
df_combined_pressure['ev'] = df_combined_pressure['Relative Humidity'] * df_combined_pressure['Saturation Vapor Pressure'] / 100
# Apply the specific humidity calculation
df_combined_pressure['Specific Humidity (qv)'] = df_combined_pressure.apply(
    lambda row: calculate_specific_humidity(row['ev'], row['Calculated Pressure']), axis=1
)
print(df_combined_pressure)

In [None]:
# Apply the calculation to the DataFrame
df_combined_pressure['Theta_v'] = df_combined_pressure.apply(calculate_theta_v, axis=1)
print(df_combined_pressure)

In [None]:
# Apply the altitude-based theta_v calculation for each time step
#df_combined_pressure['Theta_v_Alternative'] = df_combined_pressure.groupby('Time').apply(calculate_theta_v_alternative_by_altitude).explode().values
# 1) Run groupby.apply once, producing a Series of (theta_list, theta_v_list) tuples
grouped = df_combined_pressure.groupby("Time") \
            .apply(calculate_theta_v_alternative_by_altitude)

# 2) Extract the “theta_list” (first element of each tuple), explode, and align
theta_series = (
    grouped
      .apply(lambda tup: tup[0])  # take the first element → [θ0, θ1, …]
      .explode()                  # flatten into a single long Series
      .reset_index(drop=True)     # drop the Time index so it lines up with df_combined_pressure
)

# 3) Extract the “theta_v_alternative” (second element), explode, and align
theta_v_series = (
    grouped
      .apply(lambda tup: tup[1])  # take the second element → [θᵥ0, θᵥ1, …]
      .explode()
      .reset_index(drop=True)
)

# 4) Assign these back as two new columns
df_combined_pressure["Theta"] = theta_series.values
df_combined_pressure["Theta_v_Alternative"]   = theta_v_series.values

# Now each row in df_combined_pressure has both the θ and θᵥ that correspond
# to its altitude (grouped by Time).

# Display the updated DataFrame with the alternative theta_v column
print(df_combined_pressure[['Time', 'Altitude', 'Theta','Theta_v', 'Theta_v_Alternative']].head())


In [None]:
'''
# Select data for a single time step (e.g., the first unique time in the dataset)
selected_time = df_combined_pressure['Time'].unique()[600]  # Get the first unique time

# Filter the DataFrame for the selected time
df_single_time = df_combined_pressure[df_combined_pressure['Time'] == selected_time]

# Create a figure and axis
fig, ax1 = plt.subplots(figsize=(10, 6))

# Plot Specific Humidity
ax1.set_xlabel('Specific Humidity (g/kg)', color='blue')
ax1.set_ylabel('Altitude (m)', color='blue')
ax1.plot(df_single_time['Specific Humidity (qv)'], df_single_time['Altitude'], label='Specific Humidity (qv)', color='blue')
ax1.tick_params(axis='y', labelcolor='blue')

# Create a second y-axis for Theta_v
ax2 = ax1.twinx()
ax2.set_ylabel('Theta_v (K)', color='red')
ax2.plot(df_single_time['Theta_v'], df_single_time['Altitude'], label='Theta_v', color='red')
ax2.tick_params(axis='y', labelcolor='red')

# Set the title and grid
plt.title(f'Vertical Profile of Specific Humidity and Theta_v\nTime: {selected_time}')
ax1.grid(True)

plt.show()
'''

In [None]:
# Select data for a single time step (e.g., the first unique time in the dataset)
selected_time = df_combined_pressure['Time'].unique()[800]  # Get the first unique time

# Filter the DataFrame for the selected time
df_single_time = df_combined_pressure[df_combined_pressure['Time'] == selected_time]

# Create a figure and axis
fig, ax1 = plt.subplots(figsize=(10, 6))

# Plot Specific Humidity
ax1.set_xlabel('Specific Humidity (g/kg)', color='blue')
ax1.set_ylabel('Altitude (m)', color='blue')
ax1.plot(df_single_time['Specific Humidity (qv)'], df_single_time['Altitude'], label='Specific Humidity (qv)', color='blue')
ax1.tick_params(axis='y', labelcolor='blue')

# Create a second y-axis for Theta_v
ax2 = ax1.twinx()
ax2.set_ylabel('Theta_v (K)', color='red')

# Plot Theta_v (original) in red
ax2.plot(df_single_time['Theta_v'], df_single_time['Altitude'], label='Theta_v', color='red')

# Plot Theta_v_Alternative (new method) in green
ax2.plot(df_single_time['Theta_v_Alternative'], df_single_time['Altitude'], label='Theta_v_Alternative', color='green', linestyle='--')

# Set y-axis ticks for Theta_v
ax2.tick_params(axis='y', labelcolor='red')

# Add legend for both Theta_v and Theta_v_Alternative
ax2.legend(loc='upper left')

# Set the title and grid
plt.title(f'Vertical Profile of Specific Humidity and Theta_v\nTime: {selected_time}')
ax1.grid(True)

# Show plot
plt.show()


In [None]:
# Select data for a single time step (e.g., the first unique time in the dataset)
selected_time = df_combined_pressure['Time'].unique()[600]  # Get the selected time

# Filter the DataFrame for the selected time
df_single_time = df_combined_pressure[df_combined_pressure['Time'] == selected_time]

# Create a figure and axis
fig, ax = plt.subplots(figsize=(10, 6))

# Plot Theta_v (original) in red
ax.plot(df_single_time['Temperature'], df_single_time['Altitude'], label='Temperature (K)', color='blue')

# Plot Theta_v (original) in red
ax.plot(df_single_time['Theta_v'], df_single_time['Altitude'], label='Theta_v (Original)', color='red')

# Plot Theta_v_Alternative (new method) in green
ax.plot(df_single_time['Theta_v_Alternative'], df_single_time['Altitude'], label='Theta_v (Alternative)', color='green', linestyle='--')

# Set axis labels
ax.set_xlabel('Theta_v (K)', color='black')
ax.set_ylabel('Altitude (m)', color='black')

# Add a legend to distinguish the two curves
ax.legend(loc='upper left')

# Set the title and grid
plt.title(f'Vertical Profile of Theta_v (Original and Alternative)\nTime: {selected_time}')
ax.grid(True)
plt.ylim(0,450)
# Show plot
plt.show()


In [None]:
# Ensure the 'Time' column is a datetime type
df_combined_pressure['Time'] = pd.to_datetime(df_combined_pressure['Time'])

# Group by 'Time' and aggregate all other columns into lists
df_aggregated = df_combined_pressure.groupby('Time').agg(lambda x: list(x)).reset_index()

# Display the first few rows of the aggregated DataFrame
print(df_aggregated.head())

In [None]:
print(df_aggregated.columns)

In [None]:

# Function to compute the mean profile for arrays
def compute_mean_profile(profiles):
    if len(profiles) == 0:
        return []
    profiles_array = np.array(profiles)  # Directly convert to a numpy array
    return np.mean(profiles_array, axis=0)

# Resample and average function
def resample_and_average(df, interval='10T'):
    resampled = df.resample(interval).agg({
        'Altitude': 'first',  # Assuming altitude doesn't change frequently
        'Surface Temperature': lambda x: compute_mean_profile(list(x)),
        'Surface Pressure': lambda x: compute_mean_profile(list(x)),
        'Relative Humidity': lambda x: compute_mean_profile(list(x)),
        'Temperature': lambda x: compute_mean_profile(list(x)),
        'Saturation Vapor Pressure': lambda x: compute_mean_profile(list(x)),
        'Calculated Pressure': lambda x: compute_mean_profile(list(x)),
        'ev': lambda x: compute_mean_profile(list(x)),
        'Specific Humidity (qv)': lambda x: compute_mean_profile(list(x)),
        'Theta': lambda x: compute_mean_profile(list(x)),
        'Theta_v': lambda x: compute_mean_profile(list(x)),
        'Theta_v_Alternative': lambda x: compute_mean_profile(list(x)),
    })
    return resampled

# Make sure your DataFrame is indexed by 'Time'
df_aggregated.set_index('Time', inplace=True)

# Apply the resampling function
df_10min_avg = resample_and_average(df_aggregated)

# Reset index if you want 'Time' back as a column
df_10min_avg.reset_index(inplace=True)

# Display the first few rows of the averaged DataFrame
print(df_10min_avg.head())


In [None]:

# 1. Build the output‐path inside the same folder
parquet_path = os.path.join(folder_path, "MWR_vertical_dataset_10min.parquet")

# 2. Save `df` to Parquet
df_10min_avg.to_parquet(parquet_path, engine="pyarrow", index=False)

print(f"Saved DataFrame to: {parquet_path}")

In [None]:

# Select the 10th entry in the averaged DataFrame
example_data = df_10min_avg.iloc[60]  # Indexing starts at 0, so 9 is the 10th entry

# Create subplots in one row
fig, axs = plt.subplots(1, 4, figsize=(20, 5), sharey=True)

# Plot Temperature
axs[0].plot(example_data['Temperature'], example_data['Altitude'], label='Temperature (K)', color='red')
axs[0].plot(example_data['Theta'], example_data['Altitude'], label='Theta (K)', color='green')

axs[0].set_title('Temperature (K)', fontsize=14)
axs[0].legend()   # ← add legend here

axs[0].grid()

# Plot Theta_v
axs[1].plot(example_data['Theta_v'], example_data['Altitude'], label='Theta_v (K)', color='orange')
axs[1].set_title('Theta_v (K)', fontsize=14)
axs[1].grid()

# Plot Relative Humidity (RH)
axs[2].plot(example_data['Relative Humidity'], example_data['Altitude'], label='Relative Humidity (%)', color='blue')
axs[2].set_title('Relative Humidity (%)', fontsize=14)
axs[2].grid()

# Plot Specific Humidity (qv)
axs[3].plot(example_data['Specific Humidity (qv)'], example_data['Altitude'], label='Specific Humidity (g/kg)', color='green')
axs[3].set_title('Specific Humidity (g/kg)', fontsize=14)
axs[3].set_xlabel('Value')
axs[3].grid()

# Set shared y-axis limits
axs[0].set_ylim(0,2000)  # Reverse y-axis for altitude
axs[0].set_ylabel('Altitude (m)')

plt.tight_layout()
plt.suptitle(f'Profile at Time: {example_data["Time"]}', fontsize=16, y=1.02)
plt.show()


In [None]:
# Select the 10th entry in the averaged DataFrame
example_data = df_10min_avg.iloc[99]  # Indexing starts at 0, so 9 is the 10th entry

# Create a single plot for Specific Humidity
fig, ax = plt.subplots(figsize=(8, 6))  # Larger figure for presentation

# Plot Specific Humidity (qv)
ax.plot(example_data['Specific Humidity (qv)'], example_data['Altitude'], label='Specific Humidity (g/kg)', color='green')

# Set the title and labels with larger font sizes
ax.set_title('Specific Humidity (g/kg)', fontsize=18)
ax.set_xlabel('Specific Humidity (g/kg)', fontsize=16)
ax.set_ylabel('Altitude (m)', fontsize=16)

# Increase tick label font size
ax.tick_params(axis='both', which='major', labelsize=14)

# Add a grid for better visibility
ax.grid()

# Add some space between the plot and the title
plt.tight_layout()

# Display the plot
plt.show()


In [None]:
# Select the 10th entry in the averaged DataFrame
example_data = df_10min_avg.iloc[99]  # Indexing starts at 0, so 79 is the specific entry

# Create a single plot for Theta_v and Theta_v_Alternative
fig, ax = plt.subplots(figsize=(8, 6))  # Larger figure for presentation

# Plot Theta_v
ax.plot(example_data['Theta_v'], example_data['Altitude'], label='Theta_v (K)', color='orange')

# Plot Theta_v_Alternative
ax.plot(example_data['Theta_v_Alternative'], example_data['Altitude'], label='Theta_v_Alternative (K)', color='purple', linestyle='--')

# Set the title and labels with larger font sizes
ax.set_title('Vertical Profile of Potential Temperature', fontsize=18)
ax.set_xlabel('Theta_v (K)', fontsize=16)
ax.set_ylabel('Altitude (m)', fontsize=16)

# Increase tick label font size
ax.tick_params(axis='both', which='major', labelsize=14)

# Add a grid for better visibility
ax.grid()

# Add legend with larger font size for better presentation visibility
ax.legend(fontsize=14)

# Add some space between the plot and the title
plt.tight_layout()

# Display the plot
plt.show()


In [None]:

# Select the 10th entry in the averaged DataFrame
example_data = df_10min_avg.iloc[76]  # Indexing starts at 0, so 9 is the 10th entry

# Create subplots in one row
fig, axs = plt.subplots(1, 4, figsize=(20, 5), sharey=True)

# Scatter plot for Temperature
axs[0].scatter(example_data['Temperature'], example_data['Altitude'], color='red')
axs[0].set_title('Temperature (K)', fontsize=14)
axs[0].set_xlabel('Value')
axs[0].grid()

# Scatter plot for Theta_v
axs[1].scatter(example_data['Theta_v'], example_data['Altitude'], color='orange')
axs[1].set_title('Theta_v (K)', fontsize=14)
axs[1].set_xlabel('Value')
axs[1].grid()

# Scatter plot for Relative Humidity (RH)
axs[2].scatter(example_data['Relative Humidity'], example_data['Altitude'], color='blue')
axs[2].set_title('Relative Humidity (%)', fontsize=14)
axs[2].set_xlabel('Value')
axs[2].grid()

# Scatter plot for Specific Humidity (qv)
axs[3].scatter(example_data['Specific Humidity (qv)'], example_data['Altitude'], color='green')
axs[3].set_title('Specific Humidity (g/kg)', fontsize=14)
axs[3].set_xlabel('Value')
axs[3].grid()

# Add a horizontal line at 2 km
for ax in axs:
    ax.axhline(y=2000, color='red', linestyle='--')
    ax.set_ylim(0, 3000)  # Reverse y-axis for altitude
    ax.set_ylabel('Altitude (m)')

plt.tight_layout()
plt.suptitle(f'Profile at Time: {example_data["Time"]}', fontsize=16, y=1.02)
plt.show()


In [None]:
# Select the 10th entry in the averaged DataFrame
example_data = df_10min_avg.iloc[36]  # Indexing starts at 0, so 38 is the selected entry

# Create a figure for Temperature and Theta_v
plt.figure(figsize=(10, 6))

# Scatter plot for Temperature
plt.scatter(example_data['Temperature'], example_data['Altitude'], color='red', label='Temperature (K)', alpha=0.7)

# Scatter plot for Theta_v
plt.scatter(example_data['Theta_v'], example_data['Altitude'], color='orange', label='Theta_v (K)', alpha=0.7)

# Add a horizontal line at 2 km
plt.axhline(y=2000, color='red', linestyle='--', label='2 km Threshold')

# Set labels and title
plt.title(f'Temperature and Theta_v Profile at Time: {example_data["Time"]}', fontsize=16)
plt.xlabel('Value (K)', fontsize=14)
plt.ylabel('Altitude (m)', fontsize=14)

# Add a legend
plt.legend()
#plt.ylim(0,1500)
# Set grid
plt.grid()

# Show the plot
plt.tight_layout()
plt.show()


### BLH

In [None]:
# Load the Boundary Layer Height (BLH) data
blh_file = next((file_name for file_name in datasets.keys() if file_name.upper().endswith('.BLH.NC')), None)
time_blh = None
blh_values = None

if blh_file in datasets:
    blh_dataset = datasets[blh_file]

    try:
        # Extract time and BLH
        if 'time' in blh_dataset.variables:
            time_var = blh_dataset.variables['time'][:]
            time_blh = [datetime(2001, 1, 1) + timedelta(seconds=int(t)) for t in time_var]

        if 'BLH' in blh_dataset.variables:
            blh_values = blh_dataset.variables['BLH'][:]

    except Exception as e:
        print(f"Error accessing data in {blh_file}: {e}")

# Create DataFrame for BLH data
df_blh = pd.DataFrame({
    'Time': time_blh,
    'BLH': blh_values
}) if time_blh is not None and blh_values is not None else pd.DataFrame()

# Close the datasets
for dataset in datasets.values():
    dataset.close()
    
print(df_blh)

In [None]:

# Check if the DataFrame is not empty
if not df_blh.empty:
    plt.figure(figsize=(10, 6))
    plt.plot(df_blh['Time'], df_blh['BLH'], marker='o', linestyle='-', color='b')
    plt.title('Boundary Layer Height Over Time')
    plt.xlabel('Time')
    plt.ylabel('Boundary Layer Height (m)')
    plt.xticks(rotation=45)
    plt.grid()
    plt.tight_layout()  # Adjust layout to prevent clipping of tick-labels
    plt.show()
else:
    print("No data to plot.")


In [None]:
def estimate_zi_from_theta_v(theta_v, z):
    """Estimate CBL height from theta_v profile using gradient method."""
    dthetav_dz = np.gradient(theta_v, z)
    dthetav_dz_smooth = gaussian_filter1d(dthetav_dz, sigma=1)  # Optional smoothing
    zi_index = np.argmax(dthetav_dz_smooth)
    return z[zi_index] if zi_index < len(z) else np.nan

In [None]:
zi_list = []
zi_times = []

for idx, row in df_10min_avg.iterrows():
    z = row['Altitude']                # height array
    theta_v = row['Theta_v_Alternative']    # already-computed profile

    zi = estimate_zi_from_theta_v(theta_v, z)
    zi_list.append(zi)
    zi_times.append(row['Time'])

zi_df = pd.DataFrame({'Time': zi_times, 'zi': zi_list})


In [None]:
def estimate_zi_parcel_method(theta_v_profile, z_profile, theta_v_surface):
    """
    Estimate boundary layer height (zi) using parcel method.
    Finds first height where environmental theta_v > surface theta_v.
    
    Parameters:
        theta_v_profile: array-like of virtual potential temperature with height
        z_profile: array-like of corresponding height levels (same length)
        theta_v_surface: surface virtual potential temperature (scalar)
        
    Returns:
        Estimated boundary layer height (zi)
    """
    for i, theta_env in enumerate(theta_v_profile):
        if theta_env > theta_v_surface:
            return z_profile[i]
    return np.nan  # If no crossing is found

In [None]:
zi_parcel_list = []
zi_parcel_times = []

for idx, row in df_10min_avg.iterrows():
    z = row['Altitude']                # height levels
    theta_v = row['Theta_v_Alternative']  # profile (list or array)
    
    theta_v_surface = theta_v[0]  # assume surface is first level
    zi = estimate_zi_parcel_method(theta_v, z, theta_v_surface)
    
    zi_parcel_list.append(zi)
    zi_parcel_times.append(row['Time'])

zi_parcel_df = pd.DataFrame({'Time': zi_parcel_times, 'zi_parcel': zi_parcel_list})

In [None]:

plt.figure(figsize=(10, 5))
plt.plot(zi_df['Time'], zi_df['zi'], label='CBL Height $z_i$', color='blue')
plt.xlabel('Time (UTC)')
plt.ylabel('Height (m)')
plt.title('Estimated CBL Height on April 20, 2024')
plt.grid(True)
plt.legend()
plt.tight_layout()
plt.show()


In [None]:
plt.figure(figsize=(10, 5))

# Plot gradient method
plt.plot(zi_df['Time'], zi_df['zi'], label='CBL Height $z_i$ (Gradient Method)', color='blue')

# Plot parcel method (make sure zi_parcel_df is created as shown earlier)
plt.plot(zi_parcel_df['Time'], zi_parcel_df['zi_parcel'], 
         label='CBL Height $z_i$ (Parcel Method)', color='orange', linestyle='--')

plt.xlabel('Time (UTC)')
plt.ylabel('Height (m)')
plt.title('Estimated CBL Height on May 23, 2024')
plt.grid(True, linestyle='--', alpha=0.7)
plt.legend()
plt.tight_layout()
plt.show()

In [None]:
print(zi_df)

In [None]:
# Add parcel method CBL height to the DataFrame
zi_df['zi_parcel'] = zi_parcel_df['zi_parcel']


output_file = os.path.join(folder_path, 'cbl_height_2024-05-12.csv')

# Save the DataFrame
zi_df.to_csv(output_file, index=False)
print(f"CBL height saved to: {output_file}")