### What this script does

- Opens LV0/LV1 cloud‐radar NetCDF files and prints dataset metadata (dims, vars, attrs).

- Scans a chosen day folder, loads time series (SurfTemp, RH, WS, LWP, SurfPres, Rain), and makes per-file plots + an all-day plot.

- Loads vertical profiles (TProf, AHProf, RHProf) with their altitude grids (TAlts, HAlts) and renders per-file profile images and a combined day view.

- Builds a per-timestamp profile table (temperature/ humidity profiles + surface P/T), converts radar time to UTC.

- Computes pressure profiles by hydrostatic integration from surface pressure, saturation vapor pressure, vapor pressure, specific humidity, and virtual potential temperature (θv) profiles.

- Resamples all profiles to 10-minute averages and saves to a Parquet dataset.

- Extracts cloud-base height (CBH) or Rain time series from LV1 files, plots, resamples Rain to 10-min, and saves to CSV.

- Provides a few presentation plots of a selected 10-min profile (Temperature, θv, RH, qv vs altitude).

#### Lines you should change:

- Single LV0 file you inspect: file_path = r"C:\path\to\your\Cloud_radar\2024-05\2024-05-03\240503_000002_P00_ZEN.LV0.NC"

- Folder holding LV1 files for a day: folder_path = r"C:\path\to\your\Cloud_radar\2024-05\2024-05-03"

- Base + month + day blocks used in several sections (CBH, Rain, vertical profiles): 
    - base_dir_cr = r"C:\path\to\your\Cloud_radar"
    - month       = "2024-05"       <- change to target month
    - day         = "2024-05-02"    <- change to target day
    - day_folder_path = os.path.join(base_dir_cr, month, day)



In [None]:
import netCDF4 as nc
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os
from datetime import datetime, timedelta
from netCDF4 import Dataset
from reportlab.lib.pagesizes import letter
from reportlab.pdfgen import canvas
from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer
import matplotlib.dates as mdates



In [None]:
# Constants
C_p = 1005  # Specific heat capacity of dry air at constant pressure (J/kg/K)
g = 9.81    # Gravitational acceleration (m/s^2)
Ttrip = 273.16  # Triple point temperature in Kelvin
Rd=287.04
Rv=461.5
epsilon=(Rv/Rd)-1

In [None]:
# Function to calculate pressure at heights
def calculate_pressure_at_heights(df):
    calculated_pressures = []  # To store pressure values for all times and altitudes
    
    # Loop through each row in the dataframe, accessing each profile (altitudes and temperature)
    for idx, row in df.iterrows():
        # Extract the altitude and temperature profiles for this timestamp
        temperature_altitudes = row['temperature_altitudes']
        temperature_profile = row['temperature_profile']
        surface_pressure = row['surf_pres']  # Surface pressure for this timestamp
        
        # Ensure that the altitudes are sorted, just in case
        sorted_indices = np.argsort(temperature_altitudes)
        sorted_altitudes = np.array(temperature_altitudes)[sorted_indices]
        sorted_temperatures = np.array(temperature_profile)[sorted_indices]
        
        # Start with the surface pressure (P0)
        P0 = surface_pressure  # Surface pressure (hPa)
        pressures = [P0]  # First pressure is the surface pressure

        # Perform second-order (Trapezoidal Rule) integration for each altitude step
        for i in range(1, len(sorted_altitudes)):
            z1 = sorted_altitudes[i - 1]
            z2 = sorted_altitudes[i]
            T1 = sorted_temperatures[i - 1]
            T2 = sorted_temperatures[i]
            P1 = pressures[-1]  # Previous pressure value
            
            delta_z = z2 - z1  # Altitude difference
            
            # First order estimate of P2 based on P1
            P2_guess = P1 * np.exp(-g * delta_z / (Rd * T1))
            
            # Trapezoidal rule correction for P2
            delta_P = 0.5 * (-(g * P1 / (Rd * T1)) + -(g * P2_guess / (Rd * T2))) * delta_z
            P2 = P1 + delta_P
            pressures.append(P2)
        
        # Append the calculated pressures for this profile
        calculated_pressures.append(pressures)
    
    return calculated_pressures

# Function to calculate saturation vapor pressure
def calculate_saturation_vapor_pressure(T):
    es = 610.78 * np.exp(17.2694 * (T - Ttrip) / (T - 35.86))
    return es

# Function to calculate specific humidity (qv)
def calculate_specific_humidity(ev, p):
    return ev * 1000 / (p * 100 + (((Rv / Rd) - 1) * (p * 100 - ev)))


# Function to calculate potential temperature (theta) based on current pressure level and surface pressure
def calculate_potential_temperature(T, P_i, P_0):
    """
    Calculate potential temperature (theta) given temperature T (K) and pressures at level P_i (hPa) and surface pressure P_0 (hPa).
    """
    return T * (P_0 / P_i) ** (Rd / C_p)

# Function to calculate theta_v for each altitude level
def calculate_theta_v_alternative_by_altitude(row):
    theta_v_profile = []  # Store results for this timestamp
    temperature_profile = row['temperature_profile']  # Temperature profile for this timestamp
    calculated_pressures = row['calculated_pressures']  # Pressures for this timestamp
    specific_humidity = row['specific_humidity']  # Specific humidity for this timestamp
    
    for i in range(len(temperature_profile)):
        T = temperature_profile[i]  # Temperature at current altitude (K)
        P_i = calculated_pressures[i]  # Pressure at current altitude (hPa)
        qv = specific_humidity[i]  # Specific humidity at current altitude (kg/kg)
        
        if i == 0:  # Surface level
            P_0 = row['surf_pres']  # Surface pressure (hPa)
            theta = T  # Potential temperature at the surface is just T
        else:
            P_0 = row['surf_pres']  # Surface pressure remains constant
            theta = calculate_potential_temperature(T, P_i, P_0)  # Potential temperature at altitude
        
        # Calculate virtual potential temperature (theta_v) for this altitude
        theta_v_alt = theta * (1 + (epsilon * qv/1000))  # qv is in kg/kg
        
        # Append the calculated theta_v to the profile list
        theta_v_profile.append(theta_v_alt)
    
    return theta_v_profile

In [None]:
#Edit before running!!
# Open the NetCDF file
#Single LV0 file you inspect: 
file_path = r"C:\path\to\your\Cloud_radar\2024-05\2024-05-03\240503_000002_P00_ZEN.LV0.NC" #or other day
dataset = nc.Dataset(file_path, mode='r')


In [None]:
# Print dataset information
print("NetCDF Dataset Information:")
print(dataset)

# Print global attributes
print("\nGlobal Attributes:")
for attr_name in dataset.ncattrs():
    print(f"{attr_name}: {getattr(dataset, attr_name)}")

# Print dimensions
print("\nDimensions:")
for dim_name, dim in dataset.dimensions.items():
    print(f"{dim_name}: {len(dim)}")

# Print variables and their attributes
print("\nVariables:")
for var_name, var in dataset.variables.items():
    print(f"\nVariable Name: {var_name}")
    print(f"Dimensions: {var.dimensions}")
    print(f"Shape: {var.shape}")
    print(f"Data Type: {var.dtype}")
    for attr_name in var.ncattrs():
        print(f"    {attr_name}: {getattr(var, attr_name)}")

In [None]:
#Edit this before running!!!
# Path to the folder containing the data files
#Folder holding LV1 files for a day: 
folder_path = r"C:\path\to\your\Cloud_radar\2024-05\2024-05-03" #or other day
file_list = [f for f in os.listdir(folder_path) if f.endswith("LV1.nc")]

print("Files found in the folder:")
print(len(file_list))
# Loop through each file
for file_name in file_list:
    # Open the NetCDF file
    file_path = os.path.join(folder_path, file_name)
    dataset = Dataset(file_path)

    # Read necessary variables
    surf_temp_data = dataset.variables['SurfTemp'][:]
    rh_data = dataset.variables['SurfRelHum'][:]
    surf_ws_data = dataset.variables['SurfWS'][:]
    lwp_data = dataset.variables['LWP'][:]
    surf_pres_data = dataset.variables['SurfPres'][:]
    rain_rate_data = dataset.variables['Rain'][:]
    
    # Get time data
    time_data = dataset.variables['Time'][:]
    timems_data = dataset.variables['Timems'][:]
    start_time = datetime(2001, 1, 1, 0, 0, 0)
    time = [start_time + timedelta(seconds=int(time_data[i]), milliseconds=int(timems_data[i])) for i in range(len(time_data))]
    
    # Plot variables
    plt.figure(figsize=(12, 8))
    
    plt.subplot(3, 2, 1)
    plt.plot(time, surf_temp_data, color='b')
    plt.title('Surface Temperature')
    plt.xlabel('Time')
    plt.ylabel('Temperature (K)')
    
    plt.subplot(3, 2, 2)
    plt.plot(time, rh_data, color='r')
    plt.title('Relative Humidity')
    plt.xlabel('Time')
    plt.ylabel('Relative Humidity (%)')
    
    plt.subplot(3, 2, 3)
    plt.plot(time, surf_ws_data, color='g')
    plt.title('Surface Wind Speed')
    plt.xlabel('Time')
    plt.ylabel('Wind Speed (m/s)')
    
    plt.subplot(3, 2, 4)
    plt.plot(time, lwp_data, color='m')
    plt.title('Liquid Water Path')
    plt.xlabel('Time')
    plt.ylabel('LWP (g/m^2)')
    
    plt.subplot(3, 2, 5)
    plt.plot(time, surf_pres_data, color='c')
    plt.title('Surface Pressure')
    plt.xlabel('Time')
    plt.ylabel('Pressure (hPa)')
    
    plt.subplot(3, 2, 6)
    plt.plot(time, rain_rate_data, color='y')
    plt.title('Rain Rate')
    plt.xlabel('Time')
    plt.ylabel('Rain Rate (mm/hr)')
    
    plt.tight_layout()
    
    # Save plot
    plot_name = os.path.splitext(file_name)[0] + '_plot.png'
    plt.savefig(os.path.join(folder_path, plot_name))
    plt.close()
    
  

In [None]:
print(dataset['TAlts'][:])

In [None]:
# Initialize lists to store data
all_surf_temp_data = []
all_rh_data = []
all_surf_ws_data = []
all_lwp_data = []
all_surf_pres_data = []
all_rain_rate_data = []
all_time = []
# Initialize lists to store data for profiles
all_TProf_data = []
all_AHProf_data = []
all_RHProf_data = []
all_profile_time_utc = []

# Loop through each file
for file_name in file_list:
    # Open the NetCDF file
    file_path = os.path.join(folder_path, file_name)
    dataset = Dataset(file_path)

    # Read necessary variables
    surf_temp_data = dataset.variables['SurfTemp'][:]
    rh_data = dataset.variables['SurfRelHum'][:]
    surf_ws_data = dataset.variables['SurfWS'][:]
    lwp_data = dataset.variables['LWP'][:]
    surf_pres_data = dataset.variables['SurfPres'][:]
    rain_rate_data = dataset.variables['Rain'][:]

    # Get time data
    time_data = dataset.variables['Time'][:]
    timems_data = dataset.variables['Timems'][:]
    start_time = datetime(2001, 1, 1, 0, 0, 0)
    time = [start_time + timedelta(seconds=int(time_data[i]), milliseconds=int(timems_data[i])) for i in range(len(time_data))]

    # Append data to lists
    all_surf_temp_data.extend(surf_temp_data)
    all_rh_data.extend(rh_data)
    all_surf_ws_data.extend(surf_ws_data)
    all_lwp_data.extend(lwp_data)
    all_surf_pres_data.extend(surf_pres_data)
    all_rain_rate_data.extend(rain_rate_data)
    all_time.extend(time)
    #Read profile variables
    # Read profile variables
    TProf_data = dataset.variables['TProf'][:]
    AHProf_data = dataset.variables['AHProf'][:]
    RHProf_data = dataset.variables['RHProf'][:]
    profile_time_data = dataset.variables['Time'][:]

    # Convert profile time data to readable UTC format
    profile_start_time = datetime(2001, 1, 1, 0, 0, 0)
    profile_time_utc = [profile_start_time + timedelta(seconds=int(t)) for t in profile_time_data]
    # Append profile data to lists
    all_TProf_data.append(TProf_data)
    all_profile_time_utc.extend(profile_time_utc)
    all_AHProf_data.append(AHProf_data)
    all_RHProf_data.append(RHProf_data)
    # Get altitude data
    TAlt_data = dataset.variables['TAlts'][:]
    HAlt_data = dataset.variables['HAlts'][:]

    # Plot the profiles
    fig, axs = plt.subplots(3, 1, figsize=(15, 15))

    # Temperature Profile
    cax1 = axs[0].imshow(TProf_data.T, extent=[np.min(profile_time_utc), np.max(profile_time_utc), np.min(TAlt_data), np.max(TAlt_data)], aspect='auto', origin='lower')
    axs[0].set_title('Temperature Profile')
    axs[0].set_xlabel('Time (UTC)')
    axs[0].set_ylabel('Altitude (m)')
    plt.colorbar(cax1, ax=axs[0], label='Temperature (K)')

    # Absolute Humidity Profile
    cax2 = axs[1].imshow(AHProf_data.T, extent=[np.min(profile_time_utc), np.max(profile_time_utc), np.min(HAlt_data), np.max(HAlt_data)], aspect='auto', origin='lower')
    axs[1].set_title('Absolute Humidity Profile')
    axs[1].set_xlabel('Time (UTC)')
    axs[1].set_ylabel('Altitude (m)')
    plt.colorbar(cax2, ax=axs[1], label='Absolute Humidity (g/m^3)')

    # Relative Humidity Profile
    cax3 = axs[2].imshow(RHProf_data.T, extent=[np.min(profile_time_utc), np.max(profile_time_utc), np.min(HAlt_data), np.max(HAlt_data)], aspect='auto', origin='lower')
    axs[2].set_title('Relative Humidity Profile')
    axs[2].set_xlabel('Time (UTC)')
    axs[2].set_ylabel('Altitude (m)')
    plt.colorbar(cax3, ax=axs[2], label='Relative Humidity (%)')

    # Adjust layout
    plt.tight_layout()
    # Save plot
    profile_plot_name = os.path.splitext(file_name)[0] + '_profile_plot.png'
    plt.savefig(os.path.join(folder_path, profile_plot_name))
    plt.close()

print("Plots created for all files.")

In [None]:

# Plot variables for the entire day
plt.figure(figsize=(12, 8))

plt.subplot(3, 2, 1)
plt.plot(all_time, all_surf_temp_data, color='b')
plt.title('Surface Temperature')
plt.xlabel('Time')
plt.ylabel('Temperature (K)')
plt.xticks(rotation=45)

plt.subplot(3, 2, 2)
plt.plot(all_time, all_rh_data, color='r')
plt.title('Relative Humidity')
plt.xlabel('Time')
plt.ylabel('Relative Humidity (%)')
plt.xticks(rotation=45)

plt.subplot(3, 2, 3)
plt.plot(all_time, all_surf_ws_data, color='g')
plt.title('Surface Wind Speed')
plt.xlabel('Time')
plt.ylabel('Wind Speed (m/s)')
plt.xticks(rotation=45)

plt.subplot(3, 2, 4)
plt.plot(all_time, all_lwp_data, color='m')
plt.title('Liquid Water Path')
plt.xlabel('Time')
plt.ylabel('LWP (g/m^2)')
plt.xticks(rotation=45)

plt.subplot(3, 2, 5)
plt.plot(all_time, all_surf_pres_data, color='c')
plt.title('Surface Pressure')
plt.xlabel('Time')
plt.ylabel('Pressure (hPa)')
plt.xticks(rotation=45)

plt.subplot(3, 2, 6)
plt.plot(all_time, all_rain_rate_data, color='y')
plt.title('Rain Rate')
plt.xlabel('Time')
plt.ylabel('Rain Rate (mm/hr)')
plt.xticks(rotation=45)

plt.tight_layout()

# Save plot
plot_name = 'daily_plots.png'
plt.savefig(os.path.join(folder_path, plot_name))

In [None]:
# Combine all profile data for the whole day
all_TProf_data = np.concatenate(all_TProf_data, axis=0)
all_AHProf_data = np.concatenate(all_AHProf_data, axis=0)
all_RHProf_data = np.concatenate(all_RHProf_data, axis=0)


In [None]:

# Plot combined profiles for the whole day
fig, axs = plt.subplots(3, 1, figsize=(15, 15))

# Combined Temperature Profile
cax1 = axs[0].imshow(all_TProf_data.T, extent=[np.min(all_profile_time_utc), np.max(all_profile_time_utc), np.min(TAlt_data), np.max(TAlt_data)], aspect='auto', origin='lower')
axs[0].set_title('Combined Temperature Profile for the Whole Day')
axs[0].set_xlabel('Time (UTC)')
axs[0].set_ylabel('Altitude (m)')
plt.colorbar(cax1, ax=axs[0], label='Temperature (K)')

# Combined Absolute Humidity Profile
cax2 = axs[1].imshow(all_AHProf_data.T, extent=[np.min(all_profile_time_utc), np.max(all_profile_time_utc), np.min(HAlt_data), np.max(HAlt_data)], aspect='auto', origin='lower')
axs[1].set_title('Combined Absolute Humidity Profile for the Whole Day')
axs[1].set_xlabel('Time (UTC)')
axs[1].set_ylabel('Altitude (m)')
plt.colorbar(cax2, ax=axs[1], label='Absolute Humidity (g/m^3)')

# Combined Relative Humidity Profile
cax3 = axs[2].imshow(all_RHProf_data.T, extent=[np.min(all_profile_time_utc), np.max(all_profile_time_utc), np.min(HAlt_data), np.max(HAlt_data)], aspect='auto', origin='lower')
axs[2].set_title('Combined Relative Humidity Profile for the Whole Day')
axs[2].set_xlabel('Time (UTC)')
axs[2].set_ylabel('Altitude (m)')
plt.colorbar(cax3, ax=axs[2], label='Relative Humidity (%)')

# Adjust layout
plt.tight_layout()
# Save combined profile plot
combined_profile_plot_name = 'combined_daily_profile_plot.png'
plt.savefig(os.path.join(folder_path, combined_profile_plot_name))
plt.close()

### Explore cloud base

In [None]:
'''
#Edit before running!!
# Base directory containing monthly folders
base_dir_cr = r"C:\path\to\your\Cloud_radar"
month       = "2024-05"       # <- change to target month
day         = "2024-05-02"    # <- change to target day
day_folder_path = os.path.join(base_dir_cr, month, day)


# Construct the directory path for the specific date
day_folder_path = os.path.join(base_dir_cr, month, day)

# Print the constructed path for debugging
print(f"Constructed day folder path: {day_folder_path}")

# Define the specific file name you want to focus on
file_name = '240503_000002_P00_ZEN.LV0.nc'  # Replace with the actual file name if known
file_path = os.path.join(day_folder_path, file_name)

# Create a PDF file
pdf_path = 'LV0_NC_Variables_Report.pdf'
doc = SimpleDocTemplate(pdf_path, pagesize=letter)
story = []

# Define styles
styles = getSampleStyleSheet()
title_style = styles['Title']
normal_style = styles['Normal']

# Check if the file exists
if not os.path.exists(file_path):
    print(f"File {file_path} does not exist.")
else:
    print(f"File {file_path} exists. Generating report...")

    # Open the NetCDF file
    dataset = Dataset(file_path, 'r')

    # Add file name as title
    story.append(Paragraph(f'File: {file_name}', title_style))
    story.append(Spacer(1, 12))

    # Add variable information
    story.append(Paragraph('Variables and Attributes:', title_style))
    story.append(Spacer(1, 12))
    
    for var_name, var in dataset.variables.items():
        var_info = f"Variable Name: {var_name}\n"
        var_info += f"   Dimensions: {var.dimensions}\n"
        var_info += f"   Shape: {var.shape}\n"
        var_info += f"   Data Type: {var.dtype}\n"
        
        # Add attributes
        for attr_name in var.ncattrs():
            var_info += f"    {attr_name}: {getattr(var, attr_name)}\n"
        
        var_info += "\n"
        
        story.append(Paragraph(var_info, normal_style))

    # Close the dataset
    dataset.close()

    # Build the PDF
    doc.build(story)

   # print(f"PDF saved to {pdf_path}")
'''

In [None]:
'''
# Initialize a list to store the CBH data
cbh_data = []

# Check if the directory exists
if not os.path.exists(day_folder_path):
    print(f"Directory {day_folder_path} does not exist.")
else:
    print(f"Directory {day_folder_path} exists. Searching for LV1.nc files...")

    # List all files in the specific date folder for debugging
    all_files = os.listdir(day_folder_path)
    print(f"All files in {day_folder_path}: {all_files}")

    # Search for LV1.nc files in the specific date folder
    for file_name in all_files:
        if file_name.lower().endswith('lv1.nc'):
            file_path = os.path.join(day_folder_path, file_name)
            print(f"Found file: {file_path}")  # Debug: Print the file path
            
            # Open the NetCDF file
            dataset = Dataset(file_path, 'r')
            
            # Extract the CBH variable and the time variable
            cbh = dataset.variables['CBH'][:]
            time_data = dataset.variables['Time'][:]
            timems_data = dataset.variables['Timems'][:]
            start_time = datetime(2001, 1, 1, 0, 0, 0)
            time = [start_time + timedelta(seconds=int(time_data[i]), milliseconds=int(timems_data[i])) for i in range(len(time_data))]
            
            # Create a DataFrame for this file's data
            df = pd.DataFrame({'Timestamp': time, 'CBH': cbh})
            
            # Append the DataFrame to the list
            cbh_data.append(df)
            
            # Close the dataset
            dataset.close()

# Combine all DataFrames into a single DataFrame if any data was collected
if cbh_data:
    df_cbh_combined = pd.concat(cbh_data, ignore_index=True)
    
    # Print the combined DataFrame
    print(df_cbh_combined)
    
    # Optionally, save the combined DataFrame to a CSV or Parquet file
    #output_csv_path = os.path.join(base_dir_cr, 'CBH_Data_2024-05-03.csv')
    #df_cbh_combined.to_csv(output_csv_path, index=False)
    #print(f"Combined data saved to {output_csv_path}")
else:
    print("No LV1.nc files were found in the directory.")
'''

### Load Rain 

In [None]:
#Edit before running!!
# Base directory containing monthly folders
base_dir_cr = r"C:\path\to\your\Cloud_radar"
month       = "2024-05"       # <- change to target month
day         = "2024-05-23"    # <- change to target day

# Construct the directory path for the specific date
day_folder_path = os.path.join(base_dir_cr, month, day)

# Initialize a list to store the Rain data
rain_data_list = []

# Check if the directory exists
if not os.path.exists(day_folder_path):
    print(f"Directory {day_folder_path} does not exist.")
else:
    print(f"Directory {day_folder_path} exists. Searching for LV1.nc files...")

    # List all files in the specific date folder for debugging
    all_files = os.listdir(day_folder_path)
    print(f"All files in {day_folder_path}: {all_files}")

    # Search for LV1.nc files in the specific date folder
    for file_name in all_files:
        if file_name.lower().endswith('lv1.nc'):
            file_path = os.path.join(day_folder_path, file_name)
            print(f"Found file: {file_path}")  # Debug: Print the file path
            
            # Open the NetCDF file
            dataset = Dataset(file_path, 'r')
            
            # Extract the Rain variable and the time variable
            rain_data = dataset.variables['Rain'][:]
            time_data = dataset.variables['Time'][:]
            timems_data = dataset.variables['Timems'][:]
            start_time = datetime(2001, 1, 1, 0, 0, 0)
            time = [start_time + timedelta(seconds=int(time_data[i]), milliseconds=int(timems_data[i])) for i in range(len(time_data))]
            
            # Create a DataFrame for this file's data
            df = pd.DataFrame({'TIMESTAMP': time, 'Rain': rain_data})
            
            # Append the DataFrame to the list
            rain_data_list.append(df)
            
            # Close the dataset
            dataset.close()

## Combine all DataFrames into a single DataFrame if any data was collected
if rain_data_list:
    df_rain_combined = pd.concat(rain_data_list, ignore_index=True)

    # Print the combined DataFrame (optional)
    print(df_rain_combined)

    # Plot: Rain Rate vs Time (clean & presentation-ready)
   # plt.figure(figsize=(8,6))
    #plt.plot(df_rain_combined['TIMESTAMP'], df_rain_combined['Rain'], 
           #  label='Rain Rate', linewidth=2, color='teal')

    # Axis labels and title
    #plt.xlabel('Time', fontsize=20)
    #plt.ylabel('Rain Rate (mm/h)', fontsize=20)
    #plt.title('Rain Rate vs Time', fontsize=20, weight='bold')

    # Ticks and grid
    #plt.xticks(fontsize=18, rotation=45)
    #plt.yticks(fontsize=18)
    #plt.grid(True, linestyle='--', alpha=0.7)

    # Create a larger figure
    fig, ax = plt.subplots(figsize=(8, 6))
        # 2) Thicken all spines (axis borders)
    for spine in ax.spines.values():
        spine.set_linewidth(1.5)
    # 1) Plot LWP_Corrected with a bolder line
    ax.plot(df_rain_combined['TIMESTAMP'], df_rain_combined['Rain'], 
                 label='Rain Rate', linewidth=2, color='blue')
    # 3) Tick parameters for major & minor ticks
    ax.tick_params(axis='both', which='major', labelsize=12, width=1.5, length=6)
    ax.tick_params(axis='both', which='minor', width=1.0, length=4)

    # 4) Format x‐axis to show time in HH:MM
    date_format = mdates.DateFormatter('%H:%M')
    ax.xaxis.set_major_formatter(date_format)
    ax.xaxis.set_major_locator(mdates.HourLocator(interval=2))
    ax.xaxis.set_minor_locator(mdates.MinuteLocator(interval=30))

    # 5) Labels and title with bold font
    ax.set_title('Rain Rate vs Time', fontsize=18, fontweight='bold')
    ax.set_xlabel('Time', fontsize=16, fontweight='bold')
    ax.set_ylabel('Rain Rate (mm/h)', fontsize=16, fontweight='bold')

    # 6) Y‐axis tick font size
    ax.tick_params(axis='y', labelsize=12)

    # 7) Grid styling
    ax.grid(True, which='major', linestyle='--', linewidth=0.8, alpha=0.7)
    ax.grid(True, which='minor', linestyle=':', linewidth=0.5, alpha=0.5)

    # 8) Rotate x‐tick labels
    plt.xticks(rotation=45)

    # 9) (Optional) Legend inside plot
    legend = ax.legend(fontsize=14, frameon=True)
    legend.get_frame().set_linewidth(1.5)

    # 10) Tight layout and save
    plt.tight_layout()

    # Legend and layout
    #plt.legend(fontsize=12)
   # plt.tight_layout()
    file_path = os.path.join(day_folder_path, 'RR_report.png')
    plt.savefig(file_path, dpi=300)
    plt.show()
else:
    print("No rain data available.")

In [None]:
# Resample the data to 10-minute averages
df_rain_combined.set_index('TIMESTAMP', inplace=True)
df_rain_resampled = df_rain_combined.resample('10T').mean().reset_index()
    
    # Print the resampled DataFrame
print(df_rain_resampled)

In [None]:

# Save the resampled DataFrame to a CSV file in the specific date folder
output_csv_path = os.path.join(day_folder_path, 'Rain_10min_Averages.csv')
df_rain_resampled.to_csv(output_csv_path, index=False)
print(f"10-minute averages saved to {output_csv_path}")

In [None]:
plt.figure(figsize=(12, 6))  # Wider figure for better visibility

# Plot with thicker line and improved font
plt.plot(df_rain_resampled['TIMESTAMP'], df_rain_resampled['Rain'], 
         label='Rain Rate', linewidth=2, color='steelblue')

# Axis labels with larger font
plt.xlabel('Time', fontsize=14)
plt.ylabel('Rain Rate (mm/h)', fontsize=14)
plt.title('10-Minute Average Rain Rate vs Time', fontsize=16, weight='bold')

# Tick parameters
plt.xticks(fontsize=12, rotation=45)  # Rotate for time series clarity
plt.yticks(fontsize=12)

# Format x-axis if needed (e.g., showing HH:MM)
# plt.gca().xaxis.set_major_formatter(mdates.DateFormatter('%H:%M'))

# Add grid, legend, and tighter layout
plt.grid(True, linestyle='--', alpha=0.7)
plt.legend(fontsize=12)
plt.tight_layout()

plt.show()

### Vertical Profiles

In [None]:

#Edit before running!!
# Base directory containing monthly folders
base_dir_cr = r"C:\path\to\your\Cloud_radar"
month       = "2024-05"       # <- change to target month
day         = "2024-05-02"    # <- change to target day
day_folder_path = os.path.join(base_dir_cr, month, day)

# Initialize lists to store the data for each timestamp
profiles_with_time = []

# Variables to hold altitude data
temperature_altitudes = None
humidity_altitudes = None

# Check if the directory exists
if not os.path.exists(day_folder_path):
    print(f"Directory {day_folder_path} does not exist.")
else:
    print(f"Directory {day_folder_path} exists. Searching for LV1.nc files...")

    # List all files in the specific date folder for debugging
    all_files = os.listdir(day_folder_path)
    print(f"All files in {day_folder_path}: {all_files}")

    # Search for LV1.nc files in the specific date folder
    for file_name in all_files:
        if file_name.lower().endswith('lv1.nc'):
            file_path = os.path.join(day_folder_path, file_name)
            print(f"Found file: {file_path}")  # Debug: Print the file path
            
            try:
                # Try to open the NetCDF file
                dataset = Dataset(file_path, 'r')

                # Extract altitude data (only once)
                if temperature_altitudes is None:
                    temperature_altitudes = dataset.variables['TAlts'][:]  # Altitude for temperature
                    humidity_altitudes = dataset.variables['HAlts'][:]  # Altitude for humidity

                # Extract profiles and surface data
                temperature_data = dataset.variables['TProf'][:]  # Temperature profiles
                abs_humidity_data = dataset.variables['AHProf'][:]  # Absolute humidity profiles
                rel_humidity_data = dataset.variables['RHProf'][:]  # Relative humidity profiles
                surf_temp_data = dataset.variables['SurfTemp'][:]  # Surface temperature
                surf_pres_data = dataset.variables['SurfPres'][:]  # Surface pressure
                time_data = dataset.variables['Time'][:]
                timems_data = dataset.variables['Timems'][:]

                # Convert the time data
                start_time = datetime(2001, 1, 1, 0, 0, 0)
                times = [start_time + timedelta(seconds=int(time_data[i]), milliseconds=int(timems_data[i])) for i in range(len(time_data))]

                # Append the data as dictionaries for each time step
                for i, time in enumerate(times):
                    profiles_with_time.append({
                        'timestamp': time,
                        'temperature_altitudes': temperature_altitudes,
                        'temperature_profile': temperature_data[i, :],
                        'humidity_altitudes': humidity_altitudes,
                        'abs_humidity_profile': abs_humidity_data[i, :],
                        'rel_humidity_profile': rel_humidity_data[i, :],
                        'surf_temp': surf_temp_data[i],  # Surface temperature for this timestamp
                        'surf_pres': surf_pres_data[i]   # Surface pressure for this timestamp
                    })

                # Close the dataset
                dataset.close()

            except OSError as e:
                print(f"Error reading file {file_path}: {e}")

# Create a DataFrame from the list of profiles
df = pd.DataFrame(profiles_with_time)

# Set the timestamp as the index
#df['timestamp'] = pd.to_datetime(df['timestamp'])

#df.set_index('timestamp', inplace=True)

# Print the first few rows to verify the structure
print(df.head())


In [None]:
# Apply the pressure calculation for every timestamp and add it to the DataFrame
df['calculated_pressures'] = calculate_pressure_at_heights(df)

# Calculate saturation vapor pressure for each temperature profile
df['saturation_vapor_pressure'] = df['temperature_profile'].apply(
    lambda temp_profile: [calculate_saturation_vapor_pressure(temp) for temp in temp_profile]
)

# Calculate vapor pressure using relative humidity and saturation vapor pressure
df['vapor_pressure'] = df.apply(
    lambda row: [rh * es / 100 for rh, es in zip(row['rel_humidity_profile'], row['saturation_vapor_pressure'])],
    axis=1
)

# Calculate specific humidity for every altitude and time step
df['specific_humidity'] = df.apply(
    lambda row: [calculate_specific_humidity(ev, p) for ev, p in zip(row['vapor_pressure'], row['calculated_pressures'])],
    axis=1
)



In [None]:
# Apply the theta_v calculation to each row in the DataFrame
df['theta_v'] = df.apply(calculate_theta_v_alternative_by_altitude, axis=1)
print(df.head())

In [None]:
# Function to compute the mean profile for arrays
def compute_mean_profile(profiles):
    if len(profiles) == 0:
        return []
    profiles_array = np.array(profiles)  # Convert to numpy array
    return np.mean(profiles_array, axis=0).tolist()  # Convert back to list for JSON-friendly output

# Resample and average function
def resample_and_average(df, interval='10T'):
    resampled = df.resample(interval).agg({
        'temperature_altitudes': 'first',  # Assuming this does not change often
        'temperature_profile': lambda x: compute_mean_profile(list(x)),
        'humidity_altitudes': 'first',  # Same assumption
        'abs_humidity_profile': lambda x: compute_mean_profile(list(x)),
        'rel_humidity_profile': lambda x: compute_mean_profile(list(x)),
        'surf_temp': 'mean',  # Average surface temperature
        'surf_pres': 'mean',  # Average surface pressure
        'calculated_pressures': lambda x: compute_mean_profile(list(x)),
        'saturation_vapor_pressure': lambda x: compute_mean_profile(list(x)),
        'vapor_pressure': lambda x: compute_mean_profile(list(x)),
        'specific_humidity': lambda x: compute_mean_profile(list(x)),
        'theta_v': lambda x: compute_mean_profile(list(x)),
    })
    return resampled

df['timestamp'] = pd.to_datetime(df['timestamp'])
# Set 'timestamp' as the index
df.set_index('timestamp', inplace=True)


# Apply the resampling function
df_10min_avg = resample_and_average(df)  # Adjust interval as needed

# Reset index if you want 'timestamp' back as a column
df_10min_avg.reset_index(inplace=True)

# Display the first few rows of the averaged DataFrame
print(df_10min_avg.head())


In [None]:

# 1. Build the output‐path inside the same folder
parquet_path = os.path.join(day_folder_path, "cloud_radar_vertical_dataset_10min.parquet")

# 2. Save `df` to Parquet
df_10min_avg.to_parquet(parquet_path, engine="pyarrow", index=False)

print(f"Saved DataFrame to: {parquet_path}")

In [None]:

# Assuming df_10min_avg is your averaged DataFrame and it's structured correctly.
# Select the 10th entry in the averaged DataFrame
example_data = df_10min_avg.iloc[79]  # Indexing starts at 0, so 9 is the 10th entry
print(example_data)
# Create subplots in one row
fig, axs = plt.subplots(1, 4, figsize=(20, 5), sharey=True)

# Extracting the altitude array (assuming it's the same for all parameters)
altitude = np.array(example_data['temperature_altitudes'])  # Adjusted for altitude data

# Plot Temperature
axs[0].scatter(example_data['temperature_profile'], altitude, label='Temperature (K)', color='red')
axs[0].set_title('Temperature (K)', fontsize=14)
axs[0].set_xlabel('Value')
axs[0].grid()

# Plot Theta_v
axs[1].scatter(example_data['theta_v'], altitude, label='Theta_v (K)', color='orange')
axs[1].set_title('Theta_v (K)', fontsize=14)
axs[1].set_xlabel('Value')
axs[1].grid()

# Plot Relative Humidity (RH)
axs[2].scatter(example_data['rel_humidity_profile'], altitude, label='Relative Humidity (%)', color='blue')
axs[2].set_title('Relative Humidity (%)', fontsize=14)
axs[2].set_xlabel('Value')
axs[2].grid()

# Plot Specific Humidity (qv)
axs[3].scatter(example_data['specific_humidity'], altitude, label='Specific Humidity (g/kg)', color='green')
axs[3].set_title('Specific Humidity (g/kg)', fontsize=14)
axs[3].set_xlabel('Value')
axs[3].grid()

# Set shared y-axis limits
axs[0].set_ylabel('Altitude (m)')

plt.tight_layout()

# Add a horizontal line at 2 km
for ax in axs:
    ax.axhline(y=2000, color='red', linestyle='--')
    ax.set_ylabel('Altitude (m)')
#plt.ylim(0,2000)
plt.suptitle(f'Profile at Time:  {example_data["timestamp"]}', fontsize=16, y=1.02)  # Using example_data.name for the timestamp
plt.show()
