### What this script does 

- Walks through Microwave Radiometer folders (Mar–Jun 2024), reads NetCDF files:

- From *LWP.nc: collects LWP time series and Min_LWP per day

- From *IWV.nc: collects IWV time series

- Builds tidy DataFrames (df_lwp, df_iwv, df_min_lwp_mwr), prints heads, and plots Min_LWP vs date (saved as PNG).

- Walks through Cloud Radar folders, reads *LV1.nc, extracts LWP time series, saves to Parquet, computes daily Min LWP, and plots MWR vs Cloud-Radar Min LWP.

- Optionally explores a BLH file for a chosen day, prints dataset metadata, creates a BLH vs time plot.

#### Edit before running
1) Base folder for Microwave Radiometer NetCDFs  (safe placeholder)
   
   base_dir = r"C:\path\to\your\Microwave_radiometer"

2) (Optional) Months to scan (currently Mar–Jun 2024):

   target_dirs = [os.path.join(base_dir, f'2024-{month:02d}') for month in [3, 4, 5, 6]]

3) Base folder for Cloud Radar NetCDFs 
   
   base_cr = r"C:\path\to\your\Cloud_radar"

4) Months to scan for Cloud Radar (currently Mar–May):

   target_dirs = [os.path.join(base_cr, f'2024-{month:02d}') for month in [3, 4, 5]]

5) (Optional) Where to save outputs
    - MWR plot: plot_save_path = os.path.join(base_dir, 'Min_LWP_allmonths.png')
    - Cloud Radar Parquet: parquet_file_cr = os.path.join(base_cr, 'CR_LWP_Data.parquet')
    - Microwave radiometer LWP full dataset: LWP_Data.parquet
    - Mininum Microwave radiometer LWP full dataset: Min_LWP_Data.parquet
    - IWV from Microwave radiometer full dataset: IWV_Data.parquet
    - Combined Min_LWP plot (MWR + Cloud Radar): plot_save_path = os.path.join(base_cr, 'Min_LWP_MWR_and_CloudRadar.png')

6) BLH exploration target (pick month/day that exist in your data): 
   for example:
   
    month = '2024-05'
    
    day   = '2024-05-03'
    
    day_folder_path = os.path.join(base_dir, month, day)  # BLH lives under Microwave_radiometer tree


In [None]:
import os
import glob
import netCDF4 as nc
import matplotlib.pyplot as plt
import pandas as pd
from datetime import datetime, timedelta

from netCDF4 import Dataset


In [None]:
#Edit before running!
# 1) Base folder for Microwave Radiometer NetCDFs  (safe placeholder)
# Example: base_dir = r"D:\Thesis\data\Microwave_radiometer"
base_dir = r"C:\path\to\your\Microwave_radiometer"


# Target directories for March, April, May, and June 2024
target_dirs = [os.path.join(base_dir, f'2024-{month:02d}') for month in [3, 4, 5, 6]]

# Lists to store data
lwp_data = []
iwv_data = []
min_lwp_mwr = {}

# Function to convert time from seconds since a base date to datetime
def convert_time(base_time, time_array):
    return [base_time + timedelta(seconds=int(t)) for t in time_array]

# Traverse through the directory structure
for target_dir in target_dirs:
    for root, dirs, files in os.walk(target_dir):
        for file in files:
            file_path = os.path.join(root, file)
            try:
                # Open the netCDF file
                dataset = nc.Dataset(file_path, 'r')

                # Process LWP from LWP.NC files
                if file.upper().endswith('LWP.NC'):
                    time_lwp = dataset.variables['time'][:]
                    lwp_values = dataset.variables['LWP'][:]
                    base_time = datetime(2001, 1, 1)
                    times_lwp_converted = convert_time(base_time, time_lwp)

                    # Collect the Min_LWP value
                    min_lwp = dataset.variables['Min_LWP'][:].item()  # Assuming Min_LWP is a single value
                    
                    # Collect LWP values with timestamps
                    for time, lwp in zip(times_lwp_converted, lwp_values):
                        lwp_data.append((time, lwp))

                    # Track Min_LWP per day
                    date = times_lwp_converted[0].date()  # Assume the date is the same for all timestamps in the file
                    min_lwp_mwr[date] = min_lwp

                # Process IWV from IWV.NC files
                if file.upper().endswith('IWV.NC'):
                    time_iwv = dataset.variables['time'][:]
                    iwv_values = dataset.variables['IWV'][:]
                    base_time = datetime(2001, 1, 1)
                    times_iwv_converted = convert_time(base_time, time_iwv)

                    # Collect IWV values with timestamps
                    for time, iwv in zip(times_iwv_converted, iwv_values):
                        iwv_data.append((time, iwv))

            except Exception as e:
                print(f"Error processing file {file_path}: {e}")

# Create DataFrames from the collected data
df_lwp = pd.DataFrame(lwp_data, columns=['TIMESTAMP', 'LWP'])
df_iwv = pd.DataFrame(iwv_data, columns=['TIMESTAMP', 'IWV'])

# Create DataFrame for minimum values per day
df_min_lwp_mwr = pd.DataFrame(list(min_lwp_mwr.items()), columns=['Date', 'Min_LWP'])

# Sort the DataFrames by timestamp/date
df_lwp.sort_values(by='TIMESTAMP', inplace=True)
df_iwv.sort_values(by='TIMESTAMP', inplace=True)
df_min_lwp_mwr.sort_values(by='Date', inplace=True)

# Reset index
df_lwp.reset_index(drop=True, inplace=True)
df_iwv.reset_index(drop=True, inplace=True)
df_min_lwp_mwr.reset_index(drop=True, inplace=True)

# Display the DataFrames to verify
print(df_lwp.head())
print(df_iwv.head())
print(df_min_lwp_mwr.head())

# Save the DataFrames to CSV files if needed
#df_lwp.to_csv(os.path.join(base_dir, 'LWP_Data.csv'), index=False)
#df_iwv.to_csv(os.path.join(base_dir, 'IWV_Data.csv'), index=False)
#df_min_lwp_per_day.to_csv(os.path.join(base_dir, 'Min_LWP_Per_Day.csv'), index=False)

In [None]:
print(df_lwp)

In [None]:
# Save the DataFrames to CSV files if needed
#df_lwp.to_csv(os.path.join(base_dir, 'LWP_Data.csv'), index=False)

# Save the DataFrame to Parquet format
parquet_file = os.path.join(base_dir, 'LWP_Data.parquet')
df_lwp.to_parquet(parquet_file, compression='gzip')
print(f"DataFrame saved to {parquet_file}")

#save the min data to parquet format
parquet_file_min_lwp = os.path.join(base_dir, 'Min_LWP_Data.parquet')  # Path for the min LWP data
df_min_lwp_mwr.to_parquet(parquet_file_min_lwp, compression='gzip')
print(f"Min LWP DataFrame saved to {parquet_file_min_lwp}")

parquet_file_iwv = os.path.join(base_dir, 'IWV_Data.parquet')
df_iwv.to_parquet(parquet_file_iwv, compression='gzip')
print(f"DataFrame saved to {parquet_file_iwv}")
#df_iwv.to_csv(os.path.join(base_dir, 'IWV_Data.csv'), index=False)
#df_min_lwp_per_day.to_csv(os.path.join(base_dir, 'Min_LWP_Per_Day.csv'), index=False)

In [None]:
_# Load the DataFrame from Parquet format
parquet_file = os.path.join(base_dir, 'LWP_Data.parquet')
df_lwp = pd.read_parquet(parquet_file)
print("DataFrame loaded from Parquet file")
print(df_lwp)

In [None]:
print(df_min_lwp_mwr)
min_lwp_value_mwr = df_min_lwp_mwr['Min_LWP'].min()
print(f"Minimum Min_LWP value: {min_lwp_value_mwr} g/m^2")

In [None]:
# Plotting the Min_LWP values over dates
plt.figure(figsize=(12, 6))
plt.plot(df_min_lwp_mwr['Date'], df_min_lwp_mwr['Min_LWP'], marker='o', linestyle='-', markersize=4, alpha=0.7)
plt.xlabel('Date')
plt.ylabel('Min_LWP (g/m^2)')
plt.title('Min_LWP over Dates for March, April, May, and June 2024')
plt.grid(True)
plt.xticks(rotation=45)
plt.tight_layout()
# Save the plot
plot_save_path = os.path.join(base_dir, 'Min_LWP_allmonths.png')
plt.savefig(plot_save_path)
plt.show()



### Cloud Radar data

In [None]:
#Edit this before running!!
# 3) Base folder for Cloud Radar NetCDFs 
# Example: base_cr = r"D:\Thesis\data\Cloud_radar"
base_cr = r"C:\path\to\your\Cloud_radar"

# Target directories for March, April, May, and June 2024
target_dirs = [os.path.join(base_cr, f'2024-{month:02d}') for month in [3,4,5]]

# List to store LWP values and corresponding dates
lwp_data = []


# Function to convert time from seconds since a base date to datetime
def convert_time(base_time, time_array, timems_array):
    return [base_time + timedelta(seconds=int(time_array[i]), milliseconds=int(timems_array[i])) for i in range(len(time_array))]

# Traverse through the directory structure
for target_dir in target_dirs:
    for root, dirs, files in os.walk(target_dir):
        for file in files:
            if file.endswith('LV1.nc'):
                file_path = os.path.join(root, file)
                try:
                    # Open the netCDF file
                    dataset = nc.Dataset(file_path, 'r')
                    
                    # Extract LWP data
                    lwp = dataset.variables['LWP'][:]
                    
                    # Extract time data
                    time_data = dataset.variables['Time'][:]
                    timems_data = dataset.variables['Timems'][:]
                    start_time = datetime(2001, 1, 1, 0, 0, 0)
                    time_converted = convert_time(start_time, time_data, timems_data)
                    
                    # Store the data in the list
                    for t, l in zip(time_converted, lwp):
                        lwp_data.append((t, l))
                    
                except Exception as e:
                    print(f"Error processing file {file_path}: {e}")

# Create a DataFrame from the collected data
df_lwp = pd.DataFrame(lwp_data, columns=['Datetime', 'LWP'])

# Extract date part for plotting
df_lwp['Date'] = df_lwp['Datetime'].dt.date



In [None]:
print(df_lwp)

In [None]:
# Define the file path for the Parquet file

parquet_file_cr = os.path.join(base_cr, 'CR_LWP_Data.parquet')
df_lwp.to_parquet(parquet_file_cr, compression='gzip')
print(f"DataFrame saved to {parquet_file_cr}")

In [None]:

# Create a DataFrame from the collected data
df_lwp_cr = pd.DataFrame(lwp_data, columns=['Date', 'LWP'])

# Group by date and find the minimum LWP value for each day
df_min_lwp_cr = df_lwp.groupby('Date').min().reset_index()

In [None]:
print(df_min_lwp_cr)
min_lwp_cr = df_min_lwp_cr['LWP'].min()
print(f"Minimum Min_LWP value: {min_lwp_cr} g/m^2")

In [None]:

# Plotting the Min_LWP values over dates for both MWR and Cloud Radar
plt.figure(figsize=(12, 6))
plt.plot(df_min_lwp_mwr['Date'], df_min_lwp_mwr['Min_LWP'], marker='o', linestyle='-', markersize=4, alpha=0.7, label='MWR Min LWP')
plt.plot(df_min_lwp_cr['Date'], df_min_lwp_cr['LWP'], marker='s', linestyle='-', markersize=4, alpha=0.7, label='Cloud Radar Min LWP')
plt.xlabel('Date')
plt.ylabel('Min_LWP (g/m^2)')
plt.title('Min_LWP over Dates for March, April, May, and June 2024')
plt.grid(True)
plt.xticks(rotation=45)
plt.legend()
plt.tight_layout()
# Save the plot
plot_save_path = os.path.join(base_cr, 'Min_LWP_MWR_and_CloudRadar.png')
plt.savefig(plot_save_path)
plt.show()

### Explore BLH

In [None]:
month = '2024-05'  # The folder for April
day = '2024-05-03'  # The specific date folder

# Construct the directory path for the specific date
day_folder_path = os.path.join(base_dir, month, day)

# Initialize variable to hold the file path
file_path = None

# Search for the BLH.nc file in the specific date folder
for file_name in os.listdir(day_folder_path):
    if file_name.lower().endswith('blh.nc'):
        file_path = os.path.join(day_folder_path, file_name)
        break
# Check if the file exists
if file_path and os.path.exists(file_path):
    # Open the NetCDF file
    dataset = Dataset(file_path, 'r')
    
    # Print file information
    print(f"File: {file_path}")
    
    # Print all variables in the dataset
    print("\nVariables in the dataset:")
    for var_name in dataset.variables:
        print(f"Variable Name: {var_name}")
        print(f"   Dimensions: {dataset.variables[var_name].dimensions}")
        print(f"   Shape: {dataset.variables[var_name].shape}")
        print(f"   Data Type: {dataset.variables[var_name].dtype}")
        print(f"   Units: {dataset.variables[var_name].units if 'units' in dataset.variables[var_name].ncattrs() else 'N/A'}")
        print()
    
    # Print attributes of the dataset
    print("Attributes of the dataset:")
    for attr_name in dataset.ncattrs():
        print(f"   {attr_name}: {getattr(dataset, attr_name)}")

    # Optionally print data from a specific variable if needed
    # Example: print data from a variable named 'BLH'
    if 'BLH' in dataset.variables:
        blh_data = dataset.variables['BLH'][:]
        print(f"\nData for 'BLH' variable:\n{blh_data}")

    # Close the dataset
    dataset.close()
else:
    print(f"No file ending with 'blh.nc' was found in {day_folder_path}.")


In [None]:
dataset = Dataset(file_path, 'r')
    
# Load BLH data and time variable
blh_data = dataset.variables['BLH'][:]
time_data = dataset.variables['time'][:]
    
# Convert time data to datetime (assuming time is in seconds since 2001-01-01 00:00:00)
base_time = pd.Timestamp('2001-01-01 00:00:00')
time_data = pd.to_timedelta(time_data, unit='s') + base_time


# Create a DataFrame with BLH data
df_blh = pd.DataFrame({'Timestamp': time_data, 'BLH': blh_data})

# Print the DataFrame
print(df_blh)


In [None]:
# Plot BLH vs. Time
plt.figure(figsize=(12, 6))
plt.plot(df_blh['Timestamp'], df_blh['BLH'], marker='o', linestyle='-')
plt.xlabel('Time')
plt.ylabel('BLH (m)')
plt.title('BLH vs Time on 2024-05-03')
plt.grid(True)
plt.xticks(rotation=45)
plt.tight_layout()

# Save the plot as a PNG file
#plot_save_path = 'BLH_vs_Time_2024-04-01.png'
#plt.savefig(plot_save_path, dpi=300, format='png')  # Save with 300 DPI for high quality
plt.show()