### What this script does

- Loads LWP time series from Microwave Radiometer (MWR) and Cloud Radar (CR) Parquet files, plus daily Min LWP (MWR).

- Merges and computes a simple offset correction for MWR LWP using Min LWP (-5 baseline).

- Resamples both datasets to 1-minute means and merges on timestamps.

- Produces comparison plots:

    - Min LWP (MWR) vs date

    - LWP time series (MWR vs CR)

    - Zoomed scatter (0–300 g/m²)

- Scatter with metrics (Pearson r, RMSE, mean Δ, σ)

- (Optionally) saves corrected MWR LWP to Corrected_LWP_Data.parquet.

#### Edit before running
1) Base folders (replace with your local paths):

- base_dir_mwr = r"C:\path\to\your\Microwave_radiometer"

- base_cr      = r"C:\path\to\your\Cloud_radar"

2) Expected Parquet filenames (change only if your files are named differently)

- parquet_file_mwr     = os.path.join(base_dir_mwr, 'LWP_Data.parquet')
- parquet_file_min_mwr = os.path.join(base_dir_mwr, 'Min_LWP_Data.parquet')
- parquet_file_cr      = os.path.join(base_cr,      'CR_LWP_Data.parquet')

3) Offset rule (edit if your Min LWP baseline is not -5 g/m²)
- def calculate_offset(min_lwp_mwr):

      if min_lwp_mwr != -5:
    
        return min_lwp_mwr - (-5)
        
      return 0

4) (Optional) Enable saving corrected MWR LWP:

parquet_save_path = os.path.join(base_dir_mwr, 'Corrected_LWP_Data.parquet')

df_to_save.to_parquet(parquet_save_path, compression='gzip')  # <- uncomment to save

5) (Optional) Resampling cadence (currently 1 minute)

- df_mwr_resampled = df_merged.resample('1T').mean()
- df_cr_resampled  = df_cr_lwp.resample('1T').mean()
- Change '1T' to '5T' or '10T' if you want coarser aggregations.

6) (Optional) Output figure filenames
- plot_save_path            = 'LWP_minimum_mwr.png'
- plot_save_path_timeseries = 'LWP_Comparison_corrected_offset.png'
- plot_save_path_zoomed     = 'LWP_Comparison_Zoomed.png'
- scatter_plot_save_path    = 'LWP_MWR_vs_CR_Scatter.png'


That’s it. After updating the base folders (and filenames if needed), run the script to load, correct, resample, merge, and plot MWR vs CR LWP — plus print correlation metrics.

In [None]:
import pandas as pd
import os
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
from sklearn.metrics import mean_squared_error
from scipy.stats import pearsonr
import numpy as np



In [None]:
#Edit before running!!
# 1) Base folders (replace with your local paths)
# Example:
# base_dir_mwr = r"D:\Thesis\data\Microwave_radiometer"
# base_cr     = r"D:\Thesis\data\Cloud_radar"
base_dir_mwr = r"C:\path\to\your\Microwave_radiometer"
base_cr      = r"C:\path\to\your\Cloud_radar"

# Define the file paths
parquet_file_mwr = os.path.join(base_dir_mwr, 'LWP_Data.parquet')
parquet_file_cr = os.path.join(base_cr, 'CR_LWP_Data.parquet')
parquet_file_min_mwr=os.path.join(base_dir_mwr, 'Min_LWP_Data.parquet')
# Load the DataFrames from Parquet format
df_mwr_lwp = pd.read_parquet(parquet_file_mwr)
df_cr_lwp = pd.read_parquet(parquet_file_cr)
df_mwr_lwp_min=pd.read_parquet(parquet_file_min_mwr)
# Display the first few rows of each DataFrame to verify
print("Microwave Radiometer LWP Data:")
print(df_mwr_lwp.head())

print("Microwave Radiometer min LWP Data:")
print(df_mwr_lwp_min.head())

print("\nCloud Radar LWP Data:")
print(df_cr_lwp.head())


In [None]:
# Convert TIMESTAMP in df_mwr_lwp to datetime
df_mwr_lwp['TIMESTAMP'] = pd.to_datetime(df_mwr_lwp['TIMESTAMP'])

# Extract date part from TIMESTAMP for merging
df_mwr_lwp['Date'] = df_mwr_lwp['TIMESTAMP'].dt.date

# Convert Date in df_mwr_lwp_min to datetime.date
df_mwr_lwp_min['Date'] = pd.to_datetime(df_mwr_lwp_min['Date']).dt.date

# Merge df_mwr_lwp with df_mwr_lwp_min on the Date column
df_merged = pd.merge(df_mwr_lwp, df_mwr_lwp_min, on='Date', how='left')


# Drop the temporary Date column if no longer needed
df_merged.drop(columns=['Date'], inplace=True)

# Rename columns if needed
df_merged.rename(columns={'Min_LWP': 'Min_LWP_MWR'}, inplace=True)

# Display the merged DataFrame
print("Merged Microwave Radiometer LWP Data with Min LWP:")
print(df_merged)


In [None]:
# Calculate the offset
def calculate_offset(min_lwp_mwr):
    if min_lwp_mwr != -5:
        return min_lwp_mwr - (-5)  # Compute the difference from -5
    return 0  # Default offset when Min_LWP_MWR is -5

# Apply the offset calculation
df_merged['Offset'] = df_merged['Min_LWP_MWR'].apply(calculate_offset)

# Optionally, create a corrected LWP column if needed
df_merged['LWP_Corrected'] = df_merged['LWP'] - df_merged['Offset']

print(df_merged)

In [None]:


# Create the full path for saving the Parquet file
parquet_save_path = os.path.join(base_dir_mwr, 'Corrected_LWP_Data.parquet')

# Select only the 'TIMESTAMP' and 'LWP_Corrected' columns
df_to_save = df_merged[['TIMESTAMP', 'LWP_Corrected']]

# Save the DataFrame to Parquet format
#df_to_save.to_parquet(parquet_save_path, compression='gzip')

print(f"DataFrame saved to {parquet_save_path}")


In [None]:
# Resample Microwave Radiometer Data
# Convert 'TIMESTAMP' to datetime type and handle parsing errors
df_merged['TIMESTAMP'] = pd.to_datetime(df_merged['TIMESTAMP'], errors='coerce')
df_merged.dropna(subset=['TIMESTAMP'], inplace=True)
df_merged['LWP_Corrected'] = pd.to_numeric(df_merged['LWP_Corrected'], errors='coerce')
df_merged.dropna(subset=['LWP_Corrected'], inplace=True)
df_merged.set_index('TIMESTAMP', inplace=True)
df_mwr_resampled = df_merged.resample('1T').mean()
df_mwr_resampled.dropna(inplace=True)
df_mwr_resampled.reset_index(inplace=True)

print(df_mwr_resampled)

In [None]:
# Convert 'Datetime' to datetime type and set as index
df_cr_lwp['Datetime'] = pd.to_datetime(df_cr_lwp['Datetime'],errors='coerce')

# Convert relevant columns to numeric, coerce errors to NaN
df_cr_lwp['LWP'] = pd.to_numeric(df_cr_lwp['LWP'], errors='coerce')


# Drop rows where 'LWP' is NaN
df_cr_lwp.dropna(subset=['LWP'], inplace=True)

# Drop the 'Date' column as it is not needed
df_cr_lwp.drop(columns=['Date'], inplace=True)

# Set 'Datetime' as the index
df_cr_lwp.set_index('Datetime', inplace=True)

# Resample the data to 1-second intervals and calculate the mean
#df_cr_resampled = df_cr_lwp.resample('1S').mean()
df_cr_resampled = df_cr_lwp.resample('1T').mean()

# Drop rows with NaN values if needed (depending on the data's sparsity)
df_cr_resampled.dropna(inplace=True)

# Optionally reset index if needed
df_cr_resampled.reset_index(inplace=True)

df_cr_resampled.rename(columns={'Datetime': 'TIMESTAMP', 'LWP': 'LWP_CR'}, inplace=True)  # Rename for consistency


print(df_cr_resampled)

In [None]:
# Merge the two resampled DataFrames based on overlapping timestamps
df_combined_resampled = pd.merge(df_cr_resampled, df_mwr_resampled, on='TIMESTAMP', how='inner', suffixes=('_CR', '_MWR'))
print(df_combined_resampled)

In [None]:
# Create the figure
plt.figure(figsize=(10, 6))

# Plot LWP from Microwave Radiometer
plt.plot(df_combined_resampled['TIMESTAMP'], df_combined_resampled['Min_LWP_MWR'], color='blue', linestyle='-', marker='', alpha=0.7, linewidth=2)


# Formatting the x-axis for dates
plt.gca().xaxis.set_major_formatter(mdates.DateFormatter('%d-%m'))
plt.gca().xaxis.set_major_locator(mdates.DayLocator(interval=6))
plt.gcf().autofmt_xdate()  # Rotate date labels

# Labels and title (without bold)
plt.xlabel('Date', fontsize=18)  # Larger labels, no bold
plt.ylabel('LWP_min (g/m²)', fontsize=18)
plt.title('Minimum LWP (MWR)', fontsize=22)  # Larger title, no bold

# Add the legend with larger font
#plt.legend(loc='best', fontsize=16)

# Enable the grid with slight enhancements
plt.grid(True, linestyle='--', linewidth=0.6, alpha=0.7)

# Adjust layout for better spacing
plt.tight_layout(rect=[0, 0, 1, 0.96])  # Adjust rect to leave space for the title
# Save the plot as a high-quality PNG
plot_save_path = 'LWP_minimum_mwr.png'
plt.savefig(plot_save_path, dpi=300, bbox_inches='tight', format='png')  # Save with bbox_inches='tight' to pre
#plt.ylim(-6,0)

# Display the plot
plt.show()


In [None]:


# Set font properties for presentation slides without bold text
plt.rcParams.update({
    'font.size': 16,        # Increase the default font size
    'axes.titlesize': 20,   # Larger title size
    'axes.labelsize': 18,   # Larger label size
    'legend.fontsize': 16,  # Increase legend font size
    'xtick.labelsize': 14,  # Increase x-tick size
    'ytick.labelsize': 14,  # Increase y-tick size
})

# Create the figure
plt.figure(figsize=(10, 6))

# Plot LWP from Microwave Radiometer
plt.plot(df_combined_resampled['TIMESTAMP'], df_combined_resampled['LWP'], 
         label='LWP (Microwave Radiometer)', color='blue', linestyle='-', marker='', alpha=0.7, linewidth=2)

# Plot LWP from Cloud Radar
plt.plot(df_combined_resampled['TIMESTAMP'], df_combined_resampled['LWP_CR'], 
         label='LWP (Cloud Radar)', color='red', linestyle='-', marker='', alpha=0.7, linewidth=2)

# Formatting the x-axis for dates
plt.gca().xaxis.set_major_formatter(mdates.DateFormatter('%d-%m'))
plt.gca().xaxis.set_major_locator(mdates.DayLocator(interval=6))
plt.gcf().autofmt_xdate()  # Rotate date labels

# Labels and title (without bold)
plt.xlabel('Date', fontsize=18)  # Larger labels, no bold
plt.ylabel('LWP (g/m²)', fontsize=18)
plt.title('LWP Comparison: Microwave Radiometer vs Cloud Radar', fontsize=22)  # Larger title, no bold

# Add the legend with larger font
plt.legend(loc='best', fontsize=16)

# Enable the grid with slight enhancements
plt.grid(True, linestyle='--', linewidth=0.6, alpha=0.7)

# Adjust layout for better spacing
plt.tight_layout(rect=[0, 0, 1, 0.96])  # Adjust rect to leave space for the title

# Save the plot as a high-quality PNG
plot_save_path = 'LWP_Comparison_corrected_offset.png'
plt.savefig(plot_save_path, dpi=300, bbox_inches='tight', format='png')  # Save with bbox_inches='tight' to prevent clipping

# Display the plot
plt.show()


In [None]:
# Create the figure
plt.figure(figsize=(12, 6))

# Plot LWP from Microwave Radiometer as scatter
plt.scatter(df_combined_resampled['TIMESTAMP'], df_combined_resampled['LWP_Corrected'], 
            label='LWP (Microwave Radiometer)', color='blue', alpha=0.7, s=0.1)  # s=50 for marker size

# Plot LWP from Cloud Radar as scatter
plt.scatter(df_combined_resampled['TIMESTAMP'], df_combined_resampled['LWP_CR'], 
            label='LWP (Cloud Radar)', color='red', alpha=0.7, s=0.1)  # s=50 for marker size

# Formatting the x-axis for dates
plt.gca().xaxis.set_major_formatter(mdates.DateFormatter('%d-%m'))
plt.gca().xaxis.set_major_locator(mdates.DayLocator(interval=6))
plt.gcf().autofmt_xdate()  # Rotate date labels

# Set y-axis limits to zoom in on the range of 0 to 300
plt.ylim(-10, 300)

# Labels and title (without bold)
plt.xlabel('Date', fontsize=18)  # Larger labels, no bold
plt.ylabel('LWP (g/m²)', fontsize=18)
plt.title('LWP Comparison: Microwave Radiometer vs Cloud Radar (0-300 g/m²)', fontsize=22)  # Larger title, no bold

# Add the legend with larger font
plt.legend(loc='best', fontsize=16)

# Enable the grid with slight enhancements
plt.grid(True, linestyle='--', linewidth=0.6, alpha=0.7)

# Adjust layout for better spacing
plt.tight_layout(rect=[0, 0, 1, 0.96])  # Adjust rect to leave space for the title

# Save the zoomed-in scatter plot as a high-quality PNG
plot_save_path_zoomed = 'LWP_Comparison_Zoomed.png'
plt.savefig(plot_save_path_zoomed, dpi=300, bbox_inches='tight', format='png')  # Save with bbox_inches='tight' to prevent clipping

# Display the zoomed-in scatter plot
plt.show()

In [None]:
# Calculate metrics
correlation_coefficient, _ = pearsonr(df_combined_resampled['LWP_CR'], df_combined_resampled['LWP_Corrected'])
rmse = np.sqrt(mean_squared_error(df_combined_resampled['LWP_Corrected'], df_combined_resampled['LWP_CR']))

# Create the figure
plt.figure(figsize=(12, 6))

# Create a color gradient based on the LWP values from Cloud Radar
sc = plt.scatter(df_combined_resampled['LWP_CR'], df_combined_resampled['LWP_Corrected'], 
                 c=df_combined_resampled['LWP_Corrected'], cmap='viridis', alpha=0.7, s=10)

# Add a color bar
cbar = plt.colorbar(sc)
cbar.set_label('LWP (g/m²)', fontsize=16)

# Add a reference line (y = x) for comparison
max_value = max(df_combined_resampled['LWP_Corrected'].max(), df_combined_resampled['LWP_CR'].max())
plt.plot([0, max_value], [0, max_value], linestyle='--', color='black', label='y = x (Reference)')

# Labels and title (without bold)
plt.xlabel('LWP (Cloud Radar) (g/m²)', fontsize=18)
plt.ylabel('LWP (MWR) (g/m²)', fontsize=18)
plt.title('LWP Comparison: Microwave Radiometer vs Cloud Radar', fontsize=22)

# Add the legend with larger font
plt.legend(loc='lower right', fontsize=16)

# Set equal aspect ratio to see the correlation clearly
#plt.axis('equal')
plt.xlim(-100, max_value)
plt.ylim(0, max_value)

# Enable the grid with slight enhancements
plt.grid(True, linestyle='--', linewidth=0.6, alpha=0.7)

# Add metrics to the plot
plt.text(0.05, 0.95, f'Correlation Coefficient: {correlation_coefficient:.2f}', 
         fontsize=14, transform=plt.gca().transAxes)
plt.text(0.05, 0.90, f'RMSE: {rmse:.2f} g/m²', 
         fontsize=14, transform=plt.gca().transAxes)

# Adjust layout for better spacing
plt.tight_layout(rect=[0, 0, 1, 0.96])  # Adjust rect to leave space for the title

# Save the scatter plot comparing MWR vs. CR as a high-quality PNG
scatter_plot_save_path = 'LWP_MWR_vs_CR_Scatter.png'
plt.savefig(scatter_plot_save_path, dpi=300, bbox_inches='tight', format='png')  # Save with bbox_inches='tight' to prevent clipping

# Display the scatter plot
plt.show()

In [None]:

# 1) Compute metrics on the full dataset
cr_all = df_combined_resampled['LWP_CR'].values
mwr_all = df_combined_resampled['LWP_Corrected'].values

# Pearson r
r_all, _ = pearsonr(cr_all, mwr_all)

# RMSE
rmse_all = np.sqrt(mean_squared_error(mwr_all, cr_all))

# Mean difference
diff_all = mwr_all - cr_all
mean_diff_all = np.mean(diff_all)

# Standard deviation of differences (sigma)
sigma_all = np.std(diff_all, ddof=0)   # population‐std (ddof=0)

print(f"[All data] Pearson r = {r_all:.3f}, RMSE = {rmse_all:.3f}, Mean Δ = {mean_diff_all:.3f}, σ = {sigma_all:.3f} g/m²")


# 2) Scatter plot for all data
fig, ax = plt.subplots(figsize=(10, 6))

sc = ax.scatter(
    df_combined_resampled['LWP_CR'],
    df_combined_resampled['LWP_Corrected'],
    c=df_combined_resampled['LWP_Corrected'],
    cmap='jet',
    alpha=1,
    s=3
)
cbar = plt.colorbar(sc, ax=ax)
cbar.set_label('LWP (MWR) [g/m²]', fontsize=14)

max_val_all = max(mwr_all.max(), cr_all.max())

# Reference line y = x
ax.plot([0, max_val_all], [0, max_val_all],
        linestyle='--', color='black', linewidth=1.5,
        label='y = x (Reference)')

# Labels and title
ax.set_xlabel('LWP (Cloud Radar) [g/m²]', fontsize=16, fontweight='bold')
ax.set_ylabel('LWP (MWR) [g/m²]', fontsize=16, fontweight='bold')
ax.set_title('LWP Comparison: MWR vs Cloud Radar (All Data)', fontsize=20, fontweight='bold')

# Annotate metrics
ax.text(0.02, 0.95, f"Pearson r = {r_all:.2f}", transform=ax.transAxes, fontsize=14)
ax.text(0.02, 0.90, f"RMSE = {rmse_all:.2f} g/m²", transform=ax.transAxes, fontsize=14)
ax.text(0.02, 0.85, f"Mean Δ = {mean_diff_all:.2f} g/m²", transform=ax.transAxes, fontsize=14)
ax.text(0.02, 0.80, f"σ = {sigma_all:.2f} g/m²", transform=ax.transAxes, fontsize=14)

# Style spines
for spine in ax.spines.values():
    spine.set_linewidth(1.5)

# Ticks
ax.tick_params(axis='both', which='major', labelsize=14, width=1.5, length=6)
ax.tick_params(axis='both', which='minor', width=1.0, length=4)

# Grid
ax.grid(True, which='major', linestyle='--', linewidth=0.8, alpha=0.7)
ax.grid(True, which='minor', linestyle=':', linewidth=0.5, alpha=0.5)

# Legend
legend = ax.legend(loc='lower right', fontsize=12, title='Reference', title_fontsize=12, frameon=True)
legend.get_frame().set_linewidth(1.5)

# Axes limits
ax.set_xlim(left=-50, right=max_val_all)
ax.set_ylim(bottom=-10, top=max_val_all)

plt.tight_layout()

#all_scatter_path = os.path.join(data_dir, 'LWP_MWR_vs_CR_Scatter_All.png')
#plt.savefig(all_scatter_path, dpi=300, bbox_inches='tight')
#print(f"Scatter (all data) saved to: {all_scatter_path}")

plt.show()


# 3) Filter to LWP < 300 g/m² and recompute metrics
mask_lt300 = (df_combined_resampled['LWP_CR'] < 300) & (df_combined_resampled['LWP_Corrected'] < 300)
df_lt300 = df_combined_resampled[mask_lt300]

cr_lt = df_lt300['LWP_CR'].values
mwr_lt = df_lt300['LWP_Corrected'].values

r_lt, _ = pearsonr(cr_lt, mwr_lt)
rmse_lt = np.sqrt(mean_squared_error(mwr_lt, cr_lt))
diff_lt = mwr_lt - cr_lt
mean_diff_lt = np.mean(diff_lt)
sigma_lt = np.std(diff_lt, ddof=0)

print(f"[<300] Pearson r = {r_lt:.3f}, RMSE = {rmse_lt:.3f}, Mean Δ = {mean_diff_lt:.3f}, σ = {sigma_lt:.3f} g/m²")


# 4) Scatter plot for filtered data
fig, ax = plt.subplots(figsize=(10, 6))

sc = ax.scatter(
    df_lt300['LWP_CR'],
    df_lt300['LWP_Corrected'],
    c=df_lt300['LWP_Corrected'],
    cmap='jet',
    alpha=1,
    s=3
)
cbar = plt.colorbar(sc, ax=ax)
cbar.set_label('LWP (MWR) [g/m²]', fontsize=14)

max_val_lt = 300  # since filtered

# Reference line
ax.plot([0, max_val_lt], [0, max_val_lt],
        linestyle='--', color='black', linewidth=1.5,
        label='y = x (Reference)')

ax.set_xlabel('LWP (Cloud Radar) [g/m²]', fontsize=16, fontweight='bold')
ax.set_ylabel('LWP (MWR) [g/m²]', fontsize=16, fontweight='bold')
ax.set_title('LWP Comparison (<300 g/m²)', fontsize=20, fontweight='bold')

# Annotate filtered metrics
ax.text(0.02, 0.95, f"Pearson r = {r_lt:.2f}", transform=ax.transAxes, fontsize=14)
ax.text(0.02, 0.90, f"RMSE = {rmse_lt:.2f} g/m²", transform=ax.transAxes, fontsize=14)
ax.text(0.02, 0.85, f"Mean Δ = {mean_diff_lt:.2f} g/m²", transform=ax.transAxes, fontsize=14)
ax.text(0.02, 0.80, f"σ = {sigma_lt:.2f} g/m²", transform=ax.transAxes, fontsize=14)

for spine in ax.spines.values():
    spine.set_linewidth(1.5)

ax.tick_params(axis='both', which='major', labelsize=14, width=1.5, length=6)
ax.tick_params(axis='both', which='minor', width=1.0, length=4)

ax.grid(True, which='major', linestyle='--', linewidth=0.8, alpha=0.7)
ax.grid(True, which='minor', linestyle=':', linewidth=0.5, alpha=0.5)

legend = ax.legend(loc='lower right', fontsize=12, title='Reference', title_fontsize=12, frameon=True)
legend.get_frame().set_linewidth(1.5)

ax.set_xlim(left=-10, right=max_val_lt)
ax.set_ylim(bottom=-10, top=max_val_lt)

plt.tight_layout()

#filtered_scatter_path = os.path.join(data_dir, 'LWP_MWR_vs_CR_Scatter_<300.png')
#plt.savefig(filtered_scatter_path, dpi=300, bbox_inches='tight')
#print(f"Scatter (LWP<300) saved to: {filtered_scatter_path}")

plt.show()



