### What this script does
- Loads corrected LWP parquet and reads all radiometer .dat files (SWR↓=SR15D1Dn_Irr, LWR↓=IR20Dn, LWR↑=IR20Up) for Mar–Jun.

- Computes solar position/zenith with pvlib for each timestamp.

- Aggregates and saves combined SWR and LWR parquet files.

- Merges radiation with solar geometry and LWP_Corrected, filters low SWR, and makes scatter plots:

- SWR↓ vs solar zenith (colored by LWP_Corrected; several styles).

- Computes clear-sky GHI (Ineichen model), plots measured SWR vs clear-sky GHI vs zenith, and derives Clear-Sky Index (CSI).

#### Edit before running
1) Path to corrected LWP parquet (MWR):
- base_dir_mwr = r"C:\path\to\your\Microwave_radiometer"
- filename_lwp_corrected = "Corrected_LWP_Data.parquet"
- parquet_path_lwp_corrected = os.path.join(base_dir_mwr, filename_lwp_corrected)

2) Base folder with daily radiometer .dat files

   base_folder = r"C:\path\to\your\radiometer"

3) Months you want to scan: 
     - for month_folder in ['2024-03', '2024-04', '2024-05', '2024-06']:

       ...

4) Site coordinates (used by pvlib for solar geometry / clear-sky GHI)
- latitude  = 52.6324
- longitude = 4.7534
- altitude  = 0

5) Column names in your .dat files (change if your logger uses different labels)
- swr_column   = 'SR15D1Dn_Irr'  # Shortwave down (W/m^2)
- lwr_column   = 'IR20Dn'        # Longwave down (W/m^2)
- lwr_up_column = 'IR20Up'       # Longwave up (W/m^2)

6) Output parquet filenames (saved under base_folder)
- parquet_file_swr = os.path.join(base_folder, 'Combined_SWR_Data.parquet')
- parquet_file_lwr = os.path.join(base_folder, 'Combined_LWR_Data.parquet')

7) Plot/filter settings


In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os
import datetime

!pip install pvlib

from matplotlib.dates import DateFormatter
from datetime import time
import matplotlib.colors as mcolors


In [None]:
import pvlib


In [None]:
#Edit before running!!
# 1) Path to corrected LWP parquet (MWR)
# Example: r"D:\Thesis\data\Microwave_radiometer\Corrected_LWP_Data.parquet"
base_dir_mwr = r"C:\path\to\your\Microwave_radiometer"

# Define the filenames for the Parquet files
filename_lwp_corrected = 'Corrected_LWP_Data.parquet'


# Create the full paths for loading the Parquet files
parquet_path_lwp_corrected = os.path.join(base_dir_mwr, filename_lwp_corrected)

# Load the Parquet files into DataFrames
df_lwp_corrected = pd.read_parquet(parquet_path_lwp_corrected)

# Display the first few rows of each DataFrame to verify
print("Loaded Corrected LWP Data:")
print(df_lwp_corrected.head())



In [None]:
print(df_lwp_corrected)


In [None]:
#Edit before running!!
# Base folder path containing monthly folders
## 2) Base folder with daily radiometer .dat files
# Example: r"D:\Thesis\data\radiometer"
base_folder = r"C:\path\to\your\radiometer"

# Initialize lists to store data
all_data_swr_dn = []
all_data_lwr_dn = []
all_solar_angles = []

# Location information for Alkmaar, Netherlands
latitude = 52.6324  # degrees North
longitude = 4.7534  # degrees East
altitude = 0  # altitude above sea level in meters (assuming ground level)

# Loop through each month's folder
for month_folder in ['2024-03', '2024-04', '2024-05', '2024-06']:# '2024-07']:
    month_folder_path = os.path.join(base_folder, month_folder)
    
    # Check if the folder exists
    if not os.path.exists(month_folder_path):
        continue
    
    # Loop through each day's folder in the month
    for day_folder in os.listdir(month_folder_path):
        day_folder_path = os.path.join(month_folder_path, day_folder)
        
        # Get a list of all .dat files in the folder
        data_files_solar = [file for file in os.listdir(day_folder_path) if file.endswith('.dat')]
        
        # Loop through each file in the folder
        for file in data_files_solar:
            # Construct the full file path
            file_path = os.path.join(day_folder_path, file)
            
            # Read the data from the file
            data = pd.read_csv(file_path, skiprows=1, delimiter=',', encoding='latin1')
            
            # Convert TIMESTAMP column to datetime format with error handling
            data['TIMESTAMP'] = pd.to_datetime(data['TIMESTAMP'], errors='coerce')
            
            # Drop rows with NaT values in the TIMESTAMP column
            data.dropna(subset=['TIMESTAMP'], inplace=True)
            
            # Select relevant columns containing short wave down radiation and long wave down radiation data
            swr_column = 'SR15D1Dn_Irr'  # Adjust column name if needed
            lwr_column = 'IR20Dn'  # Adjust column name if needed
            lwr_up_column = 'IR20Up'  # Upward long-wave radiation

            # Calculate solar zenith angle
            times = data['TIMESTAMP']
            solar_position = pvlib.solarposition.get_solarposition(times, latitude, longitude, altitude)
            
            # Convert the columns to numeric type
            data[swr_column] = pd.to_numeric(data[swr_column], errors='coerce')
            data[lwr_column] = pd.to_numeric(data[lwr_column], errors='coerce')
            data[lwr_up_column] = pd.to_numeric(data[lwr_up_column], errors='coerce')

            # Append the data to the lists
            all_data_swr_dn.append(data[['TIMESTAMP', swr_column]])
            all_data_lwr_dn.append(data[['TIMESTAMP', lwr_column, lwr_up_column]])

            all_solar_angles.append(solar_position)

# Combine data from all files
all_data_combined_swr = pd.concat(all_data_swr_dn, ignore_index=True)
all_data_combined_lwr = pd.concat(all_data_lwr_dn, ignore_index=True)

# Concatenate solar angles into a single DataFrame
all_solar_angles_df = pd.concat(all_solar_angles, ignore_index=True)

# Merge the dataframes on the TIMESTAMP column
#final_combined_df = all_data_combined_swr.merge(all_data_combined_lwr, on='TIMESTAMP').merge(all_solar_angles_df, left_on='TIMESTAMP', right_on='apparent_zenith')

# Print or process final_combined_df as needed
#print(final_combined_df.head())


In [None]:
# Save the combined SWR data to Parquet format
parquet_file_swr = os.path.join(base_folder, 'Combined_SWR_Data.parquet')
all_data_combined_swr.to_parquet(parquet_file_swr, compression='gzip')
print(f"Combined SWR DataFrame saved to {parquet_file_swr}")

# Save the combined LWR data to Parquet format
parquet_file_lwr = os.path.join(base_folder, 'Combined_LWR_Data.parquet')
all_data_combined_lwr.to_parquet(parquet_file_lwr, compression='gzip')
print(f"Combined LWR DataFrame saved to {parquet_file_lwr}")

In [None]:
#print(all_solar_angles_df)
print(all_solar_angles_df)

In [None]:
# Filter out the zero values in shortwave down radiation
filtered_data = all_data_combined_swr[all_data_combined_swr['SR15D1Dn_Irr'] > 10]
filtered_angles = all_solar_angles_df.loc[filtered_data.index]

# Plot the scatter plot excluding zero values in radiation
plt.figure(figsize=(10, 6))

# Adjust marker size (s), color (c), and transparency (alpha)
plt.plot(filtered_angles['apparent_zenith'], filtered_data['SR15D1Dn_Irr'], 'ko', markersize=0.1, alpha=0.1)

plt.xlabel('Apparent Solar Zenith Angle (degrees)')
plt.ylabel('Shortwave Down Radiation (W/m^2)')
plt.title('Shortwave Down Radiation vs Solar Zenith Angle in Alkmaar, Netherlands (March-June 2024)')
# Save the plot as a PDF file
#output_file = 'shortwave_radiation_vs_zenith_angle.png'
#plt.savefig(output_file, format='png', dpi=300)  # Save as PDF with 300 dpi for high quality

# Show the plot (optional)
plt.show()
plt.show()

In [None]:
#print(filtered_data)
#print(filtered_angles)

# Concatenate DataFrames along the columns
merged_df = pd.concat([filtered_data, filtered_angles['apparent_zenith']], axis=1)

# Print the merged DataFrame
print("\nMerged DataFrame:")
print(merged_df)

In [None]:
# Ensure TIMESTAMP columns are in datetime format for proper merging
df_lwp_corrected['TIMESTAMP'] = pd.to_datetime(df_lwp_corrected['TIMESTAMP'])
merged_df['TIMESTAMP'] = pd.to_datetime(merged_df['TIMESTAMP'])

# Merge DataFrames on the 'TIMESTAMP' column
merged_final_df = pd.merge(merged_df, df_lwp_corrected, on='TIMESTAMP', how='inner')

# Print the final merged DataFrame
print("\nFinal Merged DataFrame:")
print(merged_final_df.head())

In [None]:
filtered_df = merged_final_df[(merged_final_df['LWP_Corrected'] >= 0) & (merged_final_df['LWP_Corrected'] <= 50)]

# Define LWP ranges and colors

bin_size = 5
lwp_bins = np.arange(0, 51, bin_size) 
#lwp_bins = np.arange(0, merged_final_df['LWP_Corrected'].max() + bin_size, bin_size)  # Define ranges with step size of 200
lwp_labels = [f'{b}-{b + bin_size}' for b in lwp_bins[:-1]]

# Create a column for LWP range labels
#merged_final_df['LWP_Range'] = pd.cut(merged_final_df['LWP_Corrected'], bins=lwp_bins, labels=lwp_labels, include_lowest=True)
# Create a column for LWP range labels within the filtered DataFrame using .loc
filtered_df.loc[:, 'LWP_Range'] = pd.cut(filtered_df['LWP_Corrected'], bins=lwp_bins, labels=lwp_labels, include_lowest=True)

# Set up the color map
cmap = plt.get_cmap('viridis')  # Use a colormap from matplotlib
norm = mcolors.BoundaryNorm(boundaries=lwp_bins, ncolors=cmap.N)

# Plotting
plt.figure(figsize=(12, 8))

scatter = plt.scatter(
    filtered_df['apparent_zenith'],
    filtered_df['SR15D1Dn_Irr'],
    c=filtered_df['LWP_Corrected'],
    cmap=cmap,
    norm=norm,
    s=0.5,
    alpha=0.7
)

#scatter = plt.scatter(merged_final_df['apparent_zenith'],merged_final_df['SR15D1Dn_Irr'],c=merged_final_df['LWP_Corrected'],
  #  cmap=cmap, norm=norm,s=0.5,  # Size of the points alpha=0.7)

# Add colorbar with labels
cbar = plt.colorbar(scatter, boundaries=lwp_bins, ticks=lwp_bins, format='%1.1f')
cbar.set_label('LWP Corrected (g/m²)')

# Set colorbar ticks and labels
cbar.set_ticks(lwp_bins[:-1] + bin_size / 2)  # Set ticks to the middle of each bin
cbar.set_ticklabels([f'{lwp_bins[i]}-{lwp_bins[i + 1]}' for i in range(len(lwp_bins) - 1)])  # Set labels

# Labels and title
plt.xlabel('Apparent Zenith (degrees)')
plt.ylabel('Radiation (SR15D1Dn_Irr)')
plt.title('Radiation vs Apparent Zenith with LWP Color Mapping')

# Show grid
plt.grid(True)

# Show plot
plt.tight_layout()
plt.show()


In [None]:
print(filtered_df)

In [None]:
plt.figure(figsize=(12, 8))
scatter = plt.scatter(
    filtered_df['apparent_zenith'],
    filtered_df['SR15D1Dn_Irr'],
    c=filtered_df['LWP_Corrected'],
    cmap='viridis',
    alpha=filtered_df['LWP_Corrected']/ filtered_df['LWP_Corrected'].max(),
    s=0.5
)

# Add color bar
cbar = plt.colorbar(scatter)
cbar.set_label('LWP Corrected (g/m²)')

# Labels and title
plt.xlabel('Apparent Zenith (degrees)')
plt.ylabel('Radiation (SR15D1Dn_Irr)')
plt.title('Radiation vs Apparent Zenith with Opacity-Based LWP Mapping')

plt.tight_layout()
plt.show()


In [None]:
# Plot with continuous color mapping
plt.figure(figsize=(12, 8))
scatter = plt.scatter(
    filtered_df['apparent_zenith'],
    filtered_df['SR15D1Dn_Irr'],
    c=filtered_df['LWP_Corrected'],
    cmap='viridis',  # Or any other color map
    s=0.5,            # Size of the points
    alpha=0.7
)

# Add color bar
cbar = plt.colorbar(scatter)
cbar.set_label('LWP Corrected (g/m²)')

# Labels and title
plt.xlabel('Apparent Zenith (degrees)')
plt.ylabel('Radiation (SR15D1Dn_Irr)')
plt.title('Radiation vs Apparent Zenith with Continuous LWP Color Mapping')

# Show plot
plt.tight_layout()
plt.show()


In [None]:
'''
# Plot shortwave down radiation vs solar zenith angle

plt.figure(figsize=(10, 6))

# Adjust marker size (s), color (c), and transparency (alpha)
plt.scatter(all_solar_angles_df['apparent_zenith'], all_data_combined_swr['SR15D1Dn_Irr'], marker='o', s=0.1, c='black', alpha=0.1)

plt.xlabel('Apparent Solar Zenith Angle (degrees)')
plt.ylabel('Shortwave Down Radiation (W/m^2)')
plt.title('Shortwave Down Radiation vs Solar Zenith Angle in Alkmaar, Netherlands (April-May 2024)')
#plt.grid(True)  # Enable grid for better visualization of data density
plt.show()
'''

In [None]:
def calculate_clear_sky_sw_down_again(latitude, longitude, timestamps):
    # Create location object
    location = pvlib.location.Location(latitude, longitude)
    
    # Calculate clear-sky GHI using Ineichen model
    clearsky = location.get_clearsky(timestamps, model='ineichen')
    
    # Extract timestamps and clear-sky GHI values
    clear_sky_sw_down = clearsky['ghi']
    timestamps = clear_sky_sw_down.index
    
    # Create DataFrame with columns 'TIMESTAMP' and 'ghi'
    clear_sky_df = pd.DataFrame({
        'TIMESTAMP': timestamps,
        'ghi': clear_sky_sw_down.values
    })
    
    return clear_sky_df


In [None]:
latitude = 52.6324  # Latitude of Alkmaar, Netherlands
longitude = 4.7534  # Longitude of Alkmaar, Netherlands

# Assuming `all_data_combined_swr` is a DataFrame containing your data
timestamps = pd.to_datetime(all_data_combined_swr['TIMESTAMP'], errors='coerce')
timestamps = pd.DatetimeIndex(timestamps)

clear_sky_df = calculate_clear_sky_sw_down_again(latitude, longitude, timestamps)

In [None]:
location = pvlib.location.Location(latitude, longitude)
solar_position = location.get_solarposition(timestamps)
#solar_position=pvlib.solarposition.get_solarposition(timestamps, latitude, longitude)

In [None]:
plt.figure(figsize=(10, 6))
plt.plot(solar_position['zenith'], clear_sky_df['ghi'], 'o', markersize=1)
plt.xlabel('Solar Zenith Angle (degrees)')
plt.ylabel('Clear-Sky GHI (W/m^2)')
plt.title('Clear-Sky GHI vs. Solar Zenith Angle')
plt.grid(True)
plt.show()

In [None]:
# Filter out zero values in measured shortwave down radiation

# Calculate solar position for filtered data
solar_position = pvlib.solarposition.get_solarposition(filtered_data['TIMESTAMP'], latitude, longitude)



In [None]:
print(solar_position)

In [None]:
# Plot Clear-Sky GHI vs. Solar Zenith Angle
plt.figure(figsize=(10, 6))

# Plot Measured SW_dn with black small dots and reduced opacity
#plt.plot(filtered_angles['apparent_zenith'], filtered_data['SR15D1Dn_Irr'], 'ko', markersize=0.1, alpha=0.1, label='Measured SW_dn')
plt.plot(merged_df['apparent_zenith'], merged_df['SR15D1Dn_Irr'], 'ko', markersize=0.1, alpha=0.1, label='Measured SW_dn')

# Plot Clear-Sky GHI with a red line on top
plt.plot(merged_df['apparent_zenith'], clear_sky_df.loc[merged_df.index, 'ghi'], 'ro',markersize=0.1, alpha=0.5,label='Clear-Sky GHI', zorder=5)
plt.xlabel('Solar Zenith Angle (degrees)')
plt.ylabel('Shortwave Down Radiation (W/m^2)')
plt.title('Shortwave Down Radiation vs Solar Zenith Angle in Alkmaar, Netherlands (March-July 2024)')
#plt.legend()
plt.grid(True)
# Save the plot as a PDF file
#output_file = 'shortwave_radiation_vs_zenith_angle_vs_csmodel.png'
# Save the plot as a PNG file
#plt.savefig(output_file, format='png', dpi=300)  # Save as PNG with 300 dpi for high quality
plt.show()


In [None]:

# Ensure `ghi` from clear_sky_df is aligned with the timestamps in `merged_df`
merged_df['ghi'] = clear_sky_df.loc[merged_df.index, 'ghi']  # Add 'ghi' values from clear_sky_df to merged_df
merged_df['CSI'] = merged_df['SR15D1Dn_Irr'] / merged_df['ghi']  # Calculate CSI

# Check the first few rows to confirm the new CSI column
print(merged_df[['TIMESTAMP', 'SR15D1Dn_Irr', 'ghi', 'CSI']].head())

In [None]:
# Merge on TIMESTAMP with inner join to keep only overlapping times
final_merged_df = pd.merge(filtered_df, merged_df[['TIMESTAMP', 'SR15D1Dn_Irr', 'ghi', 'CSI']], 
                           on='TIMESTAMP', how='inner')

# Check the resulting DataFrame
print(final_merged_df.head())