In [None]:
import numpy as np  
import pandas as pd 
import matplotlib.pyplot as plt  
import matplotlib.cm as cm  

# Define a TaylorDiagram class
class TaylorDiagram:
    def __init__(self, refstd, fig=None, rect=111, label=''):
        self.refstd = refstd  
        self.fig = fig if fig is not None else plt.figure()  
        self.ax = self.fig.add_subplot(rect, polar=True)  
        self.ax.set_theta_zero_location('N')  
        self.ax.set_theta_direction(-1)  

        # Plot reference point and standard deviation contour
        self.ax.plot([0], [refstd], 'ko', label=label)  
        l, = self.ax.plot(np.linspace(0, np.pi/2), [refstd]*50, 'k--')  
        l.set_dashes([2, 2]) 

        # Set axis limits and labels
        self.ax.set_ylim(0, 1.5*refstd)  
        self.ax.set_xlim(0, np.pi/2)  

        # Configure grid and ticks
        self.ax.xaxis.set_major_formatter(plt.FuncFormatter(lambda x, _: f'{np.cos(x):.2f}'))  
        self.ax.yaxis.set_major_formatter(plt.FuncFormatter(lambda y, _: f'{y:.2f}'))  

        # Labels for the diagram
        self.ax.set_xlabel('Standard Deviation', fontsize=14, fontweight='bold')  
        self.ax.set_ylabel('Standard Deviation', fontsize=14, fontweight='bold')  

        # Add correlation axis label
        self.add_correlation_axis_label()

    def add_correlation_axis_label(self):
        # Add a label for the correlation axis
        self.ax.text(np.pi/4, self.refstd * 1.2, 'Correlation', fontsize=14, fontweight='bold', color='black', ha='center', va='center')

    def add_sample(self, stddev, corrcoef, rmse, label=None, **kwargs):
        theta = np.arccos(corrcoef)  
        self.ax.plot(theta, stddev, 'o', label=label, **kwargs)  

    def add_grid(self):
        self.ax.grid(True)  

    def add_legend(self):
        # Add a legend to the plot
        legend = self.ax.legend(loc='upper right', bbox_to_anchor=(1.65, 0.95))
        for text in legend.get_texts():
            text.set_fontsize(14)
            text.set_fontweight('bold')
            text.set_color('black')

# Function to compute standard deviation, correlation coefficient, and RMSE
def compute_metrics(obs, model):
    stddev = np.std(model)  
    corrcoef = np.corrcoef(obs, model)[0, 1]  
    rmse = np.sqrt(np.mean((obs - model)**2))  
    return stddev, corrcoef, rmse

In [None]:
# Assuming you have a DataFrame named 'df' containing all the datasets 
# that you want to use for model validation, including the observed variable 

observed_column = "Observed_Variable"            ### Replace "Observed_Variable" with your observed variable name ###
# Replace df with your actual DataFrame
model_columns = [col for col in df.columns if col not in [observed_column, 'datetime', 'month']]

print("Model columns:", model_columns)

# Convert all columns to numeric (use errors='coerce' to handle non-numeric entries)
df = df.apply(pd.to_numeric, errors='coerce')

# Check for NaN or Inf values in the observed data
nan_count = df[observed_column].isna().sum()
inf_count = np.isinf(df[observed_column]).sum()

print(f"NaN values in observed data: {nan_count}")
print(f"Inf values in observed data: {inf_count}")

if nan_count > 0 or inf_count > 0:
    # Handle NaN or Inf values
    df[observed_column].fillna(df[observed_column].mean(), inplace=True)
    df.replace([np.inf, -np.inf], df[observed_column].mean(), inplace=True)

# Compute metrics for observation and each model
obs = df[observed_column].values
obs_std = np.std(obs)

print(f"Standard deviation of observed data: {obs_std}")

# Check if obs_std is a valid number
if not np.isfinite(obs_std):
    raise ValueError("Standard deviation of observed data is not finite.")

models_metrics = {}  # Dictionary to store metrics for each model
colors = cm.get_cmap('tab20', len(model_columns))

for idx, col in enumerate(model_columns):
    model_values = df[col].values
    std, corr, rmse = compute_metrics(obs, model_values)
    
    # Skip models with NaN values
    if np.isnan(std) or np.isnan(corr) or np.isnan(rmse):
        print(f"Skipping {col} due to NaN values")
        continue

    print(f"Processing {col} - stddev: {std}, correlation: {corr}, rmse: {rmse}")
    models_metrics[col] = (std, corr, rmse)

# Plotting Taylor diagram
fig = plt.figure(figsize=(12, 8))  # Create figure with specified size
dia = TaylorDiagram(obs_std, fig=fig, label='Observation')

for idx, (model, (std, corr, rmse)) in enumerate(models_metrics.items()):
    print(f"Plotting {model} with stddev: {std}, correlation: {corr}")
    dia.add_sample(std, corr, rmse, label=model, color=colors(idx))

dia.add_grid()
dia.add_legend()
plt.show()