In [None]:
import os
import seaborn as sns

def create_measurement_pairplots(df):
    # Create pairplots directory if it doesn't exist
    output_dir = 'pairplots'
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)
    
    # Define the measurement types we'll be analyzing
    measurement_types = [
        'airtemp_degF',
        'windspeed_mph',
        'windgust_mph',
        'rh_percent',
        'precip_in'
    ]
    
    # Map measurement types to their display titles
    title_mapping = {
        'airtemp_degF': 'Temperature (F) Comparison',
        'windspeed_mph': 'Wind Speed (mph) Comparison',
        'windgust_mph': 'Wind Gust (mph) Comparison',
        'rh_percent': 'Relative Humidity Comparison',
        'precip_in': 'Precipitation (in) Comparison'
    }
    
    # Define all location identifiers
    locations = ['BURN', 'NCAT', 'SALI', 'MITC', 'SASS', 'FRYI', 'JEFF', 'BEAR', 'WINE', 'UNCA']
    
    # Set default seaborn style with larger font scale for better readability
    sns.set_theme(font_scale=1.5)
    
    # Create pair plots for each measurement type
    for measure in measurement_types:
        # Create a new dataframe with just the columns for this measurement type
        measure_cols = [f"{loc}_{measure}" for loc in locations]
        measure_df = df[measure_cols]
        
        # Rename columns to just location names for cleaner plot
        measure_df.columns = locations
        
        # For precipitation, filter out values above 0.6 before sampling
        # This change ensures we focus on the most relevant range of precipitation values
        if measure == 'precip_in':
            measure_df = measure_df[measure_df.max(axis=1) <= 0.6]
        
        # Sample 7500 points while maintaining performance
        if len(measure_df) > 7500:
            measure_df = measure_df.sample(n=7500, random_state=42)
        
        # Create the pair plot with updated parameters for clarity
        g = sns.pairplot(
            measure_df,
            diag_kind='kde',
            height=12,
            aspect=1.1,
            plot_kws={'alpha': 0.08, 's': 10},
            diag_kws={'alpha': 0.4}
        )
        
        # Adjust the figure size to account for labels
        g.fig.set_size_inches(12, 10)
        
        # Add and style the main title
        g.fig.suptitle(title_mapping[measure], y=1.02, size=24, weight='bold')
        
        # Format each subplot's axes and labels
        for ax in g.axes.flat:
            if ax is not None:
                if measure == 'airtemp_degF':
                    ax.set_xticks([10, 60])
                    ax.set_yticks([10, 60])
                elif measure == 'precip_in':
                    # Updated limits and ticks for precipitation:
                    # - Range now goes from 0 to 0.6 inches
                    # - Tick marks at 0 and 0.4 for easier reference
                    ax.set_xlim(0, 0.6)
                    ax.set_ylim(0, 0.6)
                    ax.set_xticks([0, 0.4])
                    ax.set_yticks([0, 0.4])
                    
                # Make labels larger and bold
                if ax.get_xlabel():
                    ax.set_xlabel(ax.get_xlabel(), fontsize=14, weight='bold')
                if ax.get_ylabel():
                    ax.set_ylabel(ax.get_ylabel(), fontsize=14, weight='bold')
                
                ax.tick_params(labelsize=12)
                plt.setp(ax.get_xticklabels(), rotation=45, ha='right')
                ax.set_xlabel(ax.get_xlabel(), labelpad=15)
                ax.set_ylabel(ax.get_ylabel(), labelpad=15)
        
        plt.subplots_adjust(
            left=0.1,
            right=0.97,
            bottom=0.1,
            top=0.97,
            wspace=0.072,
            hspace=0.072
        )
        
        # Save the plot with high resolution
        save_path = os.path.join(output_dir, f'pairplot_{measure}.png')
        plt.savefig(save_path, dpi=400, bbox_inches='tight', pad_inches=0.5)
        plt.close()
        
        print(f"Saved {measure} pair plot to {save_path}")

In [None]:
create_measurement_pairplots(df)