In [1]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

In [2]:
# Load the Excel file
file_path = '2000 iterations/remind_SSP2-PkBudg1150-SPS1_2050.xlsx'
excel_data = pd.ExcelFile(file_path)

In [3]:
# Load the "Technology shares" sheet
df = excel_data.parse('Technology shares')

In [4]:
# Define categories
categories = {
    'Wind': ['Wind - onshore-DDPM', 'Wind - onshore-Gearbox', 'Wind - offshore-DDPM', 'Wind - offshore-Gearbox'],
    'PV': ['PV - c-Si', 'PV - CdTe', 'PV - CIGS', 'PV - a-Si', 'PV - Perovskite', 'PV - GaAs'],
    'CSP': ['CSP - Parabolic trough', 'CSP - Solar tower'],
    'Fuel cell, stationary': ['Fuel cell - Stationary - PEMFC', 'Fuel cell - Stationary - SOFC'],
    'Electrolyzer': ['Electrolyzer - PEM', 'Electrolyzer - Alkaline', 'Electrolyzer - HTEL'],
    'Battery, mobile': ['Battery-Mobile - NMC111', 'Battery-Mobile - NMC523', 'Battery-Mobile - NMC622', 'Battery-Mobile - NMC811',
                        'Battery-Mobile - NMC955', 'Battery-Mobile - NCA', 'Battery-Mobile - LFP', 'Battery-Mobile - LiS',
                        'Battery-Mobile - LiO2','Battery-Mobile - SIB',
                       ], 
    'Battery, stationary': ['Battery-Stationary - NMC111-Stationary', 'Battery-Stationary - NMC523-Stationary', 'Battery-Stationary - NMC622-Stationary', 
                            'Battery-Stationary - NMC811-Stationary', 'Battery-Stationary - NMC955-Stationary', 'Battery-Stationary - Lead-Acid-Stationary',
                            'Battery-Stationary - LFP-Stationary', 'Battery-Stationary - SIB-Stationary', 'Battery-Stationary - Redox-Flow-Stationary'
                           ]  
}

In [5]:
def clean_column_name(col_name, category_name):
    # Remove category prefix and hyphen
    cleaned_name = col_name.replace(category_name + ' - ', '').replace(' - ', ' ')
    cleaned_name = cleaned_name.replace('-Stationary', '').strip()
    # Special handling for certain terms
    if 'DDPM' in cleaned_name:
        return cleaned_name.replace('Ddpm', 'DDPM')
    elif 'PEMFC' in cleaned_name or 'SOFC' in cleaned_name:
        return cleaned_name.split()[-1]
    elif 'PEM' in cleaned_name or 'HTEL' in cleaned_name:
        return cleaned_name.split()[-1].upper()
    elif 'Battery-Mobile' in col_name or 'Battery-Stationary' in col_name:
        tech_name = cleaned_name.split()[-1].upper()
        return tech_name.replace('STATIONARY', '').strip()
    else:
        # Handle specific cases like GaAs, CdTe, and CIGS
        if cleaned_name.lower() == 'gaas':
            return 'GaAs'
        elif cleaned_name.lower() == 'cdte':
            return 'CdTe'
        elif cleaned_name.lower() == 'cigs':
            return 'CIGS'
        return cleaned_name.title()

# Function to create and save pairwise scatter plots for a given category
def create_and_save_pairwise_scatter_plots(data, columns, category_name):
    # Rename columns to remove category prefix and hyphen, capitalize subtechnology names
    renamed_columns = {col: clean_column_name(col, category_name) for col in columns}
    data_renamed = data[columns].rename(columns=renamed_columns)
    
    # Create pairwise scatter plot
    pairplot = sns.pairplot(data_renamed, plot_kws={'s': 10})
    pairplot.fig.suptitle(f'Pairwise Scatter Plots for {category_name.capitalize()}', y=1.02, fontsize=16)
    
    # Adjust font sizes for axes titles and tick labels
    for ax in pairplot.axes.flatten():
        ax.set_xlabel(ax.get_xlabel(), fontsize=12)
        ax.set_ylabel(ax.get_ylabel(), fontsize=12)
        ax.tick_params(axis='both', which='major', labelsize=10)
    
    # Set consistent figure size
    pairplot.fig.set_size_inches(10, 10)
    
    # Save the plot
    plt.savefig(f'pairwise_scatter_{category_name}_2000it.png', bbox_inches='tight')
    plt.close()

In [6]:
# Generate pairwise scatter plots for each category
for category, columns in categories.items():
    create_and_save_pairwise_scatter_plots(df, columns, category)