In [6]:
# ==============================================================
# Comprehensive Sensitivity Analysis for Hydrogen Delivery Models
# (Low Demand Scenario)
# ==============================================================
# Author: John Cadiz
# Purpose: Compute Pearson, Spearman, Standardized Regression Coefficients (SRC),
#          and Partial Rank Correlation Coefficients (PRCC)
# ==============================================================

import pandas as pd
import numpy as np
from itertools import product
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import StandardScaler
import pingouin as pg

# -----------------------------
# 1. LOAD AND CLEAN HEADERS
# -----------------------------
file = "All_Simulation_Data - Low Demand 100 km.csv"
df = pd.read_csv(file, header=[0, 1,2])
# Flatten the two header levels into one string (e.g. "New Haven_Gas Truck_Electricity Cost")
df.columns = ['_'.join([str(c1), str(c2), str(c3)]).strip().replace('Unnamed: ', '').replace('  ', ' ')
              for c1, c2,c3 in df.columns]

def clean_header(h):
    if not isinstance(h, str):
        return h
    h = h.strip()
    h = h.replace("Electtricity", "Electricity")
    h = h.replace("Liquiefaction", "Liquefaction")
    h = h.replace("Commpressor", "Compressor")
    h = h.replace("Trans Leage", "Trans Leakage")
    h = h.replace("length", "Length")
    h = h.replace("Cap Cost", "Capital Cost")
    return h

df.columns = [clean_header(c) for c in df.columns]

# -----------------------------
# 2. SPLIT INTO CITY + MODE BLOCKS
# -----------------------------
cities = ["New Haven", "Fresno", "Atlanta", "Miami"]
modes = ["Gas", "Liquid", "Pipeline"]

num_blocks = len(cities) * len(modes)
cols_per_block = df.shape[1] // num_blocks

blocks = [df.iloc[:, i*cols_per_block:(i+1)*cols_per_block].copy() for i in range(num_blocks)]

labeled_blocks = []
i = 0
for city in cities:
    for mode in modes:
        block = blocks[i]
        block["City"] = city
        block["Mode"] = mode
        labeled_blocks.append(block)
        i += 1

tidy = pd.concat(labeled_blocks, ignore_index=True)
print("✅ Data reshaped:", tidy.shape)

# -----------------------------
# 3. DEFINE INPUTS + OUTPUTS
# -----------------------------
outputs = ["Total Cost", "Total GHG Cost"]
exclude_cols = outputs + ["City", "Mode"]
inputs = [c for c in tidy.columns if c not in exclude_cols]

results = []

# -----------------------------
# 4. ANALYSIS LOOP
# -----------------------------
for city, mode in product(cities, modes):
    subset = tidy[(tidy["City"] == city) & (tidy["Mode"] == mode)].dropna()

    # Skip empty sets
    if subset.empty:
        continue

    for output in outputs:
        # Drop NA for current output
        subset_clean = subset[inputs + [output]].dropna()

        # ---- (1) Pearson & Spearman ----
        pearson_corr = subset_clean.corr(method="pearson")[output].drop(output)
        spearman_corr = subset_clean.corr(method="spearman")[output].drop(output)

        # ---- (2) Standardized Regression Coefficients ----
        X = subset_clean[inputs]
        y = subset_clean[output]
        X_scaled = StandardScaler().fit_transform(X)
        y_scaled = StandardScaler().fit_transform(y.values.reshape(-1, 1)).ravel()
        model = LinearRegression().fit(X_scaled, y_scaled)
        src = pd.Series(model.coef_, index=inputs)

        # ---- (3) Partial Rank Correlation Coefficients ----
        prcc_values = []
        for var in inputs:
            try:
                prcc = pg.partial_corr(
                    data=subset_clean,
                    x=var,
                    y=output,
                    covar=[v for v in inputs if v != var],
                    method="spearman"
                )
                prcc_values.append(prcc["r"].values[0])
            except Exception:
                prcc_values.append(np.nan)
        prcc = pd.Series(prcc_values, index=inputs)

        # ---- Combine Results ----
        df_temp = pd.DataFrame({
            "City": city,
            "Mode": mode,
            "Output": output,
            "Variable": inputs,
            "Pearson_r": pearson_corr.reindex(inputs).values,
            "Spearman_r": spearman_corr.reindex(inputs).values,
            "SRC": src.reindex(inputs).values,
            "PRCC": prcc.reindex(inputs).values
        })

        results.append(df_temp)

# Combine all results
final_results = pd.concat(results, ignore_index=True)

# -----------------------------
# 5. SORT + EXPORT
# -----------------------------
for metric in ["Pearson_r", "Spearman_r", "SRC", "PRCC"]:
    final_results[f"abs_{metric}"] = final_results[metric].abs()

final_sorted = final_results.sort_values(
    by=["City", "Mode", "Output", "abs_Pearson_r"], ascending=[True, True, True, False]
)

# Save summary
#final_sorted.to_csv("Low_Demand_Sensitivity_Analysis.csv", index=False)
print("\n✅ Sensitivity analysis complete.")
print("Saved as 'Low_Demand_Sensitivity_Analysis.csv'")
print("Columns:", list(final_sorted.columns))


✅ Data reshaped: (100128, 302)


ValueError: No objects to concatenate

In [7]:
df

Unnamed: 0,Low Demand (New Haven)_Gas Truck_Electricity Cost,1_level_0_1_level_1_Utilization,2_level_0_2_level_1_Fuel Price,3_level_0_3_level_1_Gas Leak,4_level_0_4_level_1_Trans Length,5_level_0_5_level_1_Gas Station Leak,6_level_0_6_level_1_Gas Trans Leak,7_level_0_7_level_1_Stations,8_level_0_8_level_1_Fuel Economy,9_level_0_9_level_1_Cab Cost,...,293_level_0_293_level_1_Liquefaction Efficiency,294_level_0_294_level_1_Compression Efficiency,295_level_0_295_level_1_Transmission Cost,296_level_0_296_level_1_Distribution Cost,297_level_0_297_level_1_Station Cost,298_level_0_298_level_1_Total Cost,299_level_0_299_level_1_Transmission GHG Cost,300_level_0_300_level_1_Distribution GHG Cost,301_level_0_301_level_1_Station GHG Cost,302_level_0_302_level_1_Total GHG Cost
0,0.189904,51.458566,7.854899,0.002049,100,0.001931,0.011285,4,9.995054,249616.7541,...,1.072016,1.052182,2.0398,9.2136,3.199329,14.452729,257.592523,184.597605,2354.82960,2797.01973
1,0.098573,61.919246,7.167697,0.001535,100,0.004188,0.004921,4,10.441106,255008.0895,...,1.175323,0.857934,1.9974,8.3681,2.323959,12.689459,214.680534,232.769072,1600.22762,2047.67722
2,0.220006,44.503319,10.670201,0.002267,100,0.001892,0.004934,5,10.649487,239255.5118,...,0.918826,1.184116,2.0380,9.8704,3.075492,14.983892,105.571455,78.021886,1086.11547,1269.70881
3,0.220594,60.410585,14.694990,0.001332,100,0.003213,0.004764,4,8.918251,210514.8435,...,1.048229,0.846735,2.0872,8.5340,2.562501,13.183701,193.193473,130.636171,1421.26522,1745.09486
4,0.097197,34.687278,8.536070,0.001455,100,0.005314,0.006170,6,10.716705,273926.7707,...,1.144617,1.180669,1.9740,10.9849,2.949251,15.908151,103.136398,126.087749,1057.97501,1287.19916
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8339,0.215006,58.476191,10.609373,0.001757,100,0.005469,0.004578,4,9.320119,377487.5760,...,0.870676,0.977052,2.0634,8.6276,2.646949,13.337949,124.453037,86.812058,1056.47281,1267.73791
8340,0.104031,84.287002,13.961806,0.002044,100,0.005081,0.003795,3,10.636764,354049.5276,...,0.971683,0.928375,2.0005,7.2167,1.770179,10.987379,300.500566,185.961488,2423.84003,2910.30208
8341,0.203053,37.806687,10.207252,0.001339,100,0.004463,0.016612,6,8.330031,242882.7882,...,1.162431,1.135868,2.0366,10.6543,3.394447,16.085348,162.118338,129.335999,1599.90839,1891.36273
8342,0.155586,75.709898,8.988350,0.002364,100,0.006258,0.010919,3,7.989173,352961.0515,...,0.920730,1.094223,2.0114,7.5931,2.328439,11.932939,199.553922,126.893485,1897.14832,2223.59572
