In [None]:
#(1) Find a unified regularization parameter and output a precision matrix
import pandas as pd
import numpy as np
import os
from sklearn.preprocessing import StandardScaler
from sklearn.covariance import GraphicalLassoCV, graphical_lasso, empirical_covariance
import warnings

# Suppress specific warnings
warnings.filterwarnings("ignore", category=RuntimeWarning)

# Define input file paths
input_files = [
    r"D:\Desktop\GraphicalLasso\1-39x39-Input-Output-Matrices\2005.xlsx",
    r"D:\Desktop\GraphicalLasso\1-39x39-Input-Output-Matrices\2010.xlsx",
    r"D:\Desktop\GraphicalLasso\1-39x39-Input-Output-Matrices\2015.xlsx",
    r"D:\Desktop\GraphicalLasso\1-39x39-Input-Output-Matrices\2020.xlsx"
]

# Define output directory
output_dir = r"D:\Desktop\GraphicalLasso\2-Precision-Matrices"
os.makedirs(output_dir, exist_ok=True)

# Initialize dictionaries and lists for data storage
data_dict = {}
merged_data = []

# Process each input file: clean, standardize, and prepare for alpha learning
for file_path in input_files:
    # Load the input-output matrix
    df = pd.read_excel(file_path, index_col=0)
    
    # Clean and standardize data for alpha learning
    df_cleaned = df.replace([np.inf, -np.inf], np.nan).dropna()
    scaler = StandardScaler()
    df_scaled = scaler.fit_transform(df_cleaned)

    # Store standardized data for merged dataset
    year = os.path.splitext(os.path.basename(file_path))[0]
    merged_data.append(df_scaled)
    # Store cleaned data for precision matrix calculation
    data_dict[year] = df_cleaned

    print(f"✅ Processed data for {year}")

# Merge data and fit GraphicalLassoCV to determine the optimal alpha
merged_data = np.vstack(merged_data)
model = GraphicalLassoCV(max_iter=1000, tol=1e-3)
model.fit(merged_data)
best_alpha = model.alpha_

print(f"\n🎯 Optimal alpha determined: {best_alpha}")
print(f"✅ Model iterations: {model.n_iter_}")

# Compute precision matrix for each year and save results
for year, df_cleaned in data_dict.items():
    # Standardize the cleaned data
    scaler = StandardScaler()
    data_standardized = scaler.fit_transform(df_cleaned)
    data_df = pd.DataFrame(data_standardized, index=df_cleaned.index, columns=df_cleaned.columns)

    # Compute empirical covariance matrix with a small perturbation to ensure positive definiteness
    emp_cov = empirical_covariance(data_df)
    epsilon = 1e-4
    emp_cov += epsilon * np.eye(emp_cov.shape[0])

    # Try different multiples of the optimal alpha to compute the precision matrix
    success = False
    for factor in [1, 2, 4, 8, 16]:
        try_alpha = best_alpha * factor
        try:
            covariance, precision = graphical_lasso(emp_cov, alpha=try_alpha, max_iter=500, tol=1e-3)
            print(f"✅ {year}: Precision matrix successfully estimated with alpha={try_alpha}")
            success = True
            break
        except FloatingPointError:
            print(f"⚠️ {year}: Failed with alpha={try_alpha}, trying a larger alpha...")

    if not success:
        raise RuntimeError(f"❌ {year}: All alpha values failed, unable to estimate precision matrix")

    # Save the precision matrix to Excel
    output_file = os.path.join(output_dir, f"{year}-Precision-Matrix.xlsx")
    precision_df = pd.DataFrame(precision, index=df_cleaned.columns, columns=df_cleaned.columns)
    precision_df.to_excel(output_file)
    print(f"📁 Precision matrix saved to: {output_file}")

print("\n🎉 All files processed successfully!")

In [None]:
#(2)Find the optimal attenuation factor and calculate Katz-Bonacich centrality
import pandas as pd
import numpy as np
import networkx as nx
import os
import warnings

# Suppress convergence warnings
warnings.filterwarnings("ignore", category=RuntimeWarning)

# Define input and output directories
input_dir = r"D:\Desktop\GraphicalLasso\2-Precision-Matrices"
output_dir = r"D:\Desktop\GraphicalLasso\3-Undirected-Graph-KB-Centrality"
os.makedirs(output_dir, exist_ok=True)

# List of years
years = ['2005', '2010', '2015', '2020']

# Dictionary to store alpha values
alpha_dict = {}

def calculate_best_alpha_and_katz(file_path):
    """Calculate the optimal alpha and return Katz-Bonacich centrality"""
    precision_matrix = pd.read_excel(file_path, index_col=0)

    # Extract non-zero, non-diagonal upper triangle elements to construct edges
    edges = []
    for i in range(precision_matrix.shape[0]):
        for j in range(i + 1, precision_matrix.shape[1]):
            if precision_matrix.iloc[i, j] != 0:
                edges.append((precision_matrix.index[i], precision_matrix.columns[j]))

    if not edges:
        raise ValueError("❌ No valid edges found!")

    # Create an undirected graph
    G = nx.Graph()
    G.add_edges_from(edges)

    # Calculate the maximum eigenvalue (to estimate alpha)
    max_eigenvalue = max(nx.adjacency_spectrum(G, weight=None)).real
    alpha_candidates = np.linspace(0.1, 0.99, 20) * (1 / max_eigenvalue)

    def evaluate(alpha):
        try:
            centrality = nx.katz_centrality(G, alpha=alpha, beta=1.0, max_iter=1000, tol=1e-6)
            return np.std(list(centrality.values()))
        except nx.PowerIterationFailedConvergence:
            return np.inf

    stds = [evaluate(a) for a in alpha_candidates]
    best_index = np.argmin(stds)
    best_alpha = alpha_candidates[best_index]

    # Calculate final centrality
    katz = nx.katz_centrality(G, alpha=best_alpha, beta=1.0, max_iter=1000, tol=1e-6)

    return best_alpha, katz

# Iterate through years and compute centrality
for year in years:
    input_path = os.path.join(input_dir, f"{year}-Precision-Matrix.xlsx")
    output_path = os.path.join(output_dir, f"KB-Centrality_{year}.xlsx")

    print(f"\n📘 Processing data for {year}...")

    # Calculate alpha and centrality
    alpha, katz_centrality = calculate_best_alpha_and_katz(input_path)
    alpha_dict[year] = alpha

    # Create DataFrame and add ranking column
    df_katz = pd.DataFrame({
        "Sector_Code": list(katz_centrality.keys()),
        "Katz_Bonacich_Centrality": list(katz_centrality.values()),
    })

    # Sort by centrality and assign ranks (highest centrality is rank 1)
    df_katz = df_katz.sort_values(by="Katz_Bonacich_Centrality", ascending=False)
    df_katz["Rank"] = df_katz["Katz_Bonacich_Centrality"].rank(ascending=False, method="min").astype(int)

    # Save results to Excel
    df_katz.to_excel(output_path, index=False)
    print(f"✅ Katz-Bonacich centrality for {year} saved to: {output_path}")
    print(f"🔍 Optimal attenuation factor alpha: {alpha:.5f}")

print("\n🎉 Katz-Bonacich centrality calculations completed for all years!")

In [None]:
#(3)Obtain the ranking results of Sector importance for each year
import pandas as pd
import os

# Define file paths
base_dir = r"D:\Desktop\GraphicalLasso\3-Undirected-Graph-KB-Centrality"
years = ['2005', '2010', '2015', '2020']
file_paths = {year: os.path.join(base_dir, f"KB-Centrality_{year}.xlsx") for year in years}

# Create an empty dictionary to store DataFrames for each year (containing only sector codes and ranks)
rank_dfs = {}

for year in years:
    df = pd.read_excel(file_paths[year])
    df_rank = df[["Sector_Code", "Rank"]].copy()
    df_rank.rename(columns={"Rank": year}, inplace=True)
    rank_dfs[year] = df_rank

# Sequentially merge rankings for the four years, aligning on "Sector_Code"
merged_df = rank_dfs[years[0]]
for year in years[1:]:
    merged_df = pd.merge(merged_df, rank_dfs[year], on="Sector_Code", how="outer")

# Sort by sector code (optional)
merged_df = merged_df.sort_values(by="Sector_Code").reset_index(drop=True)

# Save to Excel
output_path = os.path.join(base_dir, "Sector_KB_Centrality_Rankings_Summary.xlsx")
merged_df.to_excel(output_path, index=False)

print(f"\n✅ Successfully generated rankings summary file: {output_path}")