In [None]:
import pandas as pd
import numpy as np

In [None]:
# Function to load a KEGG to HGNC mapping from a CSV file
def load_kegg_to_hgnc_mapping(mapping_csv_path):
    mapping_df = pd.read_csv(mapping_csv_path)
    # Assuming the csv file has columns 'Symbol' for HGNC and 'Gene_ID' for KEGG ID
    kegg_to_hgnc_mapping = {f'hsa:{gene_id}': symbol for symbol, gene_id in zip(mapping_df['Symbol'], mapping_df['Gene_ID'])}
    return kegg_to_hgnc_mapping

# Function to update the gene matrix with HGNC symbols
def update_and_sort_matrix_with_hgnc_symbols(matrix, kegg_to_hgnc_mapping):
    # Replace KEGG IDs in the index and columns with HGNC symbols
    matrix.index = [kegg_to_hgnc_mapping.get(gene_id, 'unknown') for gene_id in matrix.index]
    matrix.columns = [kegg_to_hgnc_mapping.get(gene_id, 'unknown') for gene_id in matrix.columns]
    # Sort the index and columns
    matrix = matrix.sort_index().sort_index(axis=1)
    return matrix

In [None]:
# Load the KEGG to HGNC mapping
mapping_csv_path = 'NCBI_ID-Symbol.csv'  # Replace with your actual file path
kegg_to_hgnc_mapping = load_kegg_to_hgnc_mapping(mapping_csv_path)


In [None]:
# Assume 'merged_gene_relation_network_matrix.csv' is the merged matrix file you generated earlier
merged_matrix_df = pd.read_csv('merged_gene_relation_network_matrix.csv', index_col=0)

# Update the merged matrix with HGNC symbols
updated_matrix_df = update_and_sort_matrix_with_hgnc_symbols(merged_matrix_df, kegg_to_hgnc_mapping)


In [None]:
updated_matrix_df

In [None]:
# Save the updated matrix to CSV with HGNC symbols
output_csv_path_with_hgnc = 'updated_gene_relation_network_matrix_with_hgnc.csv'
updated_matrix_df.to_csv(output_csv_path_with_hgnc)
print(f'The updated gene relation network matrix with HGNC symbols has been saved to {output_csv_path_with_hgnc}')