<div style="background-color: #D2B48C; padding: 10px;">
    <h2><center>Cox Proportional Hazards Analysis (PH v/s Heart Failure) </center></h2>
</div>

### <center> <span style="background-color:#D2B48C; padding:5px;">Cox Proportional Hazards Analysis (PH v/s death)</span>

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches


Pre_heart_failure_dataframe = []
Post_heart_failure_dataframe = []
Common_heart_failure_dataframe = []
heart_failure_conditions = []
combined_dataframe_heart_failure = []

# Load the datasets (adjust paths as necessary)
Pre_heart_failure_dataframe = pd.read_csv('Pre heart_failure COMMON with comorbidities.csv')
Post_heart_failure_dataframe = pd.read_csv('Post heart_failure COMMON with comorbidities.csv')
Common_heart_failure_dataframe = pd.read_csv('Combined heart_failure Common with comorbidities.csv')

# Combine the dataframes
combined_dataframe_heart_failure = pd.concat([Pre_heart_failure_dataframe, Common_heart_failure_dataframe, Post_heart_failure_dataframe], ignore_index=True)

# Drop duplicates from the combined dataframe
combined_dataframe_heart_failure = combined_dataframe_heart_failure.drop_duplicates()

# Replace specific heart_failure types with "heart_failure"
heart_failure_conditions = []
heart_failure_conditions = ['I50.0', 'I50.1']
combined_dataframe_heart_failure['Combined ICD10 Codes'] = combined_dataframe_heart_failure['Combined ICD10 Codes'].replace(heart_failure_conditions, 'heart_failure')


combined_dataframe_heart_failure.head(3)


Pre_PH_dataframe = []
Post_PH_dataframe = []
Common_PH_dataframe = []
Common_PH_dataframe = []
combined_dataframe_PH = []
ph_conditions = []

# Load the datasets (adjust paths as necessary)
Pre_PH_dataframe = pd.read_csv('Pre-PH Common PH Icd10 Codes.csv')
Post_PH_dataframe = pd.read_csv('Post-PH Common PH Icd10 Codes.csv')
Common_PH_dataframe = pd.read_csv('Common-PH Common PH Icd10 Codes.csv')

# Combine the dataframes
combined_dataframe_PH = pd.concat([Pre_PH_dataframe, Common_PH_dataframe, Post_PH_dataframe], ignore_index=True)

# Drop duplicates from the combined dataframe
combined_dataframe_PH = combined_dataframe_PH.drop_duplicates()
# Replace specific PH types with "PH"
ph_conditions = ['I27.0', 'I27.2', 'I27.9']
combined_dataframe_PH['ICD10 Codes'] = combined_dataframe_PH['ICD10 Codes'].replace(ph_conditions, 'PH')
#combined_dataframe_PH.head(2)

#################################################################################################################################################
#################################################################################################################################################
#################################################################################################################################################

# Step 1: Convert Diagnosis Date to datetime format for proper sequencing
combined_dataframe_PH['Diagnosis Date'] = pd.to_datetime(combined_dataframe_PH['Diagnosis Date'])
combined_dataframe_heart_failure['Combined ICD10 Diagnosis Date'] = pd.to_datetime(combined_dataframe_heart_failure['Combined ICD10 Diagnosis Date'])

# Step 2: Define relevant conditions and filter the data
# For PH cohort
ph_conditions = {
    "Essential (primary) hypertension (I10)": "I10",
    "Atrial fibrillation and flutter (I48)": "I48",
    "Pure hypercholesterolemia (E78.0)": "E78.0",
    "Atherosclerotic heart disease (I25.1)": "I25.1",
    "Type 2 diabetes mellitus without complications (E11.9)": "E11.9",
    "Mitral (valve) insufficiency (I34.0)": "I34.0",
    "Asthma, unspecified (J45.9)": "J45.9",
    "PH": "PH",  # Specific for PH cohort
}
ph_condition_codes = list(ph_conditions.values())
filtered_ph_dataframe = combined_dataframe_PH[combined_dataframe_PH['ICD10 Codes'].isin(ph_condition_codes)]

# For heart_failure cohort
heart_failure_conditions = {
    "Essential (primary) hypertension (I10)": "I10",
    "Atrial fibrillation and flutter (I48)": "I48",
    "Pure hypercholesterolemia (E78.0)": "E78.0",
    "Atherosclerotic heart disease (I25.1)": "I25.1",
    "Type 2 diabetes mellitus without complications (E11.9)": "E11.9",
    "Mitral (valve) insufficiency (I34.0)": "I34.0",
    "Asthma, unspecified (J45.9)": "J45.9",
    "heart_failure": "heart_failure",  # Specific for heart_failure cohort
}
heart_failure_condition_codes = list(heart_failure_conditions.values())
filtered_heart_failure_dataframe = combined_dataframe_heart_failure[combined_dataframe_heart_failure['Combined ICD10 Codes'].isin(heart_failure_condition_codes)]

# Step 3: Sort data by Participant ID and Diagnosis Date
filtered_ph_dataframe = filtered_ph_dataframe.sort_values(by=['Participant ID', 'Diagnosis Date'])
filtered_heart_failure_dataframe = filtered_heart_failure_dataframe.sort_values(by=['Participant ID', 'Combined ICD10 Diagnosis Date'])


#################################################################################################################################################
#################################################################################################################################################
#################################################################################################################################################

# Convert necessary columns in the PH dataframe to datetime
filtered_ph_dataframe['Diagnosis Date'] = pd.to_datetime(filtered_ph_dataframe['Diagnosis Date'], errors='coerce')
filtered_ph_dataframe['PH Diagnosis Date'] = pd.to_datetime(filtered_ph_dataframe['PH Diagnosis Date'], errors='coerce')

# Convert necessary columns in the heart_failure dataframe to datetime
filtered_heart_failure_dataframe['Combined ICD10 Diagnosis Date'] = pd.to_datetime(filtered_heart_failure_dataframe['Combined ICD10 Diagnosis Date'], errors='coerce')
filtered_heart_failure_dataframe['heart_failure Diagnosis Date'] = pd.to_datetime(filtered_heart_failure_dataframe['heart_failure Diagnosis Date'], errors='coerce')

# Now filter rows for PH cohort
filtered_ph_dataframe = filtered_ph_dataframe[
    filtered_ph_dataframe['Diagnosis Date'] < filtered_ph_dataframe['PH Diagnosis Date']
]

# Now filter rows for heart_failure cohort
filtered_heart_failure_dataframe = filtered_heart_failure_dataframe[
    filtered_heart_failure_dataframe['Combined ICD10 Diagnosis Date'] < filtered_heart_failure_dataframe['heart_failure Diagnosis Date']
]

#################################################################################################################################################
#################################################################################################################################################
#################################################################################################################################################

# Step 5: Group by Participant ID to create disease sequences (before diagnosis)
# For PH cohort
ph_traces = (
    filtered_ph_dataframe.groupby('Participant ID')
    .apply(lambda x: x['ICD10 Codes'].tolist() if len(x) >= 2 else None)  # Keep only participants with at least 2 conditions
    .dropna()
    .reset_index(drop=True)
)
ph_traces = pd.DataFrame(ph_traces, columns=['Traces'])

# For heart_failure cohort
heart_failure_traces = (
    filtered_heart_failure_dataframe.groupby('Participant ID')
    .apply(lambda x: x['Combined ICD10 Codes'].tolist() if len(x) >= 2 else None)  # Keep only participants with at least 2 conditions
    .dropna()
    .reset_index(drop=True)
)
heart_failure_traces = pd.DataFrame(heart_failure_traces, columns=['Traces'])

#################################################################################################################################################
#################################################################################################################################################
#################################################################################################################################################

# Step 6: Find common pathways between the two cohorts
ph_trace_set = set(ph_traces['Traces'].apply(tuple))  # Convert to tuple for set operations
heart_failure_trace_set = set(heart_failure_traces['Traces'].apply(tuple))  # Convert to tuple for set operations

# Find common pathways
common_pathways = ph_trace_set.intersection(heart_failure_trace_set)

# Convert common pathways to a DataFrame with a readable string format
common_pathways_df = pd.DataFrame({'Pathway': [' -> '.join(pathway) for pathway in common_pathways]})

# Initialize participant counts
ph_participant_counts = []
hf_participant_counts = []

# Calculate participant counts for each pathway
for pathway in common_pathways:
    # Count participants in PH cohort
    ph_count = ph_traces[ph_traces['Traces'].apply(tuple) == pathway].shape[0]
    ph_participant_counts.append(ph_count)
    
    # Count participants in Heart Failure cohort
    hf_count = heart_failure_traces[heart_failure_traces['Traces'].apply(tuple) == pathway].shape[0]
    hf_participant_counts.append(hf_count)

# Add participant counts to the DataFrame
common_pathways_df['PH Participants'] = ph_participant_counts
common_pathways_df['HF Participants'] = hf_participant_counts

# Display the resulting DataFrame
print("Common Pathways Between PH and Heart Failure Cohorts:")
display(common_pathways_df)
print()
# Define thresholds
min_participants = 5  # Minimum participants threshold
min_deaths = 1        # Minimum number of death events required

# Step 1: Filter pathways with sufficient participants in both PH and HF cohorts
filtered_pathways = []
filtered_pathways = common_pathways_df[
    (common_pathways_df['PH Participants'] >= min_participants) &
    (common_pathways_df['HF Participants'] >= min_participants)
]

print("Filtered Pathways with Sufficient Participants:")
display(filtered_pathways)
print()

filtered_pathways = filtered_pathways.drop(columns=['PH Participants', 'HF Participants'])
display(filtered_pathways)

#################################################################################################################################################
#################################################################################################################################################
#################################################################################################################################################

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches
from lifelines import CoxPHFitter

# Step 1: Ensure all date columns are in datetime format
filtered_ph_dataframe['PH Diagnosis Date'] = pd.to_datetime(filtered_ph_dataframe['PH Diagnosis Date'], errors='coerce')
filtered_ph_dataframe['Date of Death'] = pd.to_datetime(filtered_ph_dataframe['Date of Death'], errors='coerce')

# Step 2: Set a global Censor Date for all alive participants
global_censor_date = pd.to_datetime("2021-09-29")  # Adjust this date as per your dataset or study

# Step 3: Compute Death_Followup_Time
filtered_ph_dataframe['Death_Followup_Time'] = None

# For deceased participants
filtered_ph_dataframe.loc[filtered_ph_dataframe['Alive / Dead'] == 'Dead', 'Death_Followup_Time'] = (
    (filtered_ph_dataframe['Date of Death'] - filtered_ph_dataframe['PH Diagnosis Date']).dt.days
)

# For alive participants
filtered_ph_dataframe.loc[filtered_ph_dataframe['Alive / Dead'] == 'Alive', 'Death_Followup_Time'] = (
    (global_censor_date - filtered_ph_dataframe['PH Diagnosis Date']).dt.days
)





# Step 4: Filter for valid pathways
filtered_ph_dataframe['Pathway'] = filtered_ph_dataframe.groupby('Participant ID')['ICD10 Codes'].transform(lambda x: ' -> '.join(x))

#display(filtered_ph_dataframe.head(5))
filtered_ph_dataframe = filtered_ph_dataframe[filtered_ph_dataframe['Pathway'].isin(filtered_pathways['Pathway'].tolist())]

#################################################################################################################################################
#################################################################################################################################################
#################################################################################################################################################

pathways_with_death = []
# Step 1: Identify Pathways with at least one non-NaN "Date of Death"
pathways_with_death = (
    filtered_ph_dataframe
    .groupby('Pathway')['Date of Death']  # Group by "Pathway" and check "Date of Death"
    .apply(lambda x: x.notna().any())  # True if any "Date of Death" is not NaN
)

# Step 2: Map the results back to the main DataFrame
filtered_ph_dataframe['Dead Check'] = filtered_ph_dataframe['Pathway'].map(pathways_with_death)

# Step 3: Convert True/False to 1/0
filtered_ph_dataframe['Dead Check'] = np.where(filtered_ph_dataframe['Dead Check'], 1, 0)

# Step 4: Display the updated DataFrame
display(filtered_ph_dataframe[['Pathway', 'Date of Death', 'Dead Check']])


# Filter rows where "Dead Check" is 1
pathways_with_dead  = []
pathways_with_dead = filtered_ph_dataframe[filtered_ph_dataframe['Dead Check'] == 1]

# Get unique pathways
unique_pathways_with_dead = []
unique_pathways_with_dead = pathways_with_dead['Pathway'].unique()

# Display the unique pathways
print("Unique Pathways with at least one death (Dead Check = 1):")
for pathway in unique_pathways_with_dead:
    print(pathway)

# Alternatively, display as a DataFrame
unique_pathways_df =[]
unique_pathways_df = pd.DataFrame(unique_pathways_with_dead, columns=['Pathway'])
#len(unique_pathways_df)
display(unique_pathways_df)

#################################################################################################################################################
#################################################################################################################################################
#################################################################################################################################################



# Step 5: Remove rows with invalid or non-positive follow-up times
filtered_ph_dataframe = filtered_ph_dataframe.dropna(subset=['Death_Followup_Time'])
filtered_ph_dataframe = filtered_ph_dataframe[filtered_ph_dataframe['Death_Followup_Time'] > 0]

# Step 6: Encode categorical variables
filtered_ph_dataframe['Sex'] = filtered_ph_dataframe['Sex'].map({'Male': 1, 'Female': 0})  # Encode Sex as numeric
filtered_ph_dataframe['Alive / Dead'] = filtered_ph_dataframe['Alive / Dead'].map({'Dead': 1, 'Alive': 0})  # Encode Alive/Dead as binary



# Step 7: One-hot encode the pathways
cox_data = filtered_ph_dataframe[['Death_Followup_Time', 'Alive / Dead', 'Age', 'Sex', 'Pathway']]
cox_data = pd.get_dummies(cox_data, columns=['Pathway'], drop_first=True)

# Step 8: Fit the Cox Proportional Hazards Model
cox_model = CoxPHFitter(penalizer=0.1)  # Adding penalizer to handle high-dimensional data
cox_model.fit(cox_data, duration_col='Death_Followup_Time', event_col='Alive / Dead')
cox_model.print_summary()  # Print detailed summary of the model

# Step 9: Plot the hazard ratios
plt.figure(figsize=(8, 20))  # Adjust the plot size for better visibility
cox_model.plot()
plt.title("Cox Proportional Hazards Analysis: Mortality Risk by PH Pathways", fontsize=16)
plt.xlabel("log(HR) (95% CI)", fontsize=12)
plt.ylabel("")  # Remove y-label for a cleaner presentation
plt.xticks(fontsize=10)
plt.yticks(fontsize=10)
plt.tight_layout()
plt.show()

### <center> <span style="background-color:#D2B48C; padding:5px;">Cox Proportional Hazards Analysis: Mortality Risk by heart_failure Pathways</span>

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches


Pre_heart_failure_dataframe = []
Post_heart_failure_dataframe = []
Common_heart_failure_dataframe = []
heart_failure_conditions = []
combined_dataframe_heart_failure = []

# Load the datasets (adjust paths as necessary)
Pre_heart_failure_dataframe = pd.read_csv('Pre heart_failure COMMON with comorbidities.csv')
Post_heart_failure_dataframe = pd.read_csv('Post heart_failure COMMON with comorbidities.csv')
Common_heart_failure_dataframe = pd.read_csv('Combined heart_failure Common with comorbidities.csv')

# Combine the dataframes
combined_dataframe_heart_failure = pd.concat([Pre_heart_failure_dataframe, Common_heart_failure_dataframe, Post_heart_failure_dataframe], ignore_index=True)

# Drop duplicates from the combined dataframe
combined_dataframe_heart_failure = combined_dataframe_heart_failure.drop_duplicates()

# Replace specific heart_failure types with "heart_failure"
heart_failure_conditions = []
heart_failure_conditions = ['I50.0', 'I50.1']
combined_dataframe_heart_failure['Combined ICD10 Codes'] = combined_dataframe_heart_failure['Combined ICD10 Codes'].replace(heart_failure_conditions, 'heart_failure')

Pre_PH_dataframe = []
Post_PH_dataframe = []
Common_PH_dataframe = []
Common_PH_dataframe = []
combined_dataframe_PH = []
ph_conditions = []

# Load the datasets (adjust paths as necessary)
Pre_PH_dataframe = pd.read_csv('Pre-PH Common PH Icd10 Codes.csv')
Post_PH_dataframe = pd.read_csv('Post-PH Common PH Icd10 Codes.csv')
Common_PH_dataframe = pd.read_csv('Common-PH Common PH Icd10 Codes.csv')

# Combine the dataframes
combined_dataframe_PH = pd.concat([Pre_PH_dataframe, Common_PH_dataframe, Post_PH_dataframe], ignore_index=True)

# Drop duplicates from the combined dataframe
combined_dataframe_PH = combined_dataframe_PH.drop_duplicates()
# Replace specific PH types with "PH"
ph_conditions = ['I27.0', 'I27.2', 'I27.9']
combined_dataframe_PH['ICD10 Codes'] = combined_dataframe_PH['ICD10 Codes'].replace(ph_conditions, 'PH')


#################################################################################################################################################
#################################################################################################################################################
#################################################################################################################################################

# Step 1: Convert Diagnosis Date to datetime format for proper sequencing
combined_dataframe_PH['Diagnosis Date'] = pd.to_datetime(combined_dataframe_PH['Diagnosis Date'])
combined_dataframe_heart_failure['Combined ICD10 Diagnosis Date'] = pd.to_datetime(combined_dataframe_heart_failure['Combined ICD10 Diagnosis Date'])

# Step 2: Define relevant conditions and filter the data
# For PH cohort
ph_conditions = {
    "Essential (primary) hypertension (I10)": "I10",
    "Atrial fibrillation and flutter (I48)": "I48",
    "Pure hypercholesterolemia (E78.0)": "E78.0",
    "Atherosclerotic heart disease (I25.1)": "I25.1",
    "Type 2 diabetes mellitus without complications (E11.9)": "E11.9",
    "Mitral (valve) insufficiency (I34.0)": "I34.0",
    "Asthma, unspecified (J45.9)": "J45.9",
    "PH": "PH",  # Specific for PH cohort
}
ph_condition_codes = list(ph_conditions.values())
filtered_ph_dataframe = combined_dataframe_PH[combined_dataframe_PH['ICD10 Codes'].isin(ph_condition_codes)]

# For heart_failure cohort
heart_failure_conditions = {
    "Essential (primary) hypertension (I10)": "I10",
    "Atrial fibrillation and flutter (I48)": "I48",
    "Pure hypercholesterolemia (E78.0)": "E78.0",
    "Atherosclerotic heart disease (I25.1)": "I25.1",
    "Type 2 diabetes mellitus without complications (E11.9)": "E11.9",
    "Mitral (valve) insufficiency (I34.0)": "I34.0",
    "Asthma, unspecified (J45.9)": "J45.9",
    "heart_failure": "heart_failure",  # Specific for heart_failure cohort
}
heart_failure_condition_codes = list(heart_failure_conditions.values())
filtered_heart_failure_dataframe = combined_dataframe_heart_failure[combined_dataframe_heart_failure['Combined ICD10 Codes'].isin(heart_failure_condition_codes)]

# Step 3: Sort data by Participant ID and Diagnosis Date
filtered_ph_dataframe = filtered_ph_dataframe.sort_values(by=['Participant ID', 'Diagnosis Date'])
filtered_heart_failure_dataframe = filtered_heart_failure_dataframe.sort_values(by=['Participant ID', 'Combined ICD10 Diagnosis Date'])


#################################################################################################################################################
#################################################################################################################################################
#################################################################################################################################################

# Convert necessary columns in the PH dataframe to datetime
filtered_ph_dataframe['Diagnosis Date'] = pd.to_datetime(filtered_ph_dataframe['Diagnosis Date'], errors='coerce')
filtered_ph_dataframe['PH Diagnosis Date'] = pd.to_datetime(filtered_ph_dataframe['PH Diagnosis Date'], errors='coerce')

# Convert necessary columns in the heart_failure dataframe to datetime
filtered_heart_failure_dataframe['Combined ICD10 Diagnosis Date'] = pd.to_datetime(filtered_heart_failure_dataframe['Combined ICD10 Diagnosis Date'], errors='coerce')
filtered_heart_failure_dataframe['heart_failure Diagnosis Date'] = pd.to_datetime(filtered_heart_failure_dataframe['heart_failure Diagnosis Date'], errors='coerce')

# Now filter rows for PH cohort
filtered_ph_dataframe = filtered_ph_dataframe[
    filtered_ph_dataframe['Diagnosis Date'] < filtered_ph_dataframe['PH Diagnosis Date']
]

# Now filter rows for heart_failure cohort
filtered_heart_failure_dataframe = filtered_heart_failure_dataframe[
    filtered_heart_failure_dataframe['Combined ICD10 Diagnosis Date'] < filtered_heart_failure_dataframe['heart_failure Diagnosis Date']
]

#################################################################################################################################################
#################################################################################################################################################
#################################################################################################################################################

# Step 5: Group by Participant ID to create disease sequences (before diagnosis)
# For PH cohort
ph_traces = (
    filtered_ph_dataframe.groupby('Participant ID')
    .apply(lambda x: x['ICD10 Codes'].tolist() if len(x) >= 2 else None)  # Keep only participants with at least 2 conditions
    .dropna()
    .reset_index(drop=True)
)
ph_traces = pd.DataFrame(ph_traces, columns=['Traces'])

# For heart_failure cohort
heart_failure_traces = (
    filtered_heart_failure_dataframe.groupby('Participant ID')
    .apply(lambda x: x['Combined ICD10 Codes'].tolist() if len(x) >= 2 else None)  # Keep only participants with at least 2 conditions
    .dropna()
    .reset_index(drop=True)
)
heart_failure_traces = pd.DataFrame(heart_failure_traces, columns=['Traces'])

#################################################################################################################################################
#################################################################################################################################################
#################################################################################################################################################

# Step 6: Find common pathways between the two cohorts
ph_trace_set = set(ph_traces['Traces'].apply(tuple))  # Convert to tuple for set operations
heart_failure_trace_set = set(heart_failure_traces['Traces'].apply(tuple))  # Convert to tuple for set operations

# Find common pathways
common_pathways = ph_trace_set.intersection(heart_failure_trace_set)

# Convert common pathways to a DataFrame with a readable string format
common_pathways_df = pd.DataFrame({'Pathway': [' -> '.join(pathway) for pathway in common_pathways]})

# Initialize participant counts
ph_participant_counts = []
hf_participant_counts = []

# Calculate participant counts for each pathway
for pathway in common_pathways:
    # Count participants in PH cohort
    ph_count = ph_traces[ph_traces['Traces'].apply(tuple) == pathway].shape[0]
    ph_participant_counts.append(ph_count)
    
    # Count participants in Heart Failure cohort
    hf_count = heart_failure_traces[heart_failure_traces['Traces'].apply(tuple) == pathway].shape[0]
    hf_participant_counts.append(hf_count)

# Add participant counts to the DataFrame
common_pathways_df['PH Participants'] = ph_participant_counts
common_pathways_df['HF Participants'] = hf_participant_counts

# Display the resulting DataFrame
print("Common Pathways Between PH and Heart Failure Cohorts:")
display(common_pathways_df)
print()

# Define thresholds
min_participants = 5  # Minimum participants threshold
min_deaths = 1        # Minimum number of death events required

# Step 1: Filter pathways with sufficient participants in both PH and HF cohorts
filtered_pathways = []
filtered_pathways = common_pathways_df[
    (common_pathways_df['PH Participants'] >= min_participants) &
    (common_pathways_df['HF Participants'] >= min_participants)
]

print("Filtered Pathways with Sufficient Participants:")
display(filtered_pathways)
print()

filtered_pathways = filtered_pathways.drop(columns=['PH Participants', 'HF Participants'])
display(filtered_pathways)

#################################################################################################################################################
#################################################################################################################################################
#################################################################################################################################################

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches
from lifelines import CoxPHFitter

# Step 1: Ensure all date columns are in datetime format
filtered_heart_failure_dataframe['heart_failure Diagnosis Date'] = pd.to_datetime(filtered_heart_failure_dataframe['heart_failure Diagnosis Date'], errors='coerce')
filtered_heart_failure_dataframe['Date of Death'] = pd.to_datetime(filtered_heart_failure_dataframe['Date of Death'], errors='coerce')

# Step 2: Set a global Censor Date for all alive participants
global_censor_date = pd.to_datetime("2021-09-29")  # Adjust this date as per your dataset or study

# Step 3: Compute Death_Followup_Time
filtered_heart_failure_dataframe['Death_Followup_Time'] = None

# For deceased participants
filtered_heart_failure_dataframe.loc[filtered_heart_failure_dataframe['Alive / Dead'] == 'Dead', 'Death_Followup_Time'] = (
    (filtered_heart_failure_dataframe['Date of Death'] - filtered_heart_failure_dataframe['heart_failure Diagnosis Date']).dt.days
)

# For alive participants
filtered_heart_failure_dataframe.loc[filtered_heart_failure_dataframe['Alive / Dead'] == 'Alive', 'Death_Followup_Time'] = (
    (global_censor_date - filtered_heart_failure_dataframe['heart_failure Diagnosis Date']).dt.days
)





# Step 4: Filter for valid pathways
filtered_heart_failure_dataframe['Pathway'] = filtered_heart_failure_dataframe.groupby('Participant ID')['Combined ICD10 Codes'].transform(lambda x: ' -> '.join(x))

#display(filtered_ph_dataframe.head(5))
filtered_heart_failure_dataframe = filtered_heart_failure_dataframe[filtered_heart_failure_dataframe['Pathway'].isin(filtered_pathways['Pathway'].tolist())]

#################################################################################################################################################
#################################################################################################################################################
#################################################################################################################################################

pathways_with_death = []
# Step 1: Identify Pathways with at least one non-NaN "Date of Death"
pathways_with_death = (
    filtered_heart_failure_dataframe
    .groupby('Pathway')['Date of Death']  # Group by "Pathway" and check "Date of Death"
    .apply(lambda x: x.notna().any())  # True if any "Date of Death" is not NaN
)

# Step 2: Map the results back to the main DataFrame
filtered_heart_failure_dataframe['Dead Check'] = filtered_heart_failure_dataframe['Pathway'].map(pathways_with_death)

# Step 3: Convert True/False to 1/0
filtered_heart_failure_dataframe['Dead Check'] = np.where(filtered_heart_failure_dataframe['Dead Check'], 1, 0)

# Step 4: Display the updated DataFrame
#display(filtered_ph_dataframe[['Pathway', 'Date of Death', 'Dead Check']])


# Filter rows where "Dead Check" is 1
pathways_with_dead  = []
pathways_with_dead = filtered_heart_failure_dataframe[filtered_heart_failure_dataframe['Dead Check'] == 1]

# Get unique pathways
unique_pathways_with_dead = []
unique_pathways_with_dead = pathways_with_dead['Pathway'].unique()

# Display the unique pathways
print("Unique Pathways with at least one death (Dead Check = 1):")
for pathway in unique_pathways_with_dead:
    print(pathway)

# Alternatively, display as a DataFrame
unique_pathways_df =[]
unique_pathways_df = pd.DataFrame(unique_pathways_with_dead, columns=['Pathway'])
#len(unique_pathways_df)
#display(unique_pathways_df)

#################################################################################################################################################
#################################################################################################################################################
#################################################################################################################################################



# Step 5: Remove rows with invalid or non-positive follow-up times
filtered_heart_failure_dataframe = filtered_heart_failure_dataframe.dropna(subset=['Death_Followup_Time'])
filtered_heart_failure_dataframe = filtered_heart_failure_dataframe[filtered_heart_failure_dataframe['Death_Followup_Time'] > 0]

# Step 6: Encode categorical variables
filtered_heart_failure_dataframe['Sex'] = filtered_heart_failure_dataframe['Sex'].map({'Male': 1, 'Female': 0})  # Encode Sex as numeric
filtered_heart_failure_dataframe['Alive / Dead'] = filtered_heart_failure_dataframe['Alive / Dead'].map({'Dead': 1, 'Alive': 0})  # Encode Alive/Dead as binary



# Step 7: One-hot encode the pathways
cox_data = filtered_heart_failure_dataframe[['Death_Followup_Time', 'Alive / Dead', 'Individual Age', 'Sex', 'Pathway']]
cox_data = pd.get_dummies(cox_data, columns=['Pathway'], drop_first=True)

# Step 8: Fit the Cox Proportional Hazards Model
cox_model = CoxPHFitter(penalizer=0.1)  # Adding penalizer to handle high-dimensional data
cox_model.fit(cox_data, duration_col='Death_Followup_Time', event_col='Alive / Dead')
cox_model.print_summary()  # Print detailed summary of the model

# Step 9: Plot the hazard ratios
plt.figure(figsize=(8, 20))  # Adjust the plot size for better visibility
cox_model.plot()
plt.title("Cox Proportional Hazards Analysis: Mortality Risk by heart_failure Pathways", fontsize=16)
plt.xlabel("log(HR) (95% CI)", fontsize=12)
plt.ylabel("")  # Remove y-label for a cleaner presentation
plt.xticks(fontsize=10)
plt.yticks(fontsize=10)
plt.tight_layout()
plt.show()


### <center> <span style="background-color:#D2B48C; padding:5px;">Cox Proportional Hazards Analysis: PH vs heart_failure Pathways</span>

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches


Pre_heart_failure_dataframe = []
Post_heart_failure_dataframe = []
Common_heart_failure_dataframe = []
heart_failure_conditions = []
combined_dataframe_heart_failure = []

# Load the datasets (adjust paths as necessary)
Pre_heart_failure_dataframe = pd.read_csv('Pre heart_failure COMMON with comorbidities.csv')
Post_heart_failure_dataframe = pd.read_csv('Post heart_failure COMMON with comorbidities.csv')
Common_heart_failure_dataframe = pd.read_csv('Combined heart_failure Common with comorbidities.csv')

# Combine the dataframes
combined_dataframe_heart_failure = pd.concat([Pre_heart_failure_dataframe, Common_heart_failure_dataframe, Post_heart_failure_dataframe], ignore_index=True)

# Drop duplicates from the combined dataframe
combined_dataframe_heart_failure = combined_dataframe_heart_failure.drop_duplicates()

# Replace specific heart_failure types with "heart_failure"
heart_failure_conditions = []
heart_failure_conditions = ['I50.0', 'I50.1']
combined_dataframe_heart_failure['Combined ICD10 Codes'] = combined_dataframe_heart_failure['Combined ICD10 Codes'].replace(heart_failure_conditions, 'heart_failure')

Pre_PH_dataframe = []
Post_PH_dataframe = []
Common_PH_dataframe = []
Common_PH_dataframe = []
combined_dataframe_PH = []
ph_conditions = []

# Load the datasets (adjust paths as necessary)
Pre_PH_dataframe = pd.read_csv('Pre-PH Common PH Icd10 Codes.csv')
Post_PH_dataframe = pd.read_csv('Post-PH Common PH Icd10 Codes.csv')
Common_PH_dataframe = pd.read_csv('Common-PH Common PH Icd10 Codes.csv')

# Combine the dataframes
combined_dataframe_PH = pd.concat([Pre_PH_dataframe, Common_PH_dataframe, Post_PH_dataframe], ignore_index=True)

# Drop duplicates from the combined dataframe
combined_dataframe_PH = combined_dataframe_PH.drop_duplicates()
# Replace specific PH types with "PH"
ph_conditions = ['I27.0', 'I27.2', 'I27.9']
combined_dataframe_PH['ICD10 Codes'] = combined_dataframe_PH['ICD10 Codes'].replace(ph_conditions, 'PH')


#################################################################################################################################################
#################################################################################################################################################
#################################################################################################################################################

# Step 1: Convert Diagnosis Date to datetime format for proper sequencing
combined_dataframe_PH['Diagnosis Date'] = pd.to_datetime(combined_dataframe_PH['Diagnosis Date'])
combined_dataframe_heart_failure['Combined ICD10 Diagnosis Date'] = pd.to_datetime(combined_dataframe_heart_failure['Combined ICD10 Diagnosis Date'])

# Step 2: Define relevant conditions and filter the data
# For PH cohort
ph_conditions = {
    "Essential (primary) hypertension (I10)": "I10",
    "Atrial fibrillation and flutter (I48)": "I48",
    "Pure hypercholesterolemia (E78.0)": "E78.0",
    "Atherosclerotic heart disease (I25.1)": "I25.1",
    "Type 2 diabetes mellitus without complications (E11.9)": "E11.9",
    "Mitral (valve) insufficiency (I34.0)": "I34.0",
    "Asthma, unspecified (J45.9)": "J45.9",
    "PH": "PH",  # Specific for PH cohort
}
ph_condition_codes = list(ph_conditions.values())
filtered_ph_dataframe = combined_dataframe_PH[combined_dataframe_PH['ICD10 Codes'].isin(ph_condition_codes)]

# For heart_failure cohort
heart_failure_conditions = {
    "Essential (primary) hypertension (I10)": "I10",
    "Atrial fibrillation and flutter (I48)": "I48",
    "Pure hypercholesterolemia (E78.0)": "E78.0",
    "Atherosclerotic heart disease (I25.1)": "I25.1",
    "Type 2 diabetes mellitus without complications (E11.9)": "E11.9",
    "Mitral (valve) insufficiency (I34.0)": "I34.0",
    "Asthma, unspecified (J45.9)": "J45.9",
    "heart_failure": "heart_failure",  # Specific for heart_failure cohort
}
heart_failure_condition_codes = list(heart_failure_conditions.values())
filtered_heart_failure_dataframe = combined_dataframe_heart_failure[combined_dataframe_heart_failure['Combined ICD10 Codes'].isin(heart_failure_condition_codes)]

# Step 3: Sort data by Participant ID and Diagnosis Date
filtered_ph_dataframe = filtered_ph_dataframe.sort_values(by=['Participant ID', 'Diagnosis Date'])
filtered_heart_failure_dataframe = filtered_heart_failure_dataframe.sort_values(by=['Participant ID', 'Combined ICD10 Diagnosis Date'])


#################################################################################################################################################
#################################################################################################################################################
#################################################################################################################################################

# Convert necessary columns in the PH dataframe to datetime
filtered_ph_dataframe['Diagnosis Date'] = pd.to_datetime(filtered_ph_dataframe['Diagnosis Date'], errors='coerce')
filtered_ph_dataframe['PH Diagnosis Date'] = pd.to_datetime(filtered_ph_dataframe['PH Diagnosis Date'], errors='coerce')

# Convert necessary columns in the heart_failure dataframe to datetime
filtered_heart_failure_dataframe['Combined ICD10 Diagnosis Date'] = pd.to_datetime(filtered_heart_failure_dataframe['Combined ICD10 Diagnosis Date'], errors='coerce')
filtered_heart_failure_dataframe['heart_failure Diagnosis Date'] = pd.to_datetime(filtered_heart_failure_dataframe['heart_failure Diagnosis Date'], errors='coerce')

# Now filter rows for PH cohort
filtered_ph_dataframe = filtered_ph_dataframe[
    filtered_ph_dataframe['Diagnosis Date'] < filtered_ph_dataframe['PH Diagnosis Date']
]

# Now filter rows for heart_failure cohort
filtered_heart_failure_dataframe = filtered_heart_failure_dataframe[
    filtered_heart_failure_dataframe['Combined ICD10 Diagnosis Date'] < filtered_heart_failure_dataframe['heart_failure Diagnosis Date']
]

#################################################################################################################################################
#################################################################################################################################################
#################################################################################################################################################

# Step 5: Group by Participant ID to create disease sequences (before diagnosis)
# For PH cohort
ph_traces = (
    filtered_ph_dataframe.groupby('Participant ID')
    .apply(lambda x: x['ICD10 Codes'].tolist() if len(x) >= 2 else None)  # Keep only participants with at least 2 conditions
    .dropna()
    .reset_index(drop=True)
)
ph_traces = pd.DataFrame(ph_traces, columns=['Traces'])

# For heart_failure cohort
heart_failure_traces = (
    filtered_heart_failure_dataframe.groupby('Participant ID')
    .apply(lambda x: x['Combined ICD10 Codes'].tolist() if len(x) >= 2 else None)  # Keep only participants with at least 2 conditions
    .dropna()
    .reset_index(drop=True)
)
heart_failure_traces = pd.DataFrame(heart_failure_traces, columns=['Traces'])

#################################################################################################################################################
#################################################################################################################################################
#################################################################################################################################################

# Step 6: Find common pathways between the two cohorts
ph_trace_set = set(ph_traces['Traces'].apply(tuple))  # Convert to tuple for set operations
heart_failure_trace_set = set(heart_failure_traces['Traces'].apply(tuple))  # Convert to tuple for set operations

# Find common pathways
common_pathways = ph_trace_set.intersection(heart_failure_trace_set)

# Convert common pathways to a DataFrame with a readable string format
common_pathways_df = pd.DataFrame({'Pathway': [' -> '.join(pathway) for pathway in common_pathways]})

################################################################################################################################################
#################################################################################################################################################
##############################################################################################################################################

# Initialize participant counts
ph_participant_counts = []
hf_participant_counts = []

# Calculate participant counts for each pathway
for pathway in common_pathways:
    # Count participants in PH cohort
    ph_count = ph_traces[ph_traces['Traces'].apply(tuple) == pathway].shape[0]
    ph_participant_counts.append(ph_count)
    
    # Count participants in Heart Failure cohort
    hf_count = heart_failure_traces[heart_failure_traces['Traces'].apply(tuple) == pathway].shape[0]
    hf_participant_counts.append(hf_count)

# Add participant counts to the DataFrame
common_pathways_df['PH Participants'] = ph_participant_counts
common_pathways_df['HF Participants'] = hf_participant_counts

# Display the resulting DataFrame
print("Common Pathways Between PH and Heart Failure Cohorts:")
display(common_pathways_df)
print()

# Define thresholds
min_participants = 5  # Minimum participants threshold
min_deaths = 1        # Minimum number of death events required

# Step 1: Filter pathways with sufficient participants in both PH and HF cohorts
filtered_pathways = []
filtered_pathways = common_pathways_df[
    (common_pathways_df['PH Participants'] >= min_participants) &
    (common_pathways_df['HF Participants'] >= min_participants)
]

print("Filtered Pathways with Sufficient Participants:")
display(filtered_pathways)
print()

filtered_pathways = filtered_pathways.drop(columns=['PH Participants', 'HF Participants'])
display(filtered_pathways)

#################################################################################################################################################
#################################################################################################################################################
#################################################################################################################################################

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches
from lifelines import CoxPHFitter




# Add a Cohort column to differentiate the groups
filtered_ph_dataframe['Cohort'] = 'PH'
filtered_heart_failure_dataframe['Cohort'] = 'HF'

# Standardize column names for consistency
filtered_ph_dataframe = filtered_ph_dataframe.rename(columns={
    'Diagnosis Date': 'Diagnosis_Date',
    'ICD10 Codes': 'Disease_Code',
    'PH followup Time': 'Followup_Time'
})
filtered_heart_failure_dataframe = filtered_heart_failure_dataframe.rename(columns={
    'Combined ICD10 Diagnosis Date': 'Diagnosis_Date',
    'Combined ICD10 Codes': 'Disease_Code',
    'heart_failure Matched followup Time': 'Followup_Time',
    'Individual Age':'Age'
})

# Combine dataframes
combined_data = []
combined_data = pd.concat([filtered_ph_dataframe, filtered_heart_failure_dataframe], ignore_index=True, sort=False)

# Ensure all diagnosis dates are in datetime format
combined_data['Diagnosis_Date'] = pd.to_datetime(combined_data['Diagnosis_Date'], errors='coerce')



# Filter for common pathways
combined_data['Pathway'] = combined_data.groupby('Participant ID')['Disease_Code'].transform(lambda x: ' -> '.join(x))
combined_data = combined_data[combined_data['Pathway'].isin(filtered_pathways['Pathway'].tolist())]


#################################################################################################################################################
#################################################################################################################################################
#################################################################################################################################################

pathways_with_death = []
# Step 1: Identify Pathways with at least one non-NaN "Date of Death"
pathways_with_death = (
    combined_data
    .groupby('Pathway')['Date of Death']  # Group by "Pathway" and check "Date of Death"
    .apply(lambda x: x.notna().any())  # True if any "Date of Death" is not NaN
)

# Step 2: Map the results back to the main DataFrame
combined_data['Dead Check'] = combined_data['Pathway'].map(pathways_with_death)

# Step 3: Convert True/False to 1/0
combined_data['Dead Check'] = np.where(combined_data['Dead Check'], 1, 0)

# Step 4: Display the updated DataFrame
#display(combined_data[['Pathway', 'Date of Death', 'Dead Check']])


# Filter rows where "Dead Check" is 1
pathways_with_dead  = []
pathways_with_dead = combined_data[combined_data['Dead Check'] == 1]

# Get unique pathways
unique_pathways_with_dead = []
unique_pathways_with_dead = pathways_with_dead['Pathway'].unique()

# Display the unique pathways
print("Unique Pathways with at least one death (Dead Check = 1):")
for pathway in unique_pathways_with_dead:
    print(pathway)

# Alternatively, display as a DataFrame
unique_pathways_df =[]
unique_pathways_df = pd.DataFrame(unique_pathways_with_dead, columns=['Pathway'])
#len(unique_pathways_df)
#display(unique_pathways_df)

#################################################################################################################################################
#################################################################################################################################################
#################################################################################################################################################





# Encode categorical columns
combined_data['Sex'] = combined_data['Sex'].map({'Male': 1, 'Female': 0})  # Encode Sex as numeric
combined_data['Cohort'] = combined_data['Cohort'].map({'PH': 1, 'HF': 0})  # Encode cohort as 1 (PH) and 0 (HF)
display(combined_data[['Followup_Time', 'Dead Check', 'Cohort']].head())

# One-hot encode the pathways for Cox analysis
cox_data = combined_data[['Followup_Time', 'Cohort', 'Age', 'Sex', 'Pathway']]
cox_data = pd.get_dummies(cox_data, columns=['Pathway'], drop_first=True)  # One-hot encode the pathways



# Check for missing or invalid data
cox_data = cox_data.dropna()  # Drop rows with missing values

# Step 8: Fit the Cox Proportional Hazards Model
cox_model = CoxPHFitter()
cox_model.fit(cox_data, duration_col='Followup_Time', event_col='Cohort')
cox_model.print_summary()


# Step 9: Plot the hazard ratios with updated dimensions
plt.figure(figsize=(8, 15))  # Increase the height for better y-axis visibility
cox_model.plot()
plt.title("Cox Proportional Hazards Analysis: PH vs HF Pathways", fontsize=16)
plt.xlabel("log(HR) (95% CI)", fontsize=12)
plt.ylabel("")  # Remove default ylabel to clean up
plt.xticks(fontsize=10)
plt.yticks(fontsize=10)
plt.tight_layout()  # Ensure everything fits nicely
plt.show()


### <center> <span style="background-color:#D2B48C; padding:5px;">Cox Proportional Hazards Analysis:Combine Pathways</span>

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from mpl_toolkits.axes_grid1.inset_locator import inset_axes
import matplotlib.patches as mpatches



def create_hazard_ratio_plot(ax, plot_data):
    # Step 1: Extract and group the pathways by their starting conditions
    pathway_groups = {
        'E11.9': plot_data.loc[plot_data.index.str.startswith("Pathway_E11.9")],
        'E78.0': plot_data.loc[plot_data.index.str.startswith("Pathway_E78.0")],
        'I10': plot_data.loc[plot_data.index.str.startswith("Pathway_I10")],
        'I25.1': plot_data.loc[plot_data.index.str.startswith("Pathway_I25.1")],
        'I48': plot_data.loc[plot_data.index.str.startswith("Pathway_I48")],
        'J45.9': plot_data.loc[plot_data.index.str.startswith("Pathway_J45.9")],
    }

    # Define custom colors for each group
    group_colors = {
    'E11.9': '#228B22',  # Forest Green - Rich green
    'I10': '#7D3C98',  # Amethyst - Deeper purple
    'E78.0': '#D2691E',  # Chocolate - Rich tan
    'I25.1': '#FF69B4',  # Hot Pink - Vibrant pink
    'I48': '#4682B4',  # Steel Blue - Deeper blue
    'J45.9': '#FF8C00',  # light orange
        
    }

    # Step 2: Plot grouped pathways
    current_y = 0
    y_positions = []  # List to store all y-tick positions
    spacing = 0.8  # Custom spacing between groups

    for group, data in pathway_groups.items():
        # Plot each group with its specific color
        group_y_ticks = range(current_y, current_y + len(data))
        ax.errorbar(
            data['coef'],
            group_y_ticks,
            xerr=[
                data['coef'] - data['coef lower 95%'],
                data['coef upper 95%'] - data['coef']
            ],
            fmt='o',
            capsize=4.0,
            color=group_colors[group]  # Use the color for the current group
        )
        y_positions.extend(group_y_ticks)  # Append y-tick positions
        current_y += len(data) + int(spacing)  # Add spacing after each group

    # Step 3: Adjust y-axis limits to minimize gaps
    min_y = min(y_positions)  # Smallest y-tick
    max_y = max(y_positions)  # Largest y-tick
    buffer = 0.5  # Buffer space around the top and bottom
    ax.set_ylim(min_y - buffer, max_y + buffer)  # Dynamically set y-axis limits

    # Step 4: Customize plot
    ax.invert_yaxis()  # Invert y-axis
    ax.axvline(x=0, color='black', linestyle='--', linewidth=0.7)  # Add a vertical line at x=0
    # Add custom x-ticks
    x_ticks = [-1, -0.5, 0, 0.5, 1, 1.5, 2]  # Example x-ticks for log(HR)
    x_labels = ["-1","-0.5", "0", "0.5", "1", "1.5", "2"]  # Labels for x-ticks
    ax.set_xticks(x_ticks)
    ax.set_xticklabels(x_labels, fontsize=10)

   # Add labels and titles
    plt.xlabel("log(HR) (95% CI)", fontsize=12)
    plt.xticks(fontsize=10)  # Set x-tick label size
    ax.set_yticks([])
    ax.set_title("Cox Proportional Hazards Analysis\n (PH v/s HF)", fontsize=13)
    
 #################################################################################################


def create_hazard_ratio_plot_ph(ax, plot_data_ph):
    # Step 1: Extract and group the pathways by their starting conditions
    pathway_groups_ph = {
        'E11.9': plot_data_ph.loc[plot_data_ph.index.str.startswith("Pathway_E11.9")],
        'E78.0': plot_data_ph.loc[plot_data_ph.index.str.startswith("Pathway_E78.0")],
        'I10': plot_data_ph.loc[plot_data_ph.index.str.startswith("Pathway_I10")],        
        'I25.1': plot_data_ph.loc[plot_data_ph.index.str.startswith("Pathway_I25.1")],
        'I48': plot_data_ph.loc[plot_data_ph.index.str.startswith("Pathway_I48")],
        'J45.9': plot_data_ph.loc[plot_data_ph.index.str.startswith("Pathway_J45.9")],
    }

    # Define custom colors for each group
    group_colors_ph = {
        'E11.9': '#228B22',  # Forest Green
        'I10': '#7D3C98',  # Amethyst  
        'E78.0': '#D2691E',  # Chocolate
        'I25.1': '#FF69B4',  # Hot Pink
        'I48': '#4682B4',  # Steel Blue
        'J45.9': '#FF8C00',  # Orange
    }

    # Step 2: Plot grouped pathways
    current_y_ph = 0
    y_positions_ph = []  # List to store all y-tick positions for PH
    spacing_ph = 0.8  # Custom spacing between groups

    for group, data in pathway_groups_ph.items():
        # Plot each group with its specific color
        group_y_ticks_ph = range(current_y_ph, current_y_ph + len(data))
        ax.errorbar(
            data['coef'],
            group_y_ticks_ph,
            xerr=[
                data['coef'] - data['coef lower 95%'],
                data['coef upper 95%'] - data['coef']
            ],
            fmt='o',
            capsize=4.0,
            color=group_colors_ph[group]  # Use the color for the current group
        )
        y_positions_ph.extend(group_y_ticks_ph)  # Append y-tick positions
        current_y_ph += len(data) + int(spacing_ph)  # Add spacing after each group

    # Step 3: Adjust y-axis limits to minimize gaps
    min_y_ph = min(y_positions_ph)  # Smallest y-tick
    max_y_ph = max(y_positions_ph)  # Largest y-tick
    buffer_ph = 0.5  # Buffer space around the top and bottom
    ax.set_ylim(min_y_ph - buffer_ph, max_y_ph + buffer_ph)  # Dynamically set y-axis limits

    # Step 4: Customize plot
    ax.invert_yaxis()  # Invert y-axis
    ax.axvline(x=0, color='black', linestyle='--', linewidth=0.7)  # Add a vertical line at x=0

    # Add custom x-ticks
    x_ticks_ph = [-2, -1.5, -1, -0.5, 0, 0.5, 1]  # Expanded range for PH data
    x_labels_ph = ["-2", "-1.5", "-1", "-0.5", "0", "0.5","1"]  # Labels for x-ticks
    ax.set_xticks(x_ticks_ph)
    ax.set_xticklabels(x_labels_ph, fontsize=10)

    # Add labels and titles
    ax.set_xlabel("log(HR) (95% CI)", fontsize=12)
    ax.set_title("Cox Proportional Hazards Analysis\n (Mortality Risk with PH Pathways)", fontsize=13)

    # Remove default y-axis labels
    ax.set_yticks([])




##############################################################################################
    
    
    


    
def create_hazard_ratio_plot_heart_failure(ax, plot_data_heart_failure):
    # Step 1: Extract and group the pathways by their starting conditions
    pathway_groups_heart_failure = {
        'E11.9': plot_data_heart_failure.loc[plot_data_heart_failure.index.str.startswith("Pathway_E11.9")],
        'E78.0': plot_data_heart_failure.loc[plot_data_heart_failure.index.str.startswith("Pathway_E78.0")],
        'I10': plot_data_heart_failure.loc[plot_data_heart_failure.index.str.startswith("Pathway_I10")],        
        'I25.1': plot_data_heart_failure.loc[plot_data_heart_failure.index.str.startswith("Pathway_I25.1")],
        'I48': plot_data_heart_failure.loc[plot_data_heart_failure.index.str.startswith("Pathway_I48")],
        'J45.9': plot_data_heart_failure.loc[plot_data_heart_failure.index.str.startswith("Pathway_J45.9")],
    }

    # Define custom colors for each group (differentiate from PH colors if necessary)
    group_colors_heart_failure = {
       'E11.9': '#228B22',  # Forest Green
        'I10': '#7D3C98',  # Amethyst  
        'E78.0': '#D2691E',  # Chocolate
        'I25.1': '#FF69B4',  # Hot Pink
        'I48': '#4682B4',  # Steel Blue
        'J45.9': '#FF8C00',  # Orange
    }

    # Step 2: Plot grouped pathways
    current_y_heart_failure = 0
    y_positions_heart_failure = []  # List to store all y-tick positions for heart_failure
    spacing_heart_failure = 0.8  # Custom spacing between groups

    for group, data in pathway_groups_heart_failure.items():
        # Plot each group with its specific color
        group_y_ticks_heart_failure = range(current_y_heart_failure, current_y_heart_failure + len(data))
        ax.errorbar(
            data['coef'],
            group_y_ticks_heart_failure,
            xerr=[
                data['coef'] - data['coef lower 95%'],
                data['coef upper 95%'] - data['coef']
            ],
            fmt='o',
            capsize=4.0,
            color=group_colors_heart_failure[group]  # Use the color for the current group
        )
        y_positions_heart_failure.extend(group_y_ticks_heart_failure)  # Append y-tick positions
        current_y_heart_failure += len(data) + int(spacing_heart_failure)  # Add spacing after each group

    # Step 3: Adjust y-axis limits to minimize gaps
    min_y_heart_failure = min(y_positions_heart_failure)  # Smallest y-tick
    max_y_heart_failure = max(y_positions_heart_failure)  # Largest y-tick
    buffer_heart_failure = 0.5  # Buffer space around the top and bottom
    ax.set_ylim(min_y_heart_failure - buffer_heart_failure, max_y_heart_failure + buffer_heart_failure)  # Dynamically set y-axis limits

    # Step 4: Customize plot
    ax.invert_yaxis()  # Invert y-axis
    ax.axvline(x=0, color='black', linestyle='--', linewidth=0.7)  # Add a vertical line at x=0

    # Add custom x-ticks
    x_ticks_heart_failure = [-1, -0.5, 0, 0.5, 1, 1.5]  # Expanded range for COPD data
    x_labels_heart_failure = ["-1", "-0.5", "0", "0.5", "1", "1.5"]  # Labels for x-ticks
    ax.set_xticks(x_ticks_heart_failure)
    ax.set_xticklabels(x_labels_heart_failure, fontsize=10)

    # Add labels and titles
    ax.set_xlabel("log(HR) (95% CI)", fontsize=12)
    ax.set_title("Cox Proportional Hazards Analysis\n (Mortality Risk with HF Pathways)", fontsize=13)

    # Remove default y-axis labels
    ax.set_yticks([])
    
    
    
########################################################################################################################    

    





# Define the pathways and colors
pathways = [
    "E11.9 -> E78.0 -> I10 -> I25.1 -> PH/HF",
    "E11.9 -> E78.0 -> I10 -> I48 -> PH/HF",
    "E11.9 -> I10 -> PH/HF",
    "E11.9 -> I10 -> E78.0 -> PH/HF",
    "E11.9 -> I10 -> I25.1 -> PH/HF",
    "E11.9 -> I10 -> I25.1 -> E78.0 -> PH/HF",
    "E11.9 -> I10 -> I48 -> PH/HF",
    "E11.9 -> I10 -> J45.9 -> PH/HF",
    "E11.9 -> I25.1 -> PH/HF",
    
    "E78.0 -> I10 -> PH/HF",
    "E78.0 -> I10 -> E11.9 -> PH/HF",
    "E78.0 -> I10 -> I25.1 -> PH/HF",
    "E78.0 -> I10 -> I25.1 -> I48 -> PH/HF",
    "E78.0 -> I10 -> I48 -> PH/HF",
    "E78.0 -> I10 -> J45.9 -> PH/HF",
    "E78.0 -> I25.1 -> PH/HF",
    
    
    "I10 -> E11.9 -> PH/HF",
    "I10 -> E11.9 -> E78.0 -> PH/HF",
    "I10 -> E11.9 -> E78.0 -> I25.1 -> PH/HF",
    "I10 -> E78.0 -> PH/HF",
    "I10 -> E78.0 -> I25.1 -> PH/HF",
    "I10 -> E78.0 -> I48 -> PH/HF",
    "I10 -> I25.1 -> PH/HF",
    "I10 -> I25.1 -> E78.0 -> PH/HF",
    "I10 -> I25.1 -> E78.0 -> I48 -> PH/HF",
    "I10 -> I25.1 -> I48 -> PH/HF",
    "I10 -> I34.0 -> PH/HF",
    "I10 -> I48 -> PH/HF",
    "I10 -> I48 -> E11.9 -> PH/HF",
    "I10 -> I48 -> E78.0 -> PH/HF",
    "I10 -> I48 -> I34.0 -> PH/HF",
    "I10 -> I48 -> J45.9 -> PH/HF",
    "I10 -> J45.9 -> PH/HF",
    "I10 -> J45.9 -> E11.9 -> PH/HF",
    "I10 -> J45.9 -> E78.0 -> PH/HF",
    
    "I25.1 -> E78.0 -> I10 -> PH/HF",
    "I25.1 -> I10 -> PH/HF",
    "I25.1 -> I48 -> PH/HF",
    
    "I48 -> E78.0 -> I10 -> PH/HF",
    "I48 -> I10 -> PH/HF",
    "I48 -> J45.9 -> PH/HF",

    "J45.9 -> E11.9 -> I10 -> PH/HF",
    "J45.9 -> E78.0 -> I10 -> PH/HF",
    "J45.9 -> I10 -> PH/HF"   
]


color_dict = {
    'PH/HF': '#b3b3b3',
    'I10': '#b3a3cc',
    'I48': '#add8e6',
    'E78.0': '#ddc4a1',
    'I25.1': '#f4b0c8',
    'E11.9': '#c4e3b3',
    'J45.9': '#F4A460',
    'I34.0':'#FFECB3'
}




# Create the plot
fig, ax = plt.subplots(figsize=(16, 12))
gap = 0.001  # Horizontal gap between nodes
row_gap = 1  # Vertical gap between rows



# Plot the pathways
for row, pathway in enumerate(pathways, start=1):
    conditions = pathway.split(" -> ")
    for col, condition in enumerate(conditions, start=1):
        # Calculate node position
        x_pos = col + (col - 1) * gap
        y_pos = len(pathways) - row + 1

        # Draw rectangle for each node
        ax.add_patch(plt.Rectangle((x_pos - 0.5, y_pos - 0.5), 0.6, 1.2, 
                                    facecolor=color_dict.get(condition, 'white'), edgecolor='black'))
        # Add text to node
        ax.text(x_pos - 0.17, y_pos, condition, ha='center', va='center', fontsize=10)

        # Add arrows between nodes
        if col < len(conditions):
            next_x_pos = x_pos + 1 + gap
            ax.annotate(
                '',
                xy=(next_x_pos - 0.5, y_pos),  # End position of arrow
                xytext=(x_pos + 0.11, y_pos),  # Start position of arrow
                arrowprops=dict(
                    arrowstyle="->,head_width=0.2,head_length=0.3",  # Adjust arrowhead size
                    color='black',  # Arrow color
                    lw=0.7,  # Line width (thicker arrow)
                    shrinkA=0,  # Adjust start of the arrow (in points)
                    shrinkB=0   # Adjust end of the arrow (in points)
                
            )
    )



box_start_x = len(conditions) + 2.2  # Starting x position for the box
box_end_x = len(conditions) + 7.0    # Ending x position for the box
box_start_y = 0.45                   # Starting y position for the box
box_end_y = len(pathways) + 0.5     # Ending y position for the box



# Insert the hazard ratio plot into the box area
inset = inset_axes(
    ax,
    width="60%",  # Adjust width of the inset
    height="100.1%",  # Adjust height of the inset
    bbox_to_anchor=(box_start_x + 5.0, box_start_y + 0.1, box_end_x - box_start_x, box_end_y - box_start_y),  # Shift down
    bbox_transform=ax.transData,
    loc='center'
)


# Use the function to create the hazard ratio plot in the inset
# Replace `plot_data` with your actual DataFrame containing the hazard ratio data
plot_data = pd.DataFrame({
    'coef': [0.58, 0.56, 0.10, 0.55, 0.54, 1.19, 0.82, -0.05, 0.20, 0.16, 
             1.13, 0.33, 0.56, 0.71, 1.07, -0.41, -0.29, 0.52, 1.53, 0.70, 
             0.55, 0.83, 0.15, 0.62, 1.22, 0.63, -0.23, 0.66, 0.56, 0.71, 
             1.24, 0.60, 0.38, 0.85, 0.24, 0.73, 0.66, 1.10, 0.98, 0.91, 
             -0.47, 0.15, 0.54, 0.63],
    'coef lower 95%': [0.17, 0.05, -0.23, 0.16, 0.15, 0.74, 0.41, -0.63, -0.47, -0.17, 
                       0.59, -0.04, 0.05, 0.32, 0.61, -0.97, -0.80, 0.08, 1.05, 0.36, 
                       0.14, 0.43, -0.24, 0.12, 0.71, 0.16, -0.70, 0.35, 0.10, 0.34, 
                       0.79, 0.17, 0.04, 0.34, -0.30, 0.30, 0.22, 0.61, 0.41, 0.57, 
                       -1.15, -0.42, -0.03, 0.25],
    'coef upper 95%': [0.98, 1.07, 0.43, 0.94, 0.92, 1.65, 1.23, 0.52, 0.88, 0.49, 
                       1.66, 0.69, 1.08, 1.11, 1.54, 0.14, 0.22, 0.95, 2.01, 1.04, 
                       0.96, 1.23, 0.53, 1.13, 1.73, 1.11, 0.24, 0.96, 1.02, 1.09, 
                       1.69, 1.03, 0.72, 1.35, 0.77, 1.16, 1.09, 1.60, 1.56, 1.25, 
                       0.20, 0.72, 1.11, 1.01]
}, index=[        
    "Pathway_E11.9 -> E78.0 -> I10 -> I25.1",
    "Pathway_E11.9 -> E78.0 -> I10 -> I48",
    "Pathway_E11.9 -> I10",
    "Pathway_E11.9 -> I10 -> E78.0",
    "Pathway_E11.9 -> I10 -> I25.1",
    "Pathway_E11.9 -> I10 -> I25.1 -> E78.0",
    "Pathway_E11.9 -> I10 -> I48",
    "Pathway_E11.9 -> I10 -> J45.9",
    "Pathway_E11.9 -> I25.1",
    
    "Pathway_E78.0 -> I10",
    "Pathway_E78.0 -> I10 -> E11.9",
    "Pathway_E78.0 -> I10 -> I25.1",
    "Pathway_E78.0 -> I10 -> I25.1 -> I48",
    "Pathway_E78.0 -> I10 -> I48",
    "Pathway_E78.0 -> I10 -> J45.9",
    "Pathway_E78.0 -> I25.1",
    
    "Pathway_I10 -> E11.9",
    "Pathway_I10 -> E11.9 -> E78.0",
    "Pathway_I10 -> E11.9 -> E78.0 -> I25.1",
    "Pathway_I10 -> E78.0",
    "Pathway_I10 -> E78.0 -> I25.1",
    "Pathway_I10 -> E78.0 -> I48",
    "Pathway_I10 -> I25.1",
    "Pathway_I10 -> I25.1 -> E78.0",
    "Pathway_I10 -> I25.1 -> E78.0 -> I48",
    "Pathway_I10 -> I25.1 -> I48",
    "Pathway_I10 -> I34.0",
    "Pathway_I10 -> I48",
    "Pathway_I10 -> I48 -> E11.9",
    "Pathway_I10 -> I48 -> E78.0",
    "Pathway_I10 -> I48 -> I34.0",
    "Pathway_I10 -> I48 -> J45.9",
    "Pathway_I10 -> J45.9",
    "Pathway_I10 -> J45.9 -> E11.9",
    "Pathway_I10 -> J45.9 -> E78.0",
    
    "Pathway_I25.1 -> E78.0 -> I10",
    "Pathway_I25.1 -> I10",
    "Pathway_I25.1 -> I48",
    
    "Pathway_I48 -> E78.0 -> I10",
    "Pathway_I48 -> I10",
    "Pathway_I48 -> J45.9",
    
    "Pathway_J45.9 -> E11.9 -> I10",
    "Pathway_J45.9 -> E78.0 -> I10",
    "Pathway_J45.9 -> I10"
])

# Flip the inset box upside down by inverting the y-axis
#inset.invert_xaxis()

create_hazard_ratio_plot(inset, plot_data)







###################################################################################

plot_data_ph = pd.DataFrame({
    'coef': [0.27, -0.00, 0.16, 0.68, 0.00, 0.43, -0.11, 0.13, -0.72, 0.04, 
             -0.02, -0.33, 0.38, 0.00, 0.20, -0.41, 0.40, 0.72, -0.05, -0.14, 
             0.34, -0.56, -0.05, 0.31, 0.26, 0.30, -1.18, -0.24, 0.16, -0.69, 
             -0.22, 0.54, -0.13, 0.51, 0.20, 0.08, 0.39, 0.07, 0.46, 0.04, 
             -0.93, 0.29, 0.53, 0.20],
    'coef lower 95%': [-0.16, -0.55, -0.13, 0.34, -0.39, -0.12, -0.54, -0.62, -1.75, -0.26, 
                       -0.73, -0.73, -0.18, -0.39, -0.29, -1.20, -0.20, 0.28, -0.67, -0.49, 
                       -0.07, -1.03, -0.45, -0.24, -0.29, -0.20, -1.90, -0.49, -0.37, -1.17, 
                       -0.74, 0.12, -0.47, 0.00, -0.53, -0.44, -0.05, -0.47, -0.18, -0.27, 
                       -1.93, -0.46, -0.04, -0.21],
    'coef upper 95%': [0.70, 0.54, 0.45, 1.02, 0.39, 0.98, 0.33, 0.88, 0.31, 0.35, 
                       0.69, 0.08, 0.93, 0.40, 0.70, 0.39, 1.00, 1.16, 0.57, 0.21, 
                       0.74, -0.08, 0.35, 0.86, 0.81, 0.80, -0.46, 0.02, 0.70, -0.22, 
                       0.29, 0.95, 0.22, 1.02, 0.93, 0.61, 0.83, 0.61, 1.10, 0.34, 
                       0.07, 1.04, 1.10, 0.61]
}, index=[        
    "Pathway_E11.9 -> E78.0 -> I10 -> I25.1",
    "Pathway_E11.9 -> E78.0 -> I10 -> I48",
    "Pathway_E11.9 -> I10",
    "Pathway_E11.9 -> I10 -> E78.0",
    "Pathway_E11.9 -> I10 -> I25.1",
    "Pathway_E11.9 -> I10 -> I25.1 -> E78.0",
    "Pathway_E11.9 -> I10 -> I48",
    "Pathway_E11.9 -> I10 -> J45.9",
    "Pathway_E11.9 -> I25.1",
    
    "Pathway_E78.0 -> I10",
    "Pathway_E78.0 -> I10 -> E11.9",
    "Pathway_E78.0 -> I10 -> I25.1",
    "Pathway_E78.0 -> I10 -> I25.1 -> I48",
    "Pathway_E78.0 -> I10 -> I48",
    "Pathway_E78.0 -> I10 -> J45.9",
    "Pathway_E78.0 -> I25.1",
    
    "Pathway_I10 -> E11.9",
    "Pathway_I10 -> E11.9 -> E78.0",
    "Pathway_I10 -> E11.9 -> E78.0 -> I25.1",
    "Pathway_I10 -> E78.0",
    "Pathway_I10 -> E78.0 -> I25.1",
    "Pathway_I10 -> E78.0 -> I48",
    "Pathway_I10 -> I25.1",
    "Pathway_I10 -> I25.1 -> E78.0",
    "Pathway_I10 -> I25.1 -> E78.0 -> I48",
    "Pathway_I10 -> I25.1 -> I48",
    "Pathway_I10 -> I34.0",
    "Pathway_I10 -> I48",
    "Pathway_I10 -> I48 -> E11.9",
    "Pathway_I10 -> I48 -> E78.0",
    "Pathway_I10 -> I48 -> I34.0",
    "Pathway_I10 -> I48 -> J45.9",
    "Pathway_I10 -> J45.9",
    "Pathway_I10 -> J45.9 -> E11.9",
    "Pathway_I10 -> J45.9 -> E78.0",
    
    "Pathway_I25.1 -> E78.0 -> I10",
    "Pathway_I25.1 -> I10",
    "Pathway_I25.1 -> I48",
    
    "Pathway_I48 -> E78.0 -> I10",
    "Pathway_I48 -> I10",
    "Pathway_I48 -> J45.9",
    
    "Pathway_J45.9 -> E11.9 -> I10",
    "Pathway_J45.9 -> E78.0 -> I10",
    "Pathway_J45.9 -> I10"
])

# Add the second hazard ratio plot
plot_data_2 = plot_data_ph.copy()  # Example: Using the same data for demonstration
plot_data_2['coef'] = plot_data_2['coef'] * 1.1  # Slightly modify coefficients for differentiation#

# Define the position for the second inset
box_start_x_ph = box_end_x + 1.0  # Start the second inset further to the right
box_end_x_ph = box_start_x_ph + 4.8  # Adjust width for the second inset

# Insert the second hazard ratio plot
inset_ph = inset_axes(
    ax,
    width="60%",  # Adjust width of the second inset
    height="100.1%",  # Adjust height of the second inset
    bbox_to_anchor=(box_start_x_ph - 6.8, box_start_y + 0.1, box_end_x_ph - box_start_x_ph, box_end_y - box_start_y),  # Adjust placement
    bbox_transform=ax.transData,
    loc='center'
)

# Create the second hazard ratio plot using the same function
create_hazard_ratio_plot_ph(inset_ph, plot_data_ph)





####################################################################################

plot_data_heart_failure = pd.DataFrame({
    'coef': [-0.08, 0.08, 0.29, 0.09, -0.10, -0.30, -0.07, 0.55, -0.51, -0.39, 
             0.59, -0.10, -0.62, 0.47, -0.09, -0.74, -0.11, -0.28, 0.08, 0.25, 
             -0.38, 0.41, -0.11, -0.47, 0.15, -0.30, 0.42, -0.27, 0.90, 0.26, 
             -0.25, -0.16, -0.22, -0.39, 0.39, -0.55, 0.15, -0.08, 0.36, -0.60, 
             -0.22, 0.27, -0.16, -0.30],
    'coef lower 95%': [-0.35, -0.30, 0.07, -0.22, -0.43, -0.70, -0.43, -0.02, -1.15, -0.64, 
                       0.12, -0.34, -1.01, 0.17, -0.81, -1.16, -0.41, -0.60, -0.45, -0.03, 
                       -0.78, 0.04, -0.40, -0.81, -0.33, -0.83, -0.10, -0.51, 0.42, -0.24, 
                       -0.86, -0.69, -0.55, -1.07, -0.01, -0.92, -0.23, -0.64, -0.19, -0.99, 
                       -0.82, -0.28, -0.78, -0.78],
    'coef upper 95%': [0.19, 0.45, 0.51, 0.41, 0.22, 0.10, 0.29, 1.12, 0.13, -0.15, 
                       1.06, 0.14, -0.23, 0.78, 0.63, -0.31, 0.20, 0.04, 0.62, 0.53, 
                       0.02, 0.78, 0.18, -0.13, 0.63, 0.22, 0.94, -0.03, 1.38, 0.76, 
                       0.36, 0.38, 0.11, 0.30, 0.80, -0.18, 0.54, 0.47, 0.92, -0.21, 
                       0.39, 0.83, 0.45, 0.19]
}, index=[
    "Pathway_E11.9 -> E78.0 -> I10 -> I25.1",
    "Pathway_E11.9 -> E78.0 -> I10 -> I48",
    "Pathway_E11.9 -> I10",
    "Pathway_E11.9 -> I10 -> E78.0",
    "Pathway_E11.9 -> I10 -> I25.1",
    "Pathway_E11.9 -> I10 -> I25.1 -> E78.0",
    "Pathway_E11.9 -> I10 -> I48",
    "Pathway_E11.9 -> I10 -> J45.9",
    "Pathway_E11.9 -> I25.1",
    
    "Pathway_E78.0 -> I10",
    "Pathway_E78.0 -> I10 -> E11.9",
    "Pathway_E78.0 -> I10 -> I25.1",
    "Pathway_E78.0 -> I10 -> I25.1 -> I48",
    "Pathway_E78.0 -> I10 -> I48",
    "Pathway_E78.0 -> I10 -> J45.9",
    "Pathway_E78.0 -> I25.1",
    
    "Pathway_I10 -> E11.9",
    "Pathway_I10 -> E11.9 -> E78.0",
    "Pathway_I10 -> E11.9 -> E78.0 -> I25.1",
    "Pathway_I10 -> E78.0",
    "Pathway_I10 -> E78.0 -> I25.1",
    "Pathway_I10 -> E78.0 -> I48",
    "Pathway_I10 -> I25.1",
    "Pathway_I10 -> I25.1 -> E78.0",
    "Pathway_I10 -> I25.1 -> E78.0 -> I48",
    "Pathway_I10 -> I25.1 -> I48",
    "Pathway_I10 -> I34.0",
    "Pathway_I10 -> I48",
    "Pathway_I10 -> I48 -> E11.9",
    "Pathway_I10 -> I48 -> E78.0",
    "Pathway_I10 -> I48 -> I34.0",
    "Pathway_I10 -> I48 -> J45.9",
    "Pathway_I10 -> J45.9",
    "Pathway_I10 -> J45.9 -> E11.9",
    "Pathway_I10 -> J45.9 -> E78.0",
    
    "Pathway_I25.1 -> E78.0 -> I10",
    "Pathway_I25.1 -> I10",
    "Pathway_I25.1 -> I48",
    
    "Pathway_I48 -> E78.0 -> I10",
    "Pathway_I48 -> I10",
    "Pathway_I48 -> J45.9",
    
    "Pathway_J45.9 -> E11.9 -> I10",
    "Pathway_J45.9 -> E78.0 -> I10",
    "Pathway_J45.9 -> I10"
])




# Add the second hazard ratio plot
plot_data_3 = plot_data_heart_failure.copy()  # Example: Using the same data for demonstration
plot_data_3['coef'] = plot_data_3['coef'] * 1.1  # Slightly modify coefficients for differentiation

# Define the position for the second inset
box_start_x_heart_failure = box_end_x + 1.0  # Start the second inset further to the right
box_end_x_heart_failure = box_start_x_heart_failure + 4.8  # Adjust width for the second inset

# Insert the second hazard ratio plot
inset_heart_failure = inset_axes(
    ax,
    width="60%",  # Adjust width of the second inset
    height="100.1%",  # Adjust height of the second inset
    bbox_to_anchor=(box_start_x_heart_failure - 3.8, box_start_y + 0.1, box_end_x_heart_failure - box_start_x_heart_failure, box_end_y - box_start_y),  # Adjust placement
    bbox_transform=ax.transData,
    loc='center'
)

# Create the second hazard ratio plot using the same function
create_hazard_ratio_plot_heart_failure(inset_heart_failure, plot_data_heart_failure)

#######################################################################################



# Set x-axis and y-axis limits and labels
node_columns = max(len(pathway.split(" -> ")) for pathway in pathways)  # Max nodes in a pathway
ax.set_xlim(0.5, node_columns + (node_columns - 1) * gap + 6.5)  # Adjust x-axis limits dynamically
ax.set_ylim(0.5, len(pathways) + 0.7)  # Set y-axis limits to fit all pathways

# Explicitly set y-tick positions if needed or remove them
ax.set_yticks([])  # Optionally, remove y-ticks
ax.set_yticklabels([])  # Ensure no y-axis labels

# Manually set x-tick positions and labels for disease sequence steps
ax.set_xticks([0.8, 1.80, 2.80, 3.82, 4.8])
ax.set_xticklabels(['1', '2', '3', '4', '5'], fontsize=13)

# Update legend with proper formatting and alignment
handles = [
    mpatches.Patch(color=color, label=full_name)
    for full_name, color in {
        
        "PH and HF as Index Conditions": '#b3b3b3',
        "Type 2 Diabetes Mellitus - E11.9": '#88cc88',
        "Hypertension - I10": '#a993cc',
        "Hyperlipidemia - E78": '#d2b48c',
        "Chronic Ischemic Heart Disease - I25.1": '#FFC0CB',
        "Atrial Fibrillation and Flutter - I48": '#add8e6',
        "Mitral Valve Disorder - I34.0": '#FFECB3',
        "Asthma, - J45.9": '#ffa07a',
        
    }.items()
]

# Position and style the legend
ax.legend(handles=handles , bbox_to_anchor=(0.6, -0.07), loc='upper center', ncol=4, fontsize=14, frameon=False)



# Add descriptive axis labels
ax.set_xlabel("Disease Sequence Progression", fontsize=15, labelpad=15)
ax.set_ylabel("Pathways Leading to PH/HF", fontsize=15, labelpad=15)
ax.xaxis.set_label_coords(0.2, -0.03) 

# Clean up the plot aesthetics by removing unnecessary spines
for spine in ['top', 'right']:
    ax.spines[spine].set_visible(False)

# Ensure everything fits nicely into the figure
plt.tight_layout()
# Save the plot as PNG
plt.savefig("pathways_plot.png", dpi=1200)  # Save as PNG with high resolution

plt.show()

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from mpl_toolkits.axes_grid1.inset_locator import inset_axes
import matplotlib.patches as mpatches



def create_hazard_ratio_plot(ax, plot_data):
    # Step 1: Extract and group the pathways by their starting conditions
    pathway_groups = {
        'E11.9': plot_data.loc[plot_data.index.str.startswith("Pathway_E11.9")],
        'E78.0': plot_data.loc[plot_data.index.str.startswith("Pathway_E78.0")],
        'I10': plot_data.loc[plot_data.index.str.startswith("Pathway_I10")],
        'I25.1': plot_data.loc[plot_data.index.str.startswith("Pathway_I25.1")],
        'I48': plot_data.loc[plot_data.index.str.startswith("Pathway_I48")],
        'J45.9': plot_data.loc[plot_data.index.str.startswith("Pathway_J45.9")],
    }

    # Define custom colors for each group
    group_colors = {
    'E11.9': '#228B22',  # Forest Green - Rich green
    'I10': '#7D3C98',  # Amethyst - Deeper purple
    'E78.0': '#D2691E',  # Chocolate - Rich tan
    'I25.1': '#FF69B4',  # Hot Pink - Vibrant pink
    'I48': '#4682B4',  # Steel Blue - Deeper blue
    'J45.9': '#FF8C00',  # light orange
        
    }

    # Step 2: Plot grouped pathways
    current_y = 0
    y_positions = []  # List to store all y-tick positions
    spacing = 0.8  # Custom spacing between groups

    for group, data in pathway_groups.items():
        # Plot each group with its specific color
        group_y_ticks = range(current_y, current_y + len(data))
        ax.errorbar(
            data['coef'],
            group_y_ticks,
            xerr=[
                data['coef'] - data['coef lower 95%'],
                data['coef upper 95%'] - data['coef']
            ],
            fmt='o',
            capsize=4.0,
            color=group_colors[group]  # Use the color for the current group
        )
        
        y_positions.extend(group_y_ticks)  # Append y-tick positions
        current_y += len(data) + int(spacing)  # Add spacing after each group

    # Step 3: Adjust y-axis limits to minimize gaps
    min_y = min(y_positions)  # Smallest y-tick
    max_y = max(y_positions)  # Largest y-tick
    buffer = 0.5  # Buffer space around the top and bottom
    ax.set_ylim(min_y - buffer, max_y + buffer)  # Dynamically set y-axis limits

    # Step 4: Customize plot
    ax.invert_yaxis()  # Invert y-axis
    ax.axvline(x=0, color='black', linestyle='--', linewidth=0.7)  # Add a vertical line at x=0
    # Add custom x-ticks
    x_ticks = [-1, -0.5, 0, 0.5, 1, 1.5, 2]  # Example x-ticks for log(HR)
    x_labels = ["-1","-0.5", "0", "0.5", "1", "1.5", "2"]  # Labels for x-ticks
    ax.set_xticks(x_ticks)
    ax.set_xticklabels(x_labels, fontsize=10)

   # Add labels and titles
    plt.xlabel("log(HR) (95% CI)", fontsize=12)
    plt.xticks(fontsize=10)  # Set x-tick label size
    ax.set_yticks([])
    ax.set_title("Hazards Analysis of PH v/s HF", fontsize=13)
    

def add_annotations_box(ax, plot_data, box_title, x_offset=1.0):
    """
    Adds a separate box for annotations (e.g., p-values and HR) next to the plot.

    Parameters:
    - ax: The main axis where hazard ratio bars are plotted.
    - plot_data: DataFrame containing the data (p-values and hazard ratio).
    - box_title: Title of the annotation box.
    - x_offset: Horizontal offset from the hazard ratio plot.
    """
    # Create a new inset axis for annotations
    inset_box = inset_axes(
        ax,
        width="145%",  # Width of the annotation box
        height="100%",  # Height of the box matches the plot
        bbox_to_anchor=(x_offset, 0, 0.2, 1),  # Adjust placement
        bbox_transform=ax.transAxes,
        loc="center left"
    )
    
    # Remove spines and ticks from the annotation box
    inset_box.spines['top'].set_visible(False)
    inset_box.spines['bottom'].set_visible(False)
    inset_box.spines['left'].set_visible(False)
    inset_box.spines['right'].set_visible(False)
    inset_box.tick_params(left=False, labelleft=False, bottom=False, labelbottom=False)

    # Add title to the annotation box
    inset_box.set_title(box_title, fontsize=10, pad=10)

    # Align annotations with hazard ratio bars
    for y_pos, p_val, coef in zip(range(len(plot_data)), plot_data['p'], plot_data['coef']):
        annotation = f"P:{p_val:.2f}, HR:{np.exp(coef):.2f}"  # Include HR as exp(coef)
        inset_box.text(0.5, y_pos, annotation, ha="center", va="center", fontsize=9)

    # Adjust the limits of the annotation box
    inset_box.set_ylim(ax.get_ylim())

    # Draw the box around the entire annotation area
    rect = plt.Rectangle(
        (0, 0), 1, 1,  # Starting coordinates and width/height of the rectangle
        transform=inset_box.transAxes,
        edgecolor="black",  # Box color
        facecolor="none",   # Transparent inside
        linewidth=1.5,      # Thickness of the box border
        zorder=10           # Ensure the box is on top
    )
    inset_box.add_patch(rect)



 #################################################################################################


def create_hazard_ratio_plot_ph(ax, plot_data_ph):
    # Step 1: Extract and group the pathways by their starting conditions
    pathway_groups_ph = {
        'E11.9': plot_data_ph.loc[plot_data_ph.index.str.startswith("Pathway_E11.9")],
        'E78.0': plot_data_ph.loc[plot_data_ph.index.str.startswith("Pathway_E78.0")],
        'I10': plot_data_ph.loc[plot_data_ph.index.str.startswith("Pathway_I10")],        
        'I25.1': plot_data_ph.loc[plot_data_ph.index.str.startswith("Pathway_I25.1")],
        'I48': plot_data_ph.loc[plot_data_ph.index.str.startswith("Pathway_I48")],
        'J45.9': plot_data_ph.loc[plot_data_ph.index.str.startswith("Pathway_J45.9")],
    }

    # Define custom colors for each group
    group_colors_ph = {
        'E11.9': '#228B22',  # Forest Green
        'I10': '#7D3C98',  # Amethyst  
        'E78.0': '#D2691E',  # Chocolate
        'I25.1': '#FF69B4',  # Hot Pink
        'I48': '#4682B4',  # Steel Blue
        'J45.9': '#FF8C00',  # Orange
    }

    # Step 2: Plot grouped pathways
    current_y_ph = 0
    y_positions_ph = []  # List to store all y-tick positions for PH
    spacing_ph = 0.8  # Custom spacing between groups

    for group, data in pathway_groups_ph.items():
        # Plot each group with its specific color
        group_y_ticks_ph = range(current_y_ph, current_y_ph + len(data))
        ax.errorbar(
            data['coef'],
            group_y_ticks_ph,
            xerr=[
                data['coef'] - data['coef lower 95%'],
                data['coef upper 95%'] - data['coef']
            ],
            fmt='o',
            capsize=4.0,
            color=group_colors_ph[group]  # Use the color for the current group
        )
        y_positions_ph.extend(group_y_ticks_ph)  # Append y-tick positions
        current_y_ph += len(data) + int(spacing_ph)  # Add spacing after each group

    # Step 3: Adjust y-axis limits to minimize gaps
    min_y_ph = min(y_positions_ph)  # Smallest y-tick
    max_y_ph = max(y_positions_ph)  # Largest y-tick
    buffer_ph = 0.5  # Buffer space around the top and bottom
    ax.set_ylim(min_y_ph - buffer_ph, max_y_ph + buffer_ph)  # Dynamically set y-axis limits

    # Step 4: Customize plot
    ax.invert_yaxis()  # Invert y-axis
    ax.axvline(x=0, color='black', linestyle='--', linewidth=0.7)  # Add a vertical line at x=0

    # Add custom x-ticks
    x_ticks_ph = [-2, -1.5, -1, -0.5, 0, 0.5, 1]  # Expanded range for PH data
    x_labels_ph = ["-2", "-1.5", "-1", "-0.5", "0", "0.5","1"]  # Labels for x-ticks
    ax.set_xticks(x_ticks_ph)
    ax.set_xticklabels(x_labels_ph, fontsize=10)

    # Add labels and titles
    ax.set_xlabel("log(HR) (95% CI)", fontsize=12)
    ax.set_title("Mortality Risk with PH Pathways", fontsize=13)

    # Remove default y-axis labels
    ax.set_yticks([])




##############################################################################################
    
    
    


    
def create_hazard_ratio_plot_heart_failure(ax, plot_data_heart_failure):
    # Step 1: Extract and group the pathways by their starting conditions
    pathway_groups_heart_failure = {
        'E11.9': plot_data_heart_failure.loc[plot_data_heart_failure.index.str.startswith("Pathway_E11.9")],
        'E78.0': plot_data_heart_failure.loc[plot_data_heart_failure.index.str.startswith("Pathway_E78.0")],
        'I10': plot_data_heart_failure.loc[plot_data_heart_failure.index.str.startswith("Pathway_I10")],        
        'I25.1': plot_data_heart_failure.loc[plot_data_heart_failure.index.str.startswith("Pathway_I25.1")],
        'I48': plot_data_heart_failure.loc[plot_data_heart_failure.index.str.startswith("Pathway_I48")],
        'J45.9': plot_data_heart_failure.loc[plot_data_heart_failure.index.str.startswith("Pathway_J45.9")],
    }

    # Define custom colors for each group (differentiate from PH colors if necessary)
    group_colors_heart_failure = {
       'E11.9': '#228B22',  # Forest Green
        'I10': '#7D3C98',  # Amethyst  
        'E78.0': '#D2691E',  # Chocolate
        'I25.1': '#FF69B4',  # Hot Pink
        'I48': '#4682B4',  # Steel Blue
        'J45.9': '#FF8C00',  # Orange
    }

    # Step 2: Plot grouped pathways
    current_y_heart_failure = 0
    y_positions_heart_failure = []  # List to store all y-tick positions for heart_failure
    spacing_heart_failure = 0.8  # Custom spacing between groups

    for group, data in pathway_groups_heart_failure.items():
        # Plot each group with its specific color
        group_y_ticks_heart_failure = range(current_y_heart_failure, current_y_heart_failure + len(data))
        ax.errorbar(
            data['coef'],
            group_y_ticks_heart_failure,
            xerr=[
                data['coef'] - data['coef lower 95%'],
                data['coef upper 95%'] - data['coef']
            ],
            fmt='o',
            capsize=4.0,
            color=group_colors_heart_failure[group]  # Use the color for the current group
        )
        y_positions_heart_failure.extend(group_y_ticks_heart_failure)  # Append y-tick positions
        current_y_heart_failure += len(data) + int(spacing_heart_failure)  # Add spacing after each group

    # Step 3: Adjust y-axis limits to minimize gaps
    min_y_heart_failure = min(y_positions_heart_failure)  # Smallest y-tick
    max_y_heart_failure = max(y_positions_heart_failure)  # Largest y-tick
    buffer_heart_failure = 0.5  # Buffer space around the top and bottom
    ax.set_ylim(min_y_heart_failure - buffer_heart_failure, max_y_heart_failure + buffer_heart_failure)  # Dynamically set y-axis limits

    # Step 4: Customize plot
    ax.invert_yaxis()  # Invert y-axis
    ax.axvline(x=0, color='black', linestyle='--', linewidth=0.7)  # Add a vertical line at x=0

    # Add custom x-ticks
    x_ticks_heart_failure = [-1, -0.5, 0, 0.5, 1, 1.5]  # Expanded range for COPD data
    x_labels_heart_failure = ["-1", "-0.5", "0", "0.5", "1", "1.5"]  # Labels for x-ticks
    ax.set_xticks(x_ticks_heart_failure)
    ax.set_xticklabels(x_labels_heart_failure, fontsize=10)

    # Add labels and titles
    ax.set_xlabel("log(HR) (95% CI)", fontsize=12)
    ax.set_title("Mortality Risk with HF Pathways", fontsize=13)

    # Remove default y-axis labels
    ax.set_yticks([])
    
    
    
########################################################################################################################    

    





# Define the pathways and colors
pathways = [
    "E11.9 -> E78.0 -> I10 -> I25.1 -> PH/HF",
    "E11.9 -> E78.0 -> I10 -> I48 -> PH/HF",
    "E11.9 -> I10 -> PH/HF",
    "E11.9 -> I10 -> E78.0 -> PH/HF",
    "E11.9 -> I10 -> I25.1 -> PH/HF",
    "E11.9 -> I10 -> I25.1 -> E78.0 -> PH/HF",
    "E11.9 -> I10 -> I48 -> PH/HF",
    "E11.9 -> I10 -> J45.9 -> PH/HF",
    "E11.9 -> I25.1 -> PH/HF",
    
    "E78.0 -> I10 -> PH/HF",
    "E78.0 -> I10 -> E11.9 -> PH/HF",
    "E78.0 -> I10 -> I25.1 -> PH/HF",
    "E78.0 -> I10 -> I25.1 -> I48 -> PH/HF",
    "E78.0 -> I10 -> I48 -> PH/HF",
    "E78.0 -> I10 -> J45.9 -> PH/HF",
    "E78.0 -> I25.1 -> PH/HF",
    
    
    "I10 -> E11.9 -> PH/HF",
    "I10 -> E11.9 -> E78.0 -> PH/HF",
    "I10 -> E11.9 -> E78.0 -> I25.1 -> PH/HF",
    "I10 -> E78.0 -> PH/HF",
    "I10 -> E78.0 -> I25.1 -> PH/HF",
    "I10 -> E78.0 -> I48 -> PH/HF",
    "I10 -> I25.1 -> PH/HF",
    "I10 -> I25.1 -> E78.0 -> PH/HF",
    "I10 -> I25.1 -> E78.0 -> I48 -> PH/HF",
    "I10 -> I25.1 -> I48 -> PH/HF",
    "I10 -> I34.0 -> PH/HF",
    "I10 -> I48 -> PH/HF",
    "I10 -> I48 -> E11.9 -> PH/HF",
    "I10 -> I48 -> E78.0 -> PH/HF",
    "I10 -> I48 -> I34.0 -> PH/HF",
    "I10 -> I48 -> J45.9 -> PH/HF",
    "I10 -> J45.9 -> PH/HF",
    "I10 -> J45.9 -> E11.9 -> PH/HF",
    "I10 -> J45.9 -> E78.0 -> PH/HF",
    
    "I25.1 -> E78.0 -> I10 -> PH/HF",
    "I25.1 -> I10 -> PH/HF",
    "I25.1 -> I48 -> PH/HF",
    
    "I48 -> E78.0 -> I10 -> PH/HF",
    "I48 -> I10 -> PH/HF",
    "I48 -> J45.9 -> PH/HF",

    "J45.9 -> E11.9 -> I10 -> PH/HF",
    "J45.9 -> E78.0 -> I10 -> PH/HF",
    "J45.9 -> I10 -> PH/HF"   
]


color_dict = {
    'PH/HF': '#b3b3b3',
    'I10': '#b3a3cc',
    'I48': '#add8e6',
    'E78.0': '#ddc4a1',
    'I25.1': '#f4b0c8',
    'E11.9': '#c4e3b3',
    'J45.9': '#F4A460',
    'I34.0':'#FFECB3'
}




# Create the plot
fig, ax = plt.subplots(figsize=(17, 13))
gap = -0.38  # Horizontal gap between nodes
row_gap = 1  # Vertical gap between rows



# Plot the pathways
for row, pathway in enumerate(pathways, start=1):
    conditions = pathway.split(" -> ")
    for col, condition in enumerate(conditions, start=1):
        # Calculate node position
        x_pos = col + (col - 1) * gap
        y_pos = len(pathways) - row + 1

        # Draw rectangle for each node
        ax.add_patch(plt.Rectangle((x_pos - 0.5, y_pos - 0.5), 0.6, 1.2, 
                                    facecolor=color_dict.get(condition, 'white'), edgecolor='black'))
        # Add text to node
        ax.text(x_pos - 0.17, y_pos + 0.07, condition, ha='center', va='center', fontsize=9.60)

        # Add arrows between nodes
        #if col < len(conditions):
        #    next_x_pos = x_pos + 1 + gap
        #    ax.annotate(
        #        '',
        #        xy=(next_x_pos - 0.5, y_pos),  # End position of arrow
        #        xytext=(x_pos + 0.11, y_pos),  # Start position of arrow
        #        arrowprops=dict(
        #            arrowstyle="->,head_width=0.2,head_length=0.3",  # Adjust arrowhead size
        #            color='black',  # Arrow color
        #            lw=0.7,  # Line width (thicker arrow)
        #            shrinkA=0,  # Adjust start of the arrow (in points)
        #            shrinkB=0   # Adjust end of the arrow (in points)
        #        
        #    )
#    )



box_start_x = len(conditions) + 2.2  # Starting x position for the box
box_end_x = len(conditions) + 7.0    # Ending x position for the box
box_start_y = 0.45                   # Starting y position for the box
box_end_y = len(pathways) + 0.5     # Ending y position for the box



# Insert the hazard ratio plot into the box area
inset = inset_axes(
    ax,
    width="60%",  # Adjust width of the inset
    height="100.1%",  # Adjust height of the inset
    bbox_to_anchor=(box_start_x + 5.0, box_start_y + 0.1, box_end_x - box_start_x, box_end_y - box_start_y),  # Shift down
    bbox_transform=ax.transData,
    loc='center'
)


# Use the function to create the hazard ratio plot in the inset
# Replace `plot_data` with your actual DataFrame containing the hazard ratio data
plot_data = pd.DataFrame({
    'coef': [0.58, 0.56, 0.10, 0.55, 0.54, 1.19, 0.82, -0.05, 0.20, 0.16, 
             1.13, 0.33, 0.56, 0.71, 1.07, -0.41, -0.29, 0.52, 1.53, 0.70, 
             0.55, 0.83, 0.15, 0.62, 1.22, 0.63, -0.23, 0.66, 0.56, 0.71, 
             1.24, 0.60, 0.38, 0.85, 0.24, 0.73, 0.66, 1.10, 0.98, 0.91, 
             -0.47, 0.15, 0.54, 0.63],
    'coef lower 95%': [0.17, 0.05, -0.23, 0.16, 0.15, 0.74, 0.41, -0.63, -0.47, -0.17, 
                       0.59, -0.04, 0.05, 0.32, 0.61, -0.97, -0.80, 0.08, 1.05, 0.36, 
                       0.14, 0.43, -0.24, 0.12, 0.71, 0.16, -0.70, 0.35, 0.10, 0.34, 
                       0.79, 0.17, 0.04, 0.34, -0.30, 0.30, 0.22, 0.61, 0.41, 0.57, 
                       -1.15, -0.42, -0.03, 0.25],
    'coef upper 95%': [0.98, 1.07, 0.43, 0.94, 0.92, 1.65, 1.23, 0.52, 0.88, 0.49, 
                       1.66, 0.69, 1.08, 1.11, 1.54, 0.14, 0.22, 0.95, 2.01, 1.04, 
                       0.96, 1.23, 0.53, 1.13, 1.73, 1.11, 0.24, 0.96, 1.02, 1.09, 
                       1.69, 1.03, 0.72, 1.35, 0.77, 1.16, 1.09, 1.60, 1.56, 1.25, 
                       0.20, 0.72, 1.11, 1.01],
    'p': [0.01, 0.03, 0.55, 0.01, 0.01, 0.001, 0.001, 0.86, 0.55, 0.34,
          0.001, 0.08, 0.03, 0.001, 0.001, 0.15, 0.27, 0.02, 0.001, 0.001,
          0.01, 0.001, 0.45, 0.02, 0.001, 0.01, 0.33, 0.001, 0.02, 0.001,
          0.001, 0.001, 0.03, 0.001, 0.39, 0.001, 0.001, 0.001, 0.001,0.001, 0.17,
          0.61, 0.06, 0.001]
}, index=[        
    "Pathway_E11.9 -> E78.0 -> I10 -> I25.1",
    "Pathway_E11.9 -> E78.0 -> I10 -> I48",
    "Pathway_E11.9 -> I10",
    "Pathway_E11.9 -> I10 -> E78.0",
    "Pathway_E11.9 -> I10 -> I25.1",
    "Pathway_E11.9 -> I10 -> I25.1 -> E78.0",
    "Pathway_E11.9 -> I10 -> I48",
    "Pathway_E11.9 -> I10 -> J45.9",
    "Pathway_E11.9 -> I25.1",
    
    "Pathway_E78.0 -> I10",
    "Pathway_E78.0 -> I10 -> E11.9",
    "Pathway_E78.0 -> I10 -> I25.1",
    "Pathway_E78.0 -> I10 -> I25.1 -> I48",
    "Pathway_E78.0 -> I10 -> I48",
    "Pathway_E78.0 -> I10 -> J45.9",
    "Pathway_E78.0 -> I25.1",
    
    "Pathway_I10 -> E11.9",
    "Pathway_I10 -> E11.9 -> E78.0",
    "Pathway_I10 -> E11.9 -> E78.0 -> I25.1",
    "Pathway_I10 -> E78.0",
    "Pathway_I10 -> E78.0 -> I25.1",
    "Pathway_I10 -> E78.0 -> I48",
    "Pathway_I10 -> I25.1",
    "Pathway_I10 -> I25.1 -> E78.0",
    "Pathway_I10 -> I25.1 -> E78.0 -> I48",
    "Pathway_I10 -> I25.1 -> I48",
    "Pathway_I10 -> I34.0",
    "Pathway_I10 -> I48",
    "Pathway_I10 -> I48 -> E11.9",
    "Pathway_I10 -> I48 -> E78.0",
    "Pathway_I10 -> I48 -> I34.0",
    "Pathway_I10 -> I48 -> J45.9",
    "Pathway_I10 -> J45.9",
    "Pathway_I10 -> J45.9 -> E11.9",
    "Pathway_I10 -> J45.9 -> E78.0",
    
    "Pathway_I25.1 -> E78.0 -> I10",
    "Pathway_I25.1 -> I10",
    "Pathway_I25.1 -> I48",
    
    "Pathway_I48 -> E78.0 -> I10",
    "Pathway_I48 -> I10",
    "Pathway_I48 -> J45.9",
    
    "Pathway_J45.9 -> E11.9 -> I10",
    "Pathway_J45.9 -> E78.0 -> I10",
    "Pathway_J45.9 -> I10"
])

# Flip the inset box upside down by inverting the y-axis
#inset.invert_xaxis()







# Add the annotations after creating each plot
create_hazard_ratio_plot(inset, plot_data)
# Add separate boxes for p-values and N
add_annotations_box(inset, plot_data, box_title="", x_offset=0.95)







###################################################################################

plot_data_ph = pd.DataFrame({
    'coef': [0.27, -0.00, 0.16, 0.68, 0.00, 0.43, -0.11, 0.13, -0.72, 0.04, 
             -0.02, -0.33, 0.38, 0.00, 0.20, -0.41, 0.40, 0.72, -0.05, -0.14, 
             0.34, -0.56, -0.05, 0.31, 0.26, 0.30, -1.18, -0.24, 0.16, -0.69, 
             -0.22, 0.54, -0.13, 0.51, 0.20, 0.08, 0.39, 0.07, 0.46, 0.04, 
             -0.93, 0.29, 0.53, 0.20],
    'coef lower 95%': [-0.16, -0.55, -0.13, 0.34, -0.39, -0.12, -0.54, -0.62, -1.75, -0.26, 
                       -0.73, -0.73, -0.18, -0.39, -0.29, -1.20, -0.20, 0.28, -0.67, -0.49, 
                       -0.07, -1.03, -0.45, -0.24, -0.29, -0.20, -1.90, -0.49, -0.37, -1.17, 
                       -0.74, 0.12, -0.47, 0.00, -0.53, -0.44, -0.05, -0.47, -0.18, -0.27, 
                       -1.93, -0.46, -0.04, -0.21],
    'coef upper 95%': [0.70, 0.54, 0.45, 1.02, 0.39, 0.98, 0.33, 0.88, 0.31, 0.35, 
                       0.69, 0.08, 0.93, 0.40, 0.70, 0.39, 1.00, 1.16, 0.57, 0.21, 
                       0.74, -0.08, 0.35, 0.86, 0.81, 0.80, -0.46, 0.02, 0.70, -0.22, 
                       0.29, 0.95, 0.22, 1.02, 0.93, 0.61, 0.83, 0.61, 1.10, 0.34, 
                       0.07, 1.04, 1.10, 0.61],
    'p': [0.22, 0.99, 0.29, 0.001, 0.98, 0.12, 0.63, 0.74, 0.17, 0.78, 
          0.96, 0.11, 0.18, 0.98, 0.42, 0.32, 0.19, 0.001, 0.87, 0.43, 
          0.10, 0.02, 0.80, 0.27, 0.35, 0.23, 0.001, 0.07, 0.55, 0.001, 
          0.40, 0.01, 0.47, 0.05, 0.59, 0.75, 0.08, 0.81, 0.16, 0.82, 0.07, 0.45, 0.07, 0.34]  
}, index=[        
    "Pathway_E11.9 -> E78.0 -> I10 -> I25.1",
    "Pathway_E11.9 -> E78.0 -> I10 -> I48",
    "Pathway_E11.9 -> I10",
    "Pathway_E11.9 -> I10 -> E78.0",
    "Pathway_E11.9 -> I10 -> I25.1",
    "Pathway_E11.9 -> I10 -> I25.1 -> E78.0",
    "Pathway_E11.9 -> I10 -> I48",
    "Pathway_E11.9 -> I10 -> J45.9",
    "Pathway_E11.9 -> I25.1",
    
    "Pathway_E78.0 -> I10",
    "Pathway_E78.0 -> I10 -> E11.9",
    "Pathway_E78.0 -> I10 -> I25.1",
    "Pathway_E78.0 -> I10 -> I25.1 -> I48",
    "Pathway_E78.0 -> I10 -> I48",
    "Pathway_E78.0 -> I10 -> J45.9",
    "Pathway_E78.0 -> I25.1",
    
    "Pathway_I10 -> E11.9",
    "Pathway_I10 -> E11.9 -> E78.0",
    "Pathway_I10 -> E11.9 -> E78.0 -> I25.1",
    "Pathway_I10 -> E78.0",
    "Pathway_I10 -> E78.0 -> I25.1",
    "Pathway_I10 -> E78.0 -> I48",
    "Pathway_I10 -> I25.1",
    "Pathway_I10 -> I25.1 -> E78.0",
    "Pathway_I10 -> I25.1 -> E78.0 -> I48",
    "Pathway_I10 -> I25.1 -> I48",
    "Pathway_I10 -> I34.0",
    "Pathway_I10 -> I48",
    "Pathway_I10 -> I48 -> E11.9",
    "Pathway_I10 -> I48 -> E78.0",
    "Pathway_I10 -> I48 -> I34.0",
    "Pathway_I10 -> I48 -> J45.9",
    "Pathway_I10 -> J45.9",
    "Pathway_I10 -> J45.9 -> E11.9",
    "Pathway_I10 -> J45.9 -> E78.0",
    
    "Pathway_I25.1 -> E78.0 -> I10",
    "Pathway_I25.1 -> I10",
    "Pathway_I25.1 -> I48",
    
    "Pathway_I48 -> E78.0 -> I10",
    "Pathway_I48 -> I10",
    "Pathway_I48 -> J45.9",
    
    "Pathway_J45.9 -> E11.9 -> I10",
    "Pathway_J45.9 -> E78.0 -> I10",
    "Pathway_J45.9 -> I10"
])

# Add the second hazard ratio plot
plot_data_2 = plot_data_ph.copy()  # Example: Using the same data for demonstration
plot_data_2['coef'] = plot_data_2['coef'] * 1.1  # Slightly modify coefficients for differentiation#

# Define the position for the second inset
box_start_x_ph = box_end_x + 1.0  # Start the second inset further to the right
box_end_x_ph = box_start_x_ph + 4.8  # Adjust width for the second inset

# Insert the second hazard ratio plot
inset_ph = inset_axes(
    ax,
    width="60%",  # Adjust width of the second inset
    height="100.1%",  # Adjust height of the second inset
    bbox_to_anchor=(box_start_x_ph - 8.3, box_start_y + 0.1, box_end_x_ph - box_start_x_ph, box_end_y - box_start_y),  # Adjust placement
    bbox_transform=ax.transData,
    loc='center'
)

# Create the second hazard ratio plot using the same function
create_hazard_ratio_plot_ph(inset_ph, plot_data_ph)

# Add separate boxes for p-values and N
add_annotations_box(inset, plot_data_ph, box_title="", x_offset=-1.63)



####################################################################################

plot_data_heart_failure = pd.DataFrame({
    'coef': [-0.08, 0.08, 0.29, 0.09, -0.10, -0.30, -0.07, 0.55, -0.51, -0.39, 
             0.59, -0.10, -0.62, 0.47, -0.09, -0.74, -0.11, -0.28, 0.08, 0.25, 
             -0.38, 0.41, -0.11, -0.47, 0.15, -0.30, 0.42, -0.27, 0.90, 0.26, 
             -0.25, -0.16, -0.22, -0.39, 0.39, -0.55, 0.15, -0.08, 0.36, -0.60, 
             -0.22, 0.27, -0.16, -0.30],
    'coef lower 95%': [-0.35, -0.30, 0.07, -0.22, -0.43, -0.70, -0.43, -0.02, -1.15, -0.64, 
                       0.12, -0.34, -1.01, 0.17, -0.81, -1.16, -0.41, -0.60, -0.45, -0.03, 
                       -0.78, 0.04, -0.40, -0.81, -0.33, -0.83, -0.10, -0.51, 0.42, -0.24, 
                       -0.86, -0.69, -0.55, -1.07, -0.01, -0.92, -0.23, -0.64, -0.19, -0.99, 
                       -0.82, -0.28, -0.78, -0.78],
    'coef upper 95%': [0.19, 0.45, 0.51, 0.41, 0.22, 0.10, 0.29, 1.12, 0.13, -0.15, 
                       1.06, 0.14, -0.23, 0.78, 0.63, -0.31, 0.20, 0.04, 0.62, 0.53, 
                       0.02, 0.78, 0.18, -0.13, 0.63, 0.22, 0.94, -0.03, 1.38, 0.76, 
                       0.36, 0.38, 0.11, 0.30, 0.80, -0.18, 0.54, 0.47, 0.92, -0.21, 
                       0.39, 0.83, 0.45, 0.19],
    'p': [0.56, 0.69, 0.01, 0.56, 0.54, 0.14, 0.69, 0.06, 0.12, 0.005,
    0.01, 0.4, 0.005, 0.005, 0.8, 0.005, 0.5, 0.09, 0.76, 0.08,
    0.06, 0.03, 0.46, 0.01, 0.54, 0.26, 0.12, 0.03, 0.005, 0.31,
    0.43, 0.57, 0.2, 0.27, 0.06, 0.005, 0.44, 0.77, 0.2, 0.005,
    0.48, 0.33, 0.61, 0.23]  # Add manually

}, index=[
    "Pathway_E11.9 -> E78.0 -> I10 -> I25.1",
    "Pathway_E11.9 -> E78.0 -> I10 -> I48",
    "Pathway_E11.9 -> I10",
    "Pathway_E11.9 -> I10 -> E78.0",
    "Pathway_E11.9 -> I10 -> I25.1",
    "Pathway_E11.9 -> I10 -> I25.1 -> E78.0",
    "Pathway_E11.9 -> I10 -> I48",
    "Pathway_E11.9 -> I10 -> J45.9",
    "Pathway_E11.9 -> I25.1",
    
    "Pathway_E78.0 -> I10",
    "Pathway_E78.0 -> I10 -> E11.9",
    "Pathway_E78.0 -> I10 -> I25.1",
    "Pathway_E78.0 -> I10 -> I25.1 -> I48",
    "Pathway_E78.0 -> I10 -> I48",
    "Pathway_E78.0 -> I10 -> J45.9",
    "Pathway_E78.0 -> I25.1",
    
    "Pathway_I10 -> E11.9",
    "Pathway_I10 -> E11.9 -> E78.0",
    "Pathway_I10 -> E11.9 -> E78.0 -> I25.1",
    "Pathway_I10 -> E78.0",
    "Pathway_I10 -> E78.0 -> I25.1",
    "Pathway_I10 -> E78.0 -> I48",
    "Pathway_I10 -> I25.1",
    "Pathway_I10 -> I25.1 -> E78.0",
    "Pathway_I10 -> I25.1 -> E78.0 -> I48",
    "Pathway_I10 -> I25.1 -> I48",
    "Pathway_I10 -> I34.0",
    "Pathway_I10 -> I48",
    "Pathway_I10 -> I48 -> E11.9",
    "Pathway_I10 -> I48 -> E78.0",
    "Pathway_I10 -> I48 -> I34.0",
    "Pathway_I10 -> I48 -> J45.9",
    "Pathway_I10 -> J45.9",
    "Pathway_I10 -> J45.9 -> E11.9",
    "Pathway_I10 -> J45.9 -> E78.0",
    
    "Pathway_I25.1 -> E78.0 -> I10",
    "Pathway_I25.1 -> I10",
    "Pathway_I25.1 -> I48",
    
    "Pathway_I48 -> E78.0 -> I10",
    "Pathway_I48 -> I10",
    "Pathway_I48 -> J45.9",
    
    "Pathway_J45.9 -> E11.9 -> I10",
    "Pathway_J45.9 -> E78.0 -> I10",
    "Pathway_J45.9 -> I10"
])




# Add the second hazard ratio plot
plot_data_3 = plot_data_heart_failure.copy()  # Example: Using the same data for demonstration
plot_data_3['coef'] = plot_data_3['coef'] * 1.1  # Slightly modify coefficients for differentiation

# Define the position for the second inset
box_start_x_heart_failure = box_end_x + 1.0  # Start the second inset further to the right
box_end_x_heart_failure = box_start_x_heart_failure + 4.8  # Adjust width for the second inset

# Insert the second hazard ratio plot
inset_heart_failure = inset_axes(
    ax,
    width="60%",  # Adjust width of the second inset
    height="100.1%",  # Adjust height of the second inset
    bbox_to_anchor=(box_start_x_heart_failure - 4.55, box_start_y + 0.1, box_end_x_heart_failure - box_start_x_heart_failure, box_end_y - box_start_y),  # Adjust placement
    bbox_transform=ax.transData,
    loc='center'
)

# Create the second hazard ratio plot using the same function
create_hazard_ratio_plot_heart_failure(inset_heart_failure, plot_data_heart_failure)

# Add separate boxes for p-values and N
add_annotations_box(inset, plot_data_heart_failure, box_title="", x_offset=-0.33)

#######################################################################################

# Data for PH and HF participants in the desired order
ph_participants = [10, '05', 50, 16, 16, '07', 13, '05', '05', 49, 
                   '06', 20, '05', 15, '09', '08', 10, 11, '06', 42, 
                   13, 14, 25, '07', '05', '08', 13, 78, '09', 18, 
                   10, 11, 42, '07', '06', 11, 16, 11, '05', 44, 
                   '05', '05', '05', 25]

hf_participants = [18, '09', 70, 19, 18, 6, 14, '06', '07', 61, 
                   '07', 43, 12, 22, '05', 17, 33, 20, '06', 42, 
                   14, 10, 41, 21, '05', '07', '08', 67, '07', '07', 
                   '05', 17, 30, '06', 11, 17, 18, '06', '06', 27, 
                   '05', '06', '05', 15]


# Update y-ticks to include both PH and HF participants
y_ticks = list(range(1, len(ph_participants) + 1))
y_tick_labels = [f"{ph}, {hf}" for ph, hf in zip(ph_participants, hf_participants)]


# Set x-axis and y-axis limits and labels
node_columns = max(len(pathway.split(" -> ")) for pathway in pathways)  # Max nodes in a pathway
ax.set_xlim(0.5, node_columns + (node_columns - 1) * gap + 6.5)  # Adjust x-axis limits dynamically
ax.set_ylim(0.5, len(pathways) + 0.7)  # Set y-axis limits to fit all pathways

# Explicitly set y-tick positions if needed or remove them
#ax.set_yticks([])  # Optionally, remove y-ticks
#ax.set_yticklabels([])  # Ensure no y-axis labels


# Explicitly set y-tick positions and labels
ax.set_yticks(y_ticks)  # Set y-tick positions
ax.set_yticklabels(y_tick_labels, fontsize=10)  # Display PH and HF participants as y-tick labels
# Add a title above the y-tick labels for clarification
ax.annotate(
    "No. of Participants\n (PH , HF)",  # Title text
    xy=(-0.02, 1.00),  # Position above the y-axis (relative to plot)
    xycoords='axes fraction',  # Coordinates relative to the axes
    fontsize=12,  # Font size
    ha='center',  # Horizontal alignment
    va='bottom',  # Vertical alignment
    rotation=0  # No rotation
)

# Manually set x-tick positions and labels for disease sequence steps
ax.set_xticks([0.8, 1.45, 2.05, 2.68, 3.33])
ax.set_xticklabels(['1', '2', '3', '4', '5'], fontsize=13)

# Update legend with proper formatting and alignment
handles = [
    mpatches.Patch(color=color, label=full_name)
    for full_name, color in {
        
        "PH and HF as Index Conditions": '#b3b3b3',
        "Type 2 Diabetes Mellitus - E11.9": '#88cc88',
        "Hypertension - I10": '#a993cc',
        "Hyperlipidemia - E78": '#d2b48c',
        "Chronic Ischemic Heart Disease - I25.1": '#FFC0CB',
        "Atrial Fibrillation and Flutter - I48": '#add8e6',
        "Mitral Valve Disorder - I34.0": '#FFECB3',
        "Asthma, - J45.9": '#ffa07a',
        
    }.items()
]

# Position and style the legend
ax.legend(handles=handles , bbox_to_anchor=(0.6, -0.07), loc='upper center', ncol=4, fontsize=14, frameon=False)



# Add descriptive axis labels
ax.set_xlabel("Disease Sequence Progression", fontsize=15, labelpad=15)
ax.set_ylabel("Pathways Leading to PH/HF", fontsize=15, labelpad=15)
ax.set_title("Cox Proportional Hazards Analysis", fontsize=14, pad=20, x=0.8)

ax.xaxis.set_label_coords(0.2, -0.03) 

# Clean up the plot aesthetics by removing unnecessary spines
for spine in ['top', 'right']:
    ax.spines[spine].set_visible(False)

# Ensure everything fits nicely into the figure
plt.tight_layout()
# Save the plot as PNG
plt.savefig("pathways_plot.png", dpi=1200)  # Save as PNG with high resolution
plt.show()

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from mpl_toolkits.axes_grid1.inset_locator import inset_axes
import matplotlib.patches as mpatches



def create_hazard_ratio_plot(ax, plot_data):
    # Step 1: Extract and group the pathways by their starting conditions
    pathway_groups = {
        'E11.9': plot_data.loc[plot_data.index.str.startswith("Pathway_E11.9")],
        'E78.0': plot_data.loc[plot_data.index.str.startswith("Pathway_E78.0")],
        'I10': plot_data.loc[plot_data.index.str.startswith("Pathway_I10")],
        'I25.1': plot_data.loc[plot_data.index.str.startswith("Pathway_I25.1")],
        'I48': plot_data.loc[plot_data.index.str.startswith("Pathway_I48")],
        'J45.9': plot_data.loc[plot_data.index.str.startswith("Pathway_J45.9")],
    }

    # Define custom colors for each group
    group_colors = {
        'E11.9': '#228B22',  # Forest Green - Rich green
        'I10': '#7D3C98',  # Amethyst - Deeper purple
        'E78.0': '#D2691E',  # Chocolate - Rich tan
        'I25.1': '#FF69B4',  # Hot Pink - Vibrant pink
        'I48': '#4682B4',  # Steel Blue - Deeper blue
        'J45.9': '#FF8C00',  # Light orange
    }

    # Step 2: Plot grouped pathways
    current_y = 0
    y_positions = []  # List to store all y-tick positions
    spacing = 0.8  # Custom spacing between groups

    for group, data in pathway_groups.items():
        group_y_ticks = range(current_y, current_y + len(data))
        for i, (y, coef, lower, upper) in enumerate(zip(
                group_y_ticks, data['coef'], data['coef lower 95%'], data['coef upper 95%'])):
            
            # Calculate HR and CI bounds
            hr = np.exp(coef)
            lower_hr = np.exp(lower)
            upper_hr = np.exp(upper)
            
            # Plot the error bar
            ax.errorbar(
                hr,  # HR value
                y,   # Y position
                xerr=[[hr - lower_hr], [upper_hr - hr]],  # CI as 2D array
                fmt='o',
                capsize=4.0,
                color=group_colors[group]  # Use the color for the current group
            )
            
            # Add CI annotations (left and right ends) with a small manual offset
            offset = 0.05  # Add a small space
            ax.text(lower_hr - offset, y, f"{lower_hr:.2f}", va='center', ha='right', fontsize=8, color='black')
            ax.text(upper_hr + offset, y, f"{upper_hr:.2f}", va='center', ha='left', fontsize=8, color='black')

        y_positions.extend(group_y_ticks)  # Append y-tick positions
        current_y += len(data) + int(spacing)  # Add spacing after each group

    # Step 3: Adjust y-axis limits to minimize gaps
    min_y = min(y_positions)  # Smallest y-tick
    max_y = max(y_positions)  # Largest y-tick
    buffer = 0.5  # Buffer space around the top and bottom
    ax.set_ylim(min_y - buffer, max_y + buffer)  # Dynamically set y-axis limits

    # Step 4: Customize plot
    ax.invert_yaxis()  # Invert y-axis
    ax.axvline(x=1, color='black', linestyle='--', linewidth=0.7)  # Add a vertical line at x=1 (HR=1)

    # Add custom x-ticks
    x_ticks = [0.5, 1, 1.5, 2, 2.5, 3, 3.5, 4, 4.5]  # HR values
    ax.set_xticks(x_ticks)
    ax.set_xticklabels([f"{tick:.1f}" for tick in x_ticks], fontsize=10)

    # Add labels and titles
    plt.xlabel("Hazard Ratio (HR) (95% CI)", fontsize=12)
    plt.xticks(fontsize=10)  # Set x-tick label size
    ax.set_yticks([])
    ax.set_title("Hazards Analysis of PH v/s HF", fontsize=13)

    

def add_annotations_box(ax, plot_data, box_title, x_offset=1.0):
    """
    Adds a separate box for annotations (e.g., p-values and HR) next to the plot.

    Parameters:
    - ax: The main axis where hazard ratio bars are plotted.
    - plot_data: DataFrame containing the data (p-values and hazard ratio).
    - box_title: Title of the annotation box.
    - x_offset: Horizontal offset from the hazard ratio plot.
    """
    # Create a new inset axis for annotations
    inset_box = inset_axes(
        ax,
        width="145%",  # Width of the annotation box
        height="100%",  # Height of the box matches the plot
        bbox_to_anchor=(x_offset, 0, 0.2, 1),  # Adjust placement
        bbox_transform=ax.transAxes,
        loc="center left"
    )
    
    # Remove spines and ticks from the annotation box
    inset_box.spines['top'].set_visible(False)
    inset_box.spines['bottom'].set_visible(False)
    inset_box.spines['left'].set_visible(False)
    inset_box.spines['right'].set_visible(False)
    inset_box.tick_params(left=False, labelleft=False, bottom=False, labelbottom=False)

    # Add title to the annotation box
    inset_box.set_title(box_title, fontsize=10, pad=10)

    # Align annotations with hazard ratio bars
    for y_pos, p_val, coef in zip(range(len(plot_data)), plot_data['p'], plot_data['coef']):
        annotation = f"P:{p_val:.2f}, HR:{np.exp(coef):.2f}"  # Include HR as exp(coef)
        inset_box.text(0.5, y_pos, annotation, ha="center", va="center", fontsize=9)

    # Adjust the limits of the annotation box
    inset_box.set_ylim(ax.get_ylim())

    # Draw the box around the entire annotation area
    rect = plt.Rectangle(
        (0, 0), 1, 1,  # Starting coordinates and width/height of the rectangle
        transform=inset_box.transAxes,
        edgecolor="black",  # Box color
        facecolor="none",   # Transparent inside
        linewidth=1.5,      # Thickness of the box border
        zorder=10           # Ensure the box is on top
    )
    inset_box.add_patch(rect)



 #################################################################################################


def create_hazard_ratio_plot_ph(ax, plot_data_ph):
    # Step 1: Extract and group the pathways by their starting conditions
    pathway_groups_ph = {
        'E11.9': plot_data_ph.loc[plot_data_ph.index.str.startswith("Pathway_E11.9")],
        'E78.0': plot_data_ph.loc[plot_data_ph.index.str.startswith("Pathway_E78.0")],
        'I10': plot_data_ph.loc[plot_data_ph.index.str.startswith("Pathway_I10")],        
        'I25.1': plot_data_ph.loc[plot_data_ph.index.str.startswith("Pathway_I25.1")],
        'I48': plot_data_ph.loc[plot_data_ph.index.str.startswith("Pathway_I48")],
        'J45.9': plot_data_ph.loc[plot_data_ph.index.str.startswith("Pathway_J45.9")],
    }

    # Define custom colors for each group
    group_colors_ph = {
        'E11.9': '#228B22',  # Forest Green
        'I10': '#7D3C98',  # Amethyst  
        'E78.0': '#D2691E',  # Chocolate
        'I25.1': '#FF69B4',  # Hot Pink
        'I48': '#4682B4',  # Steel Blue
        'J45.9': '#FF8C00',  # Orange
    }

    # Step 2: Plot grouped pathways
    current_y_ph = 0
    y_positions_ph = []  # List to store all y-tick positions for PH
    spacing_ph = 0.8  # Custom spacing between groups

    for group, data in pathway_groups_ph.items():
        group_y_ticks_ph = range(current_y_ph, current_y_ph + len(data))
        for i, (y, coef, lower, upper) in enumerate(zip(
                group_y_ticks_ph, data['coef'], data['coef lower 95%'], data['coef upper 95%'])):
            
            # Calculate HR and CI bounds
            hr = np.exp(coef)
            lower_hr = np.exp(lower)
            upper_hr = np.exp(upper)
            
            # Plot the error bar
            ax.errorbar(
                hr,  # HR value
                y,   # Y position
                xerr=[[hr - lower_hr], [upper_hr - hr]],  # CI as 2D array
                fmt='o',
                capsize=4.0,
                color=group_colors_ph[group]  # Use the color for the current group
            )
            
            # Add CI annotations (left and right ends) with a small manual offset
            offset = 0.05  # Add a small space
            ax.text(lower_hr - offset, y, f"{lower_hr:.2f}", va='center', ha='right', fontsize=8, color='black')
            ax.text(upper_hr + offset, y, f"{upper_hr:.2f}", va='center', ha='left', fontsize=8, color='black')

        y_positions_ph.extend(group_y_ticks_ph)  # Append y-tick positions
        current_y_ph += len(data) + int(spacing_ph)  # Add spacing after each group

    # Step 3: Adjust y-axis limits to minimize gaps
    min_y_ph = min(y_positions_ph)  # Smallest y-tick
    max_y_ph = max(y_positions_ph)  # Largest y-tick
    buffer_ph = 0.5  # Buffer space around the top and bottom
    ax.set_ylim(min_y_ph - buffer_ph, max_y_ph + buffer_ph)  # Dynamically set y-axis limits

    # Step 4: Customize plot
    ax.invert_yaxis()  # Invert y-axis
    ax.axvline(x=1, color='black', linestyle='--', linewidth=0.7)  # Add a vertical line at x=1 (HR=1)

    # Add custom x-ticks
    x_ticks_ph = [0.5, 1, 1.5, 2, 2.5, 3]  # Expanded range for HR data
    ax.set_xticks(x_ticks_ph)
    ax.set_xticklabels([f"{tick:.1f}" for tick in x_ticks_ph], fontsize=10)

    # Add labels and titles
    ax.set_xlabel("Hazard Ratio (HR) (95% CI)", fontsize=12)
    ax.set_title("Mortality Risk with PH Pathways", fontsize=13)

    # Remove default y-axis labels
    ax.set_yticks([])




##############################################################################################
    
  
    
def create_hazard_ratio_plot_heart_failure(ax, plot_data_heart_failure):
    # Step 1: Extract and group the pathways by their starting conditions
    pathway_groups_heart_failure = {
        'E11.9': plot_data_heart_failure.loc[plot_data_heart_failure.index.str.startswith("Pathway_E11.9")],
        'E78.0': plot_data_heart_failure.loc[plot_data_heart_failure.index.str.startswith("Pathway_E78.0")],
        'I10': plot_data_heart_failure.loc[plot_data_heart_failure.index.str.startswith("Pathway_I10")],        
        'I25.1': plot_data_heart_failure.loc[plot_data_heart_failure.index.str.startswith("Pathway_I25.1")],
        'I48': plot_data_heart_failure.loc[plot_data_heart_failure.index.str.startswith("Pathway_I48")],
        'J45.9': plot_data_heart_failure.loc[plot_data_heart_failure.index.str.startswith("Pathway_J45.9")],
    }

    # Define custom colors for each group (differentiate from PH colors if necessary)
    group_colors_heart_failure = {
        'E11.9': '#228B22',  # Forest Green
        'I10': '#7D3C98',  # Amethyst  
        'E78.0': '#D2691E',  # Chocolate
        'I25.1': '#FF69B4',  # Hot Pink
        'I48': '#4682B4',  # Steel Blue
        'J45.9': '#FF8C00',  # Orange
    }

    # Step 2: Plot grouped pathways
    current_y_heart_failure = 0
    y_positions_heart_failure = []  # List to store all y-tick positions for heart_failure
    spacing_heart_failure = 0.8  # Custom spacing between groups

    for group, data in pathway_groups_heart_failure.items():
        group_y_ticks_heart_failure = range(current_y_heart_failure, current_y_heart_failure + len(data))
        for i, (y, coef, lower, upper) in enumerate(zip(
                group_y_ticks_heart_failure, data['coef'], data['coef lower 95%'], data['coef upper 95%'])):
            
            # Calculate HR and CI bounds
            hr = np.exp(coef)
            lower_hr = np.exp(lower)
            upper_hr = np.exp(upper)
            
            # Plot the error bar
            ax.errorbar(
                hr,  # HR value
                y,   # Y position
                xerr=[[hr - lower_hr], [upper_hr - hr]],  # CI as 2D array
                fmt='o',
                capsize=4.0,
                color=group_colors_heart_failure[group]  # Use the color for the current group
            )
            
            # Add CI annotations (left and right ends) with a small manual offset
            offset = 0.05  # Add a small space
            ax.text(lower_hr - offset, y, f"{lower_hr:.2f}", va='center', ha='right', fontsize=8, color='black')
            ax.text(upper_hr + offset, y, f"{upper_hr:.2f}", va='center', ha='left', fontsize=8, color='black')

        y_positions_heart_failure.extend(group_y_ticks_heart_failure)  # Append y-tick positions
        current_y_heart_failure += len(data) + int(spacing_heart_failure)  # Add spacing after each group

    # Step 3: Adjust y-axis limits to minimize gaps
    min_y_heart_failure = min(y_positions_heart_failure)  # Smallest y-tick
    max_y_heart_failure = max(y_positions_heart_failure)  # Largest y-tick
    buffer_heart_failure = 0.5  # Buffer space around the top and bottom
    ax.set_ylim(min_y_heart_failure - buffer_heart_failure, max_y_heart_failure + buffer_heart_failure)  # Dynamically set y-axis limits

    # Step 4: Customize plot
    ax.invert_yaxis()  # Invert y-axis
    ax.axvline(x=1, color='black', linestyle='--', linewidth=0.7)  # Add a vertical line at x=1 (HR=1)

    # Add custom x-ticks
    x_ticks_heart_failure = [0.5, 1, 1.5, 2, 2.5, 3]  # Expanded range for HR data
    ax.set_xticks(x_ticks_heart_failure)
    ax.set_xticklabels([f"{tick:.1f}" for tick in x_ticks_heart_failure], fontsize=10)

    # Add labels and titles
    ax.set_xlabel("Hazard Ratio (HR) (95% CI)", fontsize=12)
    ax.set_title("Mortality Risk with HF Pathways", fontsize=13)

    # Remove default y-axis labels
    ax.set_yticks([])

    
    
########################################################################################################################    

    





# Define the pathways and colors
pathways = [
    "E11.9 -> E78.0 -> I10 -> I25.1 -> PH/HF",
    "E11.9 -> E78.0 -> I10 -> I48 -> PH/HF",
    "E11.9 -> I10 -> PH/HF",
    "E11.9 -> I10 -> E78.0 -> PH/HF",
    "E11.9 -> I10 -> I25.1 -> PH/HF",
    "E11.9 -> I10 -> I25.1 -> E78.0 -> PH/HF",
    "E11.9 -> I10 -> I48 -> PH/HF",
    "E11.9 -> I10 -> J45.9 -> PH/HF",
    "E11.9 -> I25.1 -> PH/HF",
    
    "E78.0 -> I10 -> PH/HF",
    "E78.0 -> I10 -> E11.9 -> PH/HF",
    "E78.0 -> I10 -> I25.1 -> PH/HF",
    "E78.0 -> I10 -> I25.1 -> I48 -> PH/HF",
    "E78.0 -> I10 -> I48 -> PH/HF",
    "E78.0 -> I10 -> J45.9 -> PH/HF",
    "E78.0 -> I25.1 -> PH/HF",
    
    
    "I10 -> E11.9 -> PH/HF",
    "I10 -> E11.9 -> E78.0 -> PH/HF",
    "I10 -> E11.9 -> E78.0 -> I25.1 -> PH/HF",
    "I10 -> E78.0 -> PH/HF",
    "I10 -> E78.0 -> I25.1 -> PH/HF",
    "I10 -> E78.0 -> I48 -> PH/HF",
    "I10 -> I25.1 -> PH/HF",
    "I10 -> I25.1 -> E78.0 -> PH/HF",
    "I10 -> I25.1 -> E78.0 -> I48 -> PH/HF",
    "I10 -> I25.1 -> I48 -> PH/HF",
    "I10 -> I34.0 -> PH/HF",
    "I10 -> I48 -> PH/HF",
    "I10 -> I48 -> E11.9 -> PH/HF",
    "I10 -> I48 -> E78.0 -> PH/HF",
    "I10 -> I48 -> I34.0 -> PH/HF",
    "I10 -> I48 -> J45.9 -> PH/HF",
    "I10 -> J45.9 -> PH/HF",
    "I10 -> J45.9 -> E11.9 -> PH/HF",
    "I10 -> J45.9 -> E78.0 -> PH/HF",
    
    "I25.1 -> E78.0 -> I10 -> PH/HF",
    "I25.1 -> I10 -> PH/HF",
    "I25.1 -> I48 -> PH/HF",
    
    "I48 -> E78.0 -> I10 -> PH/HF",
    "I48 -> I10 -> PH/HF",
    "I48 -> J45.9 -> PH/HF",

    "J45.9 -> E11.9 -> I10 -> PH/HF",
    "J45.9 -> E78.0 -> I10 -> PH/HF",
    "J45.9 -> I10 -> PH/HF"   
]


color_dict = {
    'PH/HF': '#b3b3b3',
    'I10': '#b3a3cc',
    'I48': '#add8e6',
    'E78.0': '#ddc4a1',
    'I25.1': '#f4b0c8',
    'E11.9': '#c4e3b3',
    'J45.9': '#F4A460',
    'I34.0':'#FFECB3'
}




# Create the plot
fig, ax = plt.subplots(figsize=(17, 13))
gap = -0.38  # Horizontal gap between nodes
row_gap = 1  # Vertical gap between rows



# Plot the pathways
for row, pathway in enumerate(pathways, start=1):
    conditions = pathway.split(" -> ")
    for col, condition in enumerate(conditions, start=1):
        # Calculate node position
        x_pos = col + (col - 1) * gap
        y_pos = len(pathways) - row + 1

        # Draw rectangle for each node
        ax.add_patch(plt.Rectangle((x_pos - 0.5, y_pos - 0.5), 0.6, 1.2, 
                                    facecolor=color_dict.get(condition, 'white'), edgecolor='black'))
        # Add text to node
        ax.text(x_pos - 0.17, y_pos + 0.07, condition, ha='center', va='center', fontsize=9.60)

        # Add arrows between nodes
        #if col < len(conditions):
        #    next_x_pos = x_pos + 1 + gap
        #    ax.annotate(
        #        '',
        #        xy=(next_x_pos - 0.5, y_pos),  # End position of arrow
        #        xytext=(x_pos + 0.11, y_pos),  # Start position of arrow
        #        arrowprops=dict(
        #            arrowstyle="->,head_width=0.2,head_length=0.3",  # Adjust arrowhead size
        #            color='black',  # Arrow color
        #            lw=0.7,  # Line width (thicker arrow)
        #            shrinkA=0,  # Adjust start of the arrow (in points)
        #            shrinkB=0   # Adjust end of the arrow (in points)
        #        
        #    )
#    )



box_start_x = len(conditions) + 2.2  # Starting x position for the box
box_end_x = len(conditions) + 7.0    # Ending x position for the box
box_start_y = 0.45                   # Starting y position for the box
box_end_y = len(pathways) + 0.5     # Ending y position for the box



# Insert the hazard ratio plot into the box area
inset = inset_axes(
    ax,
    width="60%",  # Adjust width of the inset
    height="100.1%",  # Adjust height of the inset
    bbox_to_anchor=(box_start_x + 5.0, box_start_y + 0.1, box_end_x - box_start_x, box_end_y - box_start_y),  # Shift down
    bbox_transform=ax.transData,
    loc='center'
)


# Use the function to create the hazard ratio plot in the inset
# Replace `plot_data` with your actual DataFrame containing the hazard ratio data
plot_data = pd.DataFrame({
    'coef': [0.58, 0.56, 0.10, 0.55, 0.54, 1.19, 0.82, -0.05, 0.20, 0.16, 
             1.13, 0.33, 0.56, 0.71, 1.07, -0.41, -0.29, 0.52, 1.53, 0.70, 
             0.55, 0.83, 0.15, 0.62, 1.22, 0.63, -0.23, 0.66, 0.56, 0.71, 
             1.24, 0.60, 0.38, 0.85, 0.24, 0.73, 0.66, 1.10, 0.98, 0.91, 
             -0.47, 0.15, 0.54, 0.63],
    'coef lower 95%': [0.17, 0.05, -0.23, 0.16, 0.15, 0.74, 0.41, -0.63, -0.47, -0.17, 
                       0.59, -0.04, 0.05, 0.32, 0.61, -0.97, -0.80, 0.08, 1.05, 0.36, 
                       0.14, 0.43, -0.24, 0.12, 0.71, 0.16, -0.70, 0.35, 0.10, 0.34, 
                       0.79, 0.17, 0.04, 0.34, -0.30, 0.30, 0.22, 0.61, 0.41, 0.57, 
                       -1.15, -0.42, -0.03, 0.25],
    'coef upper 95%': [0.98, 1.07, 0.43, 0.94, 0.92, 1.65, 1.23, 0.52, 0.88, 0.49, 
                       1.66, 0.69, 1.08, 1.11, 1.54, 0.14, 0.22, 0.95, 2.01, 1.04, 
                       0.96, 1.23, 0.53, 1.13, 1.73, 1.11, 0.24, 0.96, 1.02, 1.09, 
                       1.69, 1.03, 0.72, 1.35, 0.77, 1.16, 1.09, 1.60, 1.56, 1.25, 
                       0.20, 0.72, 1.11, 1.01],
    'p': [0.01, 0.03, 0.55, 0.01, 0.01, 0.001, 0.001, 0.86, 0.55, 0.34,
          0.001, 0.08, 0.03, 0.001, 0.001, 0.15, 0.27, 0.02, 0.001, 0.001,
          0.01, 0.001, 0.45, 0.02, 0.001, 0.01, 0.33, 0.001, 0.02, 0.001,
          0.001, 0.001, 0.03, 0.001, 0.39, 0.001, 0.001, 0.001, 0.001,0.001, 0.17,
          0.61, 0.06, 0.001]
}, index=[        
    "Pathway_E11.9 -> E78.0 -> I10 -> I25.1",
    "Pathway_E11.9 -> E78.0 -> I10 -> I48",
    "Pathway_E11.9 -> I10",
    "Pathway_E11.9 -> I10 -> E78.0",
    "Pathway_E11.9 -> I10 -> I25.1",
    "Pathway_E11.9 -> I10 -> I25.1 -> E78.0",
    "Pathway_E11.9 -> I10 -> I48",
    "Pathway_E11.9 -> I10 -> J45.9",
    "Pathway_E11.9 -> I25.1",
    
    "Pathway_E78.0 -> I10",
    "Pathway_E78.0 -> I10 -> E11.9",
    "Pathway_E78.0 -> I10 -> I25.1",
    "Pathway_E78.0 -> I10 -> I25.1 -> I48",
    "Pathway_E78.0 -> I10 -> I48",
    "Pathway_E78.0 -> I10 -> J45.9",
    "Pathway_E78.0 -> I25.1",
    
    "Pathway_I10 -> E11.9",
    "Pathway_I10 -> E11.9 -> E78.0",
    "Pathway_I10 -> E11.9 -> E78.0 -> I25.1",
    "Pathway_I10 -> E78.0",
    "Pathway_I10 -> E78.0 -> I25.1",
    "Pathway_I10 -> E78.0 -> I48",
    "Pathway_I10 -> I25.1",
    "Pathway_I10 -> I25.1 -> E78.0",
    "Pathway_I10 -> I25.1 -> E78.0 -> I48",
    "Pathway_I10 -> I25.1 -> I48",
    "Pathway_I10 -> I34.0",
    "Pathway_I10 -> I48",
    "Pathway_I10 -> I48 -> E11.9",
    "Pathway_I10 -> I48 -> E78.0",
    "Pathway_I10 -> I48 -> I34.0",
    "Pathway_I10 -> I48 -> J45.9",
    "Pathway_I10 -> J45.9",
    "Pathway_I10 -> J45.9 -> E11.9",
    "Pathway_I10 -> J45.9 -> E78.0",
    
    "Pathway_I25.1 -> E78.0 -> I10",
    "Pathway_I25.1 -> I10",
    "Pathway_I25.1 -> I48",
    
    "Pathway_I48 -> E78.0 -> I10",
    "Pathway_I48 -> I10",
    "Pathway_I48 -> J45.9",
    
    "Pathway_J45.9 -> E11.9 -> I10",
    "Pathway_J45.9 -> E78.0 -> I10",
    "Pathway_J45.9 -> I10"
])

# Flip the inset box upside down by inverting the y-axis
#inset.invert_xaxis()







# Add the annotations after creating each plot
create_hazard_ratio_plot(inset, plot_data)
# Add separate boxes for p-values and N
add_annotations_box(inset, plot_data, box_title="", x_offset=0.95)







###################################################################################

plot_data_ph = pd.DataFrame({
    'coef': [0.27, -0.00, 0.16, 0.68, 0.00, 0.43, -0.11, 0.13, -0.72, 0.04, 
             -0.02, -0.33, 0.38, 0.00, 0.20, -0.41, 0.40, 0.72, -0.05, -0.14, 
             0.34, -0.56, -0.05, 0.31, 0.26, 0.30, -1.18, -0.24, 0.16, -0.69, 
             -0.22, 0.54, -0.13, 0.51, 0.20, 0.08, 0.39, 0.07, 0.46, 0.04, 
             -0.93, 0.29, 0.53, 0.20],
    'coef lower 95%': [-0.16, -0.55, -0.13, 0.34, -0.39, -0.12, -0.54, -0.62, -1.75, -0.26, 
                       -0.73, -0.73, -0.18, -0.39, -0.29, -1.20, -0.20, 0.28, -0.67, -0.49, 
                       -0.07, -1.03, -0.45, -0.24, -0.29, -0.20, -1.90, -0.49, -0.37, -1.17, 
                       -0.74, 0.12, -0.47, 0.00, -0.53, -0.44, -0.05, -0.47, -0.18, -0.27, 
                       -1.93, -0.46, -0.04, -0.21],
    'coef upper 95%': [0.70, 0.54, 0.45, 1.02, 0.39, 0.98, 0.33, 0.88, 0.31, 0.35, 
                       0.69, 0.08, 0.93, 0.40, 0.70, 0.39, 1.00, 1.16, 0.57, 0.21, 
                       0.74, -0.08, 0.35, 0.86, 0.81, 0.80, -0.46, 0.02, 0.70, -0.22, 
                       0.29, 0.95, 0.22, 1.02, 0.93, 0.61, 0.83, 0.61, 1.10, 0.34, 
                       0.07, 1.04, 1.10, 0.61],
    'p': [0.22, 0.99, 0.29, 0.001, 0.98, 0.12, 0.63, 0.74, 0.17, 0.78, 
          0.96, 0.11, 0.18, 0.98, 0.42, 0.32, 0.19, 0.001, 0.87, 0.43, 
          0.10, 0.02, 0.80, 0.27, 0.35, 0.23, 0.001, 0.07, 0.55, 0.001, 
          0.40, 0.01, 0.47, 0.05, 0.59, 0.75, 0.08, 0.81, 0.16, 0.82, 0.07, 0.45, 0.07, 0.34]  
}, index=[        
    "Pathway_E11.9 -> E78.0 -> I10 -> I25.1",
    "Pathway_E11.9 -> E78.0 -> I10 -> I48",
    "Pathway_E11.9 -> I10",
    "Pathway_E11.9 -> I10 -> E78.0",
    "Pathway_E11.9 -> I10 -> I25.1",
    "Pathway_E11.9 -> I10 -> I25.1 -> E78.0",
    "Pathway_E11.9 -> I10 -> I48",
    "Pathway_E11.9 -> I10 -> J45.9",
    "Pathway_E11.9 -> I25.1",
    
    "Pathway_E78.0 -> I10",
    "Pathway_E78.0 -> I10 -> E11.9",
    "Pathway_E78.0 -> I10 -> I25.1",
    "Pathway_E78.0 -> I10 -> I25.1 -> I48",
    "Pathway_E78.0 -> I10 -> I48",
    "Pathway_E78.0 -> I10 -> J45.9",
    "Pathway_E78.0 -> I25.1",
    
    "Pathway_I10 -> E11.9",
    "Pathway_I10 -> E11.9 -> E78.0",
    "Pathway_I10 -> E11.9 -> E78.0 -> I25.1",
    "Pathway_I10 -> E78.0",
    "Pathway_I10 -> E78.0 -> I25.1",
    "Pathway_I10 -> E78.0 -> I48",
    "Pathway_I10 -> I25.1",
    "Pathway_I10 -> I25.1 -> E78.0",
    "Pathway_I10 -> I25.1 -> E78.0 -> I48",
    "Pathway_I10 -> I25.1 -> I48",
    "Pathway_I10 -> I34.0",
    "Pathway_I10 -> I48",
    "Pathway_I10 -> I48 -> E11.9",
    "Pathway_I10 -> I48 -> E78.0",
    "Pathway_I10 -> I48 -> I34.0",
    "Pathway_I10 -> I48 -> J45.9",
    "Pathway_I10 -> J45.9",
    "Pathway_I10 -> J45.9 -> E11.9",
    "Pathway_I10 -> J45.9 -> E78.0",
    
    "Pathway_I25.1 -> E78.0 -> I10",
    "Pathway_I25.1 -> I10",
    "Pathway_I25.1 -> I48",
    
    "Pathway_I48 -> E78.0 -> I10",
    "Pathway_I48 -> I10",
    "Pathway_I48 -> J45.9",
    
    "Pathway_J45.9 -> E11.9 -> I10",
    "Pathway_J45.9 -> E78.0 -> I10",
    "Pathway_J45.9 -> I10"
])

# Add the second hazard ratio plot
plot_data_2 = plot_data_ph.copy()  # Example: Using the same data for demonstration
plot_data_2['coef'] = plot_data_2['coef'] * 1.1  # Slightly modify coefficients for differentiation#

# Define the position for the second inset
box_start_x_ph = box_end_x + 1.0  # Start the second inset further to the right
box_end_x_ph = box_start_x_ph + 4.8  # Adjust width for the second inset

# Insert the second hazard ratio plot
inset_ph = inset_axes(
    ax,
    width="60%",  # Adjust width of the second inset
    height="100.1%",  # Adjust height of the second inset
    bbox_to_anchor=(box_start_x_ph - 8.3, box_start_y + 0.1, box_end_x_ph - box_start_x_ph, box_end_y - box_start_y),  # Adjust placement
    bbox_transform=ax.transData,
    loc='center'
)

# Create the second hazard ratio plot using the same function
create_hazard_ratio_plot_ph(inset_ph, plot_data_ph)

# Add separate boxes for p-values and N
add_annotations_box(inset, plot_data_ph, box_title="", x_offset=-1.63)



####################################################################################

plot_data_heart_failure = pd.DataFrame({
    'coef': [-0.08, 0.08, 0.29, 0.09, -0.10, -0.30, -0.07, 0.55, -0.51, -0.39, 
             0.59, -0.10, -0.62, 0.47, -0.09, -0.74, -0.11, -0.28, 0.08, 0.25, 
             -0.38, 0.41, -0.11, -0.47, 0.15, -0.30, 0.42, -0.27, 0.90, 0.26, 
             -0.25, -0.16, -0.22, -0.39, 0.39, -0.55, 0.15, -0.08, 0.36, -0.60, 
             -0.22, 0.27, -0.16, -0.30],
    'coef lower 95%': [-0.35, -0.30, 0.07, -0.22, -0.43, -0.70, -0.43, -0.02, -1.15, -0.64, 
                       0.12, -0.34, -1.01, 0.17, -0.81, -1.16, -0.41, -0.60, -0.45, -0.03, 
                       -0.78, 0.04, -0.40, -0.81, -0.33, -0.83, -0.10, -0.51, 0.42, -0.24, 
                       -0.86, -0.69, -0.55, -1.07, -0.01, -0.92, -0.23, -0.64, -0.19, -0.99, 
                       -0.82, -0.28, -0.78, -0.78],
    'coef upper 95%': [0.19, 0.45, 0.51, 0.41, 0.22, 0.10, 0.29, 1.12, 0.13, -0.15, 
                       1.06, 0.14, -0.23, 0.78, 0.63, -0.31, 0.20, 0.04, 0.62, 0.53, 
                       0.02, 0.78, 0.18, -0.13, 0.63, 0.22, 0.94, -0.03, 1.38, 0.76, 
                       0.36, 0.38, 0.11, 0.30, 0.80, -0.18, 0.54, 0.47, 0.92, -0.21, 
                       0.39, 0.83, 0.45, 0.19],
    'p': [0.56, 0.69, 0.01, 0.56, 0.54, 0.14, 0.69, 0.06, 0.12, 0.005,
    0.01, 0.4, 0.005, 0.005, 0.8, 0.005, 0.5, 0.09, 0.76, 0.08,
    0.06, 0.03, 0.46, 0.01, 0.54, 0.26, 0.12, 0.03, 0.005, 0.31,
    0.43, 0.57, 0.2, 0.27, 0.06, 0.005, 0.44, 0.77, 0.2, 0.005,
    0.48, 0.33, 0.61, 0.23]  # Add manually

}, index=[
    "Pathway_E11.9 -> E78.0 -> I10 -> I25.1",
    "Pathway_E11.9 -> E78.0 -> I10 -> I48",
    "Pathway_E11.9 -> I10",
    "Pathway_E11.9 -> I10 -> E78.0",
    "Pathway_E11.9 -> I10 -> I25.1",
    "Pathway_E11.9 -> I10 -> I25.1 -> E78.0",
    "Pathway_E11.9 -> I10 -> I48",
    "Pathway_E11.9 -> I10 -> J45.9",
    "Pathway_E11.9 -> I25.1",
    
    "Pathway_E78.0 -> I10",
    "Pathway_E78.0 -> I10 -> E11.9",
    "Pathway_E78.0 -> I10 -> I25.1",
    "Pathway_E78.0 -> I10 -> I25.1 -> I48",
    "Pathway_E78.0 -> I10 -> I48",
    "Pathway_E78.0 -> I10 -> J45.9",
    "Pathway_E78.0 -> I25.1",
    
    "Pathway_I10 -> E11.9",
    "Pathway_I10 -> E11.9 -> E78.0",
    "Pathway_I10 -> E11.9 -> E78.0 -> I25.1",
    "Pathway_I10 -> E78.0",
    "Pathway_I10 -> E78.0 -> I25.1",
    "Pathway_I10 -> E78.0 -> I48",
    "Pathway_I10 -> I25.1",
    "Pathway_I10 -> I25.1 -> E78.0",
    "Pathway_I10 -> I25.1 -> E78.0 -> I48",
    "Pathway_I10 -> I25.1 -> I48",
    "Pathway_I10 -> I34.0",
    "Pathway_I10 -> I48",
    "Pathway_I10 -> I48 -> E11.9",
    "Pathway_I10 -> I48 -> E78.0",
    "Pathway_I10 -> I48 -> I34.0",
    "Pathway_I10 -> I48 -> J45.9",
    "Pathway_I10 -> J45.9",
    "Pathway_I10 -> J45.9 -> E11.9",
    "Pathway_I10 -> J45.9 -> E78.0",
    
    "Pathway_I25.1 -> E78.0 -> I10",
    "Pathway_I25.1 -> I10",
    "Pathway_I25.1 -> I48",
    
    "Pathway_I48 -> E78.0 -> I10",
    "Pathway_I48 -> I10",
    "Pathway_I48 -> J45.9",
    
    "Pathway_J45.9 -> E11.9 -> I10",
    "Pathway_J45.9 -> E78.0 -> I10",
    "Pathway_J45.9 -> I10"
])




# Add the second hazard ratio plot
plot_data_3 = plot_data_heart_failure.copy()  # Example: Using the same data for demonstration
plot_data_3['coef'] = plot_data_3['coef'] * 1.1  # Slightly modify coefficients for differentiation

# Define the position for the second inset
box_start_x_heart_failure = box_end_x + 1.0  # Start the second inset further to the right
box_end_x_heart_failure = box_start_x_heart_failure + 4.8  # Adjust width for the second inset

# Insert the second hazard ratio plot
inset_heart_failure = inset_axes(
    ax,
    width="60%",  # Adjust width of the second inset
    height="100.1%",  # Adjust height of the second inset
    bbox_to_anchor=(box_start_x_heart_failure - 4.55, box_start_y + 0.1, box_end_x_heart_failure - box_start_x_heart_failure, box_end_y - box_start_y),  # Adjust placement
    bbox_transform=ax.transData,
    loc='center'
)

# Create the second hazard ratio plot using the same function
create_hazard_ratio_plot_heart_failure(inset_heart_failure, plot_data_heart_failure)

# Add separate boxes for p-values and N
add_annotations_box(inset, plot_data_heart_failure, box_title="", x_offset=-0.33)

#######################################################################################

# Data for PH and HF participants in the desired order
ph_participants = [10, '05', 50, 16, 16, '07', 13, '05', '05', 49, 
                   '06', 20, '05', 15, '09', '08', 10, 11, '06', 42, 
                   13, 14, 25, '07', '05', '08', 13, 78, '09', 18, 
                   10, 11, 42, '07', '06', 11, 16, 11, '05', 44, 
                   '05', '05', '05', 25]

hf_participants = [18, '09', 70, 19, 18, 6, 14, '06', '07', 61, 
                   '07', 43, 12, 22, '05', 17, 33, 20, '06', 42, 
                   14, 10, 41, 21, '05', '07', '08', 67, '07', '07', 
                   '05', 17, 30, '06', 11, 17, 18, '06', '06', 27, 
                   '05', '06', '05', 15]


# Update y-ticks to include both PH and HF participants
y_ticks = list(range(1, len(ph_participants) + 1))
y_tick_labels = [f"{ph}, {hf}" for ph, hf in zip(ph_participants, hf_participants)]


# Set x-axis and y-axis limits and labels
node_columns = max(len(pathway.split(" -> ")) for pathway in pathways)  # Max nodes in a pathway
ax.set_xlim(0.5, node_columns + (node_columns - 1) * gap + 6.5)  # Adjust x-axis limits dynamically
ax.set_ylim(0.5, len(pathways) + 0.7)  # Set y-axis limits to fit all pathways

# Explicitly set y-tick positions if needed or remove them
#ax.set_yticks([])  # Optionally, remove y-ticks
#ax.set_yticklabels([])  # Ensure no y-axis labels


# Explicitly set y-tick positions and labels
ax.set_yticks(y_ticks)  # Set y-tick positions
ax.set_yticklabels(y_tick_labels, fontsize=10)  # Display PH and HF participants as y-tick labels
# Add a title above the y-tick labels for clarification
ax.annotate(
    "No. of Participants\n (PH , HF)",  # Title text
    xy=(-0.02, 1.00),  # Position above the y-axis (relative to plot)
    xycoords='axes fraction',  # Coordinates relative to the axes
    fontsize=12,  # Font size
    ha='center',  # Horizontal alignment
    va='bottom',  # Vertical alignment
    rotation=0  # No rotation
)

# Manually set x-tick positions and labels for disease sequence steps
ax.set_xticks([0.8, 1.45, 2.05, 2.68, 3.33])
ax.set_xticklabels(['1', '2', '3', '4', '5'], fontsize=13)

# Update legend with proper formatting and alignment
handles = [
    mpatches.Patch(color=color, label=full_name)
    for full_name, color in {
        
        "PH and HF as Index Conditions": '#b3b3b3',
        "Type 2 Diabetes Mellitus - E11.9": '#88cc88',
        "Hypertension - I10": '#a993cc',
        "Hyperlipidemia - E78": '#d2b48c',
        "Chronic Ischemic Heart Disease - I25.1": '#FFC0CB',
        "Atrial Fibrillation and Flutter - I48": '#add8e6',
        "Mitral Valve Disorder - I34.0": '#FFECB3',
        "Asthma, - J45.9": '#ffa07a',
        
    }.items()
]

# Position and style the legend
ax.legend(handles=handles , bbox_to_anchor=(0.6, -0.07), loc='upper center', ncol=4, fontsize=14, frameon=False)



# Add descriptive axis labels
ax.set_xlabel("Disease Sequence Progression", fontsize=15, labelpad=15)
ax.set_ylabel("Pathways Leading to PH/HF", fontsize=15, labelpad=15)
ax.set_title("Cox Proportional Hazards Analysis", fontsize=14, pad=20, x=0.8)

ax.xaxis.set_label_coords(0.2, -0.03) 

# Clean up the plot aesthetics by removing unnecessary spines
for spine in ['top', 'right']:
    ax.spines[spine].set_visible(False)

# Ensure everything fits nicely into the figure
plt.tight_layout()
# Save the plot as PNG
plt.savefig("pathways_plot.png", dpi=1200)  # Save as PNG with high resolution
plt.show()

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from mpl_toolkits.axes_grid1.inset_locator import inset_axes
import matplotlib.patches as mpatches



def create_hazard_ratio_plot(ax, plot_data):
    # Step 1: Extract and group the pathways by their starting conditions
    pathway_groups = {
        'E11.9': plot_data.loc[plot_data.index.str.startswith("Pathway_E11.9")],
        'E78.0': plot_data.loc[plot_data.index.str.startswith("Pathway_E78.0")],
        'I10': plot_data.loc[plot_data.index.str.startswith("Pathway_I10")],
        'I25.1': plot_data.loc[plot_data.index.str.startswith("Pathway_I25.1")],
        'I48': plot_data.loc[plot_data.index.str.startswith("Pathway_I48")],
        'J45.9': plot_data.loc[plot_data.index.str.startswith("Pathway_J45.9")],
    }

    # Define custom colors for each group
    group_colors = {
        'E11.9': '#228B22',  # Forest Green - Rich green
        'I10': '#7D3C98',  # Amethyst - Deeper purple
        'E78.0': '#D2691E',  # Chocolate - Rich tan
        'I25.1': '#FF69B4',  # Hot Pink - Vibrant pink
        'I48': '#4682B4',  # Steel Blue - Deeper blue
        'J45.9': '#FF8C00',  # Light orange
    }

    # Step 2: Plot grouped pathways
    current_y = 0
    y_positions = []  # List to store all y-tick positions
    spacing = 0.8  # Custom spacing between groups

    for group, data in pathway_groups.items():
        group_y_ticks = range(current_y, current_y + len(data))
        for i, (y, coef, lower, upper) in enumerate(zip(
                group_y_ticks, data['coef'], data['coef lower 95%'], data['coef upper 95%'])):
            
            # Calculate HR and CI bounds
            hr = np.exp(coef)
            lower_hr = np.exp(lower)
            upper_hr = np.exp(upper)
            
            # Plot the error bar
            ax.errorbar(
                hr,  # HR value
                y,   # Y position
                xerr=[[hr - lower_hr], [upper_hr - hr]],  # CI as 2D array
                fmt='o',
                capsize=4.0,
                color=group_colors[group]  # Use the color for the current group
            )
            
            # Add CI annotations (left and right ends) with a small manual offset
            offset = 0.05  # Add a small space
            ax.text(lower_hr - offset, y, f"{lower_hr:.2f}", va='center', ha='right', fontsize=8, color='black')
            ax.text(upper_hr + offset, y, f"{upper_hr:.2f}", va='center', ha='left', fontsize=8, color='black')

        y_positions.extend(group_y_ticks)  # Append y-tick positions
        current_y += len(data) + int(spacing)  # Add spacing after each group

    # Step 3: Adjust y-axis limits to minimize gaps
    min_y = min(y_positions)  # Smallest y-tick
    max_y = max(y_positions)  # Largest y-tick
    buffer = 0.5  # Buffer space around the top and bottom
    ax.set_ylim(min_y - buffer, max_y + buffer)  # Dynamically set y-axis limits

    # Step 4: Customize plot
    ax.invert_yaxis()  # Invert y-axis
    ax.axvline(x=1, color='black', linestyle='--', linewidth=0.7)  # Add a vertical line at x=1 (HR=1)

    # Add custom x-ticks
    x_ticks = [0.5, 1, 1.5, 2, 2.5, 3, 3.5, 4, 4.5]  # HR values
    ax.set_xticks(x_ticks)
    ax.set_xticklabels([f"{tick:.1f}" for tick in x_ticks], fontsize=10)

    # Add labels and titles
    plt.xlabel("Hazard Ratio (HR) (95% CI)", fontsize=12)
    plt.xticks(fontsize=10)  # Set x-tick label size
    ax.set_yticks([])
    ax.set_title("Hazards Analysis of PH v/s HF", fontsize=13)

    

def add_annotations_box(ax, plot_data, box_title, x_offset=1.0):
    """
    Adds a separate box for annotations (e.g., p-values and HR) next to the plot.

    Parameters:
    - ax: The main axis where hazard ratio bars are plotted.
    - plot_data: DataFrame containing the data (p-values and hazard ratio).
    - box_title: Title of the annotation box.
    - x_offset: Horizontal offset from the hazard ratio plot.
    """
    # Create a new inset axis for annotations
    inset_box = inset_axes(
        ax,
        width="145%",  # Width of the annotation box
        height="100%",  # Height of the box matches the plot
        bbox_to_anchor=(x_offset, 0, 0.2, 1),  # Adjust placement
        bbox_transform=ax.transAxes,
        loc="center left"
    )
    
    # Remove spines and ticks from the annotation box
    inset_box.spines['top'].set_visible(False)
    inset_box.spines['bottom'].set_visible(False)
    inset_box.spines['left'].set_visible(False)
    inset_box.spines['right'].set_visible(False)
    inset_box.tick_params(left=False, labelleft=False, bottom=False, labelbottom=False)

    # Add title to the annotation box
    inset_box.set_title(box_title, fontsize=10, pad=10)

    # Align annotations with hazard ratio bars
    for y_pos, p_val, coef in zip(range(len(plot_data)), plot_data['p'], plot_data['coef']):
        annotation = f"P:{p_val:.2f}, HR:{np.exp(coef):.2f}"  # Include HR as exp(coef)
        inset_box.text(0.5, y_pos, annotation, ha="center", va="center", fontsize=9)

    # Adjust the limits of the annotation box
    inset_box.set_ylim(ax.get_ylim())

    # Draw the box around the entire annotation area
    rect = plt.Rectangle(
        (0, 0), 1, 1,  # Starting coordinates and width/height of the rectangle
        transform=inset_box.transAxes,
        edgecolor="black",  # Box color
        facecolor="none",   # Transparent inside
        linewidth=1.5,      # Thickness of the box border
        zorder=10           # Ensure the box is on top
    )
    inset_box.add_patch(rect)



 #################################################################################################


def create_hazard_ratio_plot_ph(ax, plot_data_ph):
    # Step 1: Extract and group the pathways by their starting conditions
    pathway_groups_ph = {
        'E11.9': plot_data_ph.loc[plot_data_ph.index.str.startswith("Pathway_E11.9")],
        'E78.0': plot_data_ph.loc[plot_data_ph.index.str.startswith("Pathway_E78.0")],
        'I10': plot_data_ph.loc[plot_data_ph.index.str.startswith("Pathway_I10")],        
        'I25.1': plot_data_ph.loc[plot_data_ph.index.str.startswith("Pathway_I25.1")],
        'I48': plot_data_ph.loc[plot_data_ph.index.str.startswith("Pathway_I48")],
        'J45.9': plot_data_ph.loc[plot_data_ph.index.str.startswith("Pathway_J45.9")],
    }

    # Define custom colors for each group
    group_colors_ph = {
        'E11.9': '#228B22',  # Forest Green
        'I10': '#7D3C98',  # Amethyst  
        'E78.0': '#D2691E',  # Chocolate
        'I25.1': '#FF69B4',  # Hot Pink
        'I48': '#4682B4',  # Steel Blue
        'J45.9': '#FF8C00',  # Orange
    }

    # Step 2: Plot grouped pathways
    current_y_ph = 0
    y_positions_ph = []  # List to store all y-tick positions for PH
    spacing_ph = 0.8  # Custom spacing between groups

    for group, data in pathway_groups_ph.items():
        group_y_ticks_ph = range(current_y_ph, current_y_ph + len(data))
        for i, (y, coef, lower, upper) in enumerate(zip(
                group_y_ticks_ph, data['coef'], data['coef lower 95%'], data['coef upper 95%'])):
            
            # Calculate HR and CI bounds
            hr = np.exp(coef)
            lower_hr = np.exp(lower)
            upper_hr = np.exp(upper)
            
            # Plot the error bar
            ax.errorbar(
                hr,  # HR value
                y,   # Y position
                xerr=[[hr - lower_hr], [upper_hr - hr]],  # CI as 2D array
                fmt='o',
                capsize=4.0,
                color=group_colors_ph[group]  # Use the color for the current group
            )
            
            # Add CI annotations (left and right ends) with a small manual offset
            offset = 0.05  # Add a small space
            ax.text(lower_hr - offset, y, f"{lower_hr:.2f}", va='center', ha='right', fontsize=8, color='black')
            ax.text(upper_hr + offset, y, f"{upper_hr:.2f}", va='center', ha='left', fontsize=8, color='black')

        y_positions_ph.extend(group_y_ticks_ph)  # Append y-tick positions
        current_y_ph += len(data) + int(spacing_ph)  # Add spacing after each group

    # Step 3: Adjust y-axis limits to minimize gaps
    min_y_ph = min(y_positions_ph)  # Smallest y-tick
    max_y_ph = max(y_positions_ph)  # Largest y-tick
    buffer_ph = 0.5  # Buffer space around the top and bottom
    ax.set_ylim(min_y_ph - buffer_ph, max_y_ph + buffer_ph)  # Dynamically set y-axis limits

    # Step 4: Customize plot
    ax.invert_yaxis()  # Invert y-axis
    ax.axvline(x=1, color='black', linestyle='--', linewidth=0.7)  # Add a vertical line at x=1 (HR=1)

    # Add custom x-ticks
    x_ticks_ph = [0.5, 1, 1.5, 2, 2.5, 3]  # Expanded range for HR data
    ax.set_xticks(x_ticks_ph)
    ax.set_xticklabels([f"{tick:.1f}" for tick in x_ticks_ph], fontsize=10)

    # Add labels and titles
    ax.set_xlabel("Hazard Ratio (HR) (95% CI)", fontsize=12)
    ax.set_title("Mortality Risk with PH Pathways", fontsize=13)

    # Remove default y-axis labels
    ax.set_yticks([])




##############################################################################################
    
  
    
def create_hazard_ratio_plot_heart_failure(ax, plot_data_heart_failure):
    # Step 1: Extract and group the pathways by their starting conditions
    pathway_groups_heart_failure = {
        'E11.9': plot_data_heart_failure.loc[plot_data_heart_failure.index.str.startswith("Pathway_E11.9")],
        'E78.0': plot_data_heart_failure.loc[plot_data_heart_failure.index.str.startswith("Pathway_E78.0")],
        'I10': plot_data_heart_failure.loc[plot_data_heart_failure.index.str.startswith("Pathway_I10")],        
        'I25.1': plot_data_heart_failure.loc[plot_data_heart_failure.index.str.startswith("Pathway_I25.1")],
        'I48': plot_data_heart_failure.loc[plot_data_heart_failure.index.str.startswith("Pathway_I48")],
        'J45.9': plot_data_heart_failure.loc[plot_data_heart_failure.index.str.startswith("Pathway_J45.9")],
    }

    # Define custom colors for each group (differentiate from PH colors if necessary)
    group_colors_heart_failure = {
        'E11.9': '#228B22',  # Forest Green
        'I10': '#7D3C98',  # Amethyst  
        'E78.0': '#D2691E',  # Chocolate
        'I25.1': '#FF69B4',  # Hot Pink
        'I48': '#4682B4',  # Steel Blue
        'J45.9': '#FF8C00',  # Orange
    }

    # Step 2: Plot grouped pathways
    current_y_heart_failure = 0
    y_positions_heart_failure = []  # List to store all y-tick positions for heart_failure
    spacing_heart_failure = 0.8  # Custom spacing between groups

    for group, data in pathway_groups_heart_failure.items():
        group_y_ticks_heart_failure = range(current_y_heart_failure, current_y_heart_failure + len(data))
        for i, (y, coef, lower, upper) in enumerate(zip(
                group_y_ticks_heart_failure, data['coef'], data['coef lower 95%'], data['coef upper 95%'])):
            
            # Calculate HR and CI bounds
            hr = np.exp(coef)
            lower_hr = np.exp(lower)
            upper_hr = np.exp(upper)
            
            # Plot the error bar
            ax.errorbar(
                hr,  # HR value
                y,   # Y position
                xerr=[[hr - lower_hr], [upper_hr - hr]],  # CI as 2D array
                fmt='o',
                capsize=4.0,
                color=group_colors_heart_failure[group]  # Use the color for the current group
            )
            
            # Add CI annotations (left and right ends) with a small manual offset
            offset = 0.05  # Add a small space
            ax.text(lower_hr - offset, y, f"{lower_hr:.2f}", va='center', ha='right', fontsize=8, color='black')
            ax.text(upper_hr + offset, y, f"{upper_hr:.2f}", va='center', ha='left', fontsize=8, color='black')

        y_positions_heart_failure.extend(group_y_ticks_heart_failure)  # Append y-tick positions
        current_y_heart_failure += len(data) + int(spacing_heart_failure)  # Add spacing after each group

    # Step 3: Adjust y-axis limits to minimize gaps
    min_y_heart_failure = min(y_positions_heart_failure)  # Smallest y-tick
    max_y_heart_failure = max(y_positions_heart_failure)  # Largest y-tick
    buffer_heart_failure = 0.5  # Buffer space around the top and bottom
    ax.set_ylim(min_y_heart_failure - buffer_heart_failure, max_y_heart_failure + buffer_heart_failure)  # Dynamically set y-axis limits

    # Step 4: Customize plot
    ax.invert_yaxis()  # Invert y-axis
    ax.axvline(x=1, color='black', linestyle='--', linewidth=0.7)  # Add a vertical line at x=1 (HR=1)

    # Add custom x-ticks
    x_ticks_heart_failure = [0.5, 1, 1.5, 2, 2.5, 3]  # Expanded range for HR data
    ax.set_xticks(x_ticks_heart_failure)
    ax.set_xticklabels([f"{tick:.1f}" for tick in x_ticks_heart_failure], fontsize=10)

    # Add labels and titles
    ax.set_xlabel("Hazard Ratio (HR) (95% CI)", fontsize=12)
    ax.set_title("Mortality Risk with HF Pathways", fontsize=13)

    # Remove default y-axis labels
    ax.set_yticks([])

    
    
########################################################################################################################    

    





# Define the pathways and colors
pathways = [
    "E11.9 -> E78.0 -> I10 -> I25.1 -> PH/HF",
    "E11.9 -> E78.0 -> I10 -> I48 -> PH/HF",
    "E11.9 -> I10 -> PH/HF",
    "E11.9 -> I10 -> E78.0 -> PH/HF",
    "E11.9 -> I10 -> I25.1 -> PH/HF",
    "E11.9 -> I10 -> I25.1 -> E78.0 -> PH/HF",
    "E11.9 -> I10 -> I48 -> PH/HF",
    "E11.9 -> I10 -> J45.9 -> PH/HF",
    "E11.9 -> I25.1 -> PH/HF",
    
    "E78.0 -> I10 -> PH/HF",
    "E78.0 -> I10 -> E11.9 -> PH/HF",
    "E78.0 -> I10 -> I25.1 -> PH/HF",
    "E78.0 -> I10 -> I25.1 -> I48 -> PH/HF",
    "E78.0 -> I10 -> I48 -> PH/HF",
    "E78.0 -> I10 -> J45.9 -> PH/HF",
    "E78.0 -> I25.1 -> PH/HF",
    
    
    "I10 -> E11.9 -> PH/HF",
    "I10 -> E11.9 -> E78.0 -> PH/HF",
    "I10 -> E11.9 -> E78.0 -> I25.1 -> PH/HF",
    "I10 -> E78.0 -> PH/HF",
    "I10 -> E78.0 -> I25.1 -> PH/HF",
    "I10 -> E78.0 -> I48 -> PH/HF",
    "I10 -> I25.1 -> PH/HF",
    "I10 -> I25.1 -> E78.0 -> PH/HF",
    "I10 -> I25.1 -> E78.0 -> I48 -> PH/HF",
    "I10 -> I25.1 -> I48 -> PH/HF",
    "I10 -> I34.0 -> PH/HF",
    "I10 -> I48 -> PH/HF",
    "I10 -> I48 -> E11.9 -> PH/HF",
    "I10 -> I48 -> E78.0 -> PH/HF",
    "I10 -> I48 -> I34.0 -> PH/HF",
    "I10 -> I48 -> J45.9 -> PH/HF",
    "I10 -> J45.9 -> PH/HF",
    "I10 -> J45.9 -> E11.9 -> PH/HF",
    "I10 -> J45.9 -> E78.0 -> PH/HF",
    
    "I25.1 -> E78.0 -> I10 -> PH/HF",
    "I25.1 -> I10 -> PH/HF",
    "I25.1 -> I48 -> PH/HF",
    
    "I48 -> E78.0 -> I10 -> PH/HF",
    "I48 -> I10 -> PH/HF",
    "I48 -> J45.9 -> PH/HF",

    "J45.9 -> E11.9 -> I10 -> PH/HF",
    "J45.9 -> E78.0 -> I10 -> PH/HF",
    "J45.9 -> I10 -> PH/HF"   
]


color_dict = {
    'PH/HF': '#b3b3b3',
    'I10': '#b3a3cc',
    'I48': '#add8e6',
    'E78.0': '#ddc4a1',
    'I25.1': '#f4b0c8',
    'E11.9': '#c4e3b3',
    'J45.9': '#F4A460',
    'I34.0':'#FFECB3'
}




# Create the plot
fig, ax = plt.subplots(figsize=(17, 13))
gap = -0.38  # Horizontal gap between nodes
row_gap = 1  # Vertical gap between rows



# Plot the pathways
for row, pathway in enumerate(pathways, start=1):
    conditions = pathway.split(" -> ")
    for col, condition in enumerate(conditions, start=1):
        # Calculate node position
        x_pos = col + (col - 1) * gap
        y_pos = len(pathways) - row + 1

        # Draw rectangle for each node
        ax.add_patch(plt.Rectangle((x_pos - 0.5, y_pos - 0.5), 0.6, 1.2, 
                                    facecolor=color_dict.get(condition, 'white'), edgecolor='black'))
        # Add text to node
        ax.text(x_pos - 0.17, y_pos + 0.07, condition, ha='center', va='center', fontsize=9.60)

        # Add arrows between nodes
        #if col < len(conditions):
        #    next_x_pos = x_pos + 1 + gap
        #    ax.annotate(
        #        '',
        #        xy=(next_x_pos - 0.5, y_pos),  # End position of arrow
        #        xytext=(x_pos + 0.11, y_pos),  # Start position of arrow
        #        arrowprops=dict(
        #            arrowstyle="->,head_width=0.2,head_length=0.3",  # Adjust arrowhead size
        #            color='black',  # Arrow color
        #            lw=0.7,  # Line width (thicker arrow)
        #            shrinkA=0,  # Adjust start of the arrow (in points)
        #            shrinkB=0   # Adjust end of the arrow (in points)
        #        
        #    )
#    )



box_start_x = len(conditions) + 2.2  # Starting x position for the box
box_end_x = len(conditions) + 7.0    # Ending x position for the box
box_start_y = 0.45                   # Starting y position for the box
box_end_y = len(pathways) + 0.5     # Ending y position for the box

########################################################################################################################################

# Insert the hazard ratio plot into the box area
#inset = inset_axes(
#    ax,
#    width="60%",  # Adjust width of the inset
#    height="100.1%",  # Adjust height of the inset
#    bbox_to_anchor=(box_start_x + 5.0, box_start_y + 0.1, box_end_x - box_start_x, box_end_y - box_start_y),  # Shift down
#    bbox_transform=ax.transData,
#    loc='center'
#)


# Use the function to create the hazard ratio plot in the inset
# Replace `plot_data` with your actual DataFrame containing the hazard ratio data
#plot_data = pd.DataFrame({
#    'coef': [0.58, 0.56, 0.10, 0.55, 0.54, 1.19, 0.82, -0.05, 0.20, 0.16, 
#             1.13, 0.33, 0.56, 0.71, 1.07, -0.41, -0.29, 0.52, 1.53, 0.70, 
#             0.55, 0.83, 0.15, 0.62, 1.22, 0.63, -0.23, 0.66, 0.56, 0.71, 
#             1.24, 0.60, 0.38, 0.85, 0.24, 0.73, 0.66, 1.10, 0.98, 0.91, 
#             -0.47, 0.15, 0.54, 0.63],
#    'coef lower 95%': [0.17, 0.05, -0.23, 0.16, 0.15, 0.74, 0.41, -0.63, -0.47, -0.17, 
#                       0.59, -0.04, 0.05, 0.32, 0.61, -0.97, -0.80, 0.08, 1.05, 0.36, 
#                       0.14, 0.43, -0.24, 0.12, 0.71, 0.16, -0.70, 0.35, 0.10, 0.34, 
#                       0.79, 0.17, 0.04, 0.34, -0.30, 0.30, 0.22, 0.61, 0.41, 0.57, 
#                       -1.15, -0.42, -0.03, 0.25],
#    'coef upper 95%': [0.98, 1.07, 0.43, 0.94, 0.92, 1.65, 1.23, 0.52, 0.88, 0.49, 
#                       1.66, 0.69, 1.08, 1.11, 1.54, 0.14, 0.22, 0.95, 2.01, 1.04, 
#                       0.96, 1.23, 0.53, 1.13, 1.73, 1.11, 0.24, 0.96, 1.02, 1.09, 
#                       1.69, 1.03, 0.72, 1.35, 0.77, 1.16, 1.09, 1.60, 1.56, 1.25, 
#                       0.20, 0.72, 1.11, 1.01],
#    'p': [0.01, 0.03, 0.55, 0.01, 0.01, 0.001, 0.001, 0.86, 0.55, 0.34,
#          0.001, 0.08, 0.03, 0.001, 0.001, 0.15, 0.27, 0.02, 0.001, 0.001,
#          0.01, 0.001, 0.45, 0.02, 0.001, 0.01, 0.33, 0.001, 0.02, 0.001,
#          0.001, 0.001, 0.03, 0.001, 0.39, 0.001, 0.001, 0.001, 0.001,0.001, 0.17,
#          0.61, 0.06, 0.001]
#}, index=[        
#    "Pathway_E11.9 -> E78.0 -> I10 -> I25.1",
#    "Pathway_E11.9 -> E78.0 -> I10 -> I48",
#    "Pathway_E11.9 -> I10",
#    "Pathway_E11.9 -> I10 -> E78.0",
#    "Pathway_E11.9 -> I10 -> I25.1",
#    "Pathway_E11.9 -> I10 -> I25.1 -> E78.0",
#    "Pathway_E11.9 -> I10 -> I48",
#    "Pathway_E11.9 -> I10 -> J45.9",
#    "Pathway_E11.9 -> I25.1",
#    
#    "Pathway_E78.0 -> I10",
#    "Pathway_E78.0 -> I10 -> E11.9",
#    "Pathway_E78.0 -> I10 -> I25.1",
#    "Pathway_E78.0 -> I10 -> I25.1 -> I48",
#    "Pathway_E78.0 -> I10 -> I48",
#    "Pathway_E78.0 -> I10 -> J45.9",
#    "Pathway_E78.0 -> I25.1",
#    
#    "Pathway_I10 -> E11.9",
#    "Pathway_I10 -> E11.9 -> E78.0",
#    "Pathway_I10 -> E11.9 -> E78.0 -> I25.1",
#    "Pathway_I10 -> E78.0",
#    "Pathway_I10 -> E78.0 -> I25.1",
#    "Pathway_I10 -> E78.0 -> I48",
#    "Pathway_I10 -> I25.1",
#    "Pathway_I10 -> I25.1 -> E78.0",
#    "Pathway_I10 -> I25.1 -> E78.0 -> I48",
#    "Pathway_I10 -> I25.1 -> I48",
#    "Pathway_I10 -> I34.0",
#    "Pathway_I10 -> I48",
#    "Pathway_I10 -> I48 -> E11.9",
#    "Pathway_I10 -> I48 -> E78.0",
#    "Pathway_I10 -> I48 -> I34.0",
#    "Pathway_I10 -> I48 -> J45.9",
#    "Pathway_I10 -> J45.9",
#    "Pathway_I10 -> J45.9 -> E11.9",
#    "Pathway_I10 -> J45.9 -> E78.0",
#    
#    "Pathway_I25.1 -> E78.0 -> I10",
#    "Pathway_I25.1 -> I10",
#    "Pathway_I25.1 -> I48",
#    
#    "Pathway_I48 -> E78.0 -> I10",
#    "Pathway_I48 -> I10",
#    "Pathway_I48 -> J45.9",
#    
#    "Pathway_J45.9 -> E11.9 -> I10",
#    "Pathway_J45.9 -> E78.0 -> I10",
#    "Pathway_J45.9 -> I10"
#])#

# Flip the inset box upside down by inverting the y-axis
#inset.invert_xaxis()







# Add the annotations after creating each plot
#create_hazard_ratio_plot(inset, plot_data)
# Add separate boxes for p-values and N
#add_annotations_box(inset, plot_data, box_title="", x_offset=0.95)







###################################################################################

plot_data_ph = pd.DataFrame({
    'coef': [0.27, -0.00, 0.16, 0.68, 0.00, 0.43, -0.11, 0.13, -0.72, 0.04, 
             -0.02, -0.33, 0.38, 0.00, 0.20, -0.41, 0.40, 0.72, -0.05, -0.14, 
             0.34, -0.56, -0.05, 0.31, 0.26, 0.30, -1.18, -0.24, 0.16, -0.69, 
             -0.22, 0.54, -0.13, 0.51, 0.20, 0.08, 0.39, 0.07, 0.46, 0.04, 
             -0.93, 0.29, 0.53, 0.20],
    'coef lower 95%': [-0.16, -0.55, -0.13, 0.34, -0.39, -0.12, -0.54, -0.62, -1.75, -0.26, 
                       -0.73, -0.73, -0.18, -0.39, -0.29, -1.20, -0.20, 0.28, -0.67, -0.49, 
                       -0.07, -1.03, -0.45, -0.24, -0.29, -0.20, -1.90, -0.49, -0.37, -1.17, 
                       -0.74, 0.12, -0.47, 0.00, -0.53, -0.44, -0.05, -0.47, -0.18, -0.27, 
                       -1.93, -0.46, -0.04, -0.21],
    'coef upper 95%': [0.70, 0.54, 0.45, 1.02, 0.39, 0.98, 0.33, 0.88, 0.31, 0.35, 
                       0.69, 0.08, 0.93, 0.40, 0.70, 0.39, 1.00, 1.16, 0.57, 0.21, 
                       0.74, -0.08, 0.35, 0.86, 0.81, 0.80, -0.46, 0.02, 0.70, -0.22, 
                       0.29, 0.95, 0.22, 1.02, 0.93, 0.61, 0.83, 0.61, 1.10, 0.34, 
                       0.07, 1.04, 1.10, 0.61],
    'p': [0.22, 0.99, 0.29, 0.001, 0.98, 0.12, 0.63, 0.74, 0.17, 0.78, 
          0.96, 0.11, 0.18, 0.98, 0.42, 0.32, 0.19, 0.001, 0.87, 0.43, 
          0.10, 0.02, 0.80, 0.27, 0.35, 0.23, 0.001, 0.07, 0.55, 0.001, 
          0.40, 0.01, 0.47, 0.05, 0.59, 0.75, 0.08, 0.81, 0.16, 0.82, 0.07, 0.45, 0.07, 0.34]  
}, index=[        
    "Pathway_E11.9 -> E78.0 -> I10 -> I25.1",
    "Pathway_E11.9 -> E78.0 -> I10 -> I48",
    "Pathway_E11.9 -> I10",
    "Pathway_E11.9 -> I10 -> E78.0",
    "Pathway_E11.9 -> I10 -> I25.1",
    "Pathway_E11.9 -> I10 -> I25.1 -> E78.0",
    "Pathway_E11.9 -> I10 -> I48",
    "Pathway_E11.9 -> I10 -> J45.9",
    "Pathway_E11.9 -> I25.1",
    
    "Pathway_E78.0 -> I10",
    "Pathway_E78.0 -> I10 -> E11.9",
    "Pathway_E78.0 -> I10 -> I25.1",
    "Pathway_E78.0 -> I10 -> I25.1 -> I48",
    "Pathway_E78.0 -> I10 -> I48",
    "Pathway_E78.0 -> I10 -> J45.9",
    "Pathway_E78.0 -> I25.1",
    
    "Pathway_I10 -> E11.9",
    "Pathway_I10 -> E11.9 -> E78.0",
    "Pathway_I10 -> E11.9 -> E78.0 -> I25.1",
    "Pathway_I10 -> E78.0",
    "Pathway_I10 -> E78.0 -> I25.1",
    "Pathway_I10 -> E78.0 -> I48",
    "Pathway_I10 -> I25.1",
    "Pathway_I10 -> I25.1 -> E78.0",
    "Pathway_I10 -> I25.1 -> E78.0 -> I48",
    "Pathway_I10 -> I25.1 -> I48",
    "Pathway_I10 -> I34.0",
    "Pathway_I10 -> I48",
    "Pathway_I10 -> I48 -> E11.9",
    "Pathway_I10 -> I48 -> E78.0",
    "Pathway_I10 -> I48 -> I34.0",
    "Pathway_I10 -> I48 -> J45.9",
    "Pathway_I10 -> J45.9",
    "Pathway_I10 -> J45.9 -> E11.9",
    "Pathway_I10 -> J45.9 -> E78.0",
    
    "Pathway_I25.1 -> E78.0 -> I10",
    "Pathway_I25.1 -> I10",
    "Pathway_I25.1 -> I48",
    
    "Pathway_I48 -> E78.0 -> I10",
    "Pathway_I48 -> I10",
    "Pathway_I48 -> J45.9",
    
    "Pathway_J45.9 -> E11.9 -> I10",
    "Pathway_J45.9 -> E78.0 -> I10",
    "Pathway_J45.9 -> I10"
])

# Add the second hazard ratio plot
plot_data_2 = plot_data_ph.copy()  # Example: Using the same data for demonstration
plot_data_2['coef'] = plot_data_2['coef'] * 1.1  # Slightly modify coefficients for differentiation#

# Define the position for the second inset
box_start_x_ph = box_end_x + 1.0  # Start the second inset further to the right
box_end_x_ph = box_start_x_ph + 4.8  # Adjust width for the second inset

# Insert the second hazard ratio plot
inset_ph = inset_axes(
    ax,
    width="60%",  # Adjust width of the second inset
    height="100.1%",  # Adjust height of the second inset
    bbox_to_anchor=(box_start_x_ph - 8.3, box_start_y + 0.1, box_end_x_ph - box_start_x_ph, box_end_y - box_start_y),  # Adjust placement
    bbox_transform=ax.transData,
    loc='center'
)

# Create the second hazard ratio plot using the same function
create_hazard_ratio_plot_ph(inset_ph, plot_data_ph)

# Add separate boxes for p-values and N
add_annotations_box(inset, plot_data_ph, box_title="", x_offset=-1.63)



####################################################################################

plot_data_heart_failure = pd.DataFrame({
    'coef': [-0.08, 0.08, 0.29, 0.09, -0.10, -0.30, -0.07, 0.55, -0.51, -0.39, 
             0.59, -0.10, -0.62, 0.47, -0.09, -0.74, -0.11, -0.28, 0.08, 0.25, 
             -0.38, 0.41, -0.11, -0.47, 0.15, -0.30, 0.42, -0.27, 0.90, 0.26, 
             -0.25, -0.16, -0.22, -0.39, 0.39, -0.55, 0.15, -0.08, 0.36, -0.60, 
             -0.22, 0.27, -0.16, -0.30],
    'coef lower 95%': [-0.35, -0.30, 0.07, -0.22, -0.43, -0.70, -0.43, -0.02, -1.15, -0.64, 
                       0.12, -0.34, -1.01, 0.17, -0.81, -1.16, -0.41, -0.60, -0.45, -0.03, 
                       -0.78, 0.04, -0.40, -0.81, -0.33, -0.83, -0.10, -0.51, 0.42, -0.24, 
                       -0.86, -0.69, -0.55, -1.07, -0.01, -0.92, -0.23, -0.64, -0.19, -0.99, 
                       -0.82, -0.28, -0.78, -0.78],
    'coef upper 95%': [0.19, 0.45, 0.51, 0.41, 0.22, 0.10, 0.29, 1.12, 0.13, -0.15, 
                       1.06, 0.14, -0.23, 0.78, 0.63, -0.31, 0.20, 0.04, 0.62, 0.53, 
                       0.02, 0.78, 0.18, -0.13, 0.63, 0.22, 0.94, -0.03, 1.38, 0.76, 
                       0.36, 0.38, 0.11, 0.30, 0.80, -0.18, 0.54, 0.47, 0.92, -0.21, 
                       0.39, 0.83, 0.45, 0.19],
    'p': [0.56, 0.69, 0.01, 0.56, 0.54, 0.14, 0.69, 0.06, 0.12, 0.005,
    0.01, 0.4, 0.005, 0.005, 0.8, 0.005, 0.5, 0.09, 0.76, 0.08,
    0.06, 0.03, 0.46, 0.01, 0.54, 0.26, 0.12, 0.03, 0.005, 0.31,
    0.43, 0.57, 0.2, 0.27, 0.06, 0.005, 0.44, 0.77, 0.2, 0.005,
    0.48, 0.33, 0.61, 0.23]  # Add manually

}, index=[
    "Pathway_E11.9 -> E78.0 -> I10 -> I25.1",
    "Pathway_E11.9 -> E78.0 -> I10 -> I48",
    "Pathway_E11.9 -> I10",
    "Pathway_E11.9 -> I10 -> E78.0",
    "Pathway_E11.9 -> I10 -> I25.1",
    "Pathway_E11.9 -> I10 -> I25.1 -> E78.0",
    "Pathway_E11.9 -> I10 -> I48",
    "Pathway_E11.9 -> I10 -> J45.9",
    "Pathway_E11.9 -> I25.1",
    
    "Pathway_E78.0 -> I10",
    "Pathway_E78.0 -> I10 -> E11.9",
    "Pathway_E78.0 -> I10 -> I25.1",
    "Pathway_E78.0 -> I10 -> I25.1 -> I48",
    "Pathway_E78.0 -> I10 -> I48",
    "Pathway_E78.0 -> I10 -> J45.9",
    "Pathway_E78.0 -> I25.1",
    
    "Pathway_I10 -> E11.9",
    "Pathway_I10 -> E11.9 -> E78.0",
    "Pathway_I10 -> E11.9 -> E78.0 -> I25.1",
    "Pathway_I10 -> E78.0",
    "Pathway_I10 -> E78.0 -> I25.1",
    "Pathway_I10 -> E78.0 -> I48",
    "Pathway_I10 -> I25.1",
    "Pathway_I10 -> I25.1 -> E78.0",
    "Pathway_I10 -> I25.1 -> E78.0 -> I48",
    "Pathway_I10 -> I25.1 -> I48",
    "Pathway_I10 -> I34.0",
    "Pathway_I10 -> I48",
    "Pathway_I10 -> I48 -> E11.9",
    "Pathway_I10 -> I48 -> E78.0",
    "Pathway_I10 -> I48 -> I34.0",
    "Pathway_I10 -> I48 -> J45.9",
    "Pathway_I10 -> J45.9",
    "Pathway_I10 -> J45.9 -> E11.9",
    "Pathway_I10 -> J45.9 -> E78.0",
    
    "Pathway_I25.1 -> E78.0 -> I10",
    "Pathway_I25.1 -> I10",
    "Pathway_I25.1 -> I48",
    
    "Pathway_I48 -> E78.0 -> I10",
    "Pathway_I48 -> I10",
    "Pathway_I48 -> J45.9",
    
    "Pathway_J45.9 -> E11.9 -> I10",
    "Pathway_J45.9 -> E78.0 -> I10",
    "Pathway_J45.9 -> I10"
])




# Add the second hazard ratio plot
plot_data_3 = plot_data_heart_failure.copy()  # Example: Using the same data for demonstration
plot_data_3['coef'] = plot_data_3['coef'] * 1.1  # Slightly modify coefficients for differentiation

# Define the position for the second inset
box_start_x_heart_failure = box_end_x + 1.0  # Start the second inset further to the right
box_end_x_heart_failure = box_start_x_heart_failure + 4.8  # Adjust width for the second inset

# Insert the second hazard ratio plot
inset_heart_failure = inset_axes(
    ax,
    width="60%",  # Adjust width of the second inset
    height="100.1%",  # Adjust height of the second inset
    bbox_to_anchor=(box_start_x_heart_failure - 4.55, box_start_y + 0.1, box_end_x_heart_failure - box_start_x_heart_failure, box_end_y - box_start_y),  # Adjust placement
    bbox_transform=ax.transData,
    loc='center'
)

# Create the second hazard ratio plot using the same function
create_hazard_ratio_plot_heart_failure(inset_heart_failure, plot_data_heart_failure)

# Add separate boxes for p-values and N
add_annotations_box(inset, plot_data_heart_failure, box_title="", x_offset=-0.33)

#######################################################################################

# Data for PH and HF participants in the desired order
ph_participants = [10, '05', 50, 16, 16, '07', 13, '05', '05', 49, 
                   '06', 20, '05', 15, '09', '08', 10, 11, '06', 42, 
                   13, 14, 25, '07', '05', '08', 13, 78, '09', 18, 
                   10, 11, 42, '07', '06', 11, 16, 11, '05', 44, 
                   '05', '05', '05', 25]

hf_participants = [18, '09', 70, 19, 18, 6, 14, '06', '07', 61, 
                   '07', 43, 12, 22, '05', 17, 33, 20, '06', 42, 
                   14, 10, 41, 21, '05', '07', '08', 67, '07', '07', 
                   '05', 17, 30, '06', 11, 17, 18, '06', '06', 27, 
                   '05', '06', '05', 15]


# Update y-ticks to include both PH and HF participants
y_ticks = list(range(1, len(ph_participants) + 1))
y_tick_labels = [f"{ph}, {hf}" for ph, hf in zip(ph_participants, hf_participants)]


# Set x-axis and y-axis limits and labels
node_columns = max(len(pathway.split(" -> ")) for pathway in pathways)  # Max nodes in a pathway
ax.set_xlim(0.5, node_columns + (node_columns - 1) * gap + 6.5)  # Adjust x-axis limits dynamically
ax.set_ylim(0.5, len(pathways) + 0.7)  # Set y-axis limits to fit all pathways

# Explicitly set y-tick positions if needed or remove them
#ax.set_yticks([])  # Optionally, remove y-ticks
#ax.set_yticklabels([])  # Ensure no y-axis labels


# Explicitly set y-tick positions and labels
ax.set_yticks(y_ticks)  # Set y-tick positions
ax.set_yticklabels(y_tick_labels, fontsize=10)  # Display PH and HF participants as y-tick labels
# Add a title above the y-tick labels for clarification
ax.annotate(
    "No. of Participants\n (PH , HF)",  # Title text
    xy=(-0.02, 1.00),  # Position above the y-axis (relative to plot)
    xycoords='axes fraction',  # Coordinates relative to the axes
    fontsize=12,  # Font size
    ha='center',  # Horizontal alignment
    va='bottom',  # Vertical alignment
    rotation=0  # No rotation
)

# Manually set x-tick positions and labels for disease sequence steps
ax.set_xticks([0.8, 1.45, 2.05, 2.68, 3.33])
ax.set_xticklabels(['1', '2', '3', '4', '5'], fontsize=13)

# Update legend with proper formatting and alignment
handles = [
    mpatches.Patch(color=color, label=full_name)
    for full_name, color in {
        
        "PH and HF as Index Conditions": '#b3b3b3',
        "Type 2 Diabetes Mellitus": '#88cc88',
        "Hypertension": '#a993cc',
        "Hyperlipidemia": '#d2b48c',
        "Chronic Ischemic Heart Disease": '#FFC0CB',
        "Atrial Fibrillation and Flutter": '#add8e6',
        "Mitral Valve Disorder": '#FFECB3',
        "Asthma": '#ffa07a',
        
    }.items()
]

# Position and style the legend
ax.legend(handles=handles , bbox_to_anchor=(0.6, -0.07), loc='upper center', ncol=4, fontsize=14, frameon=False)



# Add descriptive axis labels
ax.set_xlabel("Disease Sequence Progression", fontsize=15, labelpad=15)
ax.set_ylabel("Pathways Leading to PH/HF", fontsize=15, labelpad=15)
ax.set_title("Cox Proportional Hazards Analysis", fontsize=14, pad=20, x=0.8)

ax.xaxis.set_label_coords(0.2, -0.03) 

# Clean up the plot aesthetics by removing unnecessary spines
for spine in ['top', 'right']:
    ax.spines[spine].set_visible(False)

# Ensure everything fits nicely into the figure
plt.tight_layout()
# Save the plot as PNG
plt.savefig("pathways_plot.png", dpi=1200)  # Save as PNG with high resolution
plt.show()

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from mpl_toolkits.axes_grid1.inset_locator import inset_axes
import matplotlib.patches as mpatches



def create_hazard_ratio_plot(ax, plot_data):
    # Step 1: Extract and group the pathways by their starting conditions
    pathway_groups = {
        'E11.9': plot_data.loc[plot_data.index.str.startswith("Pathway_E11.9")],
        'E78.0': plot_data.loc[plot_data.index.str.startswith("Pathway_E78.0")],
        'I10': plot_data.loc[plot_data.index.str.startswith("Pathway_I10")],
        'I25.1': plot_data.loc[plot_data.index.str.startswith("Pathway_I25.1")],
        'I48': plot_data.loc[plot_data.index.str.startswith("Pathway_I48")],
        'J45.9': plot_data.loc[plot_data.index.str.startswith("Pathway_J45.9")],
    }

    # Define custom colors for each group
    group_colors = {
        'E11.9': '#228B22',  # Forest Green - Rich green
        'I10': '#7D3C98',  # Amethyst - Deeper purple
        'E78.0': '#D2691E',  # Chocolate - Rich tan
        'I25.1': '#FF69B4',  # Hot Pink - Vibrant pink
        'I48': '#4682B4',  # Steel Blue - Deeper blue
        'J45.9': '#FF8C00',  # Light orange
    }

    # Step 2: Plot grouped pathways
    current_y = 0
    y_positions = []  # List to store all y-tick positions
    spacing = 0.8  # Custom spacing between groups

    for group, data in pathway_groups.items():
        group_y_ticks = range(current_y, current_y + len(data))
        for i, (y, coef, lower, upper) in enumerate(zip(
                group_y_ticks, data['coef'], data['coef lower 95%'], data['coef upper 95%'])):
            
            # Calculate HR and CI bounds
            hr = np.exp(coef)
            lower_hr = np.exp(lower)
            upper_hr = np.exp(upper)
            
            # Plot the error bar
            ax.errorbar(
                hr,  # HR value
                y,   # Y position
                xerr=[[hr - lower_hr], [upper_hr - hr]],  # CI as 2D array
                fmt='o',
                capsize=4.0,
                color=group_colors[group]  # Use the color for the current group
            )
            
            # Add CI annotations (left and right ends) with a small manual offset
            offset = 0.05  # Add a small space
            ax.text(lower_hr - offset, y, f"{lower_hr:.2f}", va='center', ha='right', fontsize=8, color='black')
            ax.text(upper_hr + offset, y, f"{upper_hr:.2f}", va='center', ha='left', fontsize=8, color='black')

        y_positions.extend(group_y_ticks)  # Append y-tick positions
        current_y += len(data) + int(spacing)  # Add spacing after each group

    # Step 3: Adjust y-axis limits to minimize gaps
    min_y = min(y_positions)  # Smallest y-tick
    max_y = max(y_positions)  # Largest y-tick
    buffer = 0.5  # Buffer space around the top and bottom
    ax.set_ylim(min_y - buffer, max_y + buffer)  # Dynamically set y-axis limits

    # Step 4: Customize plot
    ax.invert_yaxis()  # Invert y-axis
    ax.axvline(x=1, color='black', linestyle='--', linewidth=0.7)  # Add a vertical line at x=1 (HR=1)

    # Add custom x-ticks
    x_ticks = [0.5, 1, 1.5, 2, 2.5, 3, 3.5, 4, 4.5]  # HR values
    ax.set_xticks(x_ticks)
    ax.set_xticklabels([f"{tick:.1f}" for tick in x_ticks], fontsize=10)

    # Add labels and titles
    plt.xlabel("Hazard Ratio (HR) (95% CI)", fontsize=12)
    plt.xticks(fontsize=10)  # Set x-tick label size
    ax.set_yticks([])
    ax.set_title("Hazards Analysis of PH v/s HF", fontsize=13)

    

def add_annotations_box(ax, plot_data, box_title, x_offset=1.0):
    """
    Adds a separate box for annotations (e.g., p-values and HR) next to the plot.

    Parameters:
    - ax: The main axis where hazard ratio bars are plotted.
    - plot_data: DataFrame containing the data (p-values and hazard ratio).
    - box_title: Title of the annotation box.
    - x_offset: Horizontal offset from the hazard ratio plot.
    """
    # Create a new inset axis for annotations
    inset_box = inset_axes(
        ax,
        width="145%",  # Width of the annotation box
        height="100%",  # Height of the box matches the plot
        bbox_to_anchor=(x_offset, 0, 0.2, 1),  # Adjust placement
        bbox_transform=ax.transAxes,
        loc="center left"
    )
    
    # Remove spines and ticks from the annotation box
    inset_box.spines['top'].set_visible(False)
    inset_box.spines['bottom'].set_visible(False)
    inset_box.spines['left'].set_visible(False)
    inset_box.spines['right'].set_visible(False)
    inset_box.tick_params(left=False, labelleft=False, bottom=False, labelbottom=False)

    # Add title to the annotation box
    inset_box.set_title(box_title, fontsize=10, pad=10)

    # Align annotations with hazard ratio bars
    for y_pos, p_val, coef in zip(range(len(plot_data)), plot_data['p'], plot_data['coef']):
        annotation = f"P:{p_val:.2f}, HR:{np.exp(coef):.2f}"  # Include HR as exp(coef)
        inset_box.text(0.5, y_pos, annotation, ha="center", va="center", fontsize=9)

    # Adjust the limits of the annotation box
    inset_box.set_ylim(ax.get_ylim())

    # Draw the box around the entire annotation area
    rect = plt.Rectangle(
        (0, 0), 1, 1,  # Starting coordinates and width/height of the rectangle
        transform=inset_box.transAxes,
        edgecolor="black",  # Box color
        facecolor="none",   # Transparent inside
        linewidth=1.5,      # Thickness of the box border
        zorder=10           # Ensure the box is on top
    )
    inset_box.add_patch(rect)



 #################################################################################################


def create_hazard_ratio_plot_ph(ax, plot_data_ph):
    # Step 1: Extract and group the pathways by their starting conditions
    pathway_groups_ph = {
        'E11.9': plot_data_ph.loc[plot_data_ph.index.str.startswith("Pathway_E11.9")],
        'E78.0': plot_data_ph.loc[plot_data_ph.index.str.startswith("Pathway_E78.0")],
        'I10': plot_data_ph.loc[plot_data_ph.index.str.startswith("Pathway_I10")],        
        'I25.1': plot_data_ph.loc[plot_data_ph.index.str.startswith("Pathway_I25.1")],
        'I48': plot_data_ph.loc[plot_data_ph.index.str.startswith("Pathway_I48")],
        'J45.9': plot_data_ph.loc[plot_data_ph.index.str.startswith("Pathway_J45.9")],
    }

    # Define custom colors for each group
    group_colors_ph = {
        'E11.9': '#228B22',  # Forest Green
        'I10': '#7D3C98',  # Amethyst  
        'E78.0': '#D2691E',  # Chocolate
        'I25.1': '#FF69B4',  # Hot Pink
        'I48': '#4682B4',  # Steel Blue
        'J45.9': '#FF8C00',  # Orange
    }

    # Step 2: Plot grouped pathways
    current_y_ph = 0
    y_positions_ph = []  # List to store all y-tick positions for PH
    spacing_ph = 0.8  # Custom spacing between groups

    for group, data in pathway_groups_ph.items():
        group_y_ticks_ph = range(current_y_ph, current_y_ph + len(data))
        for i, (y, coef, lower, upper) in enumerate(zip(
                group_y_ticks_ph, data['coef'], data['coef lower 95%'], data['coef upper 95%'])):
            
            # Calculate HR and CI bounds
            hr = np.exp(coef)
            lower_hr = np.exp(lower)
            upper_hr = np.exp(upper)
            
            # Plot the error bar
            ax.errorbar(
                hr,  # HR value
                y,   # Y position
                xerr=[[hr - lower_hr], [upper_hr - hr]],  # CI as 2D array
                fmt='o',
                capsize=4.0,
                color=group_colors_ph[group]  # Use the color for the current group
            )
            
            # Add CI annotations (left and right ends) with a small manual offset
            #offset = 0.05  # Add a small space
            #ax.text(lower_hr - offset, y, f"{lower_hr:.2f}", va='center', ha='right', fontsize=8, color='black')
            #ax.text(upper_hr + offset, y, f"{upper_hr:.2f}", va='center', ha='left', fontsize=8, color='black')

        y_positions_ph.extend(group_y_ticks_ph)  # Append y-tick positions
        current_y_ph += len(data) + int(spacing_ph)  # Add spacing after each group

    # Step 3: Adjust y-axis limits to minimize gaps
    min_y_ph = min(y_positions_ph)  # Smallest y-tick
    max_y_ph = max(y_positions_ph)  # Largest y-tick
    buffer_ph = 0.5  # Buffer space around the top and bottom
    ax.set_ylim(min_y_ph - buffer_ph, max_y_ph + buffer_ph)  # Dynamically set y-axis limits

    # Step 4: Customize plot
    ax.invert_yaxis()  # Invert y-axis
    ax.axvline(x=1, color='black', linestyle='--', linewidth=0.7)  # Add a vertical line at x=1 (HR=1)

    # Add custom x-ticks
    x_ticks_ph = [0.5, 1, 1.5, 2, 2.5, 3]  # Expanded range for HR data
    ax.set_xticks(x_ticks_ph)
    ax.set_xticklabels([f"{tick:.1f}" for tick in x_ticks_ph], fontsize=10)

    # Add labels and titles
    ax.set_xlabel("Hazard Ratio (HR) (95% CI)", fontsize=12)
    ax.set_title("Mortality Risk with PH Pathways", fontsize=13)

    # Remove default y-axis labels
    ax.set_yticks([])




##############################################################################################
    
  
    
def create_hazard_ratio_plot_heart_failure(ax, plot_data_heart_failure):
    # Step 1: Extract and group the pathways by their starting conditions
    pathway_groups_heart_failure = {
        'E11.9': plot_data_heart_failure.loc[plot_data_heart_failure.index.str.startswith("Pathway_E11.9")],
        'E78.0': plot_data_heart_failure.loc[plot_data_heart_failure.index.str.startswith("Pathway_E78.0")],
        'I10': plot_data_heart_failure.loc[plot_data_heart_failure.index.str.startswith("Pathway_I10")],        
        'I25.1': plot_data_heart_failure.loc[plot_data_heart_failure.index.str.startswith("Pathway_I25.1")],
        'I48': plot_data_heart_failure.loc[plot_data_heart_failure.index.str.startswith("Pathway_I48")],
        'J45.9': plot_data_heart_failure.loc[plot_data_heart_failure.index.str.startswith("Pathway_J45.9")],
    }

    # Define custom colors for each group (differentiate from PH colors if necessary)
    group_colors_heart_failure = {
        'E11.9': '#228B22',  # Forest Green
        'I10': '#7D3C98',  # Amethyst  
        'E78.0': '#D2691E',  # Chocolate
        'I25.1': '#FF69B4',  # Hot Pink
        'I48': '#4682B4',  # Steel Blue
        'J45.9': '#FF8C00',  # Orange
    }

    # Step 2: Plot grouped pathways
    current_y_heart_failure = 0
    y_positions_heart_failure = []  # List to store all y-tick positions for heart_failure
    spacing_heart_failure = 0.8  # Custom spacing between groups

    for group, data in pathway_groups_heart_failure.items():
        group_y_ticks_heart_failure = range(current_y_heart_failure, current_y_heart_failure + len(data))
        for i, (y, coef, lower, upper) in enumerate(zip(
                group_y_ticks_heart_failure, data['coef'], data['coef lower 95%'], data['coef upper 95%'])):
            
            # Calculate HR and CI bounds
            hr = np.exp(coef)
            lower_hr = np.exp(lower)
            upper_hr = np.exp(upper)
            
            # Plot the error bar
            ax.errorbar(
                hr,  # HR value
                y,   # Y position
                xerr=[[hr - lower_hr], [upper_hr - hr]],  # CI as 2D array
                fmt='o',
                capsize=4.0,
                color=group_colors_heart_failure[group]  # Use the color for the current group
            )
            
            # Add CI annotations (left and right ends) with a small manual offset
            #offset = 0.05  # Add a small space
            #ax.text(lower_hr - offset, y, f"{lower_hr:.2f}", va='center', ha='right', fontsize=8, color='black')
            #ax.text(upper_hr + offset, y, f"{upper_hr:.2f}", va='center', ha='left', fontsize=8, color='black')

        y_positions_heart_failure.extend(group_y_ticks_heart_failure)  # Append y-tick positions
        current_y_heart_failure += len(data) + int(spacing_heart_failure)  # Add spacing after each group

    # Step 3: Adjust y-axis limits to minimize gaps
    min_y_heart_failure = min(y_positions_heart_failure)  # Smallest y-tick
    max_y_heart_failure = max(y_positions_heart_failure)  # Largest y-tick
    buffer_heart_failure = 0.5  # Buffer space around the top and bottom
    ax.set_ylim(min_y_heart_failure - buffer_heart_failure, max_y_heart_failure + buffer_heart_failure)  # Dynamically set y-axis limits

    # Step 4: Customize plot
    ax.invert_yaxis()  # Invert y-axis
    ax.axvline(x=1, color='black', linestyle='--', linewidth=0.7)  # Add a vertical line at x=1 (HR=1)

    # Add custom x-ticks
    x_ticks_heart_failure = [0.5, 1, 1.5, 2, 2.5, 3,3.5,4]  # Expanded range for HR data
    ax.set_xticks(x_ticks_heart_failure)
    ax.set_xticklabels([f"{tick:.1f}" for tick in x_ticks_heart_failure], fontsize=10)

    # Add labels and titles
    ax.set_xlabel("Hazard Ratio (HR) (95% CI)", fontsize=12)
    ax.set_title("Mortality Risk with HF Pathways", fontsize=13)

    # Remove default y-axis labels
    ax.set_yticks([])

    
    
########################################################################################################################    

    





# Define the pathways and colors
pathways = [
    "E11.9 -> E78.0 -> I10 -> I25.1 -> PH/HF",
    "E11.9 -> E78.0 -> I10 -> I48 -> PH/HF",
    "E11.9 -> I10 -> PH/HF",
    "E11.9 -> I10 -> E78.0 -> PH/HF",
    "E11.9 -> I10 -> I25.1 -> PH/HF",
    "E11.9 -> I10 -> I25.1 -> E78.0 -> PH/HF",
    "E11.9 -> I10 -> I48 -> PH/HF",
    "E11.9 -> I10 -> J45.9 -> PH/HF",
    "E11.9 -> I25.1 -> PH/HF",
    
    "E78.0 -> I10 -> PH/HF",
    "E78.0 -> I10 -> E11.9 -> PH/HF",
    "E78.0 -> I10 -> I25.1 -> PH/HF",
    "E78.0 -> I10 -> I25.1 -> I48 -> PH/HF",
    "E78.0 -> I10 -> I48 -> PH/HF",
    "E78.0 -> I10 -> J45.9 -> PH/HF",
    "E78.0 -> I25.1 -> PH/HF",
    
    
    "I10 -> E11.9 -> PH/HF",
    "I10 -> E11.9 -> E78.0 -> PH/HF",
    "I10 -> E11.9 -> E78.0 -> I25.1 -> PH/HF",
    "I10 -> E78.0 -> PH/HF",
    "I10 -> E78.0 -> I25.1 -> PH/HF",
    "I10 -> E78.0 -> I48 -> PH/HF",
    "I10 -> I25.1 -> PH/HF",
    "I10 -> I25.1 -> E78.0 -> PH/HF",
    "I10 -> I25.1 -> E78.0 -> I48 -> PH/HF",
    "I10 -> I25.1 -> I48 -> PH/HF",
    "I10 -> I34.0 -> PH/HF",
    "I10 -> I48 -> PH/HF",
    "I10 -> I48 -> E11.9 -> PH/HF",
    "I10 -> I48 -> E78.0 -> PH/HF",
    "I10 -> I48 -> I34.0 -> PH/HF",
    "I10 -> I48 -> J45.9 -> PH/HF",
    "I10 -> J45.9 -> PH/HF",
    "I10 -> J45.9 -> E11.9 -> PH/HF",
    "I10 -> J45.9 -> E78.0 -> PH/HF",
    
    "I25.1 -> E78.0 -> I10 -> PH/HF",
    "I25.1 -> I10 -> PH/HF",
    "I25.1 -> I48 -> PH/HF",
    
    "I48 -> E78.0 -> I10 -> PH/HF",
    "I48 -> I10 -> PH/HF",
    "I48 -> J45.9 -> PH/HF",

    "J45.9 -> E11.9 -> I10 -> PH/HF",
    "J45.9 -> E78.0 -> I10 -> PH/HF",
    "J45.9 -> I10 -> PH/HF"   
]


color_dict = {
    'PH/HF': '#b3b3b3',
    'I10': '#b3a3cc',
    'I48': '#add8e6',
    'E78.0': '#ddc4a1',
    'I25.1': '#f4b0c8',
    'E11.9': '#c4e3b3',
    'J45.9': '#F4A460',
    'I34.0':'#FFECB3'
}




# Create the plot
fig, ax = plt.subplots(figsize=(17, 13))
gap = -0.38  # Horizontal gap between nodes
row_gap = 1  # Vertical gap between rows



# Plot the pathways
for row, pathway in enumerate(pathways, start=1):
    conditions = pathway.split(" -> ")
    for col, condition in enumerate(conditions, start=1):
        # Calculate node position
        x_pos = col + (col - 1) * gap
        y_pos = len(pathways) - row + 1

        # Draw rectangle for each node
        ax.add_patch(plt.Rectangle((x_pos - 0.5, y_pos - 0.5), 0.6, 1.2, 
                                    facecolor=color_dict.get(condition, 'white'), edgecolor='black'))
        # Add text to node
        ax.text(x_pos - 0.17, y_pos + 0.07, condition, ha='center', va='center', fontsize=9.60)

        # Add arrows between nodes
        #if col < len(conditions):
        #    next_x_pos = x_pos + 1 + gap
        #    ax.annotate(
        #        '',
        #        xy=(next_x_pos - 0.5, y_pos),  # End position of arrow
        #        xytext=(x_pos + 0.11, y_pos),  # Start position of arrow
        #        arrowprops=dict(
        #            arrowstyle="->,head_width=0.2,head_length=0.3",  # Adjust arrowhead size
        #            color='black',  # Arrow color
        #            lw=0.7,  # Line width (thicker arrow)
        #            shrinkA=0,  # Adjust start of the arrow (in points)
        #            shrinkB=0   # Adjust end of the arrow (in points)
        #        
        #    )
#    )



box_start_x = len(conditions) + 2.2  # Starting x position for the box
box_end_x = len(conditions) + 7.0    # Ending x position for the box
box_start_y = 0.45                   # Starting y position for the box
box_end_y = len(pathways) + 0.5     # Ending y position for the box

########################################################################################################################################

# Insert the hazard ratio plot into the box area
#inset = inset_axes(
#    ax,
#    width="60%",  # Adjust width of the inset
#    height="100.1%",  # Adjust height of the inset
#    bbox_to_anchor=(box_start_x + 5.0, box_start_y + 0.1, box_end_x - box_start_x, box_end_y - box_start_y),  # Shift down
#    bbox_transform=ax.transData,
#    loc='center'
#)


# Use the function to create the hazard ratio plot in the inset
# Replace `plot_data` with your actual DataFrame containing the hazard ratio data
#plot_data = pd.DataFrame({
#    'coef': [0.58, 0.56, 0.10, 0.55, 0.54, 1.19, 0.82, -0.05, 0.20, 0.16, 
#             1.13, 0.33, 0.56, 0.71, 1.07, -0.41, -0.29, 0.52, 1.53, 0.70, 
#             0.55, 0.83, 0.15, 0.62, 1.22, 0.63, -0.23, 0.66, 0.56, 0.71, 
#             1.24, 0.60, 0.38, 0.85, 0.24, 0.73, 0.66, 1.10, 0.98, 0.91, 
#             -0.47, 0.15, 0.54, 0.63],
#    'coef lower 95%': [0.17, 0.05, -0.23, 0.16, 0.15, 0.74, 0.41, -0.63, -0.47, -0.17, 
#                       0.59, -0.04, 0.05, 0.32, 0.61, -0.97, -0.80, 0.08, 1.05, 0.36, 
#                       0.14, 0.43, -0.24, 0.12, 0.71, 0.16, -0.70, 0.35, 0.10, 0.34, 
#                       0.79, 0.17, 0.04, 0.34, -0.30, 0.30, 0.22, 0.61, 0.41, 0.57, 
#                       -1.15, -0.42, -0.03, 0.25],
#    'coef upper 95%': [0.98, 1.07, 0.43, 0.94, 0.92, 1.65, 1.23, 0.52, 0.88, 0.49, 
#                       1.66, 0.69, 1.08, 1.11, 1.54, 0.14, 0.22, 0.95, 2.01, 1.04, 
#                       0.96, 1.23, 0.53, 1.13, 1.73, 1.11, 0.24, 0.96, 1.02, 1.09, 
#                       1.69, 1.03, 0.72, 1.35, 0.77, 1.16, 1.09, 1.60, 1.56, 1.25, 
#                       0.20, 0.72, 1.11, 1.01],
#    'p': [0.01, 0.03, 0.55, 0.01, 0.01, 0.001, 0.001, 0.86, 0.55, 0.34,
#          0.001, 0.08, 0.03, 0.001, 0.001, 0.15, 0.27, 0.02, 0.001, 0.001,
#          0.01, 0.001, 0.45, 0.02, 0.001, 0.01, 0.33, 0.001, 0.02, 0.001,
#          0.001, 0.001, 0.03, 0.001, 0.39, 0.001, 0.001, 0.001, 0.001,0.001, 0.17,
#          0.61, 0.06, 0.001]
#}, index=[        
#    "Pathway_E11.9 -> E78.0 -> I10 -> I25.1",
#    "Pathway_E11.9 -> E78.0 -> I10 -> I48",
#    "Pathway_E11.9 -> I10",
#    "Pathway_E11.9 -> I10 -> E78.0",
#    "Pathway_E11.9 -> I10 -> I25.1",
#    "Pathway_E11.9 -> I10 -> I25.1 -> E78.0",
#    "Pathway_E11.9 -> I10 -> I48",
#    "Pathway_E11.9 -> I10 -> J45.9",
#    "Pathway_E11.9 -> I25.1",
#    
#    "Pathway_E78.0 -> I10",
#    "Pathway_E78.0 -> I10 -> E11.9",
#    "Pathway_E78.0 -> I10 -> I25.1",
#    "Pathway_E78.0 -> I10 -> I25.1 -> I48",
#    "Pathway_E78.0 -> I10 -> I48",
#    "Pathway_E78.0 -> I10 -> J45.9",
#    "Pathway_E78.0 -> I25.1",
#    
#    "Pathway_I10 -> E11.9",
#    "Pathway_I10 -> E11.9 -> E78.0",
#    "Pathway_I10 -> E11.9 -> E78.0 -> I25.1",
#    "Pathway_I10 -> E78.0",
#    "Pathway_I10 -> E78.0 -> I25.1",
#    "Pathway_I10 -> E78.0 -> I48",
#    "Pathway_I10 -> I25.1",
#    "Pathway_I10 -> I25.1 -> E78.0",
#    "Pathway_I10 -> I25.1 -> E78.0 -> I48",
#    "Pathway_I10 -> I25.1 -> I48",
#    "Pathway_I10 -> I34.0",
#    "Pathway_I10 -> I48",
#    "Pathway_I10 -> I48 -> E11.9",
#    "Pathway_I10 -> I48 -> E78.0",
#    "Pathway_I10 -> I48 -> I34.0",
#    "Pathway_I10 -> I48 -> J45.9",
#    "Pathway_I10 -> J45.9",
#    "Pathway_I10 -> J45.9 -> E11.9",
#    "Pathway_I10 -> J45.9 -> E78.0",
#    
#    "Pathway_I25.1 -> E78.0 -> I10",
#    "Pathway_I25.1 -> I10",
#    "Pathway_I25.1 -> I48",
#    
#    "Pathway_I48 -> E78.0 -> I10",
#    "Pathway_I48 -> I10",
#    "Pathway_I48 -> J45.9",
#    
#    "Pathway_J45.9 -> E11.9 -> I10",
#    "Pathway_J45.9 -> E78.0 -> I10",
#    "Pathway_J45.9 -> I10"
#])#

# Flip the inset box upside down by inverting the y-axis
#inset.invert_xaxis()







# Add the annotations after creating each plot
#create_hazard_ratio_plot(inset, plot_data)
# Add separate boxes for p-values and N
#add_annotations_box(inset, plot_data, box_title="", x_offset=0.95)







###################################################################################

plot_data_ph = pd.DataFrame({
    'coef': [0.27, -0.00, 0.16, 0.68, 0.00, 0.43, -0.11, 0.13, -0.72, 0.04, 
             -0.02, -0.33, 0.38, 0.00, 0.20, -0.41, 0.40, 0.72, -0.05, -0.14, 
             0.34, -0.56, -0.05, 0.31, 0.26, 0.30, -1.18, -0.24, 0.16, -0.69, 
             -0.22, 0.54, -0.13, 0.51, 0.20, 0.08, 0.39, 0.07, 0.46, 0.04, 
             -0.93, 0.29, 0.53, 0.20],
    'coef lower 95%': [-0.16, -0.55, -0.13, 0.34, -0.39, -0.12, -0.54, -0.62, -1.75, -0.26, 
                       -0.73, -0.73, -0.18, -0.39, -0.29, -1.20, -0.20, 0.28, -0.67, -0.49, 
                       -0.07, -1.03, -0.45, -0.24, -0.29, -0.20, -1.90, -0.49, -0.37, -1.17, 
                       -0.74, 0.12, -0.47, 0.00, -0.53, -0.44, -0.05, -0.47, -0.18, -0.27, 
                       -1.93, -0.46, -0.04, -0.21],
    'coef upper 95%': [0.70, 0.54, 0.45, 1.02, 0.39, 0.98, 0.33, 0.88, 0.31, 0.35, 
                       0.69, 0.08, 0.93, 0.40, 0.70, 0.39, 1.00, 1.16, 0.57, 0.21, 
                       0.74, -0.08, 0.35, 0.86, 0.81, 0.80, -0.46, 0.02, 0.70, -0.22, 
                       0.29, 0.95, 0.22, 1.02, 0.93, 0.61, 0.83, 0.61, 1.10, 0.34, 
                       0.07, 1.04, 1.10, 0.61],
    'p': [0.22, 0.99, 0.29, 0.001, 0.98, 0.12, 0.63, 0.74, 0.17, 0.78, 
          0.96, 0.11, 0.18, 0.98, 0.42, 0.32, 0.19, 0.001, 0.87, 0.43, 
          0.10, 0.02, 0.80, 0.27, 0.35, 0.23, 0.001, 0.07, 0.55, 0.001, 
          0.40, 0.01, 0.47, 0.05, 0.59, 0.75, 0.08, 0.81, 0.16, 0.82, 0.07, 0.45, 0.07, 0.34]  
}, index=[        
    "Pathway_E11.9 -> E78.0 -> I10 -> I25.1",
    "Pathway_E11.9 -> E78.0 -> I10 -> I48",
    "Pathway_E11.9 -> I10",
    "Pathway_E11.9 -> I10 -> E78.0",
    "Pathway_E11.9 -> I10 -> I25.1",
    "Pathway_E11.9 -> I10 -> I25.1 -> E78.0",
    "Pathway_E11.9 -> I10 -> I48",
    "Pathway_E11.9 -> I10 -> J45.9",
    "Pathway_E11.9 -> I25.1",
    
    "Pathway_E78.0 -> I10",
    "Pathway_E78.0 -> I10 -> E11.9",
    "Pathway_E78.0 -> I10 -> I25.1",
    "Pathway_E78.0 -> I10 -> I25.1 -> I48",
    "Pathway_E78.0 -> I10 -> I48",
    "Pathway_E78.0 -> I10 -> J45.9",
    "Pathway_E78.0 -> I25.1",
    
    "Pathway_I10 -> E11.9",
    "Pathway_I10 -> E11.9 -> E78.0",
    "Pathway_I10 -> E11.9 -> E78.0 -> I25.1",
    "Pathway_I10 -> E78.0",
    "Pathway_I10 -> E78.0 -> I25.1",
    "Pathway_I10 -> E78.0 -> I48",
    "Pathway_I10 -> I25.1",
    "Pathway_I10 -> I25.1 -> E78.0",
    "Pathway_I10 -> I25.1 -> E78.0 -> I48",
    "Pathway_I10 -> I25.1 -> I48",
    "Pathway_I10 -> I34.0",
    "Pathway_I10 -> I48",
    "Pathway_I10 -> I48 -> E11.9",
    "Pathway_I10 -> I48 -> E78.0",
    "Pathway_I10 -> I48 -> I34.0",
    "Pathway_I10 -> I48 -> J45.9",
    "Pathway_I10 -> J45.9",
    "Pathway_I10 -> J45.9 -> E11.9",
    "Pathway_I10 -> J45.9 -> E78.0",
    
    "Pathway_I25.1 -> E78.0 -> I10",
    "Pathway_I25.1 -> I10",
    "Pathway_I25.1 -> I48",
    
    "Pathway_I48 -> E78.0 -> I10",
    "Pathway_I48 -> I10",
    "Pathway_I48 -> J45.9",
    
    "Pathway_J45.9 -> E11.9 -> I10",
    "Pathway_J45.9 -> E78.0 -> I10",
    "Pathway_J45.9 -> I10"
])

# Add the second hazard ratio plot
plot_data_2 = plot_data_ph.copy()  # Example: Using the same data for demonstration
plot_data_2['coef'] = plot_data_2['coef'] * 1.1  # Slightly modify coefficients for differentiation#

# Define the position for the second inset
box_start_x_ph = box_end_x + 1.0  # Start the second inset further to the right
box_end_x_ph = box_start_x_ph + 4.8  # Adjust width for the second inset

# Insert the second hazard ratio plot
inset_ph = inset_axes(
    ax,
    width="55%",  # Adjust width of the second inset
    height="100.1%",  # Adjust height of the second inset
    bbox_to_anchor=(box_start_x_ph - 8.20, box_start_y + 0.1, box_end_x_ph - box_start_x_ph, box_end_y - box_start_y),  # Adjust placement
    bbox_transform=ax.transData,
    loc='center'
)

# Create the second hazard ratio plot using the same function
create_hazard_ratio_plot_ph(inset_ph, plot_data_ph)

# Add separate boxes for p-values and N
add_annotations_box(inset, plot_data_ph, box_title="", x_offset=-1.63)



####################################################################################

plot_data_heart_failure = pd.DataFrame({
    'coef': [-0.08, 0.08, 0.29, 0.09, -0.10, -0.30, -0.07, 0.55, -0.51, -0.39, 
             0.59, -0.10, -0.62, 0.47, -0.09, -0.74, -0.11, -0.28, 0.08, 0.25, 
             -0.38, 0.41, -0.11, -0.47, 0.15, -0.30, 0.42, -0.27, 0.90, 0.26, 
             -0.25, -0.16, -0.22, -0.39, 0.39, -0.55, 0.15, -0.08, 0.36, -0.60, 
             -0.22, 0.27, -0.16, -0.30],
    'coef lower 95%': [-0.35, -0.30, 0.07, -0.22, -0.43, -0.70, -0.43, -0.02, -1.15, -0.64, 
                       0.12, -0.34, -1.01, 0.17, -0.81, -1.16, -0.41, -0.60, -0.45, -0.03, 
                       -0.78, 0.04, -0.40, -0.81, -0.33, -0.83, -0.10, -0.51, 0.42, -0.24, 
                       -0.86, -0.69, -0.55, -1.07, -0.01, -0.92, -0.23, -0.64, -0.19, -0.99, 
                       -0.82, -0.28, -0.78, -0.78],
    'coef upper 95%': [0.19, 0.45, 0.51, 0.41, 0.22, 0.10, 0.29, 1.12, 0.13, -0.15, 
                       1.06, 0.14, -0.23, 0.78, 0.63, -0.31, 0.20, 0.04, 0.62, 0.53, 
                       0.02, 0.78, 0.18, -0.13, 0.63, 0.22, 0.94, -0.03, 1.38, 0.76, 
                       0.36, 0.38, 0.11, 0.30, 0.80, -0.18, 0.54, 0.47, 0.92, -0.21, 
                       0.39, 0.83, 0.45, 0.19],
    'p': [0.56, 0.69, 0.01, 0.56, 0.54, 0.14, 0.69, 0.06, 0.12, 0.005,
    0.01, 0.4, 0.005, 0.005, 0.8, 0.005, 0.5, 0.09, 0.76, 0.08,
    0.06, 0.03, 0.46, 0.01, 0.54, 0.26, 0.12, 0.03, 0.005, 0.31,
    0.43, 0.57, 0.2, 0.27, 0.06, 0.005, 0.44, 0.77, 0.2, 0.005,
    0.48, 0.33, 0.61, 0.23]  # Add manually

}, index=[
    "Pathway_E11.9 -> E78.0 -> I10 -> I25.1",
    "Pathway_E11.9 -> E78.0 -> I10 -> I48",
    "Pathway_E11.9 -> I10",
    "Pathway_E11.9 -> I10 -> E78.0",
    "Pathway_E11.9 -> I10 -> I25.1",
    "Pathway_E11.9 -> I10 -> I25.1 -> E78.0",
    "Pathway_E11.9 -> I10 -> I48",
    "Pathway_E11.9 -> I10 -> J45.9",
    "Pathway_E11.9 -> I25.1",
    
    "Pathway_E78.0 -> I10",
    "Pathway_E78.0 -> I10 -> E11.9",
    "Pathway_E78.0 -> I10 -> I25.1",
    "Pathway_E78.0 -> I10 -> I25.1 -> I48",
    "Pathway_E78.0 -> I10 -> I48",
    "Pathway_E78.0 -> I10 -> J45.9",
    "Pathway_E78.0 -> I25.1",
    
    "Pathway_I10 -> E11.9",
    "Pathway_I10 -> E11.9 -> E78.0",
    "Pathway_I10 -> E11.9 -> E78.0 -> I25.1",
    "Pathway_I10 -> E78.0",
    "Pathway_I10 -> E78.0 -> I25.1",
    "Pathway_I10 -> E78.0 -> I48",
    "Pathway_I10 -> I25.1",
    "Pathway_I10 -> I25.1 -> E78.0",
    "Pathway_I10 -> I25.1 -> E78.0 -> I48",
    "Pathway_I10 -> I25.1 -> I48",
    "Pathway_I10 -> I34.0",
    "Pathway_I10 -> I48",
    "Pathway_I10 -> I48 -> E11.9",
    "Pathway_I10 -> I48 -> E78.0",
    "Pathway_I10 -> I48 -> I34.0",
    "Pathway_I10 -> I48 -> J45.9",
    "Pathway_I10 -> J45.9",
    "Pathway_I10 -> J45.9 -> E11.9",
    "Pathway_I10 -> J45.9 -> E78.0",
    
    "Pathway_I25.1 -> E78.0 -> I10",
    "Pathway_I25.1 -> I10",
    "Pathway_I25.1 -> I48",
    
    "Pathway_I48 -> E78.0 -> I10",
    "Pathway_I48 -> I10",
    "Pathway_I48 -> J45.9",
    
    "Pathway_J45.9 -> E11.9 -> I10",
    "Pathway_J45.9 -> E78.0 -> I10",
    "Pathway_J45.9 -> I10"
])




# Add the second hazard ratio plot
plot_data_3 = plot_data_heart_failure.copy()  # Example: Using the same data for demonstration
plot_data_3['coef'] = plot_data_3['coef'] * 1.1  # Slightly modify coefficients for differentiation

# Define the position for the second inset
box_start_x_heart_failure = box_end_x + 1.0  # Start the second inset further to the right
box_end_x_heart_failure = box_start_x_heart_failure + 4.8  # Adjust width for the second inset

# Insert the second hazard ratio plot
inset_heart_failure = inset_axes(
    ax,
    width="55%",  # Adjust width of the second inset
    height="100.1%",  # Adjust height of the second inset
    bbox_to_anchor=(box_start_x_heart_failure - 5.45, box_start_y + 0.1, box_end_x_heart_failure - box_start_x_heart_failure, box_end_y - box_start_y),  # Adjust placement
    bbox_transform=ax.transData,
    loc='center'
)

# Create the second hazard ratio plot using the same function
create_hazard_ratio_plot_heart_failure(inset_heart_failure, plot_data_heart_failure)

# Add separate boxes for p-values and N
add_annotations_box(inset, plot_data_heart_failure, box_title="", x_offset=-0.33)

#######################################################################################

# Data for PH and HF participants in the desired order
ph_participants = [10, '05', 50, 16, 16, '07', 13, '05', '05', 49, 
                   '06', 20, '05', 15, '09', '08', 10, 11, '06', 42, 
                   13, 14, 25, '07', '05', '08', 13, 78, '09', 18, 
                   10, 11, 42, '07', '06', 11, 16, 11, '05', 44, 
                   '05', '05', '05', 25]

hf_participants = [18, '09', 70, 19, 18, 6, 14, '06', '07', 61, 
                   '07', 43, 12, 22, '05', 17, 33, 20, '06', 42, 
                   14, 10, 41, 21, '05', '07', '08', 67, '07', '07', 
                   '05', 17, 30, '06', 11, 17, 18, '06', '06', 27, 
                   '05', '06', '05', 15]


# Update y-ticks to include both PH and HF participants
y_ticks = list(range(1, len(ph_participants) + 1))
y_tick_labels = [f"{ph}, {hf}" for ph, hf in zip(ph_participants, hf_participants)]


# Set x-axis and y-axis limits and labels
node_columns = max(len(pathway.split(" -> ")) for pathway in pathways)  # Max nodes in a pathway
ax.set_xlim(0.5, node_columns + (node_columns - 1) * gap + 6.5)  # Adjust x-axis limits dynamically
ax.set_ylim(0.5, len(pathways) + 0.7)  # Set y-axis limits to fit all pathways

# Explicitly set y-tick positions if needed or remove them
#ax.set_yticks([])  # Optionally, remove y-ticks
#ax.set_yticklabels([])  # Ensure no y-axis labels


# Explicitly set y-tick positions and labels
ax.set_yticks(y_ticks)  # Set y-tick positions
ax.set_yticklabels(y_tick_labels, fontsize=10)  # Display PH and HF participants as y-tick labels
# Add a title above the y-tick labels for clarification
ax.annotate(
    "No. of Participants\n (PH , HF)",  # Title text
    xy=(-0.02, 1.00),  # Position above the y-axis (relative to plot)
    xycoords='axes fraction',  # Coordinates relative to the axes
    fontsize=12,  # Font size
    ha='center',  # Horizontal alignment
    va='bottom',  # Vertical alignment
    rotation=0  # No rotation
)

# Manually set x-tick positions and labels for disease sequence steps
ax.set_xticks([0.8, 1.45, 2.05, 2.68, 3.33])
ax.set_xticklabels(['1', '2', '3', '4', '5'], fontsize=13)

# Update legend with proper formatting and alignment
handles = [
    mpatches.Patch(color=color, label=full_name)
    for full_name, color in {
        
        "PH and HF as Index Conditions": '#b3b3b3',
        "Type 2 Diabetes Mellitus": '#88cc88',
        "Hypertension": '#a993cc',
        "Hyperlipidemia": '#d2b48c',
        "Chronic Ischemic Heart Disease": '#FFC0CB',
        "Atrial Fibrillation and Flutter": '#add8e6',
        "Mitral Valve Disorder": '#FFECB3',
        "Asthma": '#ffa07a',
        
    }.items()
]

# Position and style the legend
ax.legend(handles=handles , bbox_to_anchor=(0.45, -0.07), loc='upper center', ncol=4, fontsize=14, frameon=False)



# Add descriptive axis labels
ax.set_xlabel("Disease Sequence Progression", fontsize=15, labelpad=15)
ax.set_ylabel("Pathways Leading to PH/HF", fontsize=15, labelpad=15)
ax.set_title("Cox Proportional Hazards Analysis", fontsize=15, pad=20, x=0.63)

ax.xaxis.set_label_coords(0.2, -0.03) 

# Clean up the plot aesthetics by removing unnecessary spines
for spine in ['top', 'right']:
    ax.spines[spine].set_visible(False)

# Ensure everything fits nicely into the figure
plt.tight_layout()
# Save the plot as PNG
plt.savefig("pathways_plot.png", dpi=1200)  # Save as PNG with high resolution
plt.show()

In [None]:
# Display PH and COPD dataframes directly
import pandas as pd

# Define the data for PH pathways
ph_data = {
    'Pathway': [
        "Pathway_E11.9 -> E78.0 -> I10 -> I25.1",
        "Pathway_E11.9 -> E78.0 -> I10 -> I48",
        "Pathway_E11.9 -> I10",
        "Pathway_E11.9 -> I10 -> E78.0",
        "Pathway_E11.9 -> I10 -> I25.1",
        "Pathway_E11.9 -> I10 -> I25.1 -> E78.0",
        "Pathway_E11.9 -> I10 -> I48",
        "Pathway_E11.9 -> I10 -> J45.9",
        "Pathway_E11.9 -> I25.1",
        
        "Pathway_E78.0 -> I10",
        "Pathway_E78.0 -> I10 -> E11.9",
        "Pathway_E78.0 -> I10 -> I25.1",
        "Pathway_E78.0 -> I10 -> I25.1 -> I48",
        "Pathway_E78.0 -> I10 -> I48",
        "Pathway_E78.0 -> I10 -> J45.9",
        "Pathway_E78.0 -> I25.1",
        
        "Pathway_I10 -> E11.9",
        "Pathway_I10 -> E11.9 -> E78.0",
        "Pathway_I10 -> E11.9 -> E78.0 -> I25.1",
        "Pathway_I10 -> E78.0",
        "Pathway_I10 -> E78.0 -> I25.1",
        "Pathway_I10 -> E78.0 -> I48",
        "Pathway_I10 -> I25.1",
        "Pathway_I10 -> I25.1 -> E78.0",
        "Pathway_I10 -> I25.1 -> E78.0 -> I48",
        "Pathway_I10 -> I25.1 -> I48",
        "Pathway_I10 -> I34.0",
        "Pathway_I10 -> I48",
        "Pathway_I10 -> I48 -> E11.9",
        "Pathway_I10 -> I48 -> E78.0",
        "Pathway_I10 -> I48 -> I34.0",
        "Pathway_I10 -> I48 -> J45.9",
        "Pathway_I10 -> J45.9",
        "Pathway_I10 -> J45.9 -> E11.9",
        "Pathway_I10 -> J45.9 -> E78.0",
        
        "Pathway_I25.1 -> E78.0 -> I10",
        "Pathway_I25.1 -> I10",
        "Pathway_I25.1 -> I48",
        
        "Pathway_I48 -> E78.0 -> I10",
        "Pathway_I48 -> I10",
        "Pathway_I48 -> J45.9", 
        
        "Pathway_J45.9 -> E11.9 -> I10",
        "Pathway_J45.9 -> E78.0 -> I10",
        "Pathway_J45.9 -> I10"
    ],
    'HR': [1.31, 1.00, 1.17, 1.97, 1.00, 1.54, 0.90, 1.14, 0.49, 1.04,
           0.98, 0.72, 1.46, 1.00, 1.22, 0.66, 1.49, 2.05, 0.95, 0.87,
           1.40, 0.57, 0.95, 1.36, 1.30, 1.35, 0.31, 0.78, 1.17, 0.50,
           0.80, 1.72, 0.88, 1.67, 1.22, 1.08, 1.48, 1.07, 1.58, 1.04,
           0.39, 1.34, 1.70, 1.22],
    'Lower CI': [0.85, 0.58, 0.88, 1.40, 0.68, 0.89, 0.58, 0.54, 0.17, 0.77,
                 0.48, 0.48, 0.84, 0.68, 0.75, 0.30, 0.82, 1.32, 0.51, 0.61,
                 0.93, 0.36, 0.64, 0.79, 0.75, 0.82, 0.15, 0.61, 0.69, 0.31,
                 0.48, 1.13, 0.63, 1.00, 0.59, 0.64, 0.95, 0.63, 0.84, 0.76,
                 0.14, 0.63, 0.96, 0.81],
    'Upper CI': [2.01, 1.72, 1.57, 2.77, 1.48, 2.66, 1.39, 2.41, 1.36, 1.42,
                 1.99, 1.08, 2.53, 1.49, 2.01, 1.48, 2.72, 3.19, 1.77, 1.23,
                 2.10, 0.92, 1.42, 2.37, 2.25, 2.23, 0.63, 1.02, 2.01, 0.80,
                 1.34, 2.59, 1.25, 2.77, 2.53, 1.84, 2.30, 1.84, 3.00, 1.41,
                 1.07, 2.83, 2.99, 1.84],
    'P-value': [0.22, 0.99, 0.29, 0.001, 0.98, 0.12, 0.63, 0.74, 0.17, 0.78,
                0.96, 0.11, 0.18, 0.98, 0.42, 0.32, 0.19, 0.001, 0.87, 0.43,
                0.10, 0.02, 0.80, 0.27, 0.35, 0.23, 0.001, 0.07, 0.55, 0.001,
                0.40, 0.01, 0.47, 0.05, 0.59, 0.75, 0.08, 0.81, 0.16, 0.82,
                0.07, 0.45, 0.07, 0.34]
}

# Define the data for HF pathways
hf_data = {
    'Pathway': [
       "Pathway_E11.9 -> E78.0 -> I10 -> I25.1",
        "Pathway_E11.9 -> E78.0 -> I10 -> I48",
        "Pathway_E11.9 -> I10",
        "Pathway_E11.9 -> I10 -> E78.0",
        "Pathway_E11.9 -> I10 -> I25.1",
        "Pathway_E11.9 -> I10 -> I25.1 -> E78.0",
        "Pathway_E11.9 -> I10 -> I48",
        "Pathway_E11.9 -> I10 -> J45.9",
        "Pathway_E11.9 -> I25.1",
        
        "Pathway_E78.0 -> I10",
        "Pathway_E78.0 -> I10 -> E11.9",
        "Pathway_E78.0 -> I10 -> I25.1",
        "Pathway_E78.0 -> I10 -> I25.1 -> I48",
        "Pathway_E78.0 -> I10 -> I48",
        "Pathway_E78.0 -> I10 -> J45.9",
        "Pathway_E78.0 -> I25.1",
        
        "Pathway_I10 -> E11.9",
        "Pathway_I10 -> E11.9 -> E78.0",
        "Pathway_I10 -> E11.9 -> E78.0 -> I25.1",
        "Pathway_I10 -> E78.0",
        "Pathway_I10 -> E78.0 -> I25.1",
        "Pathway_I10 -> E78.0 -> I48",
        "Pathway_I10 -> I25.1",
        "Pathway_I10 -> I25.1 -> E78.0",
        "Pathway_I10 -> I25.1 -> E78.0 -> I48",
        "Pathway_I10 -> I25.1 -> I48",
        "Pathway_I10 -> I34.0",
        "Pathway_I10 -> I48",
        "Pathway_I10 -> I48 -> E11.9",
        "Pathway_I10 -> I48 -> E78.0",
        "Pathway_I10 -> I48 -> I34.0",
        "Pathway_I10 -> I48 -> J45.9",
        "Pathway_I10 -> J45.9",
        "Pathway_I10 -> J45.9 -> E11.9",
        "Pathway_I10 -> J45.9 -> E78.0",
        
        "Pathway_I25.1 -> E78.0 -> I10",
        "Pathway_I25.1 -> I10",
        "Pathway_I25.1 -> I48",
        
        "Pathway_I48 -> E78.0 -> I10",
        "Pathway_I48 -> I10",
        "Pathway_I48 -> J45.9", 
        
        "Pathway_J45.9 -> E11.9 -> I10",
        "Pathway_J45.9 -> E78.0 -> I10",
        "Pathway_J45.9 -> I10"
    ],
    'HR': [0.92, 1.08, 1.34, 1.09, 0.90, 0.74, 0.93, 1.73, 0.60, 0.68, 
           1.80, 0.90, 0.54, 1.60, 0.91, 0.48, 0.90, 0.76, 1.08, 1.28, 
           0.68, 1.51, 0.90, 0.63, 1.16, 0.74, 1.52, 0.76, 2.46, 1.30, 
           0.78, 0.85, 0.80, 0.68, 1.48, 0.58, 1.16, 0.92, 1.43, 0.55, 
           0.80, 1.31, 0.85, 0.74],
    'Lower CI': [0.70, 0.74, 1.07, 0.80, 0.65, 0.50, 0.65, 0.98, 0.32, 0.53, 
                 1.13, 0.71, 0.36, 1.19, 0.44, 0.31, 0.66, 0.55, 0.64, 0.97, 
                 0.46, 1.04, 0.67, 0.44, 0.72, 0.44, 0.90, 0.60, 1.52, 0.79, 
                 0.42, 0.50, 0.58, 0.34, 0.99, 0.40, 0.79, 0.53, 0.83, 0.37, 
                 0.44, 0.76, 0.46, 0.46],
    'Upper CI': [1.21, 1.57, 1.67, 1.51, 1.25, 1.11, 1.34, 3.06, 1.14, 0.86, 
                 2.89, 1.15, 0.79, 2.18, 1.88, 0.73, 1.22, 1.04, 1.86, 1.70, 
                 1.02, 2.18, 1.20, 0.88, 1.88, 1.25, 2.56, 0.97, 3.97, 2.14, 
                 1.43, 1.46, 1.12, 1.35, 2.23, 0.83, 1.72, 1.60, 2.51, 0.81, 
                 1.48, 2.29, 1.57, 1.21],
    'P-value': [0.56, 0.69, 0.01, 0.56, 0.54, 0.14, 0.69, 0.06, 0.12, 0.005, 
                0.01, 0.4, 0.005, 0.005, 0.8, 0.005, 0.5, 0.09, 0.76, 0.08, 
                0.06, 0.03, 0.46, 0.01, 0.54, 0.26, 0.12, 0.03, 0.005, 0.31, 
                0.43, 0.57, 0.20, 0.27, 0.06, 0.005, 0.44, 0.77, 0.20, 0.005, 
                0.48, 0.33, 0.61, 0.23]
}

# Create DataFrames
ph_df = pd.DataFrame(ph_data)
hf_df = pd.DataFrame(hf_data)

# Display the DataFrames inline in the code
print("PH Pathways Data:")
display(ph_df)
print()
print("\nHF Pathways Data:")
display(hf_df)


In [None]:
# Check lengths of all lists in ph_data
#for key, value in ph_data.items():
#    print(f"PH {key} length: {len(value)}")
#
# Check lengths of all lists in hf_data
#for key, value in hf_data.items():
#    print(f"HF {key} length: {len(value)}")
#

<div style="background-color: #90EE90 ; padding: 10px;">
    <h2><center>Cox Proportional Hazards Analysis (PH v/s COPD) </center></h2>
</div>

### <center> <span style="background-color:#90EE90; padding:5px;">Cox Proportional Hazards Analysis (PH v/s death)</span>

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches


Pre_COPD_dataframe = []
Post_COPD_dataframe = []
Common_COPD_dataframe = []
COPD_conditions = []
combined_dataframe_COPD = []

# Load the datasets (adjust paths as necessary)
Pre_COPD_dataframe = pd.read_csv('Pre COPD COMMON with comorbidities.csv')
Post_COPD_dataframe = pd.read_csv('Post COPD COMMON with comorbidities.csv')
Common_COPD_dataframe = pd.read_csv('Combined COPD Common with comorbidities.csv')

# Combine the dataframes
combined_dataframe_COPD = pd.concat([Pre_COPD_dataframe, Common_COPD_dataframe, Post_COPD_dataframe], ignore_index=True)

# Drop duplicates from the combined dataframe
combined_dataframe_COPD = combined_dataframe_COPD.drop_duplicates()

# Replace specific COPD types with "COPD"
COPD_conditions = []
COPD_conditions = ['J43.9', 'J45.9', 'J44.1', 'J44.0', 'J44.9', 'J47', 'J43.2', 'J44.8', 'J45.0', 'J40', 'J43.0', 'J43.8', 'J42']
combined_dataframe_COPD['Combined ICD10 Codes'] = combined_dataframe_COPD['Combined ICD10 Codes'].replace(COPD_conditions, 'COPD')
#combined_dataframe_COPD.head(3)


Pre_PH_dataframe = []
Post_PH_dataframe = []
Common_PH_dataframe = []
Common_PH_dataframe = []
combined_dataframe_PH = []
ph_conditions = []

# Load the datasets (adjust paths as necessary)
Pre_PH_dataframe = pd.read_csv('Pre-PH Common PH Icd10 Codes.csv')
Post_PH_dataframe = pd.read_csv('Post-PH Common PH Icd10 Codes.csv')
Common_PH_dataframe = pd.read_csv('Common-PH Common PH Icd10 Codes.csv')

# Combine the dataframes
combined_dataframe_PH = pd.concat([Pre_PH_dataframe, Common_PH_dataframe, Post_PH_dataframe], ignore_index=True)

# Drop duplicates from the combined dataframe
combined_dataframe_PH = combined_dataframe_PH.drop_duplicates()
# Replace specific PH types with "PH"
ph_conditions = ['I27.0', 'I27.2', 'I27.9']
combined_dataframe_PH['ICD10 Codes'] = combined_dataframe_PH['ICD10 Codes'].replace(ph_conditions, 'PH')
#combined_dataframe_PH.head(2)

#################################################################################################################################################
#################################################################################################################################################
#################################################################################################################################################

# Step 1: Convert Diagnosis Date to datetime format for proper sequencing
combined_dataframe_PH['Diagnosis Date'] = pd.to_datetime(combined_dataframe_PH['Diagnosis Date'])
combined_dataframe_COPD['Combined ICD10 Diagnosis Date'] = pd.to_datetime(combined_dataframe_COPD['Combined ICD10 Diagnosis Date'])

# Step 2: Define relevant conditions and filter the data
# For PH cohort
ph_conditions = {
    "Essential (primary) hypertension (I10)": "I10",
    "Atrial fibrillation and flutter (I48)": "I48",
    "Pure hypercholesterolemia (E78.0)": "E78.0",
    "Atherosclerotic heart disease (I25.1)": "I25.1",
    "Type 2 diabetes mellitus without complications (E11.9)": "E11.9",
    "Mitral (valve) insufficiency (I34.0)": "I34.0",
    "Asthma, unspecified (J45.9)": "J45.9",
    "PH": "PH",  # Specific for PH cohort
}
ph_condition_codes = list(ph_conditions.values())
filtered_ph_dataframe = combined_dataframe_PH[combined_dataframe_PH['ICD10 Codes'].isin(ph_condition_codes)]

# For COPD cohort
COPD_conditions = {
    "Essential (primary) hypertension (I10)": "I10",
    "Atrial fibrillation and flutter (I48)": "I48",
    "Pure hypercholesterolemia (E78.0)": "E78.0",
    "Atherosclerotic heart disease (I25.1)": "I25.1",
    "Type 2 diabetes mellitus without complications (E11.9)": "E11.9",
    "Mitral (valve) insufficiency (I34.0)": "I34.0",
    "Asthma, unspecified (J45.9)": "J45.9",
    "COPD": "COPD",  # Specific for COPD cohort
}
COPD_condition_codes = list(COPD_conditions.values())
filtered_COPD_dataframe = combined_dataframe_COPD[combined_dataframe_COPD['Combined ICD10 Codes'].isin(COPD_condition_codes)]

# Step 3: Sort data by Participant ID and Diagnosis Date
filtered_ph_dataframe = filtered_ph_dataframe.sort_values(by=['Participant ID', 'Diagnosis Date'])
filtered_COPD_dataframe = filtered_COPD_dataframe.sort_values(by=['Participant ID', 'Combined ICD10 Diagnosis Date'])


#################################################################################################################################################
#################################################################################################################################################
#################################################################################################################################################

# Convert necessary columns in the PH dataframe to datetime
filtered_ph_dataframe['Diagnosis Date'] = pd.to_datetime(filtered_ph_dataframe['Diagnosis Date'], errors='coerce')
filtered_ph_dataframe['PH Diagnosis Date'] = pd.to_datetime(filtered_ph_dataframe['PH Diagnosis Date'], errors='coerce')

# Convert necessary columns in the COPD dataframe to datetime
filtered_COPD_dataframe['Combined ICD10 Diagnosis Date'] = pd.to_datetime(filtered_COPD_dataframe['Combined ICD10 Diagnosis Date'], errors='coerce')
filtered_COPD_dataframe['COPD Diagnosis Date'] = pd.to_datetime(filtered_COPD_dataframe['COPD Diagnosis Date'], errors='coerce')

# Now filter rows for PH cohort
filtered_ph_dataframe = filtered_ph_dataframe[
    filtered_ph_dataframe['Diagnosis Date'] < filtered_ph_dataframe['PH Diagnosis Date']
]

# Now filter rows for COPD cohort
filtered_COPD_dataframe = filtered_COPD_dataframe[
    filtered_COPD_dataframe['Combined ICD10 Diagnosis Date'] < filtered_COPD_dataframe['COPD Diagnosis Date']
]

#################################################################################################################################################
#################################################################################################################################################
#################################################################################################################################################

# Step 5: Group by Participant ID to create disease sequences (before diagnosis)
# For PH cohort
ph_traces = (
    filtered_ph_dataframe.groupby('Participant ID')
    .apply(lambda x: x['ICD10 Codes'].tolist() if len(x) >= 2 else None)  # Keep only participants with at least 2 conditions
    .dropna()
    .reset_index(drop=True)
)
ph_traces = pd.DataFrame(ph_traces, columns=['Traces'])

# For COPD cohort
COPD_traces = (
    filtered_COPD_dataframe.groupby('Participant ID')
    .apply(lambda x: x['Combined ICD10 Codes'].tolist() if len(x) >= 2 else None)  # Keep only participants with at least 2 conditions
    .dropna()
    .reset_index(drop=True)
)
COPD_traces = pd.DataFrame(COPD_traces, columns=['Traces'])

#################################################################################################################################################
#################################################################################################################################################
#################################################################################################################################################

# Step 6: Find common pathways between the two cohorts
ph_trace_set = set(ph_traces['Traces'].apply(tuple))  # Convert to tuple for set operations
COPD_trace_set = set(COPD_traces['Traces'].apply(tuple))  # Convert to tuple for set operations

# Find common pathways
common_pathways = ph_trace_set.intersection(COPD_trace_set)

# Convert common pathways to a DataFrame with a readable string format
common_pathways_df = pd.DataFrame({'Pathway': [' -> '.join(pathway) for pathway in common_pathways]})

# Initialize participant counts
ph_participant_counts = []
COPD_participant_counts = []

# Calculate participant counts for each pathway
for pathway in common_pathways:
    # Count participants in PH cohort
    ph_count = ph_traces[ph_traces['Traces'].apply(tuple) == pathway].shape[0]
    ph_participant_counts.append(ph_count)
    
    # Count participants in COPD cohort
    COPD_count = COPD_traces[COPD_traces['Traces'].apply(tuple) == pathway].shape[0]
    COPD_participant_counts.append(COPD_count)

# Add participant counts to the DataFrame
common_pathways_df['PH Participants'] = ph_participant_counts
common_pathways_df['COPD Participants'] = COPD_participant_counts

# Display the resulting DataFrame
print("Common Pathways Between PH and COPD Cohorts:")
display(common_pathways_df)
print()
# Define thresholds
min_participants = 5  # Minimum participants threshold
min_deaths = 1        # Minimum number of death events required

# Step 1: Filter pathways with sufficient participants in both PH and COPD cohorts
filtered_pathways = []
filtered_pathways = common_pathways_df[
    (common_pathways_df['PH Participants'] >= min_participants) &
    (common_pathways_df['COPD Participants'] >= min_participants)
]

print("Filtered Pathways with Sufficient Participants:")
display(filtered_pathways)
print()

filtered_pathways = filtered_pathways.drop(columns=['PH Participants', 'COPD Participants'])
display(filtered_pathways)

#################################################################################################################################################
#################################################################################################################################################
#################################################################################################################################################

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches
from lifelines import CoxPHFitter

# Step 1: Ensure all date columns are in datetime format
filtered_ph_dataframe['PH Diagnosis Date'] = pd.to_datetime(filtered_ph_dataframe['PH Diagnosis Date'], errors='coerce')
filtered_ph_dataframe['Date of Death'] = pd.to_datetime(filtered_ph_dataframe['Date of Death'], errors='coerce')

# Step 2: Set a global Censor Date for all alive participants
global_censor_date = pd.to_datetime("2021-09-29")  # Adjust this date as per your dataset or study

# Step 3: Compute Death_Followup_Time
filtered_ph_dataframe['Death_Followup_Time'] = None

# For deceased participants
filtered_ph_dataframe.loc[filtered_ph_dataframe['Alive / Dead'] == 'Dead', 'Death_Followup_Time'] = (
    (filtered_ph_dataframe['Date of Death'] - filtered_ph_dataframe['PH Diagnosis Date']).dt.days
)

# For alive participants
filtered_ph_dataframe.loc[filtered_ph_dataframe['Alive / Dead'] == 'Alive', 'Death_Followup_Time'] = (
    (global_censor_date - filtered_ph_dataframe['PH Diagnosis Date']).dt.days
)





# Step 4: Filter for valid pathways
filtered_ph_dataframe['Pathway'] = filtered_ph_dataframe.groupby('Participant ID')['ICD10 Codes'].transform(lambda x: ' -> '.join(x))

#display(filtered_ph_dataframe.head(5))
filtered_ph_dataframe = filtered_ph_dataframe[filtered_ph_dataframe['Pathway'].isin(filtered_pathways['Pathway'].tolist())]

#################################################################################################################################################
#################################################################################################################################################
#################################################################################################################################################

pathways_with_death = []
# Step 1: Identify Pathways with at least one non-NaN "Date of Death"
pathways_with_death = (
    filtered_ph_dataframe
    .groupby('Pathway')['Date of Death']  # Group by "Pathway" and check "Date of Death"
    .apply(lambda x: x.notna().any())  # True if any "Date of Death" is not NaN
)

# Step 2: Map the results back to the main DataFrame
filtered_ph_dataframe['Dead Check'] = filtered_ph_dataframe['Pathway'].map(pathways_with_death)

# Step 3: Convert True/False to 1/0
filtered_ph_dataframe['Dead Check'] = np.where(filtered_ph_dataframe['Dead Check'], 1, 0)

# Step 4: Display the updated DataFrame
#display(filtered_ph_dataframe[['Pathway', 'Date of Death', 'Dead Check']])


# Filter rows where "Dead Check" is 1
pathways_with_dead  = []
pathways_with_dead = filtered_ph_dataframe[filtered_ph_dataframe['Dead Check'] == 1]

# Get unique pathways
unique_pathways_with_dead = []
unique_pathways_with_dead = pathways_with_dead['Pathway'].unique()

# Display the unique pathways
print("Unique Pathways with at least one death (Dead Check = 1):")
for pathway in unique_pathways_with_dead:
    print(pathway)

# Alternatively, display as a DataFrame
unique_pathways_df =[]
unique_pathways_df = pd.DataFrame(unique_pathways_with_dead, columns=['Pathway'])
#len(unique_pathways_df)
#display(unique_pathways_df)

#################################################################################################################################################
#################################################################################################################################################
#################################################################################################################################################



# Step 5: Remove rows with invalid or non-positive follow-up times
filtered_ph_dataframe = filtered_ph_dataframe.dropna(subset=['Death_Followup_Time'])
filtered_ph_dataframe = filtered_ph_dataframe[filtered_ph_dataframe['Death_Followup_Time'] > 0]

# Step 6: Encode categorical variables
filtered_ph_dataframe['Sex'] = filtered_ph_dataframe['Sex'].map({'Male': 1, 'Female': 0})  # Encode Sex as numeric
filtered_ph_dataframe['Alive / Dead'] = filtered_ph_dataframe['Alive / Dead'].map({'Dead': 1, 'Alive': 0})  # Encode Alive/Dead as binary



# Step 7: One-hot encode the pathways
cox_data = filtered_ph_dataframe[['Death_Followup_Time', 'Alive / Dead', 'Age', 'Sex', 'Pathway']]
cox_data = pd.get_dummies(cox_data, columns=['Pathway'], drop_first=True)

# Step 8: Fit the Cox Proportional Hazards Model
cox_model = CoxPHFitter(penalizer=0.1)  # Adding penalizer to handle high-dimensional data
cox_model.fit(cox_data, duration_col='Death_Followup_Time', event_col='Alive / Dead')
cox_model.print_summary()  # Print detailed summary of the model

# Step 9: Plot the hazard ratios
plt.figure(figsize=(8, 20))  # Adjust the plot size for better visibility
cox_model.plot()
plt.title("Cox Proportional Hazards Analysis: Mortality Risk by PH Pathways", fontsize=16)
plt.xlabel("log(HR) (95% CI)", fontsize=12)
plt.ylabel("")  # Remove y-label for a cleaner presentation
plt.xticks(fontsize=10)
plt.yticks(fontsize=10)
plt.tight_layout()
plt.show()


### <center> <span style="background-color:#90EE90; padding:5px;">Cox Proportional Hazards Analysis: Mortality Risk by COPD Pathways</span>

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches


Pre_COPD_dataframe = []
Post_COPD_dataframe = []
Common_COPD_dataframe = []
COPD_conditions = []
combined_dataframe_COPD = []

# Load the datasets (adjust paths as necessary)
Pre_COPD_dataframe = pd.read_csv('Pre COPD COMMON with comorbidities.csv')
Post_COPD_dataframe = pd.read_csv('Post COPD COMMON with comorbidities.csv')
Common_COPD_dataframe = pd.read_csv('Combined COPD Common with comorbidities.csv')

# Combine the dataframes
combined_dataframe_COPD = pd.concat([Pre_COPD_dataframe, Common_COPD_dataframe, Post_COPD_dataframe], ignore_index=True)

# Drop duplicates from the combined dataframe
combined_dataframe_COPD = combined_dataframe_COPD.drop_duplicates()

# Replace specific COPD types with "COPD"
COPD_conditions = []
COPD_conditions = ['J43.9', 'J45.9', 'J44.1', 'J44.0', 'J44.9', 'J47', 'J43.2', 'J44.8', 'J45.0', 'J40', 'J43.0', 'J43.8', 'J42']
combined_dataframe_COPD['Combined ICD10 Codes'] = combined_dataframe_COPD['Combined ICD10 Codes'].replace(COPD_conditions, 'COPD')

Pre_PH_dataframe = []
Post_PH_dataframe = []
Common_PH_dataframe = []
Common_PH_dataframe = []
combined_dataframe_PH = []
ph_conditions = []

# Load the datasets (adjust paths as necessary)
Pre_PH_dataframe = pd.read_csv('Pre-PH Common PH Icd10 Codes.csv')
Post_PH_dataframe = pd.read_csv('Post-PH Common PH Icd10 Codes.csv')
Common_PH_dataframe = pd.read_csv('Common-PH Common PH Icd10 Codes.csv')

# Combine the dataframes
combined_dataframe_PH = pd.concat([Pre_PH_dataframe, Common_PH_dataframe, Post_PH_dataframe], ignore_index=True)

# Drop duplicates from the combined dataframe
combined_dataframe_PH = combined_dataframe_PH.drop_duplicates()
# Replace specific PH types with "PH"
ph_conditions = ['I27.0', 'I27.2', 'I27.9']
combined_dataframe_PH['ICD10 Codes'] = combined_dataframe_PH['ICD10 Codes'].replace(ph_conditions, 'PH')


#################################################################################################################################################
#################################################################################################################################################
#################################################################################################################################################

# Step 1: Convert Diagnosis Date to datetime format for proper sequencing
combined_dataframe_PH['Diagnosis Date'] = pd.to_datetime(combined_dataframe_PH['Diagnosis Date'])
combined_dataframe_COPD['Combined ICD10 Diagnosis Date'] = pd.to_datetime(combined_dataframe_COPD['Combined ICD10 Diagnosis Date'])

# Step 2: Define relevant conditions and filter the data
# For PH cohort
ph_conditions = {
    "Essential (primary) hypertension (I10)": "I10",
    "Atrial fibrillation and flutter (I48)": "I48",
    "Pure hypercholesterolemia (E78.0)": "E78.0",
    "Atherosclerotic heart disease (I25.1)": "I25.1",
    "Type 2 diabetes mellitus without complications (E11.9)": "E11.9",
    "Mitral (valve) insufficiency (I34.0)": "I34.0",
    "Asthma, unspecified (J45.9)": "J45.9",
    "PH": "PH",  # Specific for PH cohort
}
ph_condition_codes = list(ph_conditions.values())
filtered_ph_dataframe = combined_dataframe_PH[combined_dataframe_PH['ICD10 Codes'].isin(ph_condition_codes)]

# For COPD cohort
COPD_conditions = {
    "Essential (primary) hypertension (I10)": "I10",
    "Atrial fibrillation and flutter (I48)": "I48",
    "Pure hypercholesterolemia (E78.0)": "E78.0",
    "Atherosclerotic heart disease (I25.1)": "I25.1",
    "Type 2 diabetes mellitus without complications (E11.9)": "E11.9",
    "Mitral (valve) insufficiency (I34.0)": "I34.0",
    "Asthma, unspecified (J45.9)": "J45.9",
    "COPD": "COPD",  # Specific for COPD cohort
}
COPD_condition_codes = list(COPD_conditions.values())
filtered_COPD_dataframe = combined_dataframe_COPD[combined_dataframe_COPD['Combined ICD10 Codes'].isin(COPD_condition_codes)]

# Step 3: Sort data by Participant ID and Diagnosis Date
filtered_ph_dataframe = filtered_ph_dataframe.sort_values(by=['Participant ID', 'Diagnosis Date'])
filtered_COPD_dataframe = filtered_COPD_dataframe.sort_values(by=['Participant ID', 'Combined ICD10 Diagnosis Date'])


#################################################################################################################################################
#################################################################################################################################################
#################################################################################################################################################

# Convert necessary columns in the PH dataframe to datetime
filtered_ph_dataframe['Diagnosis Date'] = pd.to_datetime(filtered_ph_dataframe['Diagnosis Date'], errors='coerce')
filtered_ph_dataframe['PH Diagnosis Date'] = pd.to_datetime(filtered_ph_dataframe['PH Diagnosis Date'], errors='coerce')

# Convert necessary columns in the COPD dataframe to datetime
filtered_COPD_dataframe['Combined ICD10 Diagnosis Date'] = pd.to_datetime(filtered_COPD_dataframe['Combined ICD10 Diagnosis Date'], errors='coerce')
filtered_COPD_dataframe['COPD Diagnosis Date'] = pd.to_datetime(filtered_COPD_dataframe['COPD Diagnosis Date'], errors='coerce')

# Now filter rows for PH cohort
filtered_ph_dataframe = filtered_ph_dataframe[
    filtered_ph_dataframe['Diagnosis Date'] < filtered_ph_dataframe['PH Diagnosis Date']
]

# Now filter rows for COPD cohort
filtered_COPD_dataframe = filtered_COPD_dataframe[
    filtered_COPD_dataframe['Combined ICD10 Diagnosis Date'] < filtered_COPD_dataframe['COPD Diagnosis Date']
]

#################################################################################################################################################
#################################################################################################################################################
#################################################################################################################################################

# Step 5: Group by Participant ID to create disease sequences (before diagnosis)
# For PH cohort
ph_traces = (
    filtered_ph_dataframe.groupby('Participant ID')
    .apply(lambda x: x['ICD10 Codes'].tolist() if len(x) >= 2 else None)  # Keep only participants with at least 2 conditions
    .dropna()
    .reset_index(drop=True)
)
ph_traces = pd.DataFrame(ph_traces, columns=['Traces'])

# For COPD cohort
COPD_traces = (
    filtered_COPD_dataframe.groupby('Participant ID')
    .apply(lambda x: x['Combined ICD10 Codes'].tolist() if len(x) >= 2 else None)  # Keep only participants with at least 2 conditions
    .dropna()
    .reset_index(drop=True)
)
COPD_traces = pd.DataFrame(COPD_traces, columns=['Traces'])

#################################################################################################################################################
#################################################################################################################################################
#################################################################################################################################################

# Step 6: Find common pathways between the two cohorts
ph_trace_set = set(ph_traces['Traces'].apply(tuple))  # Convert to tuple for set operations
COPD_trace_set = set(COPD_traces['Traces'].apply(tuple))  # Convert to tuple for set operations

# Find common pathways
common_pathways = ph_trace_set.intersection(COPD_trace_set)

# Convert common pathways to a DataFrame with a readable string format
common_pathways_df = pd.DataFrame({'Pathway': [' -> '.join(pathway) for pathway in common_pathways]})

# Initialize participant counts
ph_participant_counts = []
COPD_participant_counts = []

# Calculate participant counts for each pathway
for pathway in common_pathways:
    # Count participants in PH cohort
    ph_count = ph_traces[ph_traces['Traces'].apply(tuple) == pathway].shape[0]
    ph_participant_counts.append(ph_count)
    
    # Count participants in COPD cohort
    COPD_count = COPD_traces[COPD_traces['Traces'].apply(tuple) == pathway].shape[0]
    COPD_participant_counts.append(COPD_count)

# Add participant counts to the DataFrame
common_pathways_df['PH Participants'] = ph_participant_counts
common_pathways_df['COPD Participants'] = COPD_participant_counts

# Display the resulting DataFrame
print("Common Pathways Between PH and COPD Cohorts:")
display(common_pathways_df)
print()

# Define thresholds
min_participants = 5  # Minimum participants threshold
min_deaths = 1        # Minimum number of death events required

# Step 1: Filter pathways with sufficient participants in both PH and COPD cohorts
filtered_pathways = []
filtered_pathways = common_pathways_df[
    (common_pathways_df['PH Participants'] >= min_participants) &
    (common_pathways_df['COPD Participants'] >= min_participants)
]

print("Filtered Pathways with Sufficient Participants:")
display(filtered_pathways)
print()

filtered_pathways = filtered_pathways.drop(columns=['PH Participants', 'COPD Participants'])
display(filtered_pathways)

#################################################################################################################################################
#################################################################################################################################################
#################################################################################################################################################

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches
from lifelines import CoxPHFitter

# Step 1: Ensure all date columns are in datetime format
filtered_COPD_dataframe['COPD Diagnosis Date'] = pd.to_datetime(filtered_COPD_dataframe['COPD Diagnosis Date'], errors='coerce')
filtered_COPD_dataframe['Date of Death'] = pd.to_datetime(filtered_COPD_dataframe['Date of Death'], errors='coerce')

# Step 2: Set a global Censor Date for all alive participants
global_censor_date = pd.to_datetime("2021-09-29")  # Adjust this date as per your dataset or study

# Step 3: Compute Death_Followup_Time
filtered_COPD_dataframe['Death_Followup_Time'] = None

# For deceased participants
filtered_COPD_dataframe.loc[filtered_COPD_dataframe['Alive / Dead'] == 'Dead', 'Death_Followup_Time'] = (
    (filtered_COPD_dataframe['Date of Death'] - filtered_COPD_dataframe['COPD Diagnosis Date']).dt.days
)

# For alive participants
filtered_COPD_dataframe.loc[filtered_COPD_dataframe['Alive / Dead'] == 'Alive', 'Death_Followup_Time'] = (
    (global_censor_date - filtered_COPD_dataframe['COPD Diagnosis Date']).dt.days
)

# Step 4: Filter for valid pathways
filtered_COPD_dataframe['Pathway'] = filtered_COPD_dataframe.groupby('Participant ID')['Combined ICD10 Codes'].transform(lambda x: ' -> '.join(x))

#display(filtered_ph_dataframe.head(5))
filtered_COPD_dataframe = filtered_COPD_dataframe[filtered_COPD_dataframe['Pathway'].isin(filtered_pathways['Pathway'].tolist())]

#################################################################################################################################################
#################################################################################################################################################
#################################################################################################################################################

pathways_with_death = []
# Step 1: Identify Pathways with at least one non-NaN "Date of Death"
pathways_with_death = (
    filtered_COPD_dataframe
    .groupby('Pathway')['Date of Death']  # Group by "Pathway" and check "Date of Death"
    .apply(lambda x: x.notna().any())  # True if any "Date of Death" is not NaN
)

# Step 2: Map the results back to the main DataFrame
filtered_COPD_dataframe['Dead Check'] = filtered_COPD_dataframe['Pathway'].map(pathways_with_death)

# Step 3: Convert True/False to 1/0
filtered_COPD_dataframe['Dead Check'] = np.where(filtered_COPD_dataframe['Dead Check'], 1, 0)

# Step 4: Display the updated DataFrame
#display(filtered_ph_dataframe[['Pathway', 'Date of Death', 'Dead Check']])


# Filter rows where "Dead Check" is 1
pathways_with_dead  = []
pathways_with_dead = filtered_COPD_dataframe[filtered_COPD_dataframe['Dead Check'] == 1]

# Get unique pathways
unique_pathways_with_dead = []
unique_pathways_with_dead = pathways_with_dead['Pathway'].unique()

# Display the unique pathways
print("Unique Pathways with at least one death (Dead Check = 1):")
for pathway in unique_pathways_with_dead:
    print(pathway)

# Alternatively, display as a DataFrame
unique_pathways_df =[]
unique_pathways_df = pd.DataFrame(unique_pathways_with_dead, columns=['Pathway'])
#len(unique_pathways_df)
#display(unique_pathways_df)

#################################################################################################################################################
#################################################################################################################################################
#################################################################################################################################################



# Step 5: Remove rows with invalid or non-positive follow-up times
filtered_COPD_dataframe = filtered_COPD_dataframe.dropna(subset=['Death_Followup_Time'])
filtered_COPD_dataframe = filtered_COPD_dataframe[filtered_COPD_dataframe['Death_Followup_Time'] > 0]

# Step 6: Encode categorical variables
filtered_COPD_dataframe['Sex'] = filtered_COPD_dataframe['Sex'].map({'Male': 1, 'Female': 0})  # Encode Sex as numeric
filtered_COPD_dataframe['Alive / Dead'] = filtered_COPD_dataframe['Alive / Dead'].map({'Dead': 1, 'Alive': 0})  # Encode Alive/Dead as binary



# Step 7: One-hot encode the pathways
cox_data = filtered_COPD_dataframe[['Death_Followup_Time', 'Alive / Dead', 'Individual Age', 'Sex', 'Pathway']]
cox_data = pd.get_dummies(cox_data, columns=['Pathway'], drop_first=True)

# Step 8: Fit the Cox Proportional Hazards Model
cox_model = CoxPHFitter(penalizer=0.1)  # Adding penalizer to handle high-dimensional data
cox_model.fit(cox_data, duration_col='Death_Followup_Time', event_col='Alive / Dead')
cox_model.print_summary()  # Print detailed summary of the model

# Step 9: Plot the hazard ratios
plt.figure(figsize=(8, 20))  # Adjust the plot size for better visibility
cox_model.plot()
plt.title("Cox Proportional Hazards Analysis: Mortality Risk by COPD Pathways", fontsize=16)
plt.xlabel("log(HR) (95% CI)", fontsize=12)
plt.ylabel("")  # Remove y-label for a cleaner presentation
plt.xticks(fontsize=10)
plt.yticks(fontsize=10)
plt.tight_layout()
plt.show()

### <center> <span style="background-color:#90EE90; padding:5px;">Cox Cox Proportional Hazards Analysis: PH vs COPD Pathways</span>

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches


Pre_COPD_dataframe = []
Post_COPD_dataframe = []
Common_COPD_dataframe = []
COPD_conditions = []
combined_dataframe_COPD = []

# Load the datasets (adjust paths as necessary)
Pre_COPD_dataframe = pd.read_csv('Pre COPD COMMON with comorbidities.csv')
Post_COPD_dataframe = pd.read_csv('Post COPD COMMON with comorbidities.csv')
Common_COPD_dataframe = pd.read_csv('Combined COPD Common with comorbidities.csv')

# Combine the dataframes
combined_dataframe_COPD = pd.concat([Pre_COPD_dataframe, Common_COPD_dataframe, Post_COPD_dataframe], ignore_index=True)

# Drop duplicates from the combined dataframe
combined_dataframe_COPD = combined_dataframe_COPD.drop_duplicates()

# Replace specific COPD types with "COPD"
COPD_conditions = []
COPD_conditions = ['J43.9', 'J45.9', 'J44.1', 'J44.0', 'J44.9', 'J47', 'J43.2', 'J44.8', 'J45.0', 'J40', 'J43.0', 'J43.8', 'J42']
combined_dataframe_COPD['Combined ICD10 Codes'] = combined_dataframe_COPD['Combined ICD10 Codes'].replace(COPD_conditions, 'COPD')

Pre_PH_dataframe = []
Post_PH_dataframe = []
Common_PH_dataframe = []
Common_PH_dataframe = []
combined_dataframe_PH = []
ph_conditions = []

# Load the datasets (adjust paths as necessary)
Pre_PH_dataframe = pd.read_csv('Pre-PH Common PH Icd10 Codes.csv')
Post_PH_dataframe = pd.read_csv('Post-PH Common PH Icd10 Codes.csv')
Common_PH_dataframe = pd.read_csv('Common-PH Common PH Icd10 Codes.csv')

# Combine the dataframes
combined_dataframe_PH = pd.concat([Pre_PH_dataframe, Common_PH_dataframe, Post_PH_dataframe], ignore_index=True)

# Drop duplicates from the combined dataframe
combined_dataframe_PH = combined_dataframe_PH.drop_duplicates()
# Replace specific PH types with "PH"
ph_conditions = ['I27.0', 'I27.2', 'I27.9']
combined_dataframe_PH['ICD10 Codes'] = combined_dataframe_PH['ICD10 Codes'].replace(ph_conditions, 'PH')


#################################################################################################################################################
#################################################################################################################################################
#################################################################################################################################################

# Step 1: Convert Diagnosis Date to datetime format for proper sequencing
combined_dataframe_PH['Diagnosis Date'] = pd.to_datetime(combined_dataframe_PH['Diagnosis Date'])
combined_dataframe_COPD['Combined ICD10 Diagnosis Date'] = pd.to_datetime(combined_dataframe_COPD['Combined ICD10 Diagnosis Date'])

# Step 2: Define relevant conditions and filter the data
# For PH cohort
ph_conditions = {
    "Essential (primary) hypertension (I10)": "I10",
    "Atrial fibrillation and flutter (I48)": "I48",
    "Pure hypercholesterolemia (E78.0)": "E78.0",
    "Atherosclerotic heart disease (I25.1)": "I25.1",
    "Type 2 diabetes mellitus without complications (E11.9)": "E11.9",
    "Mitral (valve) insufficiency (I34.0)": "I34.0",
    "Asthma, unspecified (J45.9)": "J45.9",
    "PH": "PH",  # Specific for PH cohort
}
ph_condition_codes = list(ph_conditions.values())
filtered_ph_dataframe = combined_dataframe_PH[combined_dataframe_PH['ICD10 Codes'].isin(ph_condition_codes)]

# For COPD cohort
COPD_conditions = {
    "Essential (primary) hypertension (I10)": "I10",
    "Atrial fibrillation and flutter (I48)": "I48",
    "Pure hypercholesterolemia (E78.0)": "E78.0",
    "Atherosclerotic heart disease (I25.1)": "I25.1",
    "Type 2 diabetes mellitus without complications (E11.9)": "E11.9",
    "Mitral (valve) insufficiency (I34.0)": "I34.0",
    "Asthma, unspecified (J45.9)": "J45.9",
    "COPD": "COPD",  # Specific for COPD cohort
}
COPD_condition_codes = list(COPD_conditions.values())
filtered_COPD_dataframe = combined_dataframe_COPD[combined_dataframe_COPD['Combined ICD10 Codes'].isin(COPD_condition_codes)]

# Step 3: Sort data by Participant ID and Diagnosis Date
filtered_ph_dataframe = filtered_ph_dataframe.sort_values(by=['Participant ID', 'Diagnosis Date'])
filtered_COPD_dataframe = filtered_COPD_dataframe.sort_values(by=['Participant ID', 'Combined ICD10 Diagnosis Date'])


#################################################################################################################################################
#################################################################################################################################################
#################################################################################################################################################

# Convert necessary columns in the PH dataframe to datetime
filtered_ph_dataframe['Diagnosis Date'] = pd.to_datetime(filtered_ph_dataframe['Diagnosis Date'], errors='coerce')
filtered_ph_dataframe['PH Diagnosis Date'] = pd.to_datetime(filtered_ph_dataframe['PH Diagnosis Date'], errors='coerce')

# Convert necessary columns in the COPD dataframe to datetime
filtered_COPD_dataframe['Combined ICD10 Diagnosis Date'] = pd.to_datetime(filtered_COPD_dataframe['Combined ICD10 Diagnosis Date'], errors='coerce')
filtered_COPD_dataframe['COPD Diagnosis Date'] = pd.to_datetime(filtered_COPD_dataframe['COPD Diagnosis Date'], errors='coerce')

# Now filter rows for PH cohort
filtered_ph_dataframe = filtered_ph_dataframe[
    filtered_ph_dataframe['Diagnosis Date'] < filtered_ph_dataframe['PH Diagnosis Date']
]

# Now filter rows for COPD cohort
filtered_COPD_dataframe = filtered_COPD_dataframe[
    filtered_COPD_dataframe['Combined ICD10 Diagnosis Date'] < filtered_COPD_dataframe['COPD Diagnosis Date']
]

#################################################################################################################################################
#################################################################################################################################################
#################################################################################################################################################

# Step 5: Group by Participant ID to create disease sequences (before diagnosis)
# For PH cohort
ph_traces = (
    filtered_ph_dataframe.groupby('Participant ID')
    .apply(lambda x: x['ICD10 Codes'].tolist() if len(x) >= 2 else None)  # Keep only participants with at least 2 conditions
    .dropna()
    .reset_index(drop=True)
)
ph_traces = pd.DataFrame(ph_traces, columns=['Traces'])

# For COPD cohort
COPD_traces = (
    filtered_COPD_dataframe.groupby('Participant ID')
    .apply(lambda x: x['Combined ICD10 Codes'].tolist() if len(x) >= 2 else None)  # Keep only participants with at least 2 conditions
    .dropna()
    .reset_index(drop=True)
)
COPD_traces = pd.DataFrame(COPD_traces, columns=['Traces'])

#################################################################################################################################################
#################################################################################################################################################
#################################################################################################################################################

# Step 6: Find common pathways between the two cohorts
ph_trace_set = set(ph_traces['Traces'].apply(tuple))  # Convert to tuple for set operations
COPD_trace_set = set(COPD_traces['Traces'].apply(tuple))  # Convert to tuple for set operations

# Find common pathways
common_pathways = ph_trace_set.intersection(COPD_trace_set)

# Convert common pathways to a DataFrame with a readable string format
common_pathways_df = pd.DataFrame({'Pathway': [' -> '.join(pathway) for pathway in common_pathways]})

################################################################################################################################################
#################################################################################################################################################
##############################################################################################################################################

# Initialize participant counts
ph_participant_counts = []
COPD_participant_counts = []

# Calculate participant counts for each pathway
for pathway in common_pathways:
    # Count participants in PH cohort
    ph_count = ph_traces[ph_traces['Traces'].apply(tuple) == pathway].shape[0]
    ph_participant_counts.append(ph_count)
    
    # Count participants in COPD cohort
    COPD_count = COPD_traces[COPD_traces['Traces'].apply(tuple) == pathway].shape[0]
    COPD_participant_counts.append(COPD_count)

# Add participant counts to the DataFrame
common_pathways_df['PH Participants'] = ph_participant_counts
common_pathways_df['COPD Participants'] = COPD_participant_counts

# Display the resulting DataFrame
print("Common Pathways Between PH and COPD Cohorts:")
display(common_pathways_df)
print()

# Define thresholds
min_participants = 5  # Minimum participants threshold
min_deaths = 1        # Minimum number of death events required

# Step 1: Filter pathways with sufficient participants in both PH and HF cohorts
filtered_pathways = []
filtered_pathways = common_pathways_df[
    (common_pathways_df['PH Participants'] >= min_participants) &
    (common_pathways_df['COPD Participants'] >= min_participants)
]

print("Filtered Pathways with Sufficient Participants:")
display(filtered_pathways)
print()

filtered_pathways = filtered_pathways.drop(columns=['PH Participants', 'COPD Participants'])
display(filtered_pathways)

#################################################################################################################################################
#################################################################################################################################################
#################################################################################################################################################

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches
from lifelines import CoxPHFitter




# Add a Cohort column to differentiate the groups
filtered_ph_dataframe['Cohort'] = 'PH'
filtered_COPD_dataframe['Cohort'] = 'COPD'

# Standardize column names for consistency
filtered_ph_dataframe = filtered_ph_dataframe.rename(columns={
    'Diagnosis Date': 'Diagnosis_Date',
    'ICD10 Codes': 'Disease_Code',
    'PH followup Time': 'Followup_Time'
})
filtered_COPD_dataframe = filtered_COPD_dataframe.rename(columns={
    'Combined ICD10 Diagnosis Date': 'Diagnosis_Date',
    'Combined ICD10 Codes': 'Disease_Code',
    'COPD Matched followup Time': 'Followup_Time',
    'Individual Age':'Age'
})

# Combine dataframes
combined_data = []
combined_data = pd.concat([filtered_ph_dataframe, filtered_COPD_dataframe], ignore_index=True, sort=False)

# Ensure all diagnosis dates are in datetime format
combined_data['Diagnosis_Date'] = pd.to_datetime(combined_data['Diagnosis_Date'], errors='coerce')



# Filter for common pathways
combined_data['Pathway'] = combined_data.groupby('Participant ID')['Disease_Code'].transform(lambda x: ' -> '.join(x))
combined_data = combined_data[combined_data['Pathway'].isin(filtered_pathways['Pathway'].tolist())]


#################################################################################################################################################
#################################################################################################################################################
#################################################################################################################################################

pathways_with_death = []
# Step 1: Identify Pathways with at least one non-NaN "Date of Death"
pathways_with_death = (
    combined_data
    .groupby('Pathway')['Date of Death']  # Group by "Pathway" and check "Date of Death"
    .apply(lambda x: x.notna().any())  # True if any "Date of Death" is not NaN
)

# Step 2: Map the results back to the main DataFrame
combined_data['Dead Check'] = combined_data['Pathway'].map(pathways_with_death)

# Step 3: Convert True/False to 1/0
combined_data['Dead Check'] = np.where(combined_data['Dead Check'], 1, 0)

# Step 4: Display the updated DataFrame
#display(combined_data[['Pathway', 'Date of Death', 'Dead Check']])


# Filter rows where "Dead Check" is 1
pathways_with_dead  = []
pathways_with_dead = combined_data[combined_data['Dead Check'] == 1]

# Get unique pathways
unique_pathways_with_dead = []
unique_pathways_with_dead = pathways_with_dead['Pathway'].unique()

# Display the unique pathways
print("Unique Pathways with at least one death (Dead Check = 1):")
for pathway in unique_pathways_with_dead:
    print(pathway)

# Alternatively, display as a DataFrame
unique_pathways_df =[]
unique_pathways_df = pd.DataFrame(unique_pathways_with_dead, columns=['Pathway'])
#len(unique_pathways_df)
#display(unique_pathways_df)

#################################################################################################################################################
#################################################################################################################################################
#################################################################################################################################################


# Encode categorical columns
combined_data['Sex'] = combined_data['Sex'].map({'Male': 1, 'Female': 0})  # Encode Sex as numeric
combined_data['Cohort'] = combined_data['Cohort'].map({'PH': 1, 'COPD': 0})  # Encode cohort as 1 (PH) and 0 (HF)
display(combined_data[['Followup_Time', 'Dead Check', 'Cohort']].head())

# One-hot encode the pathways for Cox analysis
cox_data = combined_data[['Followup_Time', 'Cohort', 'Age', 'Sex', 'Pathway']]
cox_data = pd.get_dummies(cox_data, columns=['Pathway'], drop_first=True)  # One-hot encode the pathways



# Check for missing or invalid data
cox_data = cox_data.dropna()  # Drop rows with missing values

# Step 8: Fit the Cox Proportional Hazards Model
cox_model = CoxPHFitter()
cox_model.fit(cox_data, duration_col='Followup_Time', event_col='Cohort')
cox_model.print_summary()


# Step 9: Plot the hazard ratios with updated dimensions
plt.figure(figsize=(8, 15))  # Increase the height for better y-axis visibility
cox_model.plot()
plt.title("Cox Proportional Hazards Analysis: PH vs COPD Pathways", fontsize=16)
plt.xlabel("log(HR) (95% CI)", fontsize=12)
plt.ylabel("")  # Remove default ylabel to clean up
plt.xticks(fontsize=10)
plt.yticks(fontsize=10)
plt.tight_layout()  # Ensure everything fits nicely
plt.show()


### <center> <span style="background-color:#90EE90; padding:5px;">Cox Proportional Hazards Analysis: Combine (PH vs COPD) Pathways</span>

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from mpl_toolkits.axes_grid1.inset_locator import inset_axes
import matplotlib.patches as mpatches



def create_hazard_ratio_plot(ax, plot_data):
    # Step 1: Extract and group the pathways by their starting conditions
    pathway_groups = {
        'E11.9': plot_data.loc[plot_data.index.str.startswith("Pathway_E11.9")],
        'E78.0': plot_data.loc[plot_data.index.str.startswith("Pathway_E78.0")],
        'I10': plot_data.loc[plot_data.index.str.startswith("Pathway_I10")],        
        'I25.1': plot_data.loc[plot_data.index.str.startswith("Pathway_I25.1")],
        'I48': plot_data.loc[plot_data.index.str.startswith("Pathway_I48")],
    }

    # Define custom colors for each group
    group_colors = {
        'E11.9': '#228B22',  # Forest Green
        'E78.0': '#D2691E',  # Chocolate - Rich tan
        'I10': '#7D3C98',  # Amethyst - Deeper purple 
        'I25.1': '#FF69B4',  # Hot Pink - Vibrant pink
        'I48': '#4682B4',  # Steel Blue - Deeper blue
    }

    # Step 2: Plot grouped pathways
    current_y = 0
    y_positions = []  # List to store all y-tick positions
    spacing = 0.8  # Custom spacing between groups

    for group, data in pathway_groups.items():
        # Plot each group with its specific color
        group_y_ticks = range(current_y, current_y + len(data))
        ax.errorbar(
            data['coef'],
            group_y_ticks,
            xerr=[
                data['coef'] - data['coef lower 95%'],
                data['coef upper 95%'] - data['coef']
            ],
            fmt='o',
            capsize=4.0,
            color=group_colors[group]  # Use the color for the current group
        )
        y_positions.extend(group_y_ticks)  # Append y-tick positions
        current_y += len(data) + int(spacing)  # Add spacing after each group

    # Step 3: Adjust y-axis limits to minimize gaps
    min_y = min(y_positions)  # Smallest y-tick
    max_y = max(y_positions)  # Largest y-tick
    buffer = 0.5  # Buffer space around the top and bottom
    ax.set_ylim(min_y - buffer, max_y + buffer)  # Dynamically set y-axis limits

    # Step 4: Customize plot
    ax.invert_yaxis()  # Invert y-axis
    ax.axvline(x=0, color='black', linestyle='--', linewidth=0.7)  # Add a vertical line at x=0
    # Add custom x-ticks
    x_ticks = [-1, -0.5, 0, 0.5, 1, 1.5]  # Example x-ticks for log(HR)
    x_labels = ["-1","-0.5", "0", "0.5", "1", "1.5"]  # Labels for x-ticks
    ax.set_xticks(x_ticks)
    ax.set_xticklabels(x_labels, fontsize=10)

   # Add labels and titles
    plt.xlabel("log(HR) (95% CI)", fontsize=12)
    plt.xticks(fontsize=10)  # Set x-tick label size
    ax.set_yticks([])
    ax.set_title("Cox Proportional Hazards Analysis\n (PH v/s COPD)", fontsize=13)
    
 #################################################################################################


def create_hazard_ratio_plot_ph(ax, plot_data_ph):
    # Step 1: Extract and group the pathways by their starting conditions
    pathway_groups_ph = {
        'E11.9': plot_data_ph.loc[plot_data_ph.index.str.startswith("Pathway_E11.9")],
        'E78.0': plot_data_ph.loc[plot_data_ph.index.str.startswith("Pathway_E78.0")],
        'I10': plot_data_ph.loc[plot_data_ph.index.str.startswith("Pathway_I10")],        
        'I25.1': plot_data_ph.loc[plot_data_ph.index.str.startswith("Pathway_I25.1")],
        'I48': plot_data_ph.loc[plot_data_ph.index.str.startswith("Pathway_I48")],
    }

    # Define custom colors for each group
    group_colors_ph = {
        'E11.9': '#228B22',  # Forest Green
        'E78.0': '#D2691E',  # Chocolate - Rich tan
        'I10': '#7D3C98',  # Amethyst - Deeper purple 
        'I25.1': '#FF69B4',  # Hot Pink - Vibrant pink
        'I48': '#4682B4',  # Steel Blue - Deeper blue
    }
    
     
    

    # Step 2: Plot grouped pathways
    current_y_ph = 0
    y_positions_ph = []  # List to store all y-tick positions for PH
    spacing_ph = 0.4  # Custom spacing between groups

    for group, data in pathway_groups_ph.items():
        # Plot each group with its specific color
        group_y_ticks_ph = range(current_y_ph, current_y_ph + len(data))
        ax.errorbar(
            data['coef'],
            group_y_ticks_ph,
            xerr=[
                data['coef'] - data['coef lower 95%'],
                data['coef upper 95%'] - data['coef']
            ],
            fmt='o',
            capsize=4.0,
            color=group_colors_ph[group]  # Use the color for the current group
        )
        y_positions_ph.extend(group_y_ticks_ph)  # Append y-tick positions
        current_y_ph += len(data) + int(spacing_ph)  # Add spacing after each group

    # Step 3: Adjust y-axis limits to minimize gaps
    min_y_ph = min(y_positions_ph)  # Smallest y-tick
    max_y_ph = max(y_positions_ph)  # Largest y-tick
    buffer_ph = 0.5  # Buffer space around the top and bottom
    ax.set_ylim(min_y_ph - buffer_ph, max_y_ph + buffer_ph)  # Dynamically set y-axis limits

    # Step 4: Customize plot
    # Adjust y-axis limits
    ax.invert_yaxis()  # Invert y-axis
    ax.axvline(x=0, color='black', linestyle='--', linewidth=0.7)  # Add a vertical line at x=0

    # Add custom x-ticks
    x_ticks_ph = [-1.5, -1, -0.5, 0, 0.5, 1]  # Expanded range for PH data
    x_labels_ph = ["-1.5", "-1", "-0.5", "0", "0.5","1"]  # Labels for x-ticks
    ax.set_xticks(x_ticks_ph)
    ax.set_xticklabels(x_labels_ph, fontsize=10)

    # Add labels and titles
    ax.set_xlabel("log(HR) (95% CI)", fontsize=12)
    ax.set_title("Cox Proportional Hazards Analysis\n (Mortality Risk with PH Pathways)", fontsize=13)

    # Remove default y-axis labels
    ax.set_yticks([])




##############################################################################################
    
    
    
def create_hazard_ratio_plot_COPD(ax, plot_data_COPD):
    # Step 1: Extract and group the pathways by their starting conditions
    pathway_groups_COPD = {
        'E11.9': plot_data_COPD.loc[plot_data_COPD.index.str.startswith("Pathway_E11.9")],
        'E78.0': plot_data_COPD.loc[plot_data_COPD.index.str.startswith("Pathway_E78.0")],
        'I10': plot_data_COPD.loc[plot_data_COPD.index.str.startswith("Pathway_I10")],
        'I25.1': plot_data_COPD.loc[plot_data_COPD.index.str.startswith("Pathway_I25.1")],
        'I48': plot_data_COPD.loc[plot_data_COPD.index.str.startswith("Pathway_I48")],
    }

    # Define custom colors for each group (differentiate from PH/COPD colors if necessary)
    group_colors_COPD= {
        'E11.9': '#228B22',  # Forest Green
        'E78.0': '#D2691E',  # Chocolate - Rich tan
        'I10': '#7D3C98',  # Amethyst - Deeper purple 
        'I25.1': '#FF69B4',  # Hot Pink - Vibrant pink
        'I48': '#4682B4',  # Steel Blue - Deeper blue
    }
    
    # Step 2: Plot grouped pathways
    current_y_COPD = 0
    y_positions_COPD = []  # List to store all y-tick positions for COPD
    spacing_COPD = 0.8  # Custom spacing between groups

    for group, data in pathway_groups_COPD.items():
        # Plot each group with its specific color
        group_y_ticks_COPD = range(current_y_COPD, current_y_COPD + len(data))
        ax.errorbar(
            data['coef'],
            group_y_ticks_COPD,
            xerr=[
                data['coef'] - data['coef lower 95%'],
                data['coef upper 95%'] - data['coef']
            ],
            fmt='o',
            capsize=4.0,
            color=group_colors_COPD[group]  # Use the color for the current group
        )
        y_positions_COPD.extend(group_y_ticks_COPD)  # Append y-tick positions
        current_y_COPD += len(data) + int(spacing_COPD)  # Add spacing after each group

    # Step 3: Adjust y-axis limits to minimize gaps
    min_y_COPD = min(y_positions_COPD)  # Smallest y-tick
    max_y_COPD = max(y_positions_COPD)  # Largest y-tick
    buffer_COPD = 0.5  # Buffer space around the top and bottom
    ax.set_ylim(min_y_COPD - buffer_COPD, max_y_COPD + buffer_COPD)  # Dynamically set y-axis limits

    # Step 4: Customize plot
    ax.invert_yaxis()  # Invert y-axis
    ax.axvline(x=0, color='black', linestyle='--', linewidth=0.7)  # Add a vertical line at x=0

    # Add custom x-ticks
    x_ticks_COPD = [-1.5,-1, -0.5, 0, 0.5, 1, 1.5,2]  # Expanded range for COPD data
    x_labels_COPD = ["-1.5","-1", "-0.5", "0", "0.5", "1", "1.5","2"]  # Labels for x-ticks
    ax.set_xticks(x_ticks_COPD)
    ax.set_xticklabels(x_labels_COPD, fontsize=10)

    # Add labels and titles
    ax.set_xlabel("log(HR) (95% CI)", fontsize=12)
    ax.set_title("Cox Proportional Hazards Analysis\n (Mortality Risk with COPD Pathways)", fontsize=13)

    # Remove default y-axis labels
    ax.set_yticks([])
    
    
    
########################################################################################################################    

  

# Define the pathways and colors
pathways = [
    "E11.9 -> E78.0 -> I10 -> I25.1 -> PH/COPD",
    "E11.9 -> E78.0 -> I10 -> I48 -> PH/COPD",
    "E11.9 -> I10 -> PH/COPD",
    "E11.9 -> I10 -> E78.0 -> PH/COPD",
    "E11.9 -> I10 -> I25.1 -> PH/COPD",
    "E11.9 -> I25.1 -> PH/COPD",
    
    "E78.0 -> I10 -> PH/COPD",
    "E78.0 -> I10 -> E11.9 -> PH/COPD",
    "E78.0 -> I10 -> I25.1 -> PH/COPD",
    "E78.0 -> I10 -> I48 -> PH/COPD",
    "E78.0 -> I25.1 -> PH/COPD",
    
    "I10 -> E11.9 -> PH/COPD",
    "I10 -> E11.9 -> E78.0 -> PH/COPD",
    "I10 -> E78.0 -> PH/COPD",
    "I10 -> E78.0 -> I25.1 -> PH/COPD",
    "I10 -> I25.1 -> PH/COPD",
    "I10 -> I48 -> PH/COPD",
    
    "I25.1 -> E78.0 -> I10 -> PH/COPD",
    "I25.1 -> I10 -> PH/COPD",
    
    "I48 -> I10 -> PH/COPD"
]



color_dict = {
    'PH/COPD': '#b3b3b3',
    'I10': '#b3a3cc',
    'I48': '#add8e6',
    'E78.0': '#ddc4a1',
    'I25.1': '#f4b0c8',
    'E11.9': '#c4e3b3'
}




# Create the plot
fig, ax = plt.subplots(figsize=(16, 8))
gap = 0.01  # Horizontal gap between nodes
row_gap = 1  # Vertical gap between rows



# Plot the pathways
for row, pathway in enumerate(pathways, start=1):
    conditions = pathway.split(" -> ")
    for col, condition in enumerate(conditions, start=1):
        # Calculate node position
        x_pos = col + (col - 1) * gap
        y_pos = len(pathways) - row  + 1 

        # Draw rectangle for each node
        ax.add_patch(plt.Rectangle((x_pos - 0.5, y_pos - 0.87), 0.6, 0.9, 
                                    facecolor=color_dict.get(condition, 'white'), edgecolor='black'))
        # Add text to node
        ax.text(x_pos - 0.188, y_pos - 0.46, condition, ha='center', va='center', fontsize=9.6)

        # Add arrows between nodes
        if col < len(conditions):
            next_x_pos = x_pos + 1 + gap
            ax.annotate(
                '',
                xy=(next_x_pos - 0.5, y_pos - 0.4),  # End position of arrow
                xytext=(x_pos + 0.11, y_pos - 0.4),  # Start position of arrow
                arrowprops=dict(
                    arrowstyle="->,head_width=0.2,head_length=0.3",  # Adjust arrowhead size
                    color='black',  # Arrow color
                    lw=0.7,  # Line width (thicker arrow)
                    shrinkA=0,  # Adjust start of the arrow (in points)
                    shrinkB=0   # Adjust end of the arrow (in points)
                
            )
    )



box_start_x = len(conditions) + 2.2  # Starting x position for the box
box_end_x = len(conditions) + 7.0    # Ending x position for the box
box_start_y = 0.45                   # Starting y position for the box
box_end_y = len(pathways) + 0.5     # Ending y position for the box

#####################################################################################################################


# Insert the hazard ratio plot into the box area
inset = inset_axes(
    ax,
    width="60%",  # Adjust width of the inset
    height="100.9%",  # Adjust height of the inset
    bbox_to_anchor=(box_start_x + 5.1, box_start_y - 0.3, box_end_x - box_start_x, box_end_y - box_start_y),  # Shift down
    bbox_transform=ax.transData,
    loc='center'
)


# Use the function to create the hazard ratio plot in the inset
# Replace `plot_data` with your actual DataFrame containing the hazard ratio data
plot_data = pd.DataFrame({
    'coef': [-0.61, 0.38, -0.18, 0.23, 0.22, -0.26, -0.23, 0.65, 0.33, 0.34, -0.03, 
             -0.30, 0.23, 0.22, 0.19, -0.26, 0.15, 0.13, 0.16, 0.42],
    'coef lower 95%': [-1.29, -0.32, -0.83, -0.45, -0.46, -1.15, -0.88, -0.12, -0.34, 
                       -0.35, -0.82, -1.07, -0.48, -0.44, -0.51, -0.95, -0.49, -0.59, -0.55, -0.24],
    'coef upper 95%': [0.07, 1.07, 0.47, 0.92, 0.91, 0.62, 0.42, 1.43, 1.00, 1.03, 0.77, 
                       0.46, 0.94, 0.88, 0.89, 0.42, 0.80, 0.84, 0.87, 1.07]
}, index=[        
    "Pathway_E11.9 -> E78.0 -> I10 -> I25.1",
    "Pathway_E11.9 -> E78.0 -> I10 -> I48",
    "Pathway_E11.9 -> I10",
    "Pathway_E11.9 -> I10 -> E78.0",
    "Pathway_E11.9 -> I10 -> I25.1",
    "Pathway_E11.9 -> I25.1",
    
    "Pathway_E78.0 -> I10",
    "Pathway_E78.0 -> I10 -> E11.9",
    "Pathway_E78.0 -> I10 -> I25.1",
    "Pathway_E78.0 -> I10 -> I48",
    "Pathway_E78.0 -> I25.1",
    
    "Pathway_I10 -> E11.9",
    "Pathway_I10 -> E11.9 -> E78.0",
    "Pathway_I10 -> E78.0",
    "Pathway_I10 -> E78.0 -> I25.1",
    "Pathway_I10 -> I25.1",
    "Pathway_I10 -> I48",
    
    "Pathway_I25.1 -> E78.0 -> I10",
    "Pathway_I25.1 -> I10",
    
    "Pathway_I48 -> I10"
])

# Flip the inset box upside down by inverting the y-axis
#inset.invert_xaxis()

create_hazard_ratio_plot(inset, plot_data)

###################################################################################################################

plot_data_ph = pd.DataFrame({
    'coef': [-0.16, 0.22, 0.09, 0.63, -0.05, -0.79, -0.02, -0.08, -0.38, -0.06, 
             -0.44, 0.36, 0.63, -0.21, 0.26, -0.14, -0.30, 0.02, 0.33, -0.02],
    'coef lower 95%': [-0.57, -0.23, -0.23, 0.27, -0.46, -1.82, -0.34, -0.80, -0.80, -0.47, 
                       -1.24, -0.25, 0.18, -0.58, -0.16, -0.55, -0.58, -0.51, -0.13, -0.34],
    'coef upper 95%': [0.26, 0.66, 0.40, 0.99, 0.36, 0.24, 0.31, 0.64, 0.04, 0.35, 
                       0.36, 0.97, 1.09, 0.16, 0.68, 0.28, -0.01, 0.56, 0.78, 0.31]
}, index=[
    "Pathway_E11.9 -> E78.0 -> I10",
    "Pathway_E11.9 -> E78.0 -> I10 -> I25.1",
    "Pathway_E11.9 -> I10",
    "Pathway_E11.9 -> I10 -> E78.0",
    "Pathway_E11.9 -> I10 -> I25.1",
    "Pathway_E11.9 -> I25.1",
    
    "Pathway_E78.0 -> I10",
    "Pathway_E78.0 -> I10 -> E11.9",
    "Pathway_E78.0 -> I10 -> I25.1",
    "Pathway_E78.0 -> I10 -> I48",
    "Pathway_E78.0 -> I25.1",
    
    "Pathway_I10 -> E11.9",
    "Pathway_I10 -> E11.9 -> E78.0",
    "Pathway_I10 -> E78.0",
    "Pathway_I10 -> E78.0 -> I25.1",
    "Pathway_I10 -> I25.1",
    "Pathway_I10 -> I48",
    
    "Pathway_I25.1 -> E78.0 -> I10",
    "Pathway_I25.1 -> I10",
    
    "Pathway_I48 -> I10"
])


# Add the second hazard ratio plot
plot_data_2 = plot_data_ph.copy()  # Example: Using the same data for demonstration
plot_data_2['coef'] = plot_data_2['coef'] * 1.1  # Slightly modify coefficients for differentiation#

# Define the position for the second inset
box_start_x_ph = box_end_x + 1.1  # Start the second inset further to the right
box_end_x_ph = box_start_x_ph + 4.8  # Adjust width for the second inset

# Insert the second hazard ratio plot
inset_ph = inset_axes(
    ax,
    width="60%",  # Adjust width of the second inset
    height="101.0%",  # Adjust height of the second inset
    bbox_to_anchor=(box_start_x_ph - 6.8, box_start_y - 0.3, box_end_x_ph - box_start_x_ph, box_end_y - box_start_y),  # Adjust placement
    bbox_transform=ax.transData,
    loc='center'
)

# Create the second hazard ratio plot using the same function
create_hazard_ratio_plot_ph(inset_ph, plot_data_ph)





####################################################################################

plot_data_COPD = pd.DataFrame({
    'coef': [-0.36, 0.69, -0.02, 0.31, 0.55, -0.01, -0.10, 0.11, 0.16, 0.92, 
             -0.91, 0.03, 1.30, 0.24, -0.05, -0.01, 0.13, -0.15, 0.82, 0.28],
    'coef lower 95%': [-0.89, 0.19, -0.40, -0.36, -0.05, -0.94, -0.47, -0.44, -0.27, 0.36, 
                       -1.54, -0.65, 0.73, -0.16, -0.63, -0.48, -0.37, -0.65, 0.20, -0.50],
    'coef upper 95%': [0.17, 1.18, 0.35, 0.97, 1.15, 0.92, 0.28, 0.67, 0.60, 1.48, 
                       -0.27, 0.71, 1.87, 0.63, 0.53, 0.45, 0.63, 0.36, 1.43, 1.06]
}, index=[
    "Pathway_E11.9 -> E78.0 -> I10",
    "Pathway_E11.9 -> E78.0 -> I10 -> I25.1",
    "Pathway_E11.9 -> I10",
    "Pathway_E11.9 -> I10 -> E78.0",
    "Pathway_E11.9 -> I10 -> I25.1",
    "Pathway_E11.9 -> I25.1",
    
    "Pathway_E78.0 -> I10",
    "Pathway_E78.0 -> I10 -> E11.9",
    "Pathway_E78.0 -> I10 -> I25.1",
    "Pathway_E78.0 -> I10 -> I48",
    "Pathway_E78.0 -> I25.1",
    
    "Pathway_I10 -> E11.9",
    "Pathway_I10 -> E11.9 -> E78.0",
    "Pathway_I10 -> E78.0",
    "Pathway_I10 -> E78.0 -> I25.1",
    "Pathway_I10 -> I25.1",
    "Pathway_I10 -> I48",
    
    "Pathway_I25.1 -> E78.0 -> I10",
    "Pathway_I25.1 -> I10",
    
    "Pathway_I48 -> I10"
])





# Add the second hazard ratio plot
plot_data_3 = plot_data_COPD.copy()  # Example: Using the same data for demonstration
plot_data_3['coef'] = plot_data_3['coef'] * 1.1  # Slightly modify coefficients for differentiation

# Define the position for the second inset
box_start_x_COPD = box_end_x + 1.1  # Start the second inset further to the right
box_end_x_COPD = box_start_x_COPD + 4.8  # Adjust width for the second inset

# Insert the second hazard ratio plot
inset_COPD = inset_axes(
    ax,
    width="60%",  # Adjust width of the second inset
    height="100.9%",  # Adjust height of the second inset
    bbox_to_anchor=(box_start_x_COPD - 3.8, box_start_y - 0.3, box_end_x_COPD - box_start_x_COPD, box_end_y - box_start_y),  # Adjust placement
    bbox_transform=ax.transData,
    loc='center'
)

# Create the second hazard ratio plot using the same function
create_hazard_ratio_plot_COPD(inset_COPD, plot_data_COPD)

#######################################################################################



# Set x-axis and y-axis limits and labels
node_columns = max(len(pathway.split(" -> ")) for pathway in pathways)  # Max nodes in a pathway
ax.set_xlim(0.5, node_columns + (node_columns - 1) * gap + 6.5)  # Adjust x-axis limits dynamically
#ax.set_ylim(0.5, len(pathways) + 0.5)  # Set y-axis limits to fit all pathways
ax.set_ylim(0, len(pathways)+0.5)  # Start the y-axis at 0 and end at the number of pathways

# Explicitly set y-tick positions if needed or remove them
ax.set_yticks([])  # Optionally, remove y-ticks
ax.set_yticklabels([])  # Ensure no y-axis labels

# Manually set x-tick positions and labels for disease sequence steps
ax.set_xticks([0.8, 1.80, 2.80, 3.82, 4.8])
ax.set_xticklabels(['1', '2', '3', '4', '5'], fontsize=13)

# Update legend with proper formatting and alignment
handles = [
    mpatches.Patch(color=color, label=full_name)
    for full_name, color in {
        
        "PH and COPD as Index Conditions": '#b3b3b3',
        "Type 2 Diabetes Mellitus - E11.9": '#88cc88',
        "Hypertension - I10": '#a993cc',
        "Hyperlipidemia - E78": '#d2b48c',
        "Chronic Ischemic Heart Disease - I25.1": '#FFC0CB',
        "Atrial Fibrillation and Flutter - I48": '#add8e6',
        
    }.items()
]

# Position and style the legend
ax.legend(handles=handles , bbox_to_anchor=(0.6, -0.09), loc='upper center', ncol=4, fontsize=14, frameon=False)



# Add descriptive axis labels
ax.set_xlabel("Disease Sequence Progression", fontsize=14, labelpad=15)
ax.set_ylabel("Pathways Leading to PH/COPD", fontsize=14, labelpad=15)
ax.xaxis.set_label_coords(0.2, -0.05) 

# Clean up the plot aesthetics by removing unnecessary spines
for spine in ['top', 'right']:
    ax.spines[spine].set_visible(False)

# Ensure everything fits nicely into the figure
plt.tight_layout()
# Save the plot as PNG
plt.savefig("pathways_plot.png", dpi=1200)  # Save as PNG with high resolution
plt.show()

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from mpl_toolkits.axes_grid1.inset_locator import inset_axes
import matplotlib.patches as mpatches



def create_hazard_ratio_plot(ax, plot_data):
    # Step 1: Extract and group the pathways by their starting conditions
    pathway_groups = {
        'E11.9': plot_data.loc[plot_data.index.str.startswith("Pathway_E11.9")],
        'E78.0': plot_data.loc[plot_data.index.str.startswith("Pathway_E78.0")],
        'I10': plot_data.loc[plot_data.index.str.startswith("Pathway_I10")],        
        'I25.1': plot_data.loc[plot_data.index.str.startswith("Pathway_I25.1")],
        'I48': plot_data.loc[plot_data.index.str.startswith("Pathway_I48")],
    }

    # Define custom colors for each group
    group_colors = {
        'E11.9': '#228B22',  # Forest Green
        'E78.0': '#D2691E',  # Chocolate - Rich tan
        'I10': '#7D3C98',  # Amethyst - Deeper purple 
        'I25.1': '#FF69B4',  # Hot Pink - Vibrant pink
        'I48': '#4682B4',  # Steel Blue - Deeper blue
    }

    # Step 2: Plot grouped pathways
    current_y = 0
    y_positions = []  # List to store all y-tick positions
    spacing = 0.8  # Custom spacing between groups

    for group, data in pathway_groups.items():
        # Plot each group with its specific color
        group_y_ticks = range(current_y, current_y + len(data))
        ax.errorbar(
            data['coef'],
            group_y_ticks,
            xerr=[
                data['coef'] - data['coef lower 95%'],
                data['coef upper 95%'] - data['coef']
            ],
            fmt='o',
            capsize=4.0,
            color=group_colors[group]  # Use the color for the current group
        )
        y_positions.extend(group_y_ticks)  # Append y-tick positions
        current_y += len(data) + int(spacing)  # Add spacing after each group

    # Step 3: Adjust y-axis limits to minimize gaps
    min_y = min(y_positions)  # Smallest y-tick
    max_y = max(y_positions)  # Largest y-tick
    buffer = 0.5  # Buffer space around the top and bottom
    ax.set_ylim(min_y - buffer, max_y + buffer)  # Dynamically set y-axis limits

    # Step 4: Customize plot
    ax.invert_yaxis()  # Invert y-axis
    ax.axvline(x=0, color='black', linestyle='--', linewidth=0.7)  # Add a vertical line at x=0
    # Add custom x-ticks
    x_ticks = [-1, -0.5, 0, 0.5, 1, 1.5]  # Example x-ticks for log(HR)
    x_labels = ["-1","-0.5", "0", "0.5", "1", "1.5"]  # Labels for x-ticks
    ax.set_xticks(x_ticks)
    ax.set_xticklabels(x_labels, fontsize=10)

   # Add labels and titles
    plt.xlabel("log(HR) (95% CI)", fontsize=12)
    plt.xticks(fontsize=10)  # Set x-tick label size
    ax.set_yticks([])
    ax.set_title("Hazards Analysis of PH v/s COPD", fontsize=13)
    

    
    
    
def add_annotations_box(ax, plot_data, box_title, x_offset=1.0):
    """
    Adds a separate box for annotations (e.g., p-values and HR) next to the plot.

    Parameters:
    - ax: The main axis where hazard ratio bars are plotted.
    - plot_data: DataFrame containing the data (p-values and hazard ratio).
    - box_title: Title of the annotation box.
    - x_offset: Horizontal offset from the hazard ratio plot.
    """
    # Create a new inset axis for annotations
    inset_box = inset_axes(
        ax,
        width="145%",  # Width of the annotation box
        height="100%",  # Height of the box matches the plot
        bbox_to_anchor=(x_offset, 0, 0.2, 1),  # Adjust placement
        bbox_transform=ax.transAxes,
        loc="center left"
    )
    
    # Remove spines and ticks from the annotation box
    inset_box.spines['top'].set_visible(False)
    inset_box.spines['bottom'].set_visible(False)
    inset_box.spines['left'].set_visible(False)
    inset_box.spines['right'].set_visible(False)
    inset_box.tick_params(left=False, labelleft=False, bottom=False, labelbottom=False)

    # Add title to the annotation box
    inset_box.set_title(box_title, fontsize=10, pad=10)

    # Align annotations with hazard ratio bars
    for y_pos, p_val, coef in zip(range(len(plot_data)), plot_data['p'], plot_data['coef']):
        annotation = f"P:{p_val:.2f}, HR:{np.exp(coef):.2f}"  # Include HR as exp(coef)
        inset_box.text(0.5, y_pos, annotation, ha="center", va="center", fontsize=9)

    # Adjust the limits of the annotation box
    inset_box.set_ylim(ax.get_ylim())

    # Draw the box around the entire annotation area
    rect = plt.Rectangle(
        (0, 0), 1, 1,  # Starting coordinates and width/height of the rectangle
        transform=inset_box.transAxes,
        edgecolor="black",  # Box color
        facecolor="none",   # Transparent inside
        linewidth=1.5,      # Thickness of the box border
        zorder=10           # Ensure the box is on top
    )
    inset_box.add_patch(rect)
    
 #################################################################################################


def create_hazard_ratio_plot_ph(ax, plot_data_ph):
    # Step 1: Extract and group the pathways by their starting conditions
    pathway_groups_ph = {
        'E11.9': plot_data_ph.loc[plot_data_ph.index.str.startswith("Pathway_E11.9")],
        'E78.0': plot_data_ph.loc[plot_data_ph.index.str.startswith("Pathway_E78.0")],
        'I10': plot_data_ph.loc[plot_data_ph.index.str.startswith("Pathway_I10")],        
        'I25.1': plot_data_ph.loc[plot_data_ph.index.str.startswith("Pathway_I25.1")],
        'I48': plot_data_ph.loc[plot_data_ph.index.str.startswith("Pathway_I48")],
    }

    # Define custom colors for each group
    group_colors_ph = {
        'E11.9': '#228B22',  # Forest Green
        'E78.0': '#D2691E',  # Chocolate - Rich tan
        'I10': '#7D3C98',  # Amethyst - Deeper purple 
        'I25.1': '#FF69B4',  # Hot Pink - Vibrant pink
        'I48': '#4682B4',  # Steel Blue - Deeper blue
    }
    
     
    

    # Step 2: Plot grouped pathways
    current_y_ph = 0
    y_positions_ph = []  # List to store all y-tick positions for PH
    spacing_ph = 0.4  # Custom spacing between groups

    for group, data in pathway_groups_ph.items():
        # Plot each group with its specific color
        group_y_ticks_ph = range(current_y_ph, current_y_ph + len(data))
        ax.errorbar(
            data['coef'],
            group_y_ticks_ph,
            xerr=[
                data['coef'] - data['coef lower 95%'],
                data['coef upper 95%'] - data['coef']
            ],
            fmt='o',
            capsize=4.0,
            color=group_colors_ph[group]  # Use the color for the current group
        )
        y_positions_ph.extend(group_y_ticks_ph)  # Append y-tick positions
        current_y_ph += len(data) + int(spacing_ph)  # Add spacing after each group

    # Step 3: Adjust y-axis limits to minimize gaps
    min_y_ph = min(y_positions_ph)  # Smallest y-tick
    max_y_ph = max(y_positions_ph)  # Largest y-tick
    buffer_ph = 0.5  # Buffer space around the top and bottom
    ax.set_ylim(min_y_ph - buffer_ph, max_y_ph + buffer_ph)  # Dynamically set y-axis limits

    # Step 4: Customize plot
    # Adjust y-axis limits
    ax.invert_yaxis()  # Invert y-axis
    ax.axvline(x=0, color='black', linestyle='--', linewidth=0.7)  # Add a vertical line at x=0

    # Add custom x-ticks
    x_ticks_ph = [-1.5, -1, -0.5, 0, 0.5, 1]  # Expanded range for PH data
    x_labels_ph = ["-1.5", "-1", "-0.5", "0", "0.5","1"]  # Labels for x-ticks
    ax.set_xticks(x_ticks_ph)
    ax.set_xticklabels(x_labels_ph, fontsize=10)

    # Add labels and titles
    ax.set_xlabel("log(HR) (95% CI)", fontsize=12)
    ax.set_title("Mortality Risk with PH Pathways", fontsize=13)

    # Remove default y-axis labels
    ax.set_yticks([])




##############################################################################################
    
    
    
def create_hazard_ratio_plot_COPD(ax, plot_data_COPD):
    # Step 1: Extract and group the pathways by their starting conditions
    pathway_groups_COPD = {
        'E11.9': plot_data_COPD.loc[plot_data_COPD.index.str.startswith("Pathway_E11.9")],
        'E78.0': plot_data_COPD.loc[plot_data_COPD.index.str.startswith("Pathway_E78.0")],
        'I10': plot_data_COPD.loc[plot_data_COPD.index.str.startswith("Pathway_I10")],
        'I25.1': plot_data_COPD.loc[plot_data_COPD.index.str.startswith("Pathway_I25.1")],
        'I48': plot_data_COPD.loc[plot_data_COPD.index.str.startswith("Pathway_I48")],
    }

    # Define custom colors for each group (differentiate from PH/COPD colors if necessary)
    group_colors_COPD= {
        'E11.9': '#228B22',  # Forest Green
        'E78.0': '#D2691E',  # Chocolate - Rich tan
        'I10': '#7D3C98',  # Amethyst - Deeper purple 
        'I25.1': '#FF69B4',  # Hot Pink - Vibrant pink
        'I48': '#4682B4',  # Steel Blue - Deeper blue
    }
    
    # Step 2: Plot grouped pathways
    current_y_COPD = 0
    y_positions_COPD = []  # List to store all y-tick positions for COPD
    spacing_COPD = 0.8  # Custom spacing between groups

    for group, data in pathway_groups_COPD.items():
        # Plot each group with its specific color
        group_y_ticks_COPD = range(current_y_COPD, current_y_COPD + len(data))
        ax.errorbar(
            data['coef'],
            group_y_ticks_COPD,
            xerr=[
                data['coef'] - data['coef lower 95%'],
                data['coef upper 95%'] - data['coef']
            ],
            fmt='o',
            capsize=4.0,
            color=group_colors_COPD[group]  # Use the color for the current group
        )
        y_positions_COPD.extend(group_y_ticks_COPD)  # Append y-tick positions
        current_y_COPD += len(data) + int(spacing_COPD)  # Add spacing after each group

    # Step 3: Adjust y-axis limits to minimize gaps
    min_y_COPD = min(y_positions_COPD)  # Smallest y-tick
    max_y_COPD = max(y_positions_COPD)  # Largest y-tick
    buffer_COPD = 0.5  # Buffer space around the top and bottom
    ax.set_ylim(min_y_COPD - buffer_COPD, max_y_COPD + buffer_COPD)  # Dynamically set y-axis limits

    # Step 4: Customize plot
    ax.invert_yaxis()  # Invert y-axis
    ax.axvline(x=0, color='black', linestyle='--', linewidth=0.7)  # Add a vertical line at x=0

    # Add custom x-ticks
    x_ticks_COPD = [-1.5,-1, -0.5, 0, 0.5, 1, 1.5,2]  # Expanded range for COPD data
    x_labels_COPD = ["-1.5","-1", "-0.5", "0", "0.5", "1", "1.5","2"]  # Labels for x-ticks
    ax.set_xticks(x_ticks_COPD)
    ax.set_xticklabels(x_labels_COPD, fontsize=10)

    # Add labels and titles
    ax.set_xlabel("log(HR) (95% CI)", fontsize=12)
    ax.set_title("Mortality Risk with COPD Pathways", fontsize=13)

    # Remove default y-axis labels
    ax.set_yticks([])
    
    
    
########################################################################################################################    

  

# Define the pathways and colors
pathways = [    
    "E11.9 -> E78.0 -> I10 -> PH/COPD",
    "E11.9 -> E78.0 -> I10 -> I25.1 -> PH/COPD",
    "E11.9 -> I10 -> PH/COPD",
    "E11.9 -> I10 -> E78.0 -> PH/COPD",
    "E11.9 -> I10 -> I25.1 -> PH/COPD",
    "E11.9 -> I25.1 -> PH/COPD",
    
    "E78.0 -> I10 -> PH/COPD",
    "E78.0 -> I10 -> E11.9 -> PH/COPD",
    "E78.0 -> I10 -> I25.1 -> PH/COPD",
    "E78.0 -> I10 -> I48 -> PH/COPD",
    "E78.0 -> I25.1 -> PH/COPD",
    
    "I10 -> E11.9 -> PH/COPD",
    "I10 -> E11.9 -> E78.0 -> PH/COPD",
    "I10 -> E78.0 -> PH/COPD",
    "I10 -> E78.0 -> I25.1 -> PH/COPD",
    "I10 -> I25.1 -> PH/COPD",
    "I10 -> I48 -> PH/COPD",
    
    "I25.1 -> E78.0 -> I10 -> PH/COPD",
    "I25.1 -> I10 -> PH/COPD",
    
    "I48 -> I10 -> PH/COPD"
]



color_dict = {
    'PH/COPD': '#b3b3b3',
    'I10': '#b3a3cc',
    'I48': '#add8e6',
    'E78.0': '#ddc4a1',
    'I25.1': '#f4b0c8',
    'E11.9': '#c4e3b3'
}




# Create the plot
fig, ax = plt.subplots(figsize=(16, 8))
gap = -0.38  # Horizontal gap between nodes
row_gap = 1  # Vertical gap between rows



# Plot the pathways
for row, pathway in enumerate(pathways, start=1):
    conditions = pathway.split(" -> ")
    for col, condition in enumerate(conditions, start=1):
        # Calculate node position
        x_pos = col + (col - 1) * gap
        y_pos = len(pathways) - row  + 1 

        # Draw rectangle for each node
        ax.add_patch(plt.Rectangle((x_pos - 0.5, y_pos - 0.87), 0.6, 0.9, 
                                    facecolor=color_dict.get(condition, 'white'), edgecolor='black'))
        # Add text to node
        ax.text(x_pos - 0.188, y_pos - 0.46, condition, ha='center', va='center', fontsize=9.6)

        # Add arrows between nodes
       # if col < len(conditions):
       #     next_x_pos = x_pos + 1 + gap
       #     ax.annotate(
       #         '',
       #         xy=(next_x_pos - 0.5, y_pos - 0.4),  # End position of arrow
       #         xytext=(x_pos + 0.11, y_pos - 0.4),  # Start position of arrow
       #         arrowprops=dict(
       #             arrowstyle="->,head_width=0.2,head_length=0.3",  # Adjust arrowhead size
       #             color='black',  # Arrow color
        #            lw=0.7,  # Line width (thicker arrow)
        #            shrinkA=0,  # Adjust start of the arrow (in points)
        #            shrinkB=0   # Adjust end of the arrow (in points)
        #        
        #    )
   # )



box_start_x = len(conditions) + 2.2  # Starting x position for the box
box_end_x = len(conditions) + 7.0    # Ending x position for the box
box_start_y = 0.45                   # Starting y position for the box
box_end_y = len(pathways) + 0.5     # Ending y position for the box

#####################################################################################################################


# Insert the hazard ratio plot into the box area
inset = inset_axes(
    ax,
    width="60%",  # Adjust width of the inset
    height="100.9%",  # Adjust height of the inset
    bbox_to_anchor=(box_start_x + 5.1, box_start_y - 0.3, box_end_x - box_start_x, box_end_y - box_start_y),  # Shift down
    bbox_transform=ax.transData,
    loc='center'
)


# Use the function to create the hazard ratio plot in the inset
# Replace `plot_data` with your actual DataFrame containing the hazard ratio data
plot_data = pd.DataFrame({
    'coef': [-0.61, 0.38, -0.18, 0.23, 0.22, -0.26, -0.23, 0.65, 0.33, 0.34, -0.03, 
             -0.30, 0.23, 0.22, 0.19, -0.26, 0.15, 0.13, 0.16, 0.42],
    'coef lower 95%': [-1.29, -0.32, -0.83, -0.45, -0.46, -1.15, -0.88, -0.12, -0.34, 
                       -0.35, -0.82, -1.07, -0.48, -0.44, -0.51, -0.95, -0.49, -0.59, -0.55, -0.24],
    'coef upper 95%': [0.07, 1.07, 0.47, 0.92, 0.91, 0.62, 0.42, 1.43, 1.00, 1.03, 0.77, 
                       0.46, 0.94, 0.88, 0.89, 0.42, 0.80, 0.84, 0.87, 1.07],
    'p':[0.08, 0.29, 0.59, 0.50, 0.52, 0.56, 0.49, 0.10, 0.33, 0.33, 0.95, 0.44, 0.52, 0.51, 0.59, 0.45, 0.64, 0.73, 0.66, 0.21]
}, index=[        
    "Pathway_E11.9 -> E78.0 -> I10",
    "Pathway_E11.9 -> E78.0 -> I10 -> I25.1",
    "Pathway_E11.9 -> I10",
    "Pathway_E11.9 -> I10 -> E78.0",
    "Pathway_E11.9 -> I10 -> I25.1",
    "Pathway_E11.9 -> I25.1",
    
    "Pathway_E78.0 -> I10",
    "Pathway_E78.0 -> I10 -> E11.9",
    "Pathway_E78.0 -> I10 -> I25.1",
    "Pathway_E78.0 -> I10 -> I48",
    "Pathway_E78.0 -> I25.1",
    
    "Pathway_I10 -> E11.9",
    "Pathway_I10 -> E11.9 -> E78.0",
    "Pathway_I10 -> E78.0",
    "Pathway_I10 -> E78.0 -> I25.1",
    "Pathway_I10 -> I25.1",
    "Pathway_I10 -> I48",
    
    "Pathway_I25.1 -> E78.0 -> I10",
    "Pathway_I25.1 -> I10",
    
    "Pathway_I48 -> I10"
])

# Flip the inset box upside down by inverting the y-axis
#inset.invert_xaxis()

create_hazard_ratio_plot(inset, plot_data)

# Add separate boxes for p-values and N
add_annotations_box(inset, plot_data, box_title="", x_offset=0.95)


###################################################################################################################

plot_data_ph = pd.DataFrame({
    'coef': [-0.16, 0.22, 0.09, 0.63, -0.05, -0.79, -0.02, -0.08, -0.38, -0.06, 
             -0.44, 0.36, 0.63, -0.21, 0.26, -0.14, -0.30, 0.02, 0.33, -0.02],
    'coef lower 95%': [-0.57, -0.23, -0.23, 0.27, -0.46, -1.82, -0.34, -0.80, -0.80, -0.47, 
                       -1.24, -0.25, 0.18, -0.58, -0.16, -0.55, -0.58, -0.51, -0.13, -0.34],
    'coef upper 95%': [0.26, 0.66, 0.40, 0.99, 0.36, 0.24, 0.31, 0.64, 0.04, 0.35, 
                       0.36, 0.97, 1.09, 0.16, 0.68, 0.28, -0.01, 0.56, 0.78, 0.31],
    'p': [0.46, 0.34, 0.58, 0.001, 0.8, 0.13, 0.92, 0.83, 0.08, 0.78, 0.28, 0.25, 0.01, 0.27, 0.23, 0.52, 0.04, 0.93, 0.16, 0.93]
}, index=[
    "Pathway_E11.9 -> E78.0 -> I10",
    "Pathway_E11.9 -> E78.0 -> I10 -> I25.1",
    "Pathway_E11.9 -> I10",
    "Pathway_E11.9 -> I10 -> E78.0",
    "Pathway_E11.9 -> I10 -> I25.1",
    "Pathway_E11.9 -> I25.1",
    
    "Pathway_E78.0 -> I10",
    "Pathway_E78.0 -> I10 -> E11.9",
    "Pathway_E78.0 -> I10 -> I25.1",
    "Pathway_E78.0 -> I10 -> I48",
    "Pathway_E78.0 -> I25.1",
    
    "Pathway_I10 -> E11.9",
    "Pathway_I10 -> E11.9 -> E78.0",
    "Pathway_I10 -> E78.0",
    "Pathway_I10 -> E78.0 -> I25.1",
    "Pathway_I10 -> I25.1",
    "Pathway_I10 -> I48",
    
    "Pathway_I25.1 -> E78.0 -> I10",
    "Pathway_I25.1 -> I10",
    
    "Pathway_I48 -> I10"
])


# Add the second hazard ratio plot
plot_data_2 = plot_data_ph.copy()  # Example: Using the same data for demonstration
plot_data_2['coef'] = plot_data_2['coef'] * 1.1  # Slightly modify coefficients for differentiation#

# Define the position for the second inset
box_start_x_ph = box_end_x + 1.1  # Start the second inset further to the right
box_end_x_ph = box_start_x_ph + 4.8  # Adjust width for the second inset

# Insert the second hazard ratio plot
inset_ph = inset_axes(
    ax,
    width="60%",  # Adjust width of the second inset
    height="101.0%",  # Adjust height of the second inset
    bbox_to_anchor=(box_start_x_ph - 8.3, box_start_y - 0.3, box_end_x_ph - box_start_x_ph, box_end_y - box_start_y),  # Adjust placement
    bbox_transform=ax.transData,
    loc='center'
)

# Create the second hazard ratio plot using the same function
create_hazard_ratio_plot_ph(inset_ph, plot_data_ph)

# Add separate boxes for p-values and N
add_annotations_box(inset, plot_data_ph, box_title="", x_offset=-1.63)






####################################################################################

plot_data_COPD = pd.DataFrame({
    'coef': [-0.36, 0.69, -0.02, 0.31, 0.55, -0.01, -0.10, 0.11, 0.16, 0.92, 
             -0.91, 0.03, 1.30, 0.24, -0.05, -0.01, 0.13, -0.15, 0.82, 0.28],
    'coef lower 95%': [-0.89, 0.19, -0.40, -0.36, -0.05, -0.94, -0.47, -0.44, -0.27, 0.36, 
                       -1.54, -0.65, 0.73, -0.16, -0.63, -0.48, -0.37, -0.65, 0.20, -0.50],
    'coef upper 95%': [0.17, 1.18, 0.35, 0.97, 1.15, 0.92, 0.28, 0.67, 0.60, 1.48, 
                       -0.27, 0.71, 1.87, 0.63, 0.53, 0.45, 0.63, 0.36, 1.43, 1.06],
    'p':[0.18, 0.01, 0.90, 0.37, 0.07, 0.98, 0.62, 0.69, 0.46, 0.001, 0.001, 0.94,0.001, 0.24, 0.86, 0.96, 0.60, 0.57, 0.01,0.48] 
}, index=[
    "Pathway_E11.9 -> E78.0 -> I10",
    "Pathway_E11.9 -> E78.0 -> I10 -> I25.1",
    "Pathway_E11.9 -> I10",
    "Pathway_E11.9 -> I10 -> E78.0",
    "Pathway_E11.9 -> I10 -> I25.1",
    "Pathway_E11.9 -> I25.1",
    
    "Pathway_E78.0 -> I10",
    "Pathway_E78.0 -> I10 -> E11.9",
    "Pathway_E78.0 -> I10 -> I25.1",
    "Pathway_E78.0 -> I10 -> I48",
    "Pathway_E78.0 -> I25.1",
    
    "Pathway_I10 -> E11.9",
    "Pathway_I10 -> E11.9 -> E78.0",
    "Pathway_I10 -> E78.0",
    "Pathway_I10 -> E78.0 -> I25.1",
    "Pathway_I10 -> I25.1",
    "Pathway_I10 -> I48",
    
    "Pathway_I25.1 -> E78.0 -> I10",
    "Pathway_I25.1 -> I10",
    
    "Pathway_I48 -> I10"
])





# Add the second hazard ratio plot
plot_data_3 = plot_data_COPD.copy()  # Example: Using the same data for demonstration
plot_data_3['coef'] = plot_data_3['coef'] * 1.1  # Slightly modify coefficients for differentiation

# Define the position for the second inset
box_start_x_COPD = box_end_x + 1.1  # Start the second inset further to the right
box_end_x_COPD = box_start_x_COPD + 4.8  # Adjust width for the second inset

# Insert the second hazard ratio plot
inset_COPD = inset_axes(
    ax,
    width="60%",  # Adjust width of the second inset
    height="100.9%",  # Adjust height of the second inset
    bbox_to_anchor=(box_start_x_COPD - 4.55, box_start_y - 0.3, box_end_x_COPD - box_start_x_COPD, box_end_y - box_start_y),  # Adjust placement
    bbox_transform=ax.transData,
    loc='center'
)

# Create the second hazard ratio plot using the same function
create_hazard_ratio_plot_COPD(inset_COPD, plot_data_COPD)

# Add separate boxes for p-values and N
add_annotations_box(inset, plot_data_COPD, box_title="", x_offset=-0.33)

#######################################################################################
# Data for PH and COPD participants in the desired order
ph_participants = [44, 16, 11, 78, 25, 13, 42, 11, 10, '08', 15, 20, '06', 49, '05', 16, 16, 50, 10, 19]

copd_participants = ['07', '06','08', 14, 17, '07', 31, '06', '08', 10, '06', 12, '05', 34, '05', '05', '05', 34, '07', 12]







# Update y-ticks to include both PH and HF participants
#y_ticks = list(range(1, len(ph_participants) + 1))
y_ticks = [ytick - 0.4 for ytick in range(1, len(ph_participants) + 1)]
y_tick_labels = [f"{ph}, {hf}" for ph, hf in zip(ph_participants, copd_participants)]

# Set x-axis and y-axis limits and labels
node_columns = max(len(pathway.split(" -> ")) for pathway in pathways)  # Max nodes in a pathway
ax.set_xlim(0.5, node_columns + (node_columns - 1) * gap + 6.5)  # Adjust x-axis limits dynamically
#ax.set_ylim(0.5, len(pathways) + 0.5)  # Set y-axis limits to fit all pathways
ax.set_ylim(0, len(pathways)+0.5)  # Start the y-axis at 0 and end at the number of pathways

# Explicitly set y-tick positions if needed or remove them
#ax.set_yticks([])  # Optionally, remove y-ticks
#ax.set_yticklabels([])  # Ensure no y-axis labels


# Explicitly set y-tick positions and labels
ax.set_yticks(y_ticks)  # Set y-tick positions
ax.set_yticklabels(y_tick_labels, fontsize=10)  # Display PH and HF participants as y-tick labels
# Add a title above the y-tick labels for clarification
ax.annotate(
    "No. of Participants\n (PH , COPD)",  # Title text
    xy=(-0.02, 1.00),  # Position above the y-axis (relative to plot)
    xycoords='axes fraction',  # Coordinates relative to the axes
    fontsize=12,  # Font size
    ha='center',  # Horizontal alignment
    va='bottom',  # Vertical alignment
    rotation=0  # No rotation
)


# Manually set x-tick positions and labels for disease sequence steps
ax.set_xticks([0.8, 1.45, 2.05, 2.68, 3.33])
ax.set_xticklabels(['1', '2', '3', '4', '5'], fontsize=13)

# Update legend with proper formatting and alignment
handles = [
    mpatches.Patch(color=color, label=full_name)
    for full_name, color in {
        
        "PH and COPD as Index Conditions": '#b3b3b3',
        "Type 2 Diabetes Mellitus - E11.9": '#88cc88',
        "Hypertension - I10": '#a993cc',
        "Hyperlipidemia - E78": '#d2b48c',
        "Chronic Ischemic Heart Disease - I25.1": '#FFC0CB',
        "Atrial Fibrillation and Flutter - I48": '#add8e6',
        
    }.items()
]

# Position and style the legend
ax.legend(handles=handles , bbox_to_anchor=(0.6, -0.09), loc='upper center', ncol=4, fontsize=14, frameon=False)



# Add descriptive axis labels
ax.set_xlabel("Disease Sequence Progression", fontsize=14, labelpad=15)
ax.set_ylabel("Pathways Leading to PH/COPD", fontsize=14, labelpad=15)
ax.set_title("Cox Proportional Hazards Analysis", fontsize=14, pad=20, x=0.8)

ax.xaxis.set_label_coords(0.2, -0.05) 

# Clean up the plot aesthetics by removing unnecessary spines
for spine in ['top', 'right']:
    ax.spines[spine].set_visible(False)

# Ensure everything fits nicely into the figure
plt.tight_layout()
# Save the plot as PNG
plt.savefig("pathways_plot.png", dpi=1200)  # Save as PNG with high resolution
plt.show()

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from mpl_toolkits.axes_grid1.inset_locator import inset_axes
import matplotlib.patches as mpatches



def create_hazard_ratio_plot(ax, plot_data):
    # Step 1: Extract and group the pathways by their starting conditions
    pathway_groups = {
        'E11.9': plot_data.loc[plot_data.index.str.startswith("Pathway_E11.9")],
        'E78.0': plot_data.loc[plot_data.index.str.startswith("Pathway_E78.0")],
        'I10': plot_data.loc[plot_data.index.str.startswith("Pathway_I10")],
        'I25.1': plot_data.loc[plot_data.index.str.startswith("Pathway_I25.1")],
        'I48': plot_data.loc[plot_data.index.str.startswith("Pathway_I48")],
    }

    # Define custom colors for each group
    group_colors = {
        'E11.9': '#228B22',  # Forest Green
        'E78.0': '#D2691E',  # Chocolate - Rich tan
        'I10': '#7D3C98',  # Amethyst - Deeper purple 
        'I25.1': '#FF69B4',  # Hot Pink - Vibrant pink
        'I48': '#4682B4',  # Steel Blue - Deeper blue
    }

    # Step 2: Plot grouped pathways
    current_y = 0
    y_positions = []  # List to store all y-tick positions
    spacing = 0.8  # Custom spacing between groups

    for group, data in pathway_groups.items():
        group_y_ticks = range(current_y, current_y + len(data))
        for i, (y, coef, lower, upper) in enumerate(zip(
                group_y_ticks, data['coef'], data['coef lower 95%'], data['coef upper 95%'])):
            
            # Calculate HR and CI bounds
            hr = np.exp(coef)
            lower_hr = np.exp(lower)
            upper_hr = np.exp(upper)
            
            # Plot the error bar
            ax.errorbar(
                hr,  # HR value
                y,   # Y position
                xerr=[[hr - lower_hr], [upper_hr - hr]],  # CI as 2D array
                fmt='o',
                capsize=4.0,
                color=group_colors[group]  # Use the color for the current group
            )
            
            # Add CI annotations (left and right ends) with a small manual offset
            offset = 0.05  # Add a small space
            ax.text(lower_hr - offset, y, f"{lower_hr:.2f}", va='center', ha='right', fontsize=8, color='black')
            ax.text(upper_hr + offset, y, f"{upper_hr:.2f}", va='center', ha='left', fontsize=8, color='black')

        y_positions.extend(group_y_ticks)  # Append y-tick positions
        current_y += len(data) + int(spacing)  # Add spacing after each group

    # Step 3: Adjust y-axis limits to minimize gaps
    min_y = min(y_positions)  # Smallest y-tick
    max_y = max(y_positions)  # Largest y-tick
    buffer = 0.5  # Buffer space around the top and bottom
    ax.set_ylim(min_y - buffer, max_y + buffer)  # Dynamically set y-axis limits

    # Step 4: Customize plot
    ax.invert_yaxis()  # Invert y-axis
    ax.axvline(x=1, color='black', linestyle='--', linewidth=0.7)  # Add a vertical line at x=1 (HR=1)

    # Add custom x-ticks
    x_ticks = [0.5, 1, 1.5, 2, 2.5, 3, 3.5, 4, 4.5]  # HR values
    ax.set_xticks(x_ticks)
    ax.set_xticklabels([f"{tick:.1f}" for tick in x_ticks], fontsize=10)

    # Add labels and titles
    plt.xlabel("Hazard Ratio (HR) (95% CI)", fontsize=12)
    plt.xticks(fontsize=10)  # Set x-tick label size
    ax.set_yticks([])
    ax.set_title("Hazards Analysis of PH v/s COPD", fontsize=13)


    
    
    
def add_annotations_box(ax, plot_data, box_title, x_offset=1.0):
    """
    Adds a separate box for annotations (e.g., p-values and HR) next to the plot.

    Parameters:
    - ax: The main axis where hazard ratio bars are plotted.
    - plot_data: DataFrame containing the data (p-values and hazard ratio).
    - box_title: Title of the annotation box.
    - x_offset: Horizontal offset from the hazard ratio plot.
    """
    # Create a new inset axis for annotations
    inset_box = inset_axes(
        ax,
        width="145%",  # Width of the annotation box
        height="100%",  # Height of the box matches the plot
        bbox_to_anchor=(x_offset, 0, 0.2, 1),  # Adjust placement
        bbox_transform=ax.transAxes,
        loc="center left"
    )
    
    # Remove spines and ticks from the annotation box
    inset_box.spines['top'].set_visible(False)
    inset_box.spines['bottom'].set_visible(False)
    inset_box.spines['left'].set_visible(False)
    inset_box.spines['right'].set_visible(False)
    inset_box.tick_params(left=False, labelleft=False, bottom=False, labelbottom=False)

    # Add title to the annotation box
    inset_box.set_title(box_title, fontsize=10, pad=10)

    # Align annotations with hazard ratio bars
    for y_pos, p_val, coef in zip(range(len(plot_data)), plot_data['p'], plot_data['coef']):
        annotation = f"P:{p_val:.2f}, HR:{np.exp(coef):.2f}"  # Include HR as exp(coef)
        inset_box.text(0.5, y_pos, annotation, ha="center", va="center", fontsize=9)

    # Adjust the limits of the annotation box
    inset_box.set_ylim(ax.get_ylim())

    # Draw the box around the entire annotation area
    rect = plt.Rectangle(
        (0, 0), 1, 1,  # Starting coordinates and width/height of the rectangle
        transform=inset_box.transAxes,
        edgecolor="black",  # Box color
        facecolor="none",   # Transparent inside
        linewidth=1.5,      # Thickness of the box border
        zorder=10           # Ensure the box is on top
    )
    inset_box.add_patch(rect)
    
 #################################################################################################


def create_hazard_ratio_plot_ph(ax, plot_data_ph):
    # Step 1: Extract and group the pathways by their starting conditions
    pathway_groups_ph = {
        'E11.9': plot_data_ph.loc[plot_data_ph.index.str.startswith("Pathway_E11.9")],
        'E78.0': plot_data_ph.loc[plot_data_ph.index.str.startswith("Pathway_E78.0")],
        'I10': plot_data_ph.loc[plot_data_ph.index.str.startswith("Pathway_I10")],
        'I25.1': plot_data_ph.loc[plot_data_ph.index.str.startswith("Pathway_I25.1")],
        'I48': plot_data_ph.loc[plot_data_ph.index.str.startswith("Pathway_I48")],
    }

    # Define custom colors for each group
    group_colors_ph = {
        'E11.9': '#228B22',  # Forest Green
        'E78.0': '#D2691E',  # Chocolate - Rich tan
        'I10': '#7D3C98',  # Amethyst - Deeper purple 
        'I25.1': '#FF69B4',  # Hot Pink - Vibrant pink
        'I48': '#4682B4',  # Steel Blue - Deeper blue
    }

    # Step 2: Plot grouped pathways
    current_y_ph = 0
    y_positions_ph = []  # List to store all y-tick positions for PH
    spacing_ph = 0.4  # Custom spacing between groups

    for group, data in pathway_groups_ph.items():
        group_y_ticks_ph = range(current_y_ph, current_y_ph + len(data))
        for i, (y, coef, lower, upper) in enumerate(zip(
                group_y_ticks_ph, data['coef'], data['coef lower 95%'], data['coef upper 95%'])):
            
            # Calculate HR and CI bounds
            hr = np.exp(coef)
            lower_hr = np.exp(lower)
            upper_hr = np.exp(upper)
            
            # Plot the error bar
            ax.errorbar(
                hr,  # HR value
                y,   # Y position
                xerr=[[hr - lower_hr], [upper_hr - hr]],  # CI as 2D array
                fmt='o',
                capsize=4.0,
                color=group_colors_ph[group]  # Use the color for the current group
            )
            
            # Add CI annotations (left and right ends) with a small manual offset
            offset = 0.05  # Add a small space
            ax.text(lower_hr - offset, y, f"{lower_hr:.2f}", va='center', ha='right', fontsize=8, color='black')
            ax.text(upper_hr + offset, y, f"{upper_hr:.2f}", va='center', ha='left', fontsize=8, color='black')

        y_positions_ph.extend(group_y_ticks_ph)  # Append y-tick positions
        current_y_ph += len(data) + int(spacing_ph)  # Add spacing after each group

    # Step 3: Adjust y-axis limits to minimize gaps
    min_y_ph = min(y_positions_ph)  # Smallest y-tick
    max_y_ph = max(y_positions_ph)  # Largest y-tick
    buffer_ph = 0.5  # Buffer space around the top and bottom
    ax.set_ylim(min_y_ph - buffer_ph, max_y_ph + buffer_ph)  # Dynamically set y-axis limits

    # Step 4: Customize plot
    ax.invert_yaxis()  # Invert y-axis
    ax.axvline(x=1, color='black', linestyle='--', linewidth=0.7)  # Add a vertical line at x=1 (HR=1)

    # Add custom x-ticks
    x_ticks_ph = [0.5, 1, 1.5, 2, 2.5, 3]  # HR values
    ax.set_xticks(x_ticks_ph)
    ax.set_xticklabels([f"{tick:.1f}" for tick in x_ticks_ph], fontsize=10)

    # Add labels and titles
    ax.set_xlabel("Hazard Ratio (HR) (95% CI)", fontsize=12)
    ax.set_title("Mortality Risk with PH Pathways", fontsize=13)

    # Remove default y-axis labels
    ax.set_yticks([])




##############################################################################################
    
    
    
def create_hazard_ratio_plot_COPD(ax, plot_data_COPD):
    # Step 1: Extract and group the pathways by their starting conditions
    pathway_groups_COPD = {
        'E11.9': plot_data_COPD.loc[plot_data_COPD.index.str.startswith("Pathway_E11.9")],
        'E78.0': plot_data_COPD.loc[plot_data_COPD.index.str.startswith("Pathway_E78.0")],
        'I10': plot_data_COPD.loc[plot_data_COPD.index.str.startswith("Pathway_I10")],
        'I25.1': plot_data_COPD.loc[plot_data_COPD.index.str.startswith("Pathway_I25.1")],
        'I48': plot_data_COPD.loc[plot_data_COPD.index.str.startswith("Pathway_I48")],
    }

    # Define custom colors for each group
    group_colors_COPD = {
        'E11.9': '#228B22',  # Forest Green
        'E78.0': '#D2691E',  # Chocolate - Rich tan
        'I10': '#7D3C98',  # Amethyst - Deeper purple 
        'I25.1': '#FF69B4',  # Hot Pink - Vibrant pink
        'I48': '#4682B4',  # Steel Blue - Deeper blue
    }
    
    # Step 2: Plot grouped pathways
    current_y_COPD = 0
    y_positions_COPD = []  # List to store all y-tick positions for COPD
    spacing_COPD = 0.8  # Custom spacing between groups

    for group, data in pathway_groups_COPD.items():
        group_y_ticks_COPD = range(current_y_COPD, current_y_COPD + len(data))
        for i, (y, coef, lower, upper) in enumerate(zip(
                group_y_ticks_COPD, data['coef'], data['coef lower 95%'], data['coef upper 95%'])):
            
            # Calculate HR and CI bounds
            hr = np.exp(coef)
            lower_hr = np.exp(lower)
            upper_hr = np.exp(upper)
            
            # Plot the error bar
            ax.errorbar(
                hr,  # HR value
                y,   # Y position
                xerr=[[hr - lower_hr], [upper_hr - hr]],  # CI as 2D array
                fmt='o',
                capsize=4.0,
                color=group_colors_COPD[group]  # Use the color for the current group
            )
            
            # Add CI annotations (left and right ends)
            offset = 1.0  # Add a small space
            ax.text(lower_hr, y, f"{lower_hr:.2f}", va='center', ha='right', fontsize=8, color='black')
            ax.text(upper_hr, y, f"{upper_hr:.2f}", va='center', ha='left', fontsize=8, color='black')

        y_positions_COPD.extend(group_y_ticks_COPD)
        current_y_COPD += len(data) + int(spacing_COPD)

    # Step 3: Adjust y-axis limits to minimize gaps
    min_y_COPD = min(y_positions_COPD)
    max_y_COPD = max(y_positions_COPD)
    buffer_COPD = 0.5
    ax.set_ylim(min_y_COPD - buffer_COPD, max_y_COPD + buffer_COPD)

    # Step 4: Customize plot
    ax.invert_yaxis()  # Invert y-axis
    ax.axvline(x=1, color='black', linestyle='--', linewidth=0.7)  # Add a vertical line at x=1 (HR=1)

    # Add custom x-ticks
    x_ticks_COPD = [0.5, 1, 1.5, 2, 2.5, 3, 3.5, 4]
    ax.set_xticks(x_ticks_COPD)
    ax.set_xticklabels([f"{tick:.1f}" for tick in x_ticks_COPD], fontsize=10)

    # Add labels and titles
    ax.set_xlabel("Hazard Ratio (HR) (95% CI)", fontsize=12)
    ax.set_title("Mortality Risk with COPD Pathways", fontsize=13)

    # Remove default y-axis labels
    ax.set_yticks([])

    
    
    
########################################################################################################################    

  

# Define the pathways and colors
pathways = [    
    "E11.9 -> E78.0 -> I10 -> PH/COPD",
    "E11.9 -> E78.0 -> I10 -> I25.1 -> PH/COPD",
    "E11.9 -> I10 -> PH/COPD",
    "E11.9 -> I10 -> E78.0 -> PH/COPD",
    "E11.9 -> I10 -> I25.1 -> PH/COPD",
    "E11.9 -> I25.1 -> PH/COPD",
    
    "E78.0 -> I10 -> PH/COPD",
    "E78.0 -> I10 -> E11.9 -> PH/COPD",
    "E78.0 -> I10 -> I25.1 -> PH/COPD",
    "E78.0 -> I10 -> I48 -> PH/COPD",
    "E78.0 -> I25.1 -> PH/COPD",
    
    "I10 -> E11.9 -> PH/COPD",
    "I10 -> E11.9 -> E78.0 -> PH/COPD",
    "I10 -> E78.0 -> PH/COPD",
    "I10 -> E78.0 -> I25.1 -> PH/COPD",
    "I10 -> I25.1 -> PH/COPD",
    "I10 -> I48 -> PH/COPD",
    
    "I25.1 -> E78.0 -> I10 -> PH/COPD",
    "I25.1 -> I10 -> PH/COPD",
    
    "I48 -> I10 -> PH/COPD"
]



color_dict = {
    'PH/COPD': '#b3b3b3',
    'I10': '#b3a3cc',
    'I48': '#add8e6',
    'E78.0': '#ddc4a1',
    'I25.1': '#f4b0c8',
    'E11.9': '#c4e3b3'
}




# Create the plot
fig, ax = plt.subplots(figsize=(16, 8))
gap = -0.38  # Horizontal gap between nodes
row_gap = 1  # Vertical gap between rows



# Plot the pathways
for row, pathway in enumerate(pathways, start=1):
    conditions = pathway.split(" -> ")
    for col, condition in enumerate(conditions, start=1):
        # Calculate node position
        x_pos = col + (col - 1) * gap
        y_pos = len(pathways) - row  + 1 

        # Draw rectangle for each node
        ax.add_patch(plt.Rectangle((x_pos - 0.5, y_pos - 0.87), 0.6, 0.9, 
                                    facecolor=color_dict.get(condition, 'white'), edgecolor='black'))
        # Add text to node
        ax.text(x_pos - 0.188, y_pos - 0.46, condition, ha='center', va='center', fontsize=9.6)

        # Add arrows between nodes
       # if col < len(conditions):
       #     next_x_pos = x_pos + 1 + gap
       #     ax.annotate(
       #         '',
       #         xy=(next_x_pos - 0.5, y_pos - 0.4),  # End position of arrow
       #         xytext=(x_pos + 0.11, y_pos - 0.4),  # Start position of arrow
       #         arrowprops=dict(
       #             arrowstyle="->,head_width=0.2,head_length=0.3",  # Adjust arrowhead size
       #             color='black',  # Arrow color
        #            lw=0.7,  # Line width (thicker arrow)
        #            shrinkA=0,  # Adjust start of the arrow (in points)
        #            shrinkB=0   # Adjust end of the arrow (in points)
        #        
        #    )
   # )



box_start_x = len(conditions) + 2.2  # Starting x position for the box
box_end_x = len(conditions) + 7.0    # Ending x position for the box
box_start_y = 0.45                   # Starting y position for the box
box_end_y = len(pathways) + 0.5     # Ending y position for the box

#####################################################################################################################


# Insert the hazard ratio plot into the box area
inset = inset_axes(
    ax,
    width="60%",  # Adjust width of the inset
    height="100.9%",  # Adjust height of the inset
    bbox_to_anchor=(box_start_x + 5.1, box_start_y - 0.3, box_end_x - box_start_x, box_end_y - box_start_y),  # Shift down
    bbox_transform=ax.transData,
    loc='center'
)


# Use the function to create the hazard ratio plot in the inset
# Replace `plot_data` with your actual DataFrame containing the hazard ratio data
plot_data = pd.DataFrame({
    'coef': [-0.61, 0.38, -0.18, 0.23, 0.22, -0.26, -0.23, 0.65, 0.33, 0.34, -0.03, 
             -0.30, 0.23, 0.22, 0.19, -0.26, 0.15, 0.13, 0.16, 0.42],
    'coef lower 95%': [-1.29, -0.32, -0.83, -0.45, -0.46, -1.15, -0.88, -0.12, -0.34, 
                       -0.35, -0.82, -1.07, -0.48, -0.44, -0.51, -0.95, -0.49, -0.59, -0.55, -0.24],
    'coef upper 95%': [0.07, 1.07, 0.47, 0.92, 0.91, 0.62, 0.42, 1.43, 1.00, 1.03, 0.77, 
                       0.46, 0.94, 0.88, 0.89, 0.42, 0.80, 0.84, 0.87, 1.07],
    'p':[0.08, 0.29, 0.59, 0.50, 0.52, 0.56, 0.49, 0.10, 0.33, 0.33, 0.95, 0.44, 0.52, 0.51, 0.59, 0.45, 0.64, 0.73, 0.66, 0.21]
}, index=[        
    "Pathway_E11.9 -> E78.0 -> I10",
    "Pathway_E11.9 -> E78.0 -> I10 -> I25.1",
    "Pathway_E11.9 -> I10",
    "Pathway_E11.9 -> I10 -> E78.0",
    "Pathway_E11.9 -> I10 -> I25.1",
    "Pathway_E11.9 -> I25.1",
    
    "Pathway_E78.0 -> I10",
    "Pathway_E78.0 -> I10 -> E11.9",
    "Pathway_E78.0 -> I10 -> I25.1",
    "Pathway_E78.0 -> I10 -> I48",
    "Pathway_E78.0 -> I25.1",
    
    "Pathway_I10 -> E11.9",
    "Pathway_I10 -> E11.9 -> E78.0",
    "Pathway_I10 -> E78.0",
    "Pathway_I10 -> E78.0 -> I25.1",
    "Pathway_I10 -> I25.1",
    "Pathway_I10 -> I48",
    
    "Pathway_I25.1 -> E78.0 -> I10",
    "Pathway_I25.1 -> I10",
    
    "Pathway_I48 -> I10"
])

# Flip the inset box upside down by inverting the y-axis
#inset.invert_xaxis()

create_hazard_ratio_plot(inset, plot_data)

# Add separate boxes for p-values and N
add_annotations_box(inset, plot_data, box_title="", x_offset=0.95)


###################################################################################################################

plot_data_ph = pd.DataFrame({
    'coef': [-0.16, 0.22, 0.09, 0.63, -0.05, -0.79, -0.02, -0.08, -0.38, -0.06, 
             -0.44, 0.36, 0.63, -0.21, 0.26, -0.14, -0.30, 0.02, 0.33, -0.02],
    'coef lower 95%': [-0.57, -0.23, -0.23, 0.27, -0.46, -1.82, -0.34, -0.80, -0.80, -0.47, 
                       -1.24, -0.25, 0.18, -0.58, -0.16, -0.55, -0.58, -0.51, -0.13, -0.34],
    'coef upper 95%': [0.26, 0.66, 0.40, 0.99, 0.36, 0.24, 0.31, 0.64, 0.04, 0.35, 
                       0.36, 0.97, 1.09, 0.16, 0.68, 0.28, -0.01, 0.56, 0.78, 0.31],
    'p': [0.46, 0.34, 0.58, 0.001, 0.8, 0.13, 0.92, 0.83, 0.08, 0.78, 0.28, 0.25, 0.01, 0.27, 0.23, 0.52, 0.04, 0.93, 0.16, 0.93]
}, index=[
    "Pathway_E11.9 -> E78.0 -> I10",
    "Pathway_E11.9 -> E78.0 -> I10 -> I25.1",
    "Pathway_E11.9 -> I10",
    "Pathway_E11.9 -> I10 -> E78.0",
    "Pathway_E11.9 -> I10 -> I25.1",
    "Pathway_E11.9 -> I25.1",
    
    "Pathway_E78.0 -> I10",
    "Pathway_E78.0 -> I10 -> E11.9",
    "Pathway_E78.0 -> I10 -> I25.1",
    "Pathway_E78.0 -> I10 -> I48",
    "Pathway_E78.0 -> I25.1",
    
    "Pathway_I10 -> E11.9",
    "Pathway_I10 -> E11.9 -> E78.0",
    "Pathway_I10 -> E78.0",
    "Pathway_I10 -> E78.0 -> I25.1",
    "Pathway_I10 -> I25.1",
    "Pathway_I10 -> I48",
    
    "Pathway_I25.1 -> E78.0 -> I10",
    "Pathway_I25.1 -> I10",
    
    "Pathway_I48 -> I10"
])


# Add the second hazard ratio plot
plot_data_2 = plot_data_ph.copy()  # Example: Using the same data for demonstration
plot_data_2['coef'] = plot_data_2['coef'] * 1.1  # Slightly modify coefficients for differentiation#

# Define the position for the second inset
box_start_x_ph = box_end_x + 1.1  # Start the second inset further to the right
box_end_x_ph = box_start_x_ph + 4.8  # Adjust width for the second inset

# Insert the second hazard ratio plot
inset_ph = inset_axes(
    ax,
    width="60%",  # Adjust width of the second inset
    height="101.0%",  # Adjust height of the second inset
    bbox_to_anchor=(box_start_x_ph - 8.3, box_start_y - 0.3, box_end_x_ph - box_start_x_ph, box_end_y - box_start_y),  # Adjust placement
    bbox_transform=ax.transData,
    loc='center'
)

# Create the second hazard ratio plot using the same function
create_hazard_ratio_plot_ph(inset_ph, plot_data_ph)

# Add separate boxes for p-values and N
add_annotations_box(inset, plot_data_ph, box_title="", x_offset=-1.63)






####################################################################################

plot_data_COPD = pd.DataFrame({
    'coef': [-0.36, 0.69, -0.02, 0.31, 0.55, -0.01, -0.10, 0.11, 0.16, 0.92, 
             -0.91, 0.03, 1.30, 0.24, -0.05, -0.01, 0.13, -0.15, 0.82, 0.28],
    'coef lower 95%': [-0.89, 0.19, -0.40, -0.36, -0.05, -0.94, -0.47, -0.44, -0.27, 0.36, 
                       -1.54, -0.65, 0.73, -0.16, -0.63, -0.48, -0.37, -0.65, 0.20, -0.50],
    'coef upper 95%': [0.17, 1.18, 0.35, 0.97, 1.15, 0.92, 0.28, 0.67, 0.60, 1.48, 
                       -0.27, 0.71, 1.87, 0.63, 0.53, 0.45, 0.63, 0.36, 1.43, 1.06],
    'p':[0.18, 0.01, 0.90, 0.37, 0.07, 0.98, 0.62, 0.69, 0.46, 0.001, 0.001, 0.94,0.001, 0.24, 0.86, 0.96, 0.60, 0.57, 0.01,0.48] 
}, index=[
    "Pathway_E11.9 -> E78.0 -> I10",
    "Pathway_E11.9 -> E78.0 -> I10 -> I25.1",
    "Pathway_E11.9 -> I10",
    "Pathway_E11.9 -> I10 -> E78.0",
    "Pathway_E11.9 -> I10 -> I25.1",
    "Pathway_E11.9 -> I25.1",
    
    "Pathway_E78.0 -> I10",
    "Pathway_E78.0 -> I10 -> E11.9",
    "Pathway_E78.0 -> I10 -> I25.1",
    "Pathway_E78.0 -> I10 -> I48",
    "Pathway_E78.0 -> I25.1",
    
    "Pathway_I10 -> E11.9",
    "Pathway_I10 -> E11.9 -> E78.0",
    "Pathway_I10 -> E78.0",
    "Pathway_I10 -> E78.0 -> I25.1",
    "Pathway_I10 -> I25.1",
    "Pathway_I10 -> I48",
    
    "Pathway_I25.1 -> E78.0 -> I10",
    "Pathway_I25.1 -> I10",
    
    "Pathway_I48 -> I10"
])





# Add the second hazard ratio plot
plot_data_3 = plot_data_COPD.copy()  # Example: Using the same data for demonstration
plot_data_3['coef'] = plot_data_3['coef'] * 1.1  # Slightly modify coefficients for differentiation

# Define the position for the second inset
box_start_x_COPD = box_end_x + 1.1  # Start the second inset further to the right
box_end_x_COPD = box_start_x_COPD + 4.8  # Adjust width for the second inset

# Insert the second hazard ratio plot
inset_COPD = inset_axes(
    ax,
    width="60%",  # Adjust width of the second inset
    height="100.9%",  # Adjust height of the second inset
    bbox_to_anchor=(box_start_x_COPD - 4.55, box_start_y - 0.3, box_end_x_COPD - box_start_x_COPD, box_end_y - box_start_y),  # Adjust placement
    bbox_transform=ax.transData,
    loc='center'
)

# Create the second hazard ratio plot using the same function
create_hazard_ratio_plot_COPD(inset_COPD, plot_data_COPD)

# Add separate boxes for p-values and N
add_annotations_box(inset, plot_data_COPD, box_title="", x_offset=-0.33)

#######################################################################################
# Data for PH and COPD participants in the desired order
ph_participants = [44, 16, 11, 78, 25, 13, 42, 11, 10, '08', 15, 20, '06', 49, '05', 16, 16, 50, 10, 19]

copd_participants = ['07', '06','08', 14, 17, '07', 31, '06', '08', 10, '06', 12, '05', 34, '05', '05', '05', 34, '07', 12]







# Update y-ticks to include both PH and HF participants
#y_ticks = list(range(1, len(ph_participants) + 1))
y_ticks = [ytick - 0.4 for ytick in range(1, len(ph_participants) + 1)]
y_tick_labels = [f"{ph}, {hf}" for ph, hf in zip(ph_participants, copd_participants)]

# Set x-axis and y-axis limits and labels
node_columns = max(len(pathway.split(" -> ")) for pathway in pathways)  # Max nodes in a pathway
ax.set_xlim(0.5, node_columns + (node_columns - 1) * gap + 6.5)  # Adjust x-axis limits dynamically
#ax.set_ylim(0.5, len(pathways) + 0.5)  # Set y-axis limits to fit all pathways
ax.set_ylim(0, len(pathways)+0.5)  # Start the y-axis at 0 and end at the number of pathways

# Explicitly set y-tick positions if needed or remove them
#ax.set_yticks([])  # Optionally, remove y-ticks
#ax.set_yticklabels([])  # Ensure no y-axis labels


# Explicitly set y-tick positions and labels
ax.set_yticks(y_ticks)  # Set y-tick positions
ax.set_yticklabels(y_tick_labels, fontsize=10)  # Display PH and HF participants as y-tick labels
# Add a title above the y-tick labels for clarification
ax.annotate(
    "No. of Participants\n (PH , COPD)",  # Title text
    xy=(-0.02, 1.00),  # Position above the y-axis (relative to plot)
    xycoords='axes fraction',  # Coordinates relative to the axes
    fontsize=12,  # Font size
    ha='center',  # Horizontal alignment
    va='bottom',  # Vertical alignment
    rotation=0  # No rotation
)


# Manually set x-tick positions and labels for disease sequence steps
ax.set_xticks([0.8, 1.45, 2.05, 2.68, 3.33])
ax.set_xticklabels(['1', '2', '3', '4', '5'], fontsize=13)

# Update legend with proper formatting and alignment
handles = [
    mpatches.Patch(color=color, label=full_name)
    for full_name, color in {
        
        "PH and COPD as Index Conditions": '#b3b3b3',
        "Type 2 Diabetes Mellitus - E11.9": '#88cc88',
        "Hypertension - I10": '#a993cc',
        "Hyperlipidemia - E78": '#d2b48c',
        "Chronic Ischemic Heart Disease - I25.1": '#FFC0CB',
        "Atrial Fibrillation and Flutter - I48": '#add8e6',
        
    }.items()
]

# Position and style the legend
ax.legend(handles=handles , bbox_to_anchor=(0.6, -0.09), loc='upper center', ncol=4, fontsize=14, frameon=False)



# Add descriptive axis labels
ax.set_xlabel("Disease Sequence Progression", fontsize=14, labelpad=15)
ax.set_ylabel("Pathways Leading to PH/COPD", fontsize=14, labelpad=15)
ax.set_title("Cox Proportional Hazards Analysis", fontsize=14, pad=20, x=0.8)

ax.xaxis.set_label_coords(0.2, -0.05) 

# Clean up the plot aesthetics by removing unnecessary spines
for spine in ['top', 'right']:
    ax.spines[spine].set_visible(False)

# Ensure everything fits nicely into the figure
plt.tight_layout()
# Save the plot as PNG
plt.savefig("pathways_plot.png", dpi=1200)  # Save as PNG with high resolution
plt.show()

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from mpl_toolkits.axes_grid1.inset_locator import inset_axes
import matplotlib.patches as mpatches



def create_hazard_ratio_plot(ax, plot_data):
    # Step 1: Extract and group the pathways by their starting conditions
    pathway_groups = {
        'E11.9': plot_data.loc[plot_data.index.str.startswith("Pathway_E11.9")],
        'E78.0': plot_data.loc[plot_data.index.str.startswith("Pathway_E78.0")],
        'I10': plot_data.loc[plot_data.index.str.startswith("Pathway_I10")],
        'I25.1': plot_data.loc[plot_data.index.str.startswith("Pathway_I25.1")],
        'I48': plot_data.loc[plot_data.index.str.startswith("Pathway_I48")],
    }

    # Define custom colors for each group
    group_colors = {
        'E11.9': '#228B22',  # Forest Green
        'E78.0': '#D2691E',  # Chocolate - Rich tan
        'I10': '#7D3C98',  # Amethyst - Deeper purple 
        'I25.1': '#FF69B4',  # Hot Pink - Vibrant pink
        'I48': '#4682B4',  # Steel Blue - Deeper blue
    }

    # Step 2: Plot grouped pathways
    current_y = 0
    y_positions = []  # List to store all y-tick positions
    spacing = 0.8  # Custom spacing between groups

    for group, data in pathway_groups.items():
        group_y_ticks = range(current_y, current_y + len(data))
        for i, (y, coef, lower, upper) in enumerate(zip(
                group_y_ticks, data['coef'], data['coef lower 95%'], data['coef upper 95%'])):
            
            # Calculate HR and CI bounds
            hr = np.exp(coef)
            lower_hr = np.exp(lower)
            upper_hr = np.exp(upper)
            
            # Plot the error bar
            ax.errorbar(
                hr,  # HR value
                y,   # Y position
                xerr=[[hr - lower_hr], [upper_hr - hr]],  # CI as 2D array
                fmt='o',
                capsize=4.5,
                color=group_colors[group]  # Use the color for the current group
            )
            
            # Add CI annotations (left and right ends) with a small manual offset
            offset = 0.05  # Add a small space
            ax.text(lower_hr - offset, y, f"{lower_hr:.2f}", va='center', ha='right', fontsize=8, color='black')
            ax.text(upper_hr + offset, y, f"{upper_hr:.2f}", va='center', ha='left', fontsize=8, color='black')

        y_positions.extend(group_y_ticks)  # Append y-tick positions
        current_y += len(data) + int(spacing)  # Add spacing after each group

    # Step 3: Adjust y-axis limits to minimize gaps
    min_y = min(y_positions)  # Smallest y-tick
    max_y = max(y_positions)  # Largest y-tick
    buffer = 0.5  # Buffer space around the top and bottom
    ax.set_ylim(min_y - buffer, max_y + buffer)  # Dynamically set y-axis limits

    # Step 4: Customize plot
    ax.invert_yaxis()  # Invert y-axis
    ax.axvline(x=1, color='black', linestyle='--', linewidth=0.7)  # Add a vertical line at x=1 (HR=1)

    # Add custom x-ticks
    x_ticks = [0.5, 1, 1.5, 2, 2.5, 3, 3.5, 4, 4.5]  # HR values
    ax.set_xticks(x_ticks)
    ax.set_xticklabels([f"{tick:.1f}" for tick in x_ticks], fontsize=10)

    # Add labels and titles
    plt.xlabel("Hazard Ratio (HR) (95% CI)", fontsize=12)
    plt.xticks(fontsize=10)  # Set x-tick label size
    ax.set_yticks([])
    ax.set_title("Hazards Analysis of PH v/s COPD", fontsize=13)


    
    
    
def add_annotations_box(ax, plot_data, box_title, x_offset=1.0):
    """
    Adds a separate box for annotations (e.g., p-values and HR) next to the plot.

    Parameters:
    - ax: The main axis where hazard ratio bars are plotted.
    - plot_data: DataFrame containing the data (p-values and hazard ratio).
    - box_title: Title of the annotation box.
    - x_offset: Horizontal offset from the hazard ratio plot.
    """
    # Create a new inset axis for annotations
    inset_box = inset_axes(
        ax,
        width="145%",  # Width of the annotation box
        height="100%",  # Height of the box matches the plot
        bbox_to_anchor=(x_offset, 0, 0.2, 1),  # Adjust placement
        bbox_transform=ax.transAxes,
        loc="center left"
    )
    
    # Remove spines and ticks from the annotation box
    inset_box.spines['top'].set_visible(False)
    inset_box.spines['bottom'].set_visible(False)
    inset_box.spines['left'].set_visible(False)
    inset_box.spines['right'].set_visible(False)
    inset_box.tick_params(left=False, labelleft=False, bottom=False, labelbottom=False)

    # Add title to the annotation box
    inset_box.set_title(box_title, fontsize=10, pad=10)

    # Align annotations with hazard ratio bars
    for y_pos, p_val, coef in zip(range(len(plot_data)), plot_data['p'], plot_data['coef']):
        annotation = f"P:{p_val:.2f}, HR:{np.exp(coef):.2f}"  # Include HR as exp(coef)
        inset_box.text(0.5, y_pos, annotation, ha="center", va="center", fontsize=9)

    # Adjust the limits of the annotation box
    inset_box.set_ylim(ax.get_ylim())

    # Draw the box around the entire annotation area
    rect = plt.Rectangle(
        (0, 0), 1, 1,  # Starting coordinates and width/height of the rectangle
        transform=inset_box.transAxes,
        edgecolor="black",  # Box color
        facecolor="none",   # Transparent inside
        linewidth=1.5,      # Thickness of the box border
        zorder=10           # Ensure the box is on top
    )
    inset_box.add_patch(rect)
    
 #################################################################################################


def create_hazard_ratio_plot_ph(ax, plot_data_ph):
    # Step 1: Extract and group the pathways by their starting conditions
    pathway_groups_ph = {
        'E11.9': plot_data_ph.loc[plot_data_ph.index.str.startswith("Pathway_E11.9")],
        'E78.0': plot_data_ph.loc[plot_data_ph.index.str.startswith("Pathway_E78.0")],
        'I10': plot_data_ph.loc[plot_data_ph.index.str.startswith("Pathway_I10")],
        'I25.1': plot_data_ph.loc[plot_data_ph.index.str.startswith("Pathway_I25.1")],
        'I48': plot_data_ph.loc[plot_data_ph.index.str.startswith("Pathway_I48")],
    }

    # Define custom colors for each group
    group_colors_ph = {
        'E11.9': '#228B22',  # Forest Green
        'E78.0': '#D2691E',  # Chocolate - Rich tan
        'I10': '#7D3C98',  # Amethyst - Deeper purple 
        'I25.1': '#FF69B4',  # Hot Pink - Vibrant pink
        'I48': '#4682B4',  # Steel Blue - Deeper blue
    }

    # Step 2: Plot grouped pathways
    current_y_ph = 0
    y_positions_ph = []  # List to store all y-tick positions for PH
    spacing_ph = 0.4  # Custom spacing between groups

    for group, data in pathway_groups_ph.items():
        group_y_ticks_ph = range(current_y_ph, current_y_ph + len(data))
        for i, (y, coef, lower, upper) in enumerate(zip(
                group_y_ticks_ph, data['coef'], data['coef lower 95%'], data['coef upper 95%'])):
            
            # Calculate HR and CI bounds
            hr = np.exp(coef)
            lower_hr = np.exp(lower)
            upper_hr = np.exp(upper)
            
            # Plot the error bar
            ax.errorbar(
                hr,  # HR value
                y,   # Y position
                xerr=[[hr - lower_hr], [upper_hr - hr]],  # CI as 2D array
                fmt='o',
                capsize=4.5,
                color=group_colors_ph[group]  # Use the color for the current group
            )
            
            # Add CI annotations (left and right ends) with a small manual offset
            offset = 0.05  # Add a small space
            ax.text(lower_hr - offset, y, f"{lower_hr:.2f}", va='center', ha='right', fontsize=8, color='black')
            ax.text(upper_hr + offset, y, f"{upper_hr:.2f}", va='center', ha='left', fontsize=8, color='black')

        y_positions_ph.extend(group_y_ticks_ph)  # Append y-tick positions
        current_y_ph += len(data) + int(spacing_ph)  # Add spacing after each group

    # Step 3: Adjust y-axis limits to minimize gaps
    min_y_ph = min(y_positions_ph)  # Smallest y-tick
    max_y_ph = max(y_positions_ph)  # Largest y-tick
    buffer_ph = 0.5  # Buffer space around the top and bottom
    ax.set_ylim(min_y_ph - buffer_ph, max_y_ph + buffer_ph)  # Dynamically set y-axis limits

    # Step 4: Customize plot
    ax.invert_yaxis()  # Invert y-axis
    ax.axvline(x=1, color='black', linestyle='--', linewidth=0.7)  # Add a vertical line at x=1 (HR=1)

    # Add custom x-ticks
    x_ticks_ph = [0.5, 1, 1.5, 2, 2.5, 3]  # HR values
    ax.set_xticks(x_ticks_ph)
    ax.set_xticklabels([f"{tick:.1f}" for tick in x_ticks_ph], fontsize=10)

    # Add labels and titles
    ax.set_xlabel("Hazard Ratio (HR) (95% CI)", fontsize=12)
    ax.set_title("Mortality Risk with PH Pathways", fontsize=13)

    # Remove default y-axis labels
    ax.set_yticks([])




##############################################################################################
    
    
    
def create_hazard_ratio_plot_COPD(ax, plot_data_COPD):
    # Step 1: Extract and group the pathways by their starting conditions
    pathway_groups_COPD = {
        'E11.9': plot_data_COPD.loc[plot_data_COPD.index.str.startswith("Pathway_E11.9")],
        'E78.0': plot_data_COPD.loc[plot_data_COPD.index.str.startswith("Pathway_E78.0")],
        'I10': plot_data_COPD.loc[plot_data_COPD.index.str.startswith("Pathway_I10")],
        'I25.1': plot_data_COPD.loc[plot_data_COPD.index.str.startswith("Pathway_I25.1")],
        'I48': plot_data_COPD.loc[plot_data_COPD.index.str.startswith("Pathway_I48")],
    }

    # Define custom colors for each group
    group_colors_COPD = {
        'E11.9': '#228B22',  # Forest Green
        'E78.0': '#D2691E',  # Chocolate - Rich tan
        'I10': '#7D3C98',  # Amethyst - Deeper purple 
        'I25.1': '#FF69B4',  # Hot Pink - Vibrant pink
        'I48': '#4682B4',  # Steel Blue - Deeper blue
    }
    
    # Step 2: Plot grouped pathways
    current_y_COPD = 0
    y_positions_COPD = []  # List to store all y-tick positions for COPD
    spacing_COPD = 0.8  # Custom spacing between groups

    for group, data in pathway_groups_COPD.items():
        group_y_ticks_COPD = range(current_y_COPD, current_y_COPD + len(data))
        for i, (y, coef, lower, upper) in enumerate(zip(
                group_y_ticks_COPD, data['coef'], data['coef lower 95%'], data['coef upper 95%'])):
            
            # Calculate HR and CI bounds
            hr = np.exp(coef)
            lower_hr = np.exp(lower)
            upper_hr = np.exp(upper)
            
            # Plot the error bar
            ax.errorbar(
                hr,  # HR value
                y,   # Y position
                xerr=[[hr - lower_hr], [upper_hr - hr]],  # CI as 2D array
                fmt='o',
                capsize=4.5,
                color=group_colors_COPD[group]  # Use the color for the current group
            )
            
            # Add CI annotations (left and right ends)
            offset = 1.0  # Add a small space
            ax.text(lower_hr, y, f"{lower_hr:.2f}", va='center', ha='right', fontsize=8, color='black')
            ax.text(upper_hr, y, f"{upper_hr:.2f}", va='center', ha='left', fontsize=8, color='black')

        y_positions_COPD.extend(group_y_ticks_COPD)
        current_y_COPD += len(data) + int(spacing_COPD)

    # Step 3: Adjust y-axis limits to minimize gaps
    min_y_COPD = min(y_positions_COPD)
    max_y_COPD = max(y_positions_COPD)
    buffer_COPD = 0.5
    ax.set_ylim(min_y_COPD - buffer_COPD, max_y_COPD + buffer_COPD)

    # Step 4: Customize plot
    ax.invert_yaxis()  # Invert y-axis
    ax.axvline(x=1, color='black', linestyle='--', linewidth=0.7)  # Add a vertical line at x=1 (HR=1)

    # Add custom x-ticks
    x_ticks_COPD = [0.5, 1, 1.5, 2, 2.5, 3, 3.5, 4,4.5,5,5.5,6,6.5]
    ax.set_xticks(x_ticks_COPD)
    ax.set_xticklabels([f"{tick:.1f}" for tick in x_ticks_COPD], fontsize=10)

    # Add labels and titles
    ax.set_xlabel("Hazard Ratio (HR) (95% CI)", fontsize=12)
    ax.set_title("Mortality Risk with COPD Pathways", fontsize=13)

    # Remove default y-axis labels
    ax.set_yticks([])

    
    
    
########################################################################################################################    

  

# Define the pathways and colors
pathways = [    
    "E11.9 -> E78.0 -> I10 -> PH/COPD",
    "E11.9 -> E78.0 -> I10 -> I25.1 -> PH/COPD",
    "E11.9 -> I10 -> PH/COPD",
    "E11.9 -> I10 -> E78.0 -> PH/COPD",
    "E11.9 -> I10 -> I25.1 -> PH/COPD",
    "E11.9 -> I25.1 -> PH/COPD",
    
    "E78.0 -> I10 -> PH/COPD",
    "E78.0 -> I10 -> E11.9 -> PH/COPD",
    "E78.0 -> I10 -> I25.1 -> PH/COPD",
    "E78.0 -> I10 -> I48 -> PH/COPD",
    "E78.0 -> I25.1 -> PH/COPD",
    
    "I10 -> E11.9 -> PH/COPD",
    "I10 -> E11.9 -> E78.0 -> PH/COPD",
    "I10 -> E78.0 -> PH/COPD",
    "I10 -> E78.0 -> I25.1 -> PH/COPD",
    "I10 -> I25.1 -> PH/COPD",
    "I10 -> I48 -> PH/COPD",
    
    "I25.1 -> E78.0 -> I10 -> PH/COPD",
    "I25.1 -> I10 -> PH/COPD",
    
    "I48 -> I10 -> PH/COPD"
]



color_dict = {
    'PH/COPD': '#b3b3b3',
    'I10': '#b3a3cc',
    'I48': '#add8e6',
    'E78.0': '#ddc4a1',
    'I25.1': '#f4b0c8',
    'E11.9': '#c4e3b3'
}




# Create the plot
fig, ax = plt.subplots(figsize=(16, 8))
gap = -0.38  # Horizontal gap between nodes
row_gap = 1  # Vertical gap between rows



# Plot the pathways
for row, pathway in enumerate(pathways, start=1):
    conditions = pathway.split(" -> ")
    for col, condition in enumerate(conditions, start=1):
        # Calculate node position
        x_pos = col + (col - 1) * gap
        y_pos = len(pathways) - row  + 1 

        # Draw rectangle for each node
        ax.add_patch(plt.Rectangle((x_pos - 0.5, y_pos - 0.87), 0.6, 0.9, 
                                    facecolor=color_dict.get(condition, 'white'), edgecolor='black'))
        # Add text to node
        ax.text(x_pos - 0.188, y_pos - 0.46, condition, ha='center', va='center', fontsize=9.6)

        # Add arrows between nodes
       # if col < len(conditions):
       #     next_x_pos = x_pos + 1 + gap
       #     ax.annotate(
       #         '',
       #         xy=(next_x_pos - 0.5, y_pos - 0.4),  # End position of arrow
       #         xytext=(x_pos + 0.11, y_pos - 0.4),  # Start position of arrow
       #         arrowprops=dict(
       #             arrowstyle="->,head_width=0.2,head_length=0.3",  # Adjust arrowhead size
       #             color='black',  # Arrow color
        #            lw=0.7,  # Line width (thicker arrow)
        #            shrinkA=0,  # Adjust start of the arrow (in points)
        #            shrinkB=0   # Adjust end of the arrow (in points)
        #        
        #    )
   # )



box_start_x = len(conditions) + 2.2  # Starting x position for the box
box_end_x = len(conditions) + 7.0    # Ending x position for the box
box_start_y = 0.45                   # Starting y position for the box
box_end_y = len(pathways) + 0.5     # Ending y position for the box

#####################################################################################################################


# Insert the hazard ratio plot into the box area
#inset = inset_axes(
#    ax,
#    width="60%",  # Adjust width of the inset
#    height="100.9%",  # Adjust height of the inset
#    bbox_to_anchor=(box_start_x + 5.1, box_start_y - 0.3, box_end_x - box_start_x, box_end_y - box_start_y),  # Shift down
#    bbox_transform=ax.transData,
#    loc='center'
#)
##
#
# Use the function to create the hazard ratio plot in the inset
# Replace `plot_data` with your actual DataFrame containing the hazard ratio data
#plot_data = pd.DataFrame({
#    'coef': [-0.61, 0.38, -0.18, 0.23, 0.22, -0.26, -0.23, 0.65, 0.33, 0.34, -0.03, 
#             -0.30, 0.23, 0.22, 0.19, -0.26, 0.15, 0.13, 0.16, 0.42],
#    'coef lower 95%': [-1.29, -0.32, -0.83, -0.45, -0.46, -1.15, -0.88, -0.12, -0.34, 
#                       -0.35, -0.82, -1.07, -0.48, -0.44, -0.51, -0.95, -0.49, -0.59, -0.55, -0.24],
#    'coef upper 95%': [0.07, 1.07, 0.47, 0.92, 0.91, 0.62, 0.42, 1.43, 1.00, 1.03, 0.77, 
#                       0.46, 0.94, 0.88, 0.89, 0.42, 0.80, 0.84, 0.87, 1.07],
#    'p':[0.08, 0.29, 0.59, 0.50, 0.52, 0.56, 0.49, 0.10, 0.33, 0.33, 0.95, 0.44, 0.52, 0.51, 0.59, 0.45, 0.64, 0.73, 0.66, 0.21]
#}, index=[        
#    "Pathway_E11.9 -> E78.0 -> I10",
#    "Pathway_E11.9 -> E78.0 -> I10 -> I25.1",
#    "Pathway_E11.9 -> I10",
#    "Pathway_E11.9 -> I10 -> E78.0",
#    "Pathway_E11.9 -> I10 -> I25.1",
#    "Pathway_E11.9 -> I25.1",
#    
#    "Pathway_E78.0 -> I10",
#    "Pathway_E78.0 -> I10 -> E11.9",
#    "Pathway_E78.0 -> I10 -> I25.1",
#    "Pathway_E78.0 -> I10 -> I48",
#    "Pathway_E78.0 -> I25.1",
#    
#    "Pathway_I10 -> E11.9",
#    "Pathway_I10 -> E11.9 -> E78.0",
#    "Pathway_I10 -> E78.0",
#    "Pathway_I10 -> E78.0 -> I25.1",
#    "Pathway_I10 -> I25.1",
#    "Pathway_I10 -> I48",
#    
#    "Pathway_I25.1 -> E78.0 -> I10",
#    "Pathway_I25.1 -> I10",
#    
#    "Pathway_I48 -> I10"
#])#

# Flip the inset box upside down by inverting the y-axis
#inset.invert_xaxis()

#create_hazard_ratio_plot(inset, plot_data)

# Add separate boxes for p-values and N
#add_annotations_box(inset, plot_data, box_title="", x_offset=0.95)


###################################################################################################################

plot_data_ph = pd.DataFrame({
    'coef': [-0.16, 0.22, 0.09, 0.63, -0.05, -0.79, -0.02, -0.08, -0.38, -0.06, 
             -0.44, 0.36, 0.63, -0.21, 0.26, -0.14, -0.30, 0.02, 0.33, -0.02],
    'coef lower 95%': [-0.57, -0.23, -0.23, 0.27, -0.46, -1.82, -0.34, -0.80, -0.80, -0.47, 
                       -1.24, -0.25, 0.18, -0.58, -0.16, -0.55, -0.58, -0.51, -0.13, -0.34],
    'coef upper 95%': [0.26, 0.66, 0.40, 0.99, 0.36, 0.24, 0.31, 0.64, 0.04, 0.35, 
                       0.36, 0.97, 1.09, 0.16, 0.68, 0.28, -0.01, 0.56, 0.78, 0.31],
    'p': [0.46, 0.34, 0.58, 0.001, 0.8, 0.13, 0.92, 0.83, 0.08, 0.78, 0.28, 0.25, 0.01, 0.27, 0.23, 0.52, 0.04, 0.93, 0.16, 0.93]
}, index=[
    "Pathway_E11.9 -> E78.0 -> I10",
    "Pathway_E11.9 -> E78.0 -> I10 -> I25.1",
    "Pathway_E11.9 -> I10",
    "Pathway_E11.9 -> I10 -> E78.0",
    "Pathway_E11.9 -> I10 -> I25.1",
    "Pathway_E11.9 -> I25.1",
    
    "Pathway_E78.0 -> I10",
    "Pathway_E78.0 -> I10 -> E11.9",
    "Pathway_E78.0 -> I10 -> I25.1",
    "Pathway_E78.0 -> I10 -> I48",
    "Pathway_E78.0 -> I25.1",
    
    "Pathway_I10 -> E11.9",
    "Pathway_I10 -> E11.9 -> E78.0",
    "Pathway_I10 -> E78.0",
    "Pathway_I10 -> E78.0 -> I25.1",
    "Pathway_I10 -> I25.1",
    "Pathway_I10 -> I48",
    
    "Pathway_I25.1 -> E78.0 -> I10",
    "Pathway_I25.1 -> I10",
    
    "Pathway_I48 -> I10"
])


# Add the second hazard ratio plot
plot_data_2 = plot_data_ph.copy()  # Example: Using the same data for demonstration
plot_data_2['coef'] = plot_data_2['coef'] * 1.1  # Slightly modify coefficients for differentiation#

# Define the position for the second inset
box_start_x_ph = box_end_x + 1.1  # Start the second inset further to the right
box_end_x_ph = box_start_x_ph + 4.8  # Adjust width for the second inset

# Insert the second hazard ratio plot
inset_ph = inset_axes(
    ax,
    width="80%",  # Adjust width of the second inset
    height="101.0%",  # Adjust height of the second inset
    bbox_to_anchor=(box_start_x_ph - 7.9, box_start_y - 0.3, box_end_x_ph - box_start_x_ph, box_end_y - box_start_y),  # Adjust placement
    bbox_transform=ax.transData,
    loc='center'
)

# Create the second hazard ratio plot using the same function
create_hazard_ratio_plot_ph(inset_ph, plot_data_ph)

# Add separate boxes for p-values and N
add_annotations_box(inset, plot_data_ph, box_title="", x_offset=-1.63)






####################################################################################

plot_data_COPD = pd.DataFrame({
    'coef': [-0.36, 0.69, -0.02, 0.31, 0.55, -0.01, -0.10, 0.11, 0.16, 0.92, 
             -0.91, 0.03, 1.30, 0.24, -0.05, -0.01, 0.13, -0.15, 0.82, 0.28],
    'coef lower 95%': [-0.89, 0.19, -0.40, -0.36, -0.05, -0.94, -0.47, -0.44, -0.27, 0.36, 
                       -1.54, -0.65, 0.73, -0.16, -0.63, -0.48, -0.37, -0.65, 0.20, -0.50],
    'coef upper 95%': [0.17, 1.18, 0.35, 0.97, 1.15, 0.92, 0.28, 0.67, 0.60, 1.48, 
                       -0.27, 0.71, 1.87, 0.63, 0.53, 0.45, 0.63, 0.36, 1.43, 1.06],
    'p':[0.18, 0.01, 0.90, 0.37, 0.07, 0.98, 0.62, 0.69, 0.46, 0.001, 0.001, 0.94,0.001, 0.24, 0.86, 0.96, 0.60, 0.57, 0.01,0.48] 
}, index=[
    "Pathway_E11.9 -> E78.0 -> I10",
    "Pathway_E11.9 -> E78.0 -> I10 -> I25.1",
    "Pathway_E11.9 -> I10",
    "Pathway_E11.9 -> I10 -> E78.0",
    "Pathway_E11.9 -> I10 -> I25.1",
    "Pathway_E11.9 -> I25.1",
    
    "Pathway_E78.0 -> I10",
    "Pathway_E78.0 -> I10 -> E11.9",
    "Pathway_E78.0 -> I10 -> I25.1",
    "Pathway_E78.0 -> I10 -> I48",
    "Pathway_E78.0 -> I25.1",
    
    "Pathway_I10 -> E11.9",
    "Pathway_I10 -> E11.9 -> E78.0",
    "Pathway_I10 -> E78.0",
    "Pathway_I10 -> E78.0 -> I25.1",
    "Pathway_I10 -> I25.1",
    "Pathway_I10 -> I48",
    
    "Pathway_I25.1 -> E78.0 -> I10",
    "Pathway_I25.1 -> I10",
    
    "Pathway_I48 -> I10"
])





# Add the second hazard ratio plot
plot_data_3 = plot_data_COPD.copy()  # Example: Using the same data for demonstration
plot_data_3['coef'] = plot_data_3['coef'] * 1.1  # Slightly modify coefficients for differentiation

# Define the position for the second inset
box_start_x_COPD = box_end_x + 1.1  # Start the second inset further to the right
box_end_x_COPD = box_start_x_COPD + 4.8  # Adjust width for the second inset

# Insert the second hazard ratio plot
inset_COPD = inset_axes(
    ax,
    width="80%",  # Adjust width of the second inset
    height="100.9%",  # Adjust height of the second inset
    bbox_to_anchor=(box_start_x_COPD - 4.00, box_start_y - 0.3, box_end_x_COPD - box_start_x_COPD, box_end_y - box_start_y),  # Adjust placement
    bbox_transform=ax.transData,
    loc='center'
)

# Create the second hazard ratio plot using the same function
create_hazard_ratio_plot_COPD(inset_COPD, plot_data_COPD)

# Add separate boxes for p-values and N
add_annotations_box(inset, plot_data_COPD, box_title="", x_offset=-0.33)

#######################################################################################
# Data for PH and COPD participants in the desired order
ph_participants = [44, 16, 11, 78, 25, 13, 42, 11, 10, '08', 15, 20, '06', 49, '05', 16, 16, 50, 10, 19]

copd_participants = ['07', '06','08', 14, 17, '07', 31, '06', '08', 10, '06', 12, '05', 34, '05', '05', '05', 34, '07', 12]







# Update y-ticks to include both PH and HF participants
#y_ticks = list(range(1, len(ph_participants) + 1))
y_ticks = [ytick - 0.4 for ytick in range(1, len(ph_participants) + 1)]
y_tick_labels = [f"{ph}, {hf}" for ph, hf in zip(ph_participants, copd_participants)]

# Set x-axis and y-axis limits and labels
node_columns = max(len(pathway.split(" -> ")) for pathway in pathways)  # Max nodes in a pathway
ax.set_xlim(0.5, node_columns + (node_columns - 1) * gap + 6.5)  # Adjust x-axis limits dynamically
#ax.set_ylim(0.5, len(pathways) + 0.5)  # Set y-axis limits to fit all pathways
ax.set_ylim(0, len(pathways)+0.5)  # Start the y-axis at 0 and end at the number of pathways

# Explicitly set y-tick positions if needed or remove them
#ax.set_yticks([])  # Optionally, remove y-ticks
#ax.set_yticklabels([])  # Ensure no y-axis labels


# Explicitly set y-tick positions and labels
ax.set_yticks(y_ticks)  # Set y-tick positions
ax.set_yticklabels(y_tick_labels, fontsize=10)  # Display PH and HF participants as y-tick labels
# Add a title above the y-tick labels for clarification
ax.annotate(
    "No. of Participants\n (PH , COPD)",  # Title text
    xy=(-0.02, 1.00),  # Position above the y-axis (relative to plot)
    xycoords='axes fraction',  # Coordinates relative to the axes
    fontsize=12,  # Font size
    ha='center',  # Horizontal alignment
    va='bottom',  # Vertical alignment
    rotation=0  # No rotation
)


# Manually set x-tick positions and labels for disease sequence steps
ax.set_xticks([0.8, 1.45, 2.05, 2.68, 3.33])
ax.set_xticklabels(['1', '2', '3', '4', '5'], fontsize=13)

# Update legend with proper formatting and alignment
handles = [
    mpatches.Patch(color=color, label=full_name)
    for full_name, color in {
        
        "PH and COPD as Index Conditions": '#b3b3b3',
        "Type 2 Diabetes Mellitus - E11.9": '#88cc88',
        "Hypertension - I10": '#a993cc',
        "Hyperlipidemia - E78": '#d2b48c',
        "Chronic Ischemic Heart Disease - I25.1": '#FFC0CB',
        "Atrial Fibrillation and Flutter - I48": '#add8e6',
        
    }.items()
]

# Position and style the legend
ax.legend(handles=handles , bbox_to_anchor=(0.6, -0.09), loc='upper center', ncol=3, fontsize=14, frameon=False)



# Add descriptive axis labels
ax.set_xlabel("Disease Sequence Progression", fontsize=14, labelpad=15)
ax.set_ylabel("Pathways Leading to PH/COPD", fontsize=14, labelpad=15)
ax.set_title("Cox Proportional Hazards Analysis", fontsize=14, pad=20, x=0.8)

ax.xaxis.set_label_coords(0.2, -0.05) 

# Clean up the plot aesthetics by removing unnecessary spines
for spine in ['top', 'right']:
    ax.spines[spine].set_visible(False)

# Ensure everything fits nicely into the figure
plt.tight_layout()
# Save the plot as PNG
plt.savefig("pathways_plot.png", dpi=1200)  # Save as PNG with high resolution
plt.show()

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from mpl_toolkits.axes_grid1.inset_locator import inset_axes
import matplotlib.patches as mpatches



def create_hazard_ratio_plot(ax, plot_data):
    # Step 1: Extract and group the pathways by their starting conditions
    pathway_groups = {
        'E11.9': plot_data.loc[plot_data.index.str.startswith("Pathway_E11.9")],
        'E78.0': plot_data.loc[plot_data.index.str.startswith("Pathway_E78.0")],
        'I10': plot_data.loc[plot_data.index.str.startswith("Pathway_I10")],
        'I25.1': plot_data.loc[plot_data.index.str.startswith("Pathway_I25.1")],
        'I48': plot_data.loc[plot_data.index.str.startswith("Pathway_I48")],
    }

    # Define custom colors for each group
    group_colors = {
        'E11.9': '#228B22',  # Forest Green
        'E78.0': '#D2691E',  # Chocolate - Rich tan
        'I10': '#7D3C98',  # Amethyst - Deeper purple 
        'I25.1': '#FF69B4',  # Hot Pink - Vibrant pink
        'I48': '#4682B4',  # Steel Blue - Deeper blue
    }

    # Step 2: Plot grouped pathways
    current_y = 0
    y_positions = []  # List to store all y-tick positions
    spacing = 0.8  # Custom spacing between groups

    for group, data in pathway_groups.items():
        group_y_ticks = range(current_y, current_y + len(data))
        for i, (y, coef, lower, upper) in enumerate(zip(
                group_y_ticks, data['coef'], data['coef lower 95%'], data['coef upper 95%'])):
            
            # Calculate HR and CI bounds
            hr = np.exp(coef)
            lower_hr = np.exp(lower)
            upper_hr = np.exp(upper)
            
            # Plot the error bar
            ax.errorbar(
                hr,  # HR value
                y,   # Y position
                xerr=[[hr - lower_hr], [upper_hr - hr]],  # CI as 2D array
                fmt='o',
                capsize=4.5,
                color=group_colors[group]  # Use the color for the current group
            )
            
            # Add CI annotations (left and right ends) with a small manual offset
            offset = 0.05  # Add a small space
            ax.text(lower_hr - offset, y, f"{lower_hr:.2f}", va='center', ha='right', fontsize=8, color='black')
            ax.text(upper_hr + offset, y, f"{upper_hr:.2f}", va='center', ha='left', fontsize=8, color='black')

        y_positions.extend(group_y_ticks)  # Append y-tick positions
        current_y += len(data) + int(spacing)  # Add spacing after each group

    # Step 3: Adjust y-axis limits to minimize gaps
    min_y = min(y_positions)  # Smallest y-tick
    max_y = max(y_positions)  # Largest y-tick
    buffer = 0.5  # Buffer space around the top and bottom
    ax.set_ylim(min_y - buffer, max_y + buffer)  # Dynamically set y-axis limits

    # Step 4: Customize plot
    ax.invert_yaxis()  # Invert y-axis
    ax.axvline(x=1, color='black', linestyle='--', linewidth=0.7)  # Add a vertical line at x=1 (HR=1)

    # Add custom x-ticks
    x_ticks = [0.5, 1, 1.5, 2, 2.5, 3, 3.5, 4, 4.5]  # HR values
    ax.set_xticks(x_ticks)
    ax.set_xticklabels([f"{tick:.1f}" for tick in x_ticks], fontsize=10)

    # Add labels and titles
    plt.xlabel("Hazard Ratio (HR) (95% CI)", fontsize=12)
    plt.xticks(fontsize=10)  # Set x-tick label size
    ax.set_yticks([])
    ax.set_title("Hazards Analysis of PH v/s COPD", fontsize=13)


    
    
    
def add_annotations_box(ax, plot_data, box_title, x_offset=1.0):
    """
    Adds a separate box for annotations (e.g., p-values and HR) next to the plot.

    Parameters:
    - ax: The main axis where hazard ratio bars are plotted.
    - plot_data: DataFrame containing the data (p-values and hazard ratio).
    - box_title: Title of the annotation box.
    - x_offset: Horizontal offset from the hazard ratio plot.
    """
    # Create a new inset axis for annotations
    inset_box = inset_axes(
        ax,
        width="145%",  # Width of the annotation box
        height="100%",  # Height of the box matches the plot
        bbox_to_anchor=(x_offset, 0, 0.2, 1),  # Adjust placement
        bbox_transform=ax.transAxes,
        loc="center left"
    )
    
    # Remove spines and ticks from the annotation box
    inset_box.spines['top'].set_visible(False)
    inset_box.spines['bottom'].set_visible(False)
    inset_box.spines['left'].set_visible(False)
    inset_box.spines['right'].set_visible(False)
    inset_box.tick_params(left=False, labelleft=False, bottom=False, labelbottom=False)

    # Add title to the annotation box
    inset_box.set_title(box_title, fontsize=10, pad=10)

    # Align annotations with hazard ratio bars
    for y_pos, p_val, coef in zip(range(len(plot_data)), plot_data['p'], plot_data['coef']):
        annotation = f"P:{p_val:.2f}, HR:{np.exp(coef):.2f}"  # Include HR as exp(coef)
        inset_box.text(0.5, y_pos, annotation, ha="center", va="center", fontsize=9)

    # Adjust the limits of the annotation box
    inset_box.set_ylim(ax.get_ylim())

    # Draw the box around the entire annotation area
    rect = plt.Rectangle(
        (0, 0), 1, 1,  # Starting coordinates and width/height of the rectangle
        transform=inset_box.transAxes,
        edgecolor="black",  # Box color
        facecolor="none",   # Transparent inside
        linewidth=1.5,      # Thickness of the box border
        zorder=10           # Ensure the box is on top
    )
    inset_box.add_patch(rect)
    
 #################################################################################################


def create_hazard_ratio_plot_ph(ax, plot_data_ph):
    # Step 1: Extract and group the pathways by their starting conditions
    pathway_groups_ph = {
        'E11.9': plot_data_ph.loc[plot_data_ph.index.str.startswith("Pathway_E11.9")],
        'E78.0': plot_data_ph.loc[plot_data_ph.index.str.startswith("Pathway_E78.0")],
        'I10': plot_data_ph.loc[plot_data_ph.index.str.startswith("Pathway_I10")],
        'I25.1': plot_data_ph.loc[plot_data_ph.index.str.startswith("Pathway_I25.1")],
        'I48': plot_data_ph.loc[plot_data_ph.index.str.startswith("Pathway_I48")],
    }

    # Define custom colors for each group
    group_colors_ph = {
        'E11.9': '#228B22',  # Forest Green
        'E78.0': '#D2691E',  # Chocolate - Rich tan
        'I10': '#7D3C98',  # Amethyst - Deeper purple 
        'I25.1': '#FF69B4',  # Hot Pink - Vibrant pink
        'I48': '#4682B4',  # Steel Blue - Deeper blue
    }

    # Step 2: Plot grouped pathways
    current_y_ph = 0
    y_positions_ph = []  # List to store all y-tick positions for PH
    spacing_ph = 0.4  # Custom spacing between groups

    for group, data in pathway_groups_ph.items():
        group_y_ticks_ph = range(current_y_ph, current_y_ph + len(data))
        for i, (y, coef, lower, upper) in enumerate(zip(
                group_y_ticks_ph, data['coef'], data['coef lower 95%'], data['coef upper 95%'])):
            
            # Calculate HR and CI bounds
            hr = np.exp(coef)
            lower_hr = np.exp(lower)
            upper_hr = np.exp(upper)
            
            # Plot the error bar
            ax.errorbar(
                hr,  # HR value
                y,   # Y position
                xerr=[[hr - lower_hr], [upper_hr - hr]],  # CI as 2D array
                fmt='o',
                capsize=4.5,
                color=group_colors_ph[group]  # Use the color for the current group
            )
            
            # Add CI annotations (left and right ends) with a small manual offset
            #offset = 0.05  # Add a small space
            #ax.text(lower_hr - offset, y, f"{lower_hr:.2f}", va='center', ha='right', fontsize=8, color='black')
            #ax.text(upper_hr + offset, y, f"{upper_hr:.2f}", va='center', ha='left', fontsize=8, color='black')

        y_positions_ph.extend(group_y_ticks_ph)  # Append y-tick positions
        current_y_ph += len(data) + int(spacing_ph)  # Add spacing after each group

    # Step 3: Adjust y-axis limits to minimize gaps
    min_y_ph = min(y_positions_ph)  # Smallest y-tick
    max_y_ph = max(y_positions_ph)  # Largest y-tick
    buffer_ph = 0.5  # Buffer space around the top and bottom
    ax.set_ylim(min_y_ph - buffer_ph, max_y_ph + buffer_ph)  # Dynamically set y-axis limits

    # Step 4: Customize plot
    ax.invert_yaxis()  # Invert y-axis
    ax.axvline(x=1, color='black', linestyle='--', linewidth=0.7)  # Add a vertical line at x=1 (HR=1)

    # Add custom x-ticks
    x_ticks_ph = [0.5, 1, 1.5, 2, 2.5, 3]  # HR values
    ax.set_xticks(x_ticks_ph)
    ax.set_xticklabels([f"{tick:.1f}" for tick in x_ticks_ph], fontsize=10)

    # Add labels and titles
    ax.set_xlabel("Hazard Ratio (HR) (95% CI)", fontsize=12)
    ax.set_title("Mortality Risk with PH Pathways", fontsize=13)

    # Remove default y-axis labels
    ax.set_yticks([])




##############################################################################################
    
    
    
def create_hazard_ratio_plot_COPD(ax, plot_data_COPD):
    # Step 1: Extract and group the pathways by their starting conditions
    pathway_groups_COPD = {
        'E11.9': plot_data_COPD.loc[plot_data_COPD.index.str.startswith("Pathway_E11.9")],
        'E78.0': plot_data_COPD.loc[plot_data_COPD.index.str.startswith("Pathway_E78.0")],
        'I10': plot_data_COPD.loc[plot_data_COPD.index.str.startswith("Pathway_I10")],
        'I25.1': plot_data_COPD.loc[plot_data_COPD.index.str.startswith("Pathway_I25.1")],
        'I48': plot_data_COPD.loc[plot_data_COPD.index.str.startswith("Pathway_I48")],
    }

    # Define custom colors for each group
    group_colors_COPD = {
        'E11.9': '#228B22',  # Forest Green
        'E78.0': '#D2691E',  # Chocolate - Rich tan
        'I10': '#7D3C98',  # Amethyst - Deeper purple 
        'I25.1': '#FF69B4',  # Hot Pink - Vibrant pink
        'I48': '#4682B4',  # Steel Blue - Deeper blue
    }
    
    # Step 2: Plot grouped pathways
    current_y_COPD = 0
    y_positions_COPD = []  # List to store all y-tick positions for COPD
    spacing_COPD = 0.8  # Custom spacing between groups

    for group, data in pathway_groups_COPD.items():
        group_y_ticks_COPD = range(current_y_COPD, current_y_COPD + len(data))
        for i, (y, coef, lower, upper) in enumerate(zip(
                group_y_ticks_COPD, data['coef'], data['coef lower 95%'], data['coef upper 95%'])):
            
            # Calculate HR and CI bounds
            hr = np.exp(coef)
            lower_hr = np.exp(lower)
            upper_hr = np.exp(upper)
            
            # Plot the error bar
            ax.errorbar(
                hr,  # HR value
                y,   # Y position
                xerr=[[hr - lower_hr], [upper_hr - hr]],  # CI as 2D array
                fmt='o',
                capsize=4.5,
                color=group_colors_COPD[group]  # Use the color for the current group
            )
            
            # Add CI annotations (left and right ends)
            #offset = 1.0  # Add a small space
            #ax.text(lower_hr, y, f"{lower_hr:.2f}", va='center', ha='right', fontsize=8, color='black')
            #ax.text(upper_hr, y, f"{upper_hr:.2f}", va='center', ha='left', fontsize=8, color='black')

        y_positions_COPD.extend(group_y_ticks_COPD)
        current_y_COPD += len(data) + int(spacing_COPD)

    # Step 3: Adjust y-axis limits to minimize gaps
    min_y_COPD = min(y_positions_COPD)
    max_y_COPD = max(y_positions_COPD)
    buffer_COPD = 0.5
    ax.set_ylim(min_y_COPD - buffer_COPD, max_y_COPD + buffer_COPD)

    # Step 4: Customize plot
    ax.invert_yaxis()  # Invert y-axis
    ax.axvline(x=1, color='black', linestyle='--', linewidth=0.7)  # Add a vertical line at x=1 (HR=1)

    # Add custom x-ticks
    x_ticks_COPD = [0.5, 1, 2, 3, 4,5,6]
    ax.set_xticks(x_ticks_COPD)
    ax.set_xticklabels([f"{tick:.1f}" for tick in x_ticks_COPD], fontsize=10)

    # Add labels and titles
    ax.set_xlabel("Hazard Ratio (HR) (95% CI)", fontsize=12)
    ax.set_title("Mortality Risk with COPD Pathways", fontsize=13)

    # Remove default y-axis labels
    ax.set_yticks([])

    
    
    
########################################################################################################################    

  

# Define the pathways and colors
pathways = [    
    "E11.9 -> E78.0 -> I10 -> PH/COPD",
    "E11.9 -> E78.0 -> I10 -> I25.1 -> PH/COPD",
    "E11.9 -> I10 -> PH/COPD",
    "E11.9 -> I10 -> E78.0 -> PH/COPD",
    "E11.9 -> I10 -> I25.1 -> PH/COPD",
    "E11.9 -> I25.1 -> PH/COPD",
    
    "E78.0 -> I10 -> PH/COPD",
    "E78.0 -> I10 -> E11.9 -> PH/COPD",
    "E78.0 -> I10 -> I25.1 -> PH/COPD",
    "E78.0 -> I10 -> I48 -> PH/COPD",
    "E78.0 -> I25.1 -> PH/COPD",
    
    "I10 -> E11.9 -> PH/COPD",
    "I10 -> E11.9 -> E78.0 -> PH/COPD",
    "I10 -> E78.0 -> PH/COPD",
    "I10 -> E78.0 -> I25.1 -> PH/COPD",
    "I10 -> I25.1 -> PH/COPD",
    "I10 -> I48 -> PH/COPD",
    
    "I25.1 -> E78.0 -> I10 -> PH/COPD",
    "I25.1 -> I10 -> PH/COPD",
    
    "I48 -> I10 -> PH/COPD"
]



color_dict = {
    'PH/COPD': '#b3b3b3',
    'I10': '#b3a3cc',
    'I48': '#add8e6',
    'E78.0': '#ddc4a1',
    'I25.1': '#f4b0c8',
    'E11.9': '#c4e3b3'
}




# Create the plot
fig, ax = plt.subplots(figsize=(16, 8))
gap = -0.38  # Horizontal gap between nodes
row_gap = 1  # Vertical gap between rows



# Plot the pathways
for row, pathway in enumerate(pathways, start=1):
    conditions = pathway.split(" -> ")
    for col, condition in enumerate(conditions, start=1):
        # Calculate node position
        x_pos = col + (col - 1) * gap
        y_pos = len(pathways) - row  + 1 

        # Draw rectangle for each node
        ax.add_patch(plt.Rectangle((x_pos - 0.5, y_pos - 0.87), 0.6, 0.9, 
                                    facecolor=color_dict.get(condition, 'white'), edgecolor='black'))
        # Add text to node
        ax.text(x_pos - 0.188, y_pos - 0.46, condition, ha='center', va='center', fontsize=9.6)

        # Add arrows between nodes
       # if col < len(conditions):
       #     next_x_pos = x_pos + 1 + gap
       #     ax.annotate(
       #         '',
       #         xy=(next_x_pos - 0.5, y_pos - 0.4),  # End position of arrow
       #         xytext=(x_pos + 0.11, y_pos - 0.4),  # Start position of arrow
       #         arrowprops=dict(
       #             arrowstyle="->,head_width=0.2,head_length=0.3",  # Adjust arrowhead size
       #             color='black',  # Arrow color
        #            lw=0.7,  # Line width (thicker arrow)
        #            shrinkA=0,  # Adjust start of the arrow (in points)
        #            shrinkB=0   # Adjust end of the arrow (in points)
        #        
        #    )
   # )



box_start_x = len(conditions) + 2.2  # Starting x position for the box
box_end_x = len(conditions) + 7.0    # Ending x position for the box
box_start_y = 0.45                   # Starting y position for the box
box_end_y = len(pathways) + 0.5     # Ending y position for the box

#####################################################################################################################


# Insert the hazard ratio plot into the box area
#inset = inset_axes(
#    ax,
#    width="60%",  # Adjust width of the inset
#    height="100.9%",  # Adjust height of the inset
#    bbox_to_anchor=(box_start_x + 5.1, box_start_y - 0.3, box_end_x - box_start_x, box_end_y - box_start_y),  # Shift down
#    bbox_transform=ax.transData,
#    loc='center'
#)
##
#
# Use the function to create the hazard ratio plot in the inset
# Replace `plot_data` with your actual DataFrame containing the hazard ratio data
#plot_data = pd.DataFrame({
#    'coef': [-0.61, 0.38, -0.18, 0.23, 0.22, -0.26, -0.23, 0.65, 0.33, 0.34, -0.03, 
#             -0.30, 0.23, 0.22, 0.19, -0.26, 0.15, 0.13, 0.16, 0.42],
#    'coef lower 95%': [-1.29, -0.32, -0.83, -0.45, -0.46, -1.15, -0.88, -0.12, -0.34, 
#                       -0.35, -0.82, -1.07, -0.48, -0.44, -0.51, -0.95, -0.49, -0.59, -0.55, -0.24],
#    'coef upper 95%': [0.07, 1.07, 0.47, 0.92, 0.91, 0.62, 0.42, 1.43, 1.00, 1.03, 0.77, 
#                       0.46, 0.94, 0.88, 0.89, 0.42, 0.80, 0.84, 0.87, 1.07],
#    'p':[0.08, 0.29, 0.59, 0.50, 0.52, 0.56, 0.49, 0.10, 0.33, 0.33, 0.95, 0.44, 0.52, 0.51, 0.59, 0.45, 0.64, 0.73, 0.66, 0.21]
#}, index=[        
#    "Pathway_E11.9 -> E78.0 -> I10",
#    "Pathway_E11.9 -> E78.0 -> I10 -> I25.1",
#    "Pathway_E11.9 -> I10",
#    "Pathway_E11.9 -> I10 -> E78.0",
#    "Pathway_E11.9 -> I10 -> I25.1",
#    "Pathway_E11.9 -> I25.1",
#    
#    "Pathway_E78.0 -> I10",
#    "Pathway_E78.0 -> I10 -> E11.9",
#    "Pathway_E78.0 -> I10 -> I25.1",
#    "Pathway_E78.0 -> I10 -> I48",
#    "Pathway_E78.0 -> I25.1",
#    
#    "Pathway_I10 -> E11.9",
#    "Pathway_I10 -> E11.9 -> E78.0",
#    "Pathway_I10 -> E78.0",
#    "Pathway_I10 -> E78.0 -> I25.1",
#    "Pathway_I10 -> I25.1",
#    "Pathway_I10 -> I48",
#    
#    "Pathway_I25.1 -> E78.0 -> I10",
#    "Pathway_I25.1 -> I10",
#    
#    "Pathway_I48 -> I10"
#])#

# Flip the inset box upside down by inverting the y-axis
#inset.invert_xaxis()

#create_hazard_ratio_plot(inset, plot_data)

# Add separate boxes for p-values and N
#add_annotations_box(inset, plot_data, box_title="", x_offset=0.95)


###################################################################################################################

plot_data_ph = pd.DataFrame({
    'coef': [-0.16, 0.22, 0.09, 0.63, -0.05, -0.79, -0.02, -0.08, -0.38, -0.06, 
             -0.44, 0.36, 0.63, -0.21, 0.26, -0.14, -0.30, 0.02, 0.33, -0.02],
    'coef lower 95%': [-0.57, -0.23, -0.23, 0.27, -0.46, -1.82, -0.34, -0.80, -0.80, -0.47, 
                       -1.24, -0.25, 0.18, -0.58, -0.16, -0.55, -0.58, -0.51, -0.13, -0.34],
    'coef upper 95%': [0.26, 0.66, 0.40, 0.99, 0.36, 0.24, 0.31, 0.64, 0.04, 0.35, 
                       0.36, 0.97, 1.09, 0.16, 0.68, 0.28, -0.01, 0.56, 0.78, 0.31],
    'p': [0.46, 0.34, 0.58, 0.001, 0.8, 0.13, 0.92, 0.83, 0.08, 0.78, 0.28, 0.25, 0.01, 0.27, 0.23, 0.52, 0.04, 0.93, 0.16, 0.93]
}, index=[
    "Pathway_E11.9 -> E78.0 -> I10",
    "Pathway_E11.9 -> E78.0 -> I10 -> I25.1",
    "Pathway_E11.9 -> I10",
    "Pathway_E11.9 -> I10 -> E78.0",
    "Pathway_E11.9 -> I10 -> I25.1",
    "Pathway_E11.9 -> I25.1",
    
    "Pathway_E78.0 -> I10",
    "Pathway_E78.0 -> I10 -> E11.9",
    "Pathway_E78.0 -> I10 -> I25.1",
    "Pathway_E78.0 -> I10 -> I48",
    "Pathway_E78.0 -> I25.1",
    
    "Pathway_I10 -> E11.9",
    "Pathway_I10 -> E11.9 -> E78.0",
    "Pathway_I10 -> E78.0",
    "Pathway_I10 -> E78.0 -> I25.1",
    "Pathway_I10 -> I25.1",
    "Pathway_I10 -> I48",
    
    "Pathway_I25.1 -> E78.0 -> I10",
    "Pathway_I25.1 -> I10",
    
    "Pathway_I48 -> I10"
])


# Add the second hazard ratio plot
plot_data_2 = plot_data_ph.copy()  # Example: Using the same data for demonstration
plot_data_2['coef'] = plot_data_2['coef'] * 1.1  # Slightly modify coefficients for differentiation#

# Define the position for the second inset
box_start_x_ph = box_end_x + 1.1  # Start the second inset further to the right
box_end_x_ph = box_start_x_ph + 4.8  # Adjust width for the second inset

# Insert the second hazard ratio plot
inset_ph = inset_axes(
    ax,
    width="55%",  # Adjust width of the second inset
    height="101.0%",  # Adjust height of the second inset
    bbox_to_anchor=(box_start_x_ph - 8.33, box_start_y - 0.3, box_end_x_ph - box_start_x_ph, box_end_y - box_start_y),  # Adjust placement
    bbox_transform=ax.transData,
    loc='center'
)

# Create the second hazard ratio plot using the same function
create_hazard_ratio_plot_ph(inset_ph, plot_data_ph)

# Add separate boxes for p-values and N
add_annotations_box(inset, plot_data_ph, box_title="", x_offset=-1.63)






####################################################################################

plot_data_COPD = pd.DataFrame({
    'coef': [-0.36, 0.69, -0.02, 0.31, 0.55, -0.01, -0.10, 0.11, 0.16, 0.92, 
             -0.91, 0.03, 1.30, 0.24, -0.05, -0.01, 0.13, -0.15, 0.82, 0.28],
    'coef lower 95%': [-0.89, 0.19, -0.40, -0.36, -0.05, -0.94, -0.47, -0.44, -0.27, 0.36, 
                       -1.54, -0.65, 0.73, -0.16, -0.63, -0.48, -0.37, -0.65, 0.20, -0.50],
    'coef upper 95%': [0.17, 1.18, 0.35, 0.97, 1.15, 0.92, 0.28, 0.67, 0.60, 1.48, 
                       -0.27, 0.71, 1.87, 0.63, 0.53, 0.45, 0.63, 0.36, 1.43, 1.06],
    'p':[0.18, 0.01, 0.90, 0.37, 0.07, 0.98, 0.62, 0.69, 0.46, 0.001, 0.001, 0.94,0.001, 0.24, 0.86, 0.96, 0.60, 0.57, 0.01,0.48] 
}, index=[
    "Pathway_E11.9 -> E78.0 -> I10",
    "Pathway_E11.9 -> E78.0 -> I10 -> I25.1",
    "Pathway_E11.9 -> I10",
    "Pathway_E11.9 -> I10 -> E78.0",
    "Pathway_E11.9 -> I10 -> I25.1",
    "Pathway_E11.9 -> I25.1",
    
    "Pathway_E78.0 -> I10",
    "Pathway_E78.0 -> I10 -> E11.9",
    "Pathway_E78.0 -> I10 -> I25.1",
    "Pathway_E78.0 -> I10 -> I48",
    "Pathway_E78.0 -> I25.1",
    
    "Pathway_I10 -> E11.9",
    "Pathway_I10 -> E11.9 -> E78.0",
    "Pathway_I10 -> E78.0",
    "Pathway_I10 -> E78.0 -> I25.1",
    "Pathway_I10 -> I25.1",
    "Pathway_I10 -> I48",
    
    "Pathway_I25.1 -> E78.0 -> I10",
    "Pathway_I25.1 -> I10",
    
    "Pathway_I48 -> I10"
])





# Add the second hazard ratio plot
plot_data_3 = plot_data_COPD.copy()  # Example: Using the same data for demonstration
plot_data_3['coef'] = plot_data_3['coef'] * 1.1  # Slightly modify coefficients for differentiation

# Define the position for the second inset
box_start_x_COPD = box_end_x + 1.1  # Start the second inset further to the right
box_end_x_COPD = box_start_x_COPD + 4.8  # Adjust width for the second inset

# Insert the second hazard ratio plot
inset_COPD = inset_axes(
    ax,
    width="55%",  # Adjust width of the second inset
    height="100.9%",  # Adjust height of the second inset
    bbox_to_anchor=(box_start_x_COPD - 5.50, box_start_y - 0.3, box_end_x_COPD - box_start_x_COPD, box_end_y - box_start_y),  # Adjust placement
    bbox_transform=ax.transData,
    loc='center'
)

# Create the second hazard ratio plot using the same function
create_hazard_ratio_plot_COPD(inset_COPD, plot_data_COPD)

# Add separate boxes for p-values and N
add_annotations_box(inset, plot_data_COPD, box_title="", x_offset=-0.33)

#######################################################################################
# Data for PH and COPD participants in the desired order
ph_participants = [44, 16, 11, 78, 25, 13, 42, 11, 10, '08', 15, 20, '06', 49, '05', 16, 16, 50, 10, 19]

copd_participants = ['07', '06','08', 14, 17, '07', 31, '06', '08', 10, '06', 12, '05', 34, '05', '05', '05', 34, '07', 12]







# Update y-ticks to include both PH and HF participants
#y_ticks = list(range(1, len(ph_participants) + 1))
y_ticks = [ytick - 0.4 for ytick in range(1, len(ph_participants) + 1)]
y_tick_labels = [f"{ph}, {hf}" for ph, hf in zip(ph_participants, copd_participants)]

# Set x-axis and y-axis limits and labels
node_columns = max(len(pathway.split(" -> ")) for pathway in pathways)  # Max nodes in a pathway
ax.set_xlim(0.5, node_columns + (node_columns - 1) * gap + 6.5)  # Adjust x-axis limits dynamically
#ax.set_ylim(0.5, len(pathways) + 0.5)  # Set y-axis limits to fit all pathways
ax.set_ylim(0, len(pathways)+0.5)  # Start the y-axis at 0 and end at the number of pathways

# Explicitly set y-tick positions if needed or remove them
#ax.set_yticks([])  # Optionally, remove y-ticks
#ax.set_yticklabels([])  # Ensure no y-axis labels


# Explicitly set y-tick positions and labels
ax.set_yticks(y_ticks)  # Set y-tick positions
ax.set_yticklabels(y_tick_labels, fontsize=10)  # Display PH and HF participants as y-tick labels
# Add a title above the y-tick labels for clarification
ax.annotate(
    "No. of Participants\n (PH , COPD)",  # Title text
    xy=(-0.02, 1.00),  # Position above the y-axis (relative to plot)
    xycoords='axes fraction',  # Coordinates relative to the axes
    fontsize=12,  # Font size
    ha='center',  # Horizontal alignment
    va='bottom',  # Vertical alignment
    rotation=0  # No rotation
)


# Manually set x-tick positions and labels for disease sequence steps
ax.set_xticks([0.8, 1.45, 2.05, 2.68, 3.33])
ax.set_xticklabels(['1', '2', '3', '4', '5'], fontsize=13)

# Update legend with proper formatting and alignment
handles = [
    mpatches.Patch(color=color, label=full_name)
    for full_name, color in {
        
        "PH and COPD as Index Conditions": '#b3b3b3',
        "Type 2 Diabetes Mellitus": '#88cc88',
        "Hypertension": '#a993cc',
        "Hyperlipidemia": '#d2b48c',
        "Chronic Ischemic Heart Disease": '#FFC0CB',
        "Atrial Fibrillation and Flutter": '#add8e6',
        
    }.items()
]

# Position and style the legend
ax.legend(handles=handles , bbox_to_anchor=(0.5, -0.09), loc='upper center', ncol=3, fontsize=14, frameon=False)



# Add descriptive axis labels
ax.set_xlabel("Disease Sequence Progression", fontsize=14, labelpad=15)
ax.set_ylabel("Pathways Leading to PH/COPD", fontsize=14, labelpad=15)
ax.set_title("Cox Proportional Hazards Analysis", fontsize=15, pad=20, x=0.65)

ax.xaxis.set_label_coords(0.2, -0.05) 

# Clean up the plot aesthetics by removing unnecessary spines
for spine in ['top', 'right']:
    ax.spines[spine].set_visible(False)

# Ensure everything fits nicely into the figure
plt.tight_layout()
# Save the plot as PNG
plt.savefig("pathways_plot.png", dpi=1200)  # Save as PNG with high resolution
plt.show()

In [None]:
# Display PH and COPD dataframes directly
import pandas as pd

# Define the data for PH pathways
ph_data = {
    'Pathway': [
        "Pathway_E11.9 -> E78.0 -> I10",
        "Pathway_E11.9 -> E78.0 -> I10 -> I25.1",
        "Pathway_E11.9 -> I10",
        "Pathway_E11.9 -> I10 -> E78.0",
        "Pathway_E11.9 -> I10 -> I25.1",
        "Pathway_E11.9 -> I25.1",
        "Pathway_E78.0 -> I10",
        "Pathway_E78.0 -> I10 -> E11.9",
        "Pathway_E78.0 -> I10 -> I25.1",
        "Pathway_E78.0 -> I10 -> I48",
        "Pathway_E78.0 -> I25.1",
        "Pathway_I10 -> E11.9",
        "Pathway_I10 -> E11.9 -> E78.0",
        "Pathway_I10 -> E78.0",
        "Pathway_I10 -> E78.0 -> I25.1",
        "Pathway_I10 -> I25.1",
        "Pathway_I10 -> I48",
        "Pathway_I25.1 -> E78.0 -> I10",
        "Pathway_I25.1 -> I10",
        "Pathway_I48 -> I10"
    ],
    'HR': [0.85, 1.25, 1.09, 1.88, 0.95, 0.45, 0.98, 0.92, 0.68, 0.94, 0.64, 1.43, 1.88, 0.81, 1.30, 0.87, 0.74, 1.02, 1.39, 0.98],
    'Lower CI': [0.57, 0.79, 0.79, 1.31, 0.63, 0.16, 0.71, 0.45, 0.45, 0.62, 0.29, 0.77, 1.20, 0.56, 0.85, 0.58, 0.55, 0.60, 0.88, 0.71],
    'Upper CI': [1.26, 1.93, 1.50, 2.70, 1.43, 1.27, 1.34, 1.86, 1.20, 1.47, 1.41, 2.65, 2.97, 1.18, 2.00, 1.30, 1.00, 1.72, 2.20, 1.35],
    'P-value': [0.46, 0.34, 0.58, 0.001, 0.8, 0.13, 0.92, 0.83, 0.08, 0.78, 0.28, 0.25, 0.01, 0.27, 0.23, 0.52, 0.04, 0.93, 0.16, 0.93]
}

# Define the data for COPD pathways
copd_data = {
    'Pathway': [
        "Pathway_E11.9 -> E78.0 -> I10",
        "Pathway_E11.9 -> E78.0 -> I10 -> I25.1",
        "Pathway_E11.9 -> I10",
        "Pathway_E11.9 -> I10 -> E78.0",
        "Pathway_E11.9 -> I10 -> I25.1",
        "Pathway_E11.9 -> I25.1",
        "Pathway_E78.0 -> I10",
        "Pathway_E78.0 -> I10 -> E11.9",
        "Pathway_E78.0 -> I10 -> I25.1",
        "Pathway_E78.0 -> I10 -> I48",
        "Pathway_E78.0 -> I25.1",
        "Pathway_I10 -> E11.9",
        "Pathway_I10 -> E11.9 -> E78.0",
        "Pathway_I10 -> E78.0",
        "Pathway_I10 -> E78.0 -> I25.1",
        "Pathway_I10 -> I25.1",
        "Pathway_I10 -> I48",
        "Pathway_I25.1 -> E78.0 -> I10",
        "Pathway_I25.1 -> I10",
        "Pathway_I48 -> I10"
    ],
    'HR': [0.7, 1.99, 0.98, 1.36, 1.73, 0.99, 0.90, 1.11, 1.17, 2.51, 0.4, 1.03, 3.67, 1.27, 0.95, 0.99, 1.14, 0.86, 2.27, 1.32],
    'Lower CI': [0.41, 1.21, 0.67, 0.7, 0.95, 0.39, 0.62, 0.64, 0.76, 1.43, 0.21, 0.52, 2.08, 0.85, 0.53, 0.61, 0.69, 0.52, 1.22, 0.61],
    'Upper CI': [1.19, 2.43, 1.42, 2.43, 3.15, 2.51, 1.51, 1.87, 1.8, 4.39, 0.76, 2.02, 6.63, 1.91, 1.67, 1.58, 1.88, 1.42, 4.06, 2.92],
    'P-value': [0.18, 0.01, 0.90, 0.37, 0.07, 0.98, 0.62, 0.69, 0.46, 0.001, 0.001, 0.94, 0.001, 0.24, 0.86, 0.96, 0.60, 0.57, 0.01, 0.48]
}

# Create DataFrames
ph_df = pd.DataFrame(ph_data)
copd_df = pd.DataFrame(copd_data)

# Display the DataFrames inline in the code
print("PH Pathways Data:")
display(ph_df)
print()
print("\nCOPD Pathways Data:")
display(copd_df)
