# 📘 Top Genes Correlated with Clinical Outcome

This notebook highlights the top genes most strongly correlated with a clinical outcome (e.g., survival duration). It enables a focused look at genes that may serve as prognostic markers.

In [None]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

# Load data
df = pd.read_csv('merged_expression_clinical.csv', index_col=0)

# Define target variable
target = 'Survival_Months'

# Compute correlation with the target
correlations = df.corr()[target].drop(target)

# Select top positively and negatively correlated genes
top_genes = correlations.abs().sort_values(ascending=False).head(10).index

# Extract correlation matrix for those genes
heatmap_data = df[top_genes].corr()

# Plot heatmap
plt.figure(figsize=(8, 6))
sns.heatmap(heatmap_data, annot=True, cmap='coolwarm')
plt.title(f'Top Genes Correlated with {target}')
plt.show()