# ðŸ“˜ Correlation Between Gene Expression and Clinical Features

This notebook explores the relationships between gene expression and clinical variables such as patient age, tumor size, and survival duration. It is useful for identifying clinically relevant biomarkers.

In [None]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

# Load expression and clinical data
expression_df = pd.read_csv('your_expression_data.csv', index_col=0)
clinical_df = pd.read_csv('your_clinical_data.csv', index_col=0)

# Merge both on patient ID
merged_df = pd.merge(expression_df, clinical_df, left_index=True, right_index=True)

# Select relevant columns
gene_cols = expression_df.columns[:50]  # or pick genes manually
clinical_cols = ['Age', 'Tumor_Size', 'Survival_Months']

# Calculate correlation matrix
corr = merged_df[gene_cols + clinical_cols].corr()

# Focus on correlations between clinical and gene variables
heatmap_data = corr.loc[clinical_cols, gene_cols]

# Plot heatmap
plt.figure(figsize=(16, 5))
sns.heatmap(heatmap_data, cmap='coolwarm', annot=False)
plt.title('Correlation Between Clinical Features and Gene Expression')
plt.xlabel("Genes")
plt.ylabel("Clinical Variables")
plt.show()