###  STEP 1: Setting up the Visualization Environment

The goal of this notebook is to generate a publication-quality figure (a "Hit Map" or "Tile Plot") based on the final candidate list (`de_novo_candidates.csv`) produced by Notebook 05.

This visualization will show exactly *which* gene mutated, in *which* lineage (DAP or VAN), and at *which* time point.

In [None]:
# ---
# Major Step 1.1: Install Visualization Libraries
# ---
print("Installing plotting libraries (matplotlib and seaborn)...")
!conda install -c conda-forge matplotlib seaborn -y
print("Installation complete.")

In [None]:
# Import necessary libraries
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import os

print("--- 1.2 Loading Final Candidate List ---")
DATA_FILE = "../results/final_analysis_data/de_novo_candidates.csv"
plot_data_df = pd.read_csv(DATA_FILE)

print(f"Successfully loaded {plot_data_df.shape[0]} candidate rows.")

# --------------------------------------------------------------------------
# Major Step 2.1: Preparing Data Matrix for Plotting
# --------------------------------------------------------------------------
print("--- 2.1 Preparing Data Matrix for Plotting ---")

# 1. Create the Pivot Table (Matrix)
# We count how many times each gene appears in each group
plot_matrix = pd.crosstab(plot_data_df['GeneName'], plot_data_df['Group'])

# 2. Convert counts to binary (1 = Hit, 0 = No Hit)
plot_matrix = (plot_matrix > 0).astype(int)

# 3. CRITICAL: Ensure all samples are present and in the correct order
sample_order = [
    'DAP_P5', 'DAP_P20', 'DAP_Final',
    'VAN_P5', 'VAN_P20', 'VAN_Final'
]
# Re-index the columns. Add missing columns (like DAP_P5) and fill with 0.
plot_matrix = plot_matrix.reindex(columns=sample_order, fill_value=0)

print("Data Matrix is ready for plotting:")
print(plot_matrix.head())

# --------------------------------------------------------------------------
# ---
# Major Step 2.2: Generating the Heatmap (Hit Map)
# ---
print("\n--- 2.2 Generating the Heatmap ---")

# --- NEW: Define output path for the figure ---
FIGURE_DIR = "../results/figures/"
!mkdir -p {FIGURE_DIR}
FIGURE_PATH = os.path.join(FIGURE_DIR, "de_novo_mutations_heatmap.png")
# --- End of NEW ---

plt.figure(figsize=(10, 12))
ax = sns.heatmap(
    plot_matrix,
    annot=True,
    cmap="Reds",
    linewidths=0.5,
    linecolor='black',
    cbar=False
)

ax.set_title('De Novo Mutations (HIGH/MODERATE Impact) by Treatment', fontsize=16)
ax.set_xlabel('Treatment Group (Time Course)', fontsize=12)
ax.set_ylabel('Gene Name (Prime Suspects)', fontsize=12)
plt.xticks(rotation=45, ha='right')
plt.yticks(rotation=0)
plt.tight_layout() # <-- Important: ensure layout is tight *before* saving

# --- NEW: Save the figure *before* showing it ---
# We save the figure to the path we defined
plt.savefig(FIGURE_PATH, dpi=300, bbox_inches='tight')
print(f"Heatmap saved successfully to: {FIGURE_PATH}")
# --- End of NEW ---

# Now we show the plot in the notebook
plt.show()