# Correlation Analysis

Correlation heatmap for panel data variables.

In [None]:
study = "correlation"
data_file = "data/panel_data.csv"
out_fig = "output/figures/correlation.pdf"
out_table = "output/tables/correlation.tex"
out_meta = "output/provenance/correlation.yml"

In [None]:
from pathlib import Path
import pandas as pd
import matplotlib
matplotlib.use("Agg")
import matplotlib.pyplot as plt
import seaborn as sns
from repro_tools import auto_build_record

data_file = Path(data_file)
out_fig = Path(out_fig)
out_table = Path(out_table)
out_meta = Path(out_meta)
for p in [out_fig, out_table, out_meta]:
    p.parent.mkdir(parents=True, exist_ok=True)

In [None]:
df = pd.read_csv(data_file)
numeric_cols = df.select_dtypes(include=["number"]).columns.tolist()
corr = df[numeric_cols].corr()
print(f"Loaded {len(df)} rows, {len(numeric_cols)} numeric columns")

In [None]:
fig, ax = plt.subplots(figsize=(10, 8))
sns.heatmap(corr, annot=True, fmt=".2f", cmap="coolwarm", center=0, ax=ax)
ax.set_title("Correlation Matrix")
fig.savefig(out_fig, bbox_inches="tight", dpi=300)
plt.close(fig)
print(f"Saved {out_fig}")

In [None]:
with open(out_table, "w") as f:
    f.write(corr.to_latex(float_format="%.2f"))
print(f"Saved {out_table}")

In [None]:
# Generate provenance
from repro_tools import write_build_record
from pathlib import Path

write_build_record(
    out_meta=out_meta,
    artifact_name=study,
    command=["papermill", "notebooks/correlation_analysis.ipynb"],
    repo_root=Path(".").resolve(),
    inputs=[data_file],
    outputs=[out_fig, out_table],
)
print(f"âœ“ Saved provenance to {out_meta}")
print("ðŸŽ‰ Complete!")