# Spending vs mCPWAD 26w Plot

This notebook creates a clean plot showing the relationship between spending (1M-8M) and mCPWAD 26w, with mCPD markers at 500 incremental levels.


In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.ticker as mtick
import matplotlib.patheffects as pe
import numpy as np


In [None]:
def plot_spending_mcpwad_curve(df):
    """
    Plot spending vs mCPWAD 26w with mCPD markers at 500 incremental levels
    
    Parameters:
    df: DataFrame with columns ['spending', 'pred_mcpd', 'pred_mcpwad_26w']
    """
    
    # Set up plot styling
    plt.rcParams.update({
        "figure.dpi": 150,
        "savefig.dpi": 300,
        "font.family": ["Proxima Nova", "Helvetica Neue", "Arial"],
        "axes.grid": True,
        "grid.alpha": 0.25,
        "grid.linestyle": "--",
        "axes.linewidth": 0.8,
    })
    
    # Define colors
    colors = {"mcpwad": "#d19017", "mcpd_markers": "#0778b5"}
    
    # Filter data to spending range 1M-8M
    df_filtered = df[(df['spending'] >= 1_000_000) & (df['spending'] <= 8_000_000)].copy()
    df_filtered = df_filtered.sort_values('spending')
    
    # Create figure
    fig, ax = plt.subplots(figsize=(10, 6))
    
    # Plot main line: spending vs mcpwad_26w
    line1, = ax.plot(df_filtered["spending"], df_filtered["pred_mcpwad_26w"], 
                     label="mCPWAD (26wks)", color=colors["mcpwad"], 
                     linewidth=2, zorder=2)
    
    # Find points where pred_mcpd is at 500 incremental levels
    # Round mcpd to nearest 500 and find unique levels
    df_filtered['mcpd_rounded'] = (df_filtered['pred_mcpd'] / 500).round() * 500
    
    # Get unique mcpd levels and find the closest point for each
    mcpd_levels = sorted(df_filtered['mcpd_rounded'].unique())
    
    spending_marks = []
    mcpwad_marks = []
    mcpd_marks = []
    
    for level in mcpd_levels:
        # Find the point closest to this mcpd level
        level_data = df_filtered[df_filtered['mcpd_rounded'] == level]
        if not level_data.empty:
            # Take the first occurrence (or you could take the one closest to the exact level)
            idx = level_data.index[0]
            spending_marks.append(df_filtered.loc[idx, 'spending'])
            mcpwad_marks.append(df_filtered.loc[idx, 'pred_mcpwad_26w'])
            mcpd_marks.append(level)
    
    # Plot markers at mcpd incremental levels
    ax.scatter(spending_marks, mcpwad_marks, marker='o', s=30, 
               color=colors["mcpd_markers"], zorder=3, alpha=0.8)
    
    # Add annotations for mcpd levels with halo effect
    halo = [pe.withStroke(linewidth=2, foreground="white")]
    for s, y, lvl in zip(spending_marks, mcpwad_marks, mcpd_marks):
        ax.annotate(f"{int(lvl/1000)}k", (s, y), xytext=(0, 10), 
                   textcoords="offset points", ha="center", fontsize=8, 
                   color=colors["mcpd_markers"], path_effects=halo)
    
    # Styling
    for spine in ax.spines.values():
        spine.set_visible(True)
        spine.set_linewidth(0.8)
        spine.set_color("black")
    
    # Labels and title
    ax.set_xlabel("Spending", fontsize=12)
    ax.set_ylabel("mCPWAD (26wks)", fontsize=12)
    ax.set_title("Spending vs mCPWAD 26w with mCPD Markers", 
                fontfamily="Arial", fontweight="bold", fontsize=14)
    
    # Format x-axis with commas
    ax.xaxis.set_major_formatter(mtick.StrMethodFormatter("{x:,.0f}"))
    
    # Create legend
    mcpd_handle = ax.scatter([], [], marker='o', s=30, color=colors["mcpd_markers"], 
                            label="mCPD levels (500 increments)")
    ax.legend(handles=[line1, mcpd_handle], fontsize=10, framealpha=0.9, loc="best")
    
    # Set x-axis limits to focus on 1M-8M range
    ax.set_xlim(1_000_000, 8_000_000)
    
    fig.tight_layout()
    plt.show()
    
    # Print summary of markers
    print(f"\nMarkers plotted for mCPD levels (in thousands):")
    for level in sorted(set(mcpd_marks)):
        print(f"  {int(level/1000)}k")
    
    return fig, ax


In [None]:
# Example usage:
# Assuming your dataframe is called 'df'
# fig, ax = plot_spending_mcpwad_curve(df)

# If you want to save the plot:
# fig.savefig('spending_mcpwad_plot.png', dpi=300, bbox_inches='tight')


## Key Changes from Original Code:

1. **Simplified focus**: Only plots spending vs mCPWAD 26w (single line)
2. **Data filtering**: Automatically filters to 1M-8M spending range
3. **Smart mCPD markers**: Finds points at 500 incremental levels of pred_mcpd
4. **Clean annotations**: Shows mCPD levels in thousands (e.g., "2k" for 2000)
5. **Improved styling**: Larger figure, better colors, cleaner legend
6. **Automatic marker detection**: No need to manually specify marker positions

## Usage:
```python
# Simply call with your dataframe
fig, ax = plot_spending_mcpwad_curve(df)
```

The function will automatically:
- Filter your data to the 1M-8M spending range
- Find points where mCPD is at 500-unit increments
- Plot markers and annotations at those points
- Display a clean, professional-looking chart
