In [5]:
import matplotlib.pyplot as plt
from matplotlib.backends.backend_pdf import PdfPages as pdf
import seaborn as sns
import pandas as pd
import os

# Load the file
df = pd.read_csv('mlb_1998_2024.csv')

# Standardize team names
team_rename_dict = {
    "Tampa Bay Devil Rays": "Tampa Bay Rays",
    "Florida Marlins": "Miami Marlins",
    "Anaheim Angels": "Los Angeles Angels",
    "Montreal Expos": "Washington Nationals",
    "Cleveland Indians": "Cleveland Guardians"
}
df["team_name"] = df["team_name"].replace(team_rename_dict)

# Pick out certain features
data = df[["team_name", "year", "wins", "losses", "runs_scored", "runs_allowed"]].copy()
data["win_pct"] = data["wins"] / (data["wins"] + data["losses"])
data["run_diff"] = data["runs_scored"] - data["runs_allowed"]
data = data[data["year"] != 2020]  # Remove COVID year

# Output folder (optional)
output_dir = "team_reports"
os.makedirs(output_dir, exist_ok=True)

# Precompute league averages
league_avg = data.groupby("year")["wins"].mean().reset_index()
league_win_avg = data["win_pct"].mean()

# === Report Generator ===
def generate_report(team):
    team_data = data[data["team_name"] == team].sort_values("year")

    if team_data.empty:
        print(f"No data found for {team}")
        return

    # --- Correlation Matrix ---
    corr_matrix = team_data[['wins', 'losses', 'win_pct', 'runs_scored', 'runs_allowed', 'run_diff']].corr()
    plt.figure(figsize=(8, 6))
    sns.heatmap(corr_matrix, annot=True, cmap='viridis', fmt=".2f")
    plt.title(f"{team} - Correlation Between Performance Metrics")
    plt.tight_layout()
    plt.savefig(os.path.join(output_dir, f"{team.replace(' ', '')}_corr.png"), dpi=300)
    plt.close()

    # --- Multi-Plot Performance Report ---------
    fig, axes = plt.subplots(2, 2, figsize=(14, 10))
    fig.suptitle(f"{team} Performance Overview (1998–2024)", fontsize=18, y=0.95)

    sns.lineplot(data=team_data, x="year", y="wins", marker="o", ax=axes[0, 0], color="navy")
    axes[0, 0].set_title("Wins by Season")
    axes[0, 0].set_ylabel("Wins")
    axes[0, 0].grid(True)

    sns.lineplot(data=team_data, x="year", y="run_diff", marker="o", ax=axes[0, 1], color="darkred")
    axes[0, 1].axhline(0, linestyle="--", color="gray")
    axes[0, 1].set_title("Run Differential by Season")
    axes[0, 1].set_ylabel("Run Differential")
    axes[0, 1].grid(True)

    sns.lineplot(data=team_data, x="year", y="win_pct", marker="o", ax=axes[1, 0], label=team, color="teal")
    axes[1, 0].axhline(league_win_avg, color="gray", linestyle="--", label="League Avg")
    axes[1, 0].set_ylim(0.3, 1.0)
    axes[1, 0].set_title("Win Percentage by Season")
    axes[1, 0].set_ylabel("Win %")
    axes[1, 0].legend()
    axes[1, 0].grid(True)

    sns.lineplot(data=team_data, x="year", y="wins", marker="o", ax=axes[1, 1], label=team, color="blue")
    sns.lineplot(data=league_avg, x="year", y="wins", ax=axes[1, 1], label="League Avg", color="gray", linestyle="--")
    axes[1, 1].set_title("Wins vs. League Average")
    axes[1, 1].legend()
    axes[1, 1].grid(True)

    plt.tight_layout(rect=[0, 0, 1, 0.95])
    output_path = os.path.join(output_dir, f"{team.replace(' ', '')}.png")
    plt.savefig(output_path, dpi=300, bbox_inches='tight')
    plt.close()
    print(f"Saved: {output_path}")

# === Loop over all teams ===
all_teams = data["team_name"].unique()
for team in sorted(all_teams):
    generate_report(team)


✅ Saved: team_reports/ArizonaDiamondbacks.png
✅ Saved: team_reports/AtlantaBraves.png
✅ Saved: team_reports/BaltimoreOrioles.png
✅ Saved: team_reports/BostonRedSox.png
✅ Saved: team_reports/ChicagoCubs.png
✅ Saved: team_reports/ChicagoWhiteSox.png
✅ Saved: team_reports/CincinnatiReds.png
✅ Saved: team_reports/ClevelandGuardians.png
✅ Saved: team_reports/ColoradoRockies.png
✅ Saved: team_reports/DetroitTigers.png
✅ Saved: team_reports/HoustonAstros.png
✅ Saved: team_reports/KansasCityRoyals.png
✅ Saved: team_reports/LosAngelesAngels.png
✅ Saved: team_reports/LosAngelesDodgers.png
✅ Saved: team_reports/MiamiMarlins.png
✅ Saved: team_reports/MilwaukeeBrewers.png
✅ Saved: team_reports/MinnesotaTwins.png
✅ Saved: team_reports/NewYorkMets.png
✅ Saved: team_reports/NewYorkYankees.png
✅ Saved: team_reports/OaklandAthletics.png
✅ Saved: team_reports/PhiladelphiaPhillies.png
✅ Saved: team_reports/PittsburghPirates.png
✅ Saved: team_reports/SanDiegoPadres.png
✅ Saved: team_reports/SanFranciscoGia