In [10]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import json, os
from datetime import datetime
from matplotlib.backends.backend_pdf import PdfPages

# --- CONFIG ---
sns.set(style="whitegrid", palette="muted")
os.makedirs("figures", exist_ok=True)

# --- LOAD DATA ---
with open("data/pokemons.json", "r", encoding="utf-8") as f:
    pokemons = json.load(f)
df = pd.DataFrame(pokemons)

print(f"✅ Data loaded successfully with {len(df)} Pokémon.")

# --- FIX MISSING COLUMNS ---
stats = ["hp", "attack", "defense", "speed"]
for s in stats:
    if s not in df.columns:
        df[s] = 0

# --- NORMALIZE TYPE ---
if "type_1" in df.columns:
    df["main_type"] = df["type_1"]
elif "type" in df.columns:
    df["main_type"] = df["type"]
elif "types" in df.columns:
    df["main_type"] = df["types"].apply(lambda x: x[0] if isinstance(x, list) and len(x)>0 else "Unknown")
else:
    df["main_type"] = "Unknown"

df["main_type"] = df["main_type"].fillna("Unknown")

# --- ADD POWER SCORE ---
df["power_score"] = df["hp"] + df["attack"] + df["defense"]

# --- SUMMARY TABLE ---
summary = (
    df.groupby("main_type")[["hp", "attack", "defense", "speed", "power_score"]]
    .mean()
    .round(1)
    .sort_values("power_score", ascending=False)
)

# --- STRONGEST POKEMON ---
strongest = df.loc[df["power_score"].idxmax()]

# --- CREATE PDF ---
timestamp = datetime.now().strftime("%Y%m%d_%H%M")
pdf_path = f"figures/Pokemon_Report_{timestamp}.pdf"

with PdfPages(pdf_path) as pdf:

    # --- COVER PAGE ---
    plt.figure(figsize=(8.5, 11))
    plt.axis("off")
    plt.text(0.1, 0.8, "POKÉMON DATA ANALYSIS REPORT (2025)", fontsize=22, weight="bold", color="#ff5733")
    plt.text(0.1, 0.7, "Generated by Melike’s Data Pipeline", fontsize=14, color="#333")
    plt.text(0.1, 0.65, f"Total Pokémon analyzed: {len(df)}", fontsize=12)
    plt.text(0.1, 0.6, f"Strongest Pokémon: {strongest['name'].capitalize()} ({strongest['main_type']})", fontsize=12)
    plt.text(0.1, 0.5, "This report explores the key statistics of Pokémon collected\nfrom PokeAPI 2025 and identifies dominant types and strengths.", fontsize=11)
    plt.text(0.1, 0.2, f"Generated on: {datetime.now().strftime('%B %d, %Y - %H:%M')}", fontsize=10, color="#555")
    pdf.savefig()
    plt.close()

    # --- FIGURE 1: AVERAGE POWER ---
    plt.figure(figsize=(10,6))
    sns.barplot(x=summary["power_score"], y=summary.index, palette="plasma")
    plt.title("Average Power by Pokémon Type (2025 Data)", fontsize=14)
    plt.xlabel("Average Power Score")
    plt.ylabel("Pokémon Type")
    plt.tight_layout()
    pdf.savefig()
    plt.close()

    # --- FIGURE 2: CORRELATION HEATMAP ---
    plt.figure(figsize=(8,6))
    sns.heatmap(df[["hp","attack","defense","speed","power_score"]].corr(), annot=True, cmap="coolwarm", fmt=".2f")
    plt.title("Correlation Between Pokémon Stats")
    plt.tight_layout()
    pdf.savefig()
    plt.close()

    # --- FIGURE 3: SCATTERPLOT ---
    plt.figure(figsize=(8,6))
    sns.scatterplot(data=df, x="speed", y="power_score", hue="main_type", alpha=0.8)
    plt.title("Power vs Speed by Pokémon Type")
    plt.xlabel("Speed")
    plt.ylabel("Power Score")
    plt.legend(bbox_to_anchor=(1.05,1), loc="upper left")
    plt.tight_layout()
    pdf.savefig()
    plt.close()

    # --- FIGURE 4: TOP 10 TABLE ---
    top10 = df.sort_values("power_score", ascending=False).head(10)
    fig, ax = plt.subplots(figsize=(8.5,4))
    ax.axis("off")
    table = ax.table(
        cellText=top10[["name","main_type","hp","attack","defense","speed","power_score"]].values,
        colLabels=["Name","Type","HP","Attack","Defense","Speed","Power"],
        loc="center",
        cellLoc="center",
    )
    table.auto_set_font_size(False)
    table.set_fontsize(9)
    plt.title("Top 10 Strongest Pokémon", fontsize=14)
    pdf.savefig()
    plt.close()

    # --- FIGURE 5: TEXT SUMMARY ---
    report_text = f"""
POKÉMON ANALYSIS REPORT (2025)
==============================

This analysis examines Pokémon data from the 2025 PokeAPI dataset.
It focuses on identifying statistical trends, type strengths,
and relationships between core attributes such as HP, Attack,
Defense, and Speed.

A total of {len(df)} Pokémon were processed, spanning {df['main_type'].nunique()} distinct types.
The strongest Pokémon in this dataset is {strongest['name'].capitalize()} ({strongest['main_type']}-type),
with a total Power Score of {strongest['power_score']} (HP: {strongest['hp']}, Attack: {strongest['attack']}, Defense: {strongest['defense']}).

The visualization of average power per type reveals that high-power Pokémon
types such as Dragon and Rock consistently outperform lighter types like Fairy or Grass.
The correlation heatmap suggests that Attack and Defense maintain a moderate
positive relationship, indicating balanced design among many powerful Pokémon.

The scatterplot highlights the trade-off between Power and Speed — fast Pokémon
tend to have lower combined power, whereas slower Pokémon exhibit greater raw strength.

In summary, this project demonstrates how Python data tools can transform
API data into insightful reports and visual summaries for analytical storytelling.
"""
    fig, ax = plt.subplots(figsize=(8.5,11))
    ax.axis("off")
    wrapped_text = "\n".join(report_text.splitlines())
    plt.text(0, 1, wrapped_text, fontsize=10, va="top")
    pdf.savefig()
    plt.close()

print(f"\n✅ PDF report successfully created: {pdf_path}")

✅ Data loaded successfully with 20 Pokémon.



Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `y` variable to `hue` and set `legend=False` for the same effect.

  sns.barplot(x=summary["power_score"], y=summary.index, palette="plasma")



✅ PDF report successfully created: figures/Pokemon_Report_20251024_1357.pdf
