In [1]:
# ----------------------------
# NBA Top 50 Player Network (Interactive PyVis)
# - Title + subtitle + community legend injected into HTML
# - Nodes: colored by Louvain community, sized by Avg PTS/36
# - Edges: weighted by shared seasons on same team
# ----------------------------

import sys, subprocess, os, webbrowser

def ensure(pkg, import_name=None):
    try:
        __import__(import_name or pkg)
    except ModuleNotFoundError:
        subprocess.check_call([sys.executable, "-m", "pip", "install", pkg])

ensure("pandas")
ensure("networkx")
ensure("pyvis")
ensure("matplotlib")
ensure("python-louvain", "community")

import pandas as pd
from itertools import combinations
from pyvis.network import Network
import matplotlib.pyplot as plt
import matplotlib.colors as mcolors
import networkx as nx
import community as community_louvain  # from python-louvain


# ----------------------------
# Helper: inject title/subtitle/legend into saved HTML
# ----------------------------
def inject_header_and_legend(html_file, title_text, subtitle_text, node_colors, partition):
    # Build legend items sorted by community id
    comm_ids = sorted(set(partition.values()))
    legend_items = ""
    for cid in comm_ids:
        sample_node = next(n for n, c in partition.items() if c == cid)
        color = node_colors.get(sample_node, "rgb(200,200,200)")
        legend_items += f"""
        <span class="legend-item">
            <span class="swatch" style="background:{color};"></span>
            Community {cid + 1}
        </span>
        """

    with open(html_file, "r", encoding="utf-8") as f:
        html = f.read()

    header = f"""
    <style>
      body {{
        background-color: #1b1b1b;
        margin: 0;
        font-family: Arial, Helvetica, sans-serif;
      }}

      .header {{
        position: relative;
        text-align: center;
        padding: 14px 10px 6px 10px;
      }}

      .title {{
        color: white;
        font-size: 30px;
        font-weight: 700;
        margin-bottom: 4px;
      }}

      .subtitle {{
        color: #cccccc;
        font-size: 13px;
        margin-bottom: 6px;
      }}

      .legend {{
        display: flex;
        flex-wrap: wrap;
        justify-content: center;
        gap: 10px;
        font-size: 12px;
        color: #eaeaea;
        padding-bottom: 6px;
      }}

      .legend-item {{
        display: inline-flex;
        align-items: center;
        gap: 6px;
        opacity: 0.95;
      }}

      .swatch {{
        width: 10px;
        height: 10px;
        border-radius: 2px;
        display: inline-block;
      }}

      /* push network down so header is always visible */
      #mynetwork {{
        margin-top: 8px;
      }}
    </style>

    <div class="header">
        <div class="title">{title_text}</div>
        <div class="subtitle">{subtitle_text}</div>
        <div class="legend">{legend_items}</div>
    </div>
    """

    if "<body>" in html:
        html = html.replace("<body>", "<body>" + header, 1)
    else:
        # fallback
        html = header + html

    with open(html_file, "w", encoding="utf-8") as f:
        f.write(html)


# ----------------------------
# 1) Load data
# ----------------------------
csv_path = "Per 36 Minutes.csv"
if not os.path.exists(csv_path):
    raise FileNotFoundError(f"CSV not found at: {os.path.abspath(csv_path)}")

df = pd.read_csv(csv_path)
df.columns = [c.strip() for c in df.columns]  # clean headers

required = {"player", "season", "team", "g", "pts_per_36_min"}
missing = required - set(df.columns)
if missing:
    raise KeyError(f"Missing columns: {missing}\nAvailable columns: {list(df.columns)}")

df["season"] = pd.to_numeric(df["season"], errors="coerce")
df["g"] = pd.to_numeric(df["g"], errors="coerce")
df["pts_per_36_min"] = pd.to_numeric(df["pts_per_36_min"], errors="coerce")
df = df.dropna(subset=["player", "season", "team", "g", "pts_per_36_min"]).copy()

# Select seasons
seasons_selected = [2014, 2015, 2016, 2017, 2018]
df = df[df["season"].isin(seasons_selected)].copy()

# ----------------------------
# 2) Select top 50 players by mean PTS/36 over selected seasons
# ----------------------------
top_players = (
    df.groupby("player")["pts_per_36_min"]
      .mean()
      .nlargest(50)
      .index
)
df_top = df[df["player"].isin(top_players)].copy()

if df_top.empty:
    raise ValueError("No data after filtering. Check seasons_selected and your CSV values.")

# ----------------------------
# 3) Node table
# ----------------------------
node_table = (
    df_top.groupby("player")
    .agg(
        total_games=("g", "sum"),
        avg_pts36=("pts_per_36_min", "mean"),
        seasons_played=("season", "nunique"),
        main_team=("team", lambda x: x.value_counts().idxmax())
    )
    .reset_index()
)

# ----------------------------
# 4) Link table (edges): count shared seasons on same team
# ----------------------------
links = []
for season, season_df in df_top.groupby("season"):
    for team, team_df in season_df.groupby("team"):
        players = sorted(team_df["player"].unique().tolist())
        for p1, p2 in combinations(players, 2):
            links.append((p1, p2, season))

link_df = pd.DataFrame(links, columns=["P1", "P2", "season"])
if link_df.empty:
    link_table = pd.DataFrame(columns=["P1", "P2", "weight"])
else:
    link_table = (
        link_df.groupby(["P1", "P2"])
              .size()
              .reset_index(name="weight")
    )

# ----------------------------
# 5) Build NetworkX graph
# ----------------------------
G = nx.Graph()

for _, row in node_table.iterrows():
    G.add_node(
        row["player"],
        avg_pts36=float(row["avg_pts36"]),
        total_games=float(row["total_games"]),
        seasons_played=int(row["seasons_played"]),
        main_team=str(row["main_team"]),
    )

for _, row in link_table.iterrows():
    G.add_edge(row["P1"], row["P2"], weight=int(row["weight"]))

# ----------------------------
# 6) Louvain community detection
# ----------------------------
if G.number_of_edges() == 0:
    partition = {n: 0 for n in G.nodes()}
else:
    partition = community_louvain.best_partition(G, weight="weight")

# ----------------------------
# 7) Assign colors per community
# ----------------------------
community_colors = plt.cm.tab20.colors  # 20 distinct colors

def rgb_from_tuple(t):
    r, g, b = (int(t[0] * 255), int(t[1] * 255), int(t[2] * 255))
    return f"rgb({r},{g},{b})"

node_colors = {node: rgb_from_tuple(community_colors[c % 20]) for node, c in partition.items()}

# ----------------------------
# 8) Create PyVis network
# ----------------------------
# height increased so title/legend fit above without pushing offscreen
net = Network(height="820px", width="100%", bgcolor="#1b1b1b", font_color="white")
net.barnes_hut()

# ----------------------------
# 9) Scale node size by avg_pts36
# ----------------------------
pts_values = node_table["avg_pts36"].tolist()
norm = mcolors.Normalize(vmin=min(pts_values), vmax=max(pts_values))

min_size, max_size = 15, 50
def node_size(val):
    return int(min_size + (max_size - min_size) * norm(val))

# ----------------------------
# 10) Add nodes
# ----------------------------
for _, row in node_table.iterrows():
    player = row["player"]
    hover_html = (
        f"<b>{player}</b><br>"
        f"Total Games: {int(row['total_games'])}<br>"
        f"Avg PTS/36: {row['avg_pts36']:.1f}<br>"
        f"Seasons Played: {int(row['seasons_played'])}<br>"
        f"Main Team: {row['main_team']}<br>"
        f"Community: {partition.get(player, 0) + 1}"
    )

    net.add_node(
        player,
        label=player,
        title=hover_html,
        value=node_size(row["avg_pts36"]),
        color=node_colors.get(player, "rgb(200,200,200)"),
        font={"size": 14, "color": "white"},
        borderWidth=2,
        borderColor="white",
        shadow=True
    )

# ----------------------------
# 11) Add edges
# ----------------------------
for _, row in link_table.iterrows():
    net.add_edge(
        row["P1"], row["P2"],
        value=int(row["weight"]),
        title=f"Shared Seasons: {int(row['weight'])}",
        color="rgba(255,215,0,0.6)"
    )

# ----------------------------
# 12) ForceAtlas2-like layout
# ----------------------------
net.force_atlas_2based(gravity=-50, central_gravity=0.01, spring_length=120, spring_strength=0.08)

# ----------------------------
# 13) Save + inject title/legend + open
# ----------------------------
html_file = "nba_network_interactive.html"
net.write_html(html_file)

inject_header_and_legend(
    html_file,
    title_text="NBA Top 50 player network",
    subtitle_text="Nodes colored by Louvain communities, size by Avg PTS/36, edges weighted by shared seasons",
    node_colors=node_colors,
    partition=partition
)

html_path = os.path.abspath(html_file)
print(f"âœ… Saved: {html_path}")
webbrowser.open(f"file://{html_path}")

FileNotFoundError: CSV not found at: /content/Per 36 Minutes.csv