In [2]:
# ===============================================================
# 01_b_processing_v19_test.py
# ===============================================================
# Constr√≥i dataset TEST v19 (para previs√£o final)
# Usa APENAS dados permitidos pelo professor (coaches, players_teams, teams)
# NUNCA usa rank, vit√≥rias, pontos ou estat√≠sticas do ano atual.
# ===============================================================

import pandas as pd
import numpy as np
import os

print(">>> [v19 TEST] A processar test set...")

# ===============================================================
# 1Ô∏è‚É£ Ler dados DO TEST SET (dados do professor)
# ===============================================================
players_teams_test = pd.read_csv("Season_11/players_teams.csv")
teams_test = pd.read_csv("Season_11/teams.csv")
coaches_test = pd.read_csv("Season_11/coaches.csv")   # usado s√≥ se precisares mais tarde

# ===============================================================
# 2Ô∏è‚É£ Ler dados hist√≥ricos (para obter T‚àí1)
# ===============================================================
players_teams_hist = pd.read_csv("data/players_teams.csv")
teams_hist = pd.read_csv("data/teams.csv")
awards_hist = pd.read_csv("data/awards_players.csv")

# Garantir ano num√©rico
for df in (players_teams_hist, players_teams_test, teams_hist, teams_test, awards_hist):
    if "year" in df.columns:
        df["year"] = pd.to_numeric(df["year"], errors="coerce").astype("Int64")
        df.dropna(subset=["year"], inplace=True)
        df["year"] = df["year"].astype(int)

test_year = teams_test["year"].iloc[0]
prev_year = test_year - 1

print(f"Preparar previs√£o para o ano {test_year} (usando T-1 = {prev_year})")

# ===============================================================
# 3Ô∏è‚É£ Calcular player_rating_prev usando hist√≥rico
# ===============================================================
cols_needed = ["playerID","year","points","rebounds","assists","steals","blocks",
               "turnovers","minutes","threeMade","threeAttempted","fgMade",
               "fgAttempted","PF","GP"]
for c in cols_needed:
    if c not in players_teams_hist.columns:
        players_teams_hist[c] = 0

agg = (
    players_teams_hist.groupby(["playerID", "year"], as_index=False)
    .agg(
        minutes=("minutes","sum"),
        points=("points","sum"),
        rebounds=("rebounds","sum"),
        assists=("assists","sum"),
        steals=("steals","sum"),
        blocks=("blocks","sum"),
        turnovers=("turnovers","sum"),
        threeMade=("threeMade","sum"),
        threeAttempted=("threeAttempted","sum"),
        fgMade=("fgMade","sum"),
        fgAttempted=("fgAttempted","sum"),
        PF=("PF","sum"),
        GP=("GP","sum"),
    )
)

# rating
agg["player_rating"] = (
      agg["points"]
    + agg["rebounds"] * 1.2
    + agg["assists"] * 1.5
    + agg["steals"] * 3
    + agg["blocks"] * 3
    - agg["turnovers"] * 2
    - agg["PF"] * 0.5
) / (agg["minutes"] / 36 + 1)

# filtrar players com minutos
agg = agg[agg["minutes"] > 50]

# criar T‚àí1 map
prev_map = agg[agg["year"] == prev_year].copy()
prev_map = prev_map.rename(columns={"player_rating": "player_rating_prev"})
prev_map = prev_map[["playerID", "player_rating_prev", "minutes"]]
prev_map = prev_map.rename(columns={"minutes": "minutes_prev"})

# ===============================================================
# 4Ô∏è‚É£ Integrar pr√©mios T‚àí1
# ===============================================================
awards_prev = awards_hist[awards_hist["year"] == prev_year]
awards_prev = awards_prev[awards_prev["award"] != "Coach of the Year"]

award_weights = {
    "Most Valuable Player": 5,
    "WNBA Finals Most Valuable Player": 4,
    "Defensive Player of the Year": 3,
    "Rookie of the Year": 2,
}

awards_prev["award_weight"] = awards_prev["award"].map(award_weights).fillna(1.0)

award_counts = (
    awards_prev.groupby("playerID")["award_weight"]
    .sum().reset_index()
    .rename(columns={"award_weight":"num_awards"})
)

prev_map = prev_map.merge(award_counts, on="playerID", how="left")
prev_map["num_awards"] = prev_map["num_awards"].fillna(0)
prev_map["player_rating_prev"] *= (1 + 0.3 * prev_map["num_awards"])

# ===============================================================
# 5Ô∏è‚É£ Roster T‚àí1 baseado no test/players_teams
# ===============================================================
roster_test = players_teams_test[["playerID","tmID","year"]].drop_duplicates()
roster_test = roster_test.merge(prev_map, on="playerID", how="left")

mean_rating_prev = prev_map["player_rating_prev"].mean()
roster_test["player_rating_prev"].fillna(mean_rating_prev, inplace=True)
roster_test["minutes_prev"].fillna(0, inplace=True)
roster_test["num_awards"].fillna(0, inplace=True)

# ===============================================================
# 6Ô∏è‚É£ Construir features por equipa (somente as 4)
# ===============================================================
def build_team_features(df):
    w = df["minutes_prev"].replace(0, np.nan).fillna(1.0)
    return pd.Series({
        "avg_player_rating": np.average(df["player_rating_prev"], weights=w),
        "team_total_awards": df["num_awards"].sum(),
    })

team_stats = (
    roster_test.groupby(["tmID","year"], as_index=False)
    .apply(build_team_features)
    .reset_index()
    .drop(columns=["level_2"], errors="ignore")
)

# ELITE RATIO T‚àí1
elite_cutoff = prev_map["player_rating_prev"].quantile(0.90)
prev_map["is_elite"] = (prev_map["player_rating_prev"] >= elite_cutoff).astype(int)

elite_ratio = (
    roster_test.merge(prev_map[["playerID","is_elite"]], on="playerID", how="left")
    .groupby(["tmID","year"], as_index=False)["is_elite"]
    .mean()
    .rename(columns={"is_elite":"elite_ratio_prev"})
)

team_stats = team_stats.merge(elite_ratio, on=["tmID","year"], how="left")
team_stats["elite_ratio_prev"].fillna(0, inplace=True)

# ===============================================================
# 7Ô∏è‚É£ margin_prev usando hist√≥rico (teams_hist)
# ===============================================================
teams_hist = teams_hist.sort_values(["tmID", "year"])

teams_hist["o_pts_prev"] = teams_hist.groupby("tmID")["o_pts"].shift(1)
teams_hist["d_pts_prev"] = teams_hist.groupby("tmID")["d_pts"].shift(1)
teams_hist["GP_prev"] = teams_hist.groupby("tmID")["GP"].shift(1)

teams_hist["margin_prev"] = (
    teams_hist["o_pts_prev"] - teams_hist["d_pts_prev"]
) / teams_hist["GP_prev"].replace(0, np.nan)

teams_prev_year = teams_hist[teams_hist["year"] == prev_year][["tmID","margin_prev"]]

# ===============================================================
# 8Ô∏è‚É£ Merge final
# ===============================================================
test_ready = teams_test.merge(team_stats, on=["tmID","year"], how="left")
test_ready = test_ready.merge(teams_prev_year, on="tmID", how="left")

test_ready.fillna(0, inplace=True)

# ===============================================================
# 9Ô∏è‚É£ Guardar TEST FINAL
# ===============================================================
final_cols = [
    "tmID","year","confID",
    "margin_prev","avg_player_rating","team_total_awards","elite_ratio_prev"
]

final_test = test_ready[final_cols].copy()

os.makedirs("a", exist_ok=True)
out_path = "a/teams_test_v19.csv"
final_test.to_csv(out_path, index=False)

print(f"\n‚úÖ TEST v19 criado com sucesso ‚Üí {out_path}")


>>> [v19 TEST] A processar test set...
Preparar previs√£o para o ano 11 (usando T-1 = 10)

‚úÖ TEST v19 criado com sucesso ‚Üí a/teams_test_v19.csv


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  roster_test["player_rating_prev"].fillna(mean_rating_prev, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  roster_test["minutes_prev"].fillna(0, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object

In [3]:
# ===============================================================
# 04_a_predict_v19.py  (VERS√ÉO CORRETA)
# ===============================================================

import pandas as pd
import numpy as np
import pickle
import os

print("\n>>> [v19 TEST] Prever ranking do test set...\n")

# ---------------------------------------------------------------
# 1) Carregar modelo treinado
# ---------------------------------------------------------------
MODEL_PATH = "a/model_v19.pkl"
with open(MODEL_PATH, "rb") as f:
    model = pickle.load(f)

print("‚úî Modelo carregado.")

# ---------------------------------------------------------------
# 2) Carregar test set j√° processado (feito no script 02)
# ---------------------------------------------------------------
TEST_PATH = "a/teams_test_v19.csv"

if not os.path.exists(TEST_PATH):
    raise FileNotFoundError("‚ùå ERRO: 'teams_test_v19.csv' n√£o existe.\nCorre o script 02 primeiro.")

test = pd.read_csv(TEST_PATH)

print("‚úî Test set carregado.")
print(test.head())

# ---------------------------------------------------------------
# 3) Features
# ---------------------------------------------------------------
X_COLS = ["margin_prev", "avg_player_rating", "team_total_awards", "elite_ratio_prev"]
X = test[X_COLS]

# ---------------------------------------------------------------
# 4) Previs√£o
# ---------------------------------------------------------------
test["pred_raw"] = model.predict(X)

# Ranking por confer√™ncia
test["rank_pred"] = test.groupby("confID")["pred_raw"].rank(ascending=True, method="min")

# ---------------------------------------------------------------
# 5) Guardar
# ---------------------------------------------------------------
OUTPUT = test[["tmID", "confID", "year", "rank_pred"]].sort_values(["confID", "rank_pred"])

OUTPUT_PATH = "a/predictions_test_v19.csv"
OUTPUT.to_csv(OUTPUT_PATH, index=False)

print("\n‚úî Previs√µes criadas com sucesso!")
print("Guardado em:", OUTPUT_PATH)
print("\nPreview:")
print(OUTPUT)



>>> [v19 TEST] Prever ranking do test set...

‚úî Modelo carregado.
‚úî Test set carregado.
  tmID  year confID  margin_prev  avg_player_rating  team_total_awards  \
0  ATL    11     EA   -10.147059          26.837347                2.0   
1  CHI    11     EA    -1.117647          21.072279                0.0   
2  CON    11     EA     4.382353          22.162138                1.0   
3  IND    11     EA     0.470588          30.870707                3.0   
4  LAS    11     WE     2.205882          22.974650                0.0   

   elite_ratio_prev  
0             0.300  
1             0.000  
2             0.000  
3             0.100  
4             0.125  

‚úî Previs√µes criadas com sucesso!
Guardado em: a/predictions_test_v19.csv

Preview:
   tmID confID  year  rank_pred
3   IND     EA    11        1.0
2   CON     EA    11        2.0
6   NYL     EA    11        3.0
0   ATL     EA    11        4.0
11  WAS     EA    11        5.0
1   CHI     EA    11        6.0
7   PHO     WE    1

In [2]:
# ===============================================================
# ü§ñ 04_a_predict_v19_by_conf.py
# ===============================================================
# Usa os modelos treinados por confer√™ncia:
# - Western Conference (WE): Linear Regression
# - Eastern Conference (EA): Random Forest
# ===============================================================

import pandas as pd
import numpy as np
import pickle
import os

print("\n>>> [v19 TEST] Prever ranking do test set com modelos por confer√™ncia...\n")

# ---------------------------------------------------------------
# 1) Carregar modelos treinados
# ---------------------------------------------------------------
MODEL_EA_PATH = "a/model_EA_v19.pkl"
MODEL_WE_PATH = "a/model_WE_v19.pkl"

with open(MODEL_EA_PATH, "rb") as f:
    model_EA = pickle.load(f)

with open(MODEL_WE_PATH, "rb") as f:
    model_WE = pickle.load(f)

print("‚úî Modelos carregados (EA = RF | WE = LR).")

# ---------------------------------------------------------------
# 2) Carregar test set j√° processado
# ---------------------------------------------------------------
TEST_PATH = "a/teams_test_v19.csv"

if not os.path.exists(TEST_PATH):
    raise FileNotFoundError("‚ùå ERRO: 'teams_test_v19.csv' n√£o existe.\nCorre o script 02 primeiro.")

test = pd.read_csv(TEST_PATH)

print("‚úî Test set carregado.")
print(test.head())

# ---------------------------------------------------------------
# 3) Features
# ---------------------------------------------------------------
X_COLS = ["margin_prev", "avg_player_rating", "team_total_awards", "elite_ratio_prev"]

# Criar coluna para previs√µes vazias
test["pred_raw"] = np.nan

# ---------------------------------------------------------------
# 4) Previs√£o por confer√™ncia
# ---------------------------------------------------------------
for conf, model in [("EA", model_EA), ("WE", model_WE)]:
    subset = test["confID"] == conf
    X = test.loc[subset, X_COLS]
    test.loc[subset, "pred_raw"] = model.predict(X)

# Ranking final por confer√™ncia
test["rank_pred"] = test.groupby("confID")["pred_raw"].rank(ascending=True, method="min")

# ---------------------------------------------------------------
# 5) Guardar resultados
# ---------------------------------------------------------------
OUTPUT = test[["tmID", "confID", "year", "rank_pred"]].sort_values(["confID", "rank_pred"])

OUTPUT_PATH = "a/predictions_test_v19_by_conf.csv"
OUTPUT.to_csv(OUTPUT_PATH, index=False)

print("\n‚úî Previs√µes criadas com sucesso!")
print("Guardado em:", OUTPUT_PATH)
print("\nPreview:")
print(OUTPUT)


>>> [v19 TEST] Prever ranking do test set com modelos por confer√™ncia...

‚úî Modelos carregados (EA = RF | WE = LR).
‚úî Test set carregado.
  tmID  year confID  margin_prev  avg_player_rating  team_total_awards  \
0  ATL    11     EA   -10.147059          26.837347                2.0   
1  CHI    11     EA    -1.117647          21.072279                0.0   
2  CON    11     EA     4.382353          22.162138                1.0   
3  IND    11     EA     0.470588          30.870707                3.0   
4  LAS    11     WE     2.205882          22.974650                0.0   

   elite_ratio_prev  
0             0.300  
1             0.000  
2             0.000  
3             0.100  
4             0.125  

‚úî Previs√µes criadas com sucesso!
Guardado em: a/predictions_test_v19_by_conf.csv

Preview:
   tmID confID  year  rank_pred
0   ATL     EA    11        1.0
11  WAS     EA    11        2.0
3   IND     EA    11        3.0
2   CON     EA    11        4.0
1   CHI     EA    11    