In [3]:
# Usando dados como pontos marcados, sofridos, eficiência ofensiva/defensiva e rebotes, prever quantas vitórias uma equipa terá numa temporada

import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score, mean_squared_error
import numpy as np


# 1️ Ler dados originais (sem tocar neles)
teams_df = pd.read_csv("data/teams.csv")

# 2️ Criar uma cópia para manipulação
teams_model = teams_df.copy()

# 3️ Criar features derivadas úteis para prever vitórias
teams_model["point_diff"] = teams_model["o_pts"] - teams_model["d_pts"]
teams_model["off_efficiency"] = teams_model["o_pts"] / teams_model["o_fga"]
teams_model["def_efficiency"] = teams_model["d_pts"] / teams_model["d_fga"]
teams_model["rebalance"] = teams_model["o_reb"] - teams_model["d_reb"]

#Remove linhas com valores nulos em colunas importantes.
teams_model = teams_model.dropna(subset=["won", "o_pts", "d_pts"])


teams_model.to_csv("data_models/teams_model.csv", index=False)

In [4]:
import pandas as pd
import numpy as np

# 1️⃣ Ler os dados das jogadoras por equipa e ano
players_teams = pd.read_csv("data/players_teams.csv")

# 2️⃣ Criar uma métrica de rating individual (como já fizeste)
players_teams = (
    players_teams.groupby(["playerID", "year"], as_index=False)
    .agg({
        "points": "sum",
        "rebounds": "sum",
        "assists": "sum",
        "steals": "sum",
        "blocks": "sum",
        "turnovers": "sum",
        "tmID": "last"  # equipa final
    })
)
players_teams["player_rating"] = (
    players_teams["points"] * 1 + 
    players_teams["rebounds"] * 1.2 + 
    players_teams["assists"] * 1.5 + 
    players_teams["steals"] * 3 +    
    players_teams["blocks"] * 3 -
    players_teams["turnovers"] * 2
)

# 3️⃣ Selecionar apenas colunas relevantes
players_teams = players_teams[["playerID", "year", "tmID", "player_rating"]]

# 4️⃣ Criar coluna com o rating do ano anterior de cada jogadora
players_teams = players_teams.sort_values(["playerID", "year"])
players_teams["player_rating_prev"] = players_teams.groupby("playerID")["player_rating"].shift(1)

# 5️⃣ Agora, para cada equipa e ano, somar o rating *do ano anterior* dos jogadores que estão nessa equipa
team_prev_ratings = (
    players_teams.groupby(["year", "tmID"], as_index=False)["player_rating_prev"]
    .mean()
    .rename(columns={"player_rating_prev": "team_sum_player_prev_rating"})
)

# 6️⃣ Remover anos iniciais (sem dados anteriores)
#team_prev_ratings = team_prev_ratings.dropna(subset=["team_sum_player_prev_rating"])

# 7️⃣ Guardar para análise posterior
import os
os.makedirs("data_models", exist_ok=True)
team_prev_ratings.to_csv("data_models/team_prev_ratings.csv", index=False)
print(players_teams.query("year == 1 and tmID in ['SEA', 'UTA']")[["playerID","year","tmID","player_rating","player_rating_prev"]].head(10))


print("\n✅ Criada tabela com força da equipa (baseada no ano anterior):")
print(team_prev_ratings.head(10))


       playerID  year tmID  player_rating  player_rating_prev
53   aycocan01w     1  SEA           12.0                 NaN
55    azzije01w     1  UTA          326.2                 NaN
81   barnequ01w     1  SEA          391.3                 NaN
230  campbed01w     1  SEA          307.3                 NaN
408  dydekma01w     1  UTA          758.5                 NaN
417  edwarmi01w     1  SEA          216.7                 NaN
419  edwarsi01w     1  SEA          365.4                 NaN
502  fresest01w     1  UTA           93.1                 NaN
515  gaithka01w     1  UTA           56.5                 NaN
522  garnean01w     1  SEA          281.1                 NaN

✅ Criada tabela com força da equipa (baseada no ano anterior):
   year tmID  team_sum_player_prev_rating
0     1  CHA                          NaN
1     1  CLE                          NaN
2     1  DET                          NaN
3     1  HOU                          NaN
4     1  IND                          NaN
5 