In [None]:
# Imports
import pandas as pd
import numpy as np

df_players = pd.read_csv('dataset/processed/players_teams_processed.csv')
df_teams = pd.read_csv('dataset/processed/teams_processed.csv')
df_awards = pd.read_csv('dataset/processed/awards_processed.csv')

league_df_stats = pd.DataFrame()
league_df_stats['lg_AST'] = df_teams.groupby('year')['o_asts'].sum().reset_index(drop=True)
league_df_stats['lg_FG'] = df_teams.groupby('year')['o_fgm'].sum().reset_index(drop=True)
league_df_stats['lg_FT'] = df_teams.groupby('year')['o_ftm'].sum().reset_index(drop=True)
league_df_stats['lg_PTS'] = df_teams.groupby('year')['o_pts'].sum().reset_index(drop=True)
league_df_stats['lg_FGA'] = df_teams.groupby('year')['o_fga'].sum().reset_index(drop=True)
league_df_stats['lg_ORB'] = df_teams.groupby('year')['o_oreb'].sum().reset_index(drop=True)
league_df_stats['lg_TOV'] = df_teams.groupby('year')['o_to'].sum().reset_index(drop=True) 
league_df_stats['lg_FTA'] = df_teams.groupby('year')['o_fta'].sum().reset_index(drop=True) 
league_df_stats['lg_TRB'] = df_teams.groupby('year')['o_reb'].sum().reset_index(drop=True) 
league_df_stats['lg_PF'] = df_teams.groupby('year')['o_pf'].sum().reset_index(drop=True) 

stats_df_players = pd.merge(df_players, df_teams, on=['tmID', 'year'], how='left')

for i in range(10):
  stats_df_players.loc[stats_df_players['year'] == i+1,'factor'] = (2/3) - (0.5* (league_df_stats['lg_AST'].iloc[i] / league_df_stats['lg_FG'].iloc[i])) / (2 * (league_df_stats['lg_FG'].iloc[i] / league_df_stats['lg_FT'].iloc[i]))
  stats_df_players.loc[stats_df_players['year'] == i+1,'vop'] = league_df_stats['lg_PTS'].iloc[i] / (league_df_stats['lg_FGA'].iloc[i] - league_df_stats['lg_ORB'].iloc[i] + league_df_stats['lg_TOV'].iloc[i] + 0.44 * league_df_stats['lg_FTA'].iloc[i])
  stats_df_players.loc[stats_df_players['year'] == i+1,'drb'] = (league_df_stats['lg_TRB'].iloc[i] - league_df_stats['lg_ORB'].iloc[i]) / league_df_stats['lg_TRB'].iloc[i]

  stats_df_players.loc[stats_df_players['year'] == i+1,'PER'] = (
    (1 / (df_players['minutes'] + df_players['PostMinutes'])).replace([np.inf, -np.inf, np.nan], 0) * 
    ((df_players['threeMade'] + df_players['PostthreeMade'])
      +  (2 / 3) * (df_players['assists'] + df_players['PostAssists'])
      +  (2 - stats_df_players['factor'] * (stats_df_players['o_asts'] / stats_df_players['o_fgm']).replace([np.inf, -np.inf, np.nan], 0)) * (stats_df_players['fgMade'] + stats_df_players['PostfgMade']) 
      +  ((stats_df_players['ftMade'] + stats_df_players['PostftMade']) * 0.5 * (1 + (1 - (stats_df_players['o_asts'] / stats_df_players['o_fgm']).replace([np.inf, -np.inf, np.nan], 0)) + (2 / 3) * (stats_df_players['o_asts'] / stats_df_players['o_fgm']).replace([np.inf, -np.inf, np.nan], 0))) 
      - stats_df_players['vop'] * (stats_df_players['turnovers'] + stats_df_players['PostTurnovers']) 
      - stats_df_players['vop'] * stats_df_players['drb'] * ((stats_df_players['fgAttempted'] + stats_df_players['PostfgAttempted']) - (stats_df_players['fgMade'] + stats_df_players['PostfgMade'])) 
      - stats_df_players['vop'] * 0.44 * (0.44 + (0.56 * stats_df_players['drb'])) * ((stats_df_players['ftAttempted'] + stats_df_players['PostftAttempted']) - (stats_df_players['ftMade'] + stats_df_players['PostftMade']))
      + stats_df_players['vop'] * (1 - stats_df_players['drb']) * (stats_df_players['rebounds'] - stats_df_players['oRebounds'])
      + stats_df_players['vop'] * stats_df_players['drb'] * stats_df_players['oRebounds']
      + stats_df_players['vop'] * (stats_df_players['steals'] * stats_df_players['PostSteals'])
      + stats_df_players['vop'] * stats_df_players['drb'] * (stats_df_players['blocks'] * stats_df_players['PostBlocks'])
      - (stats_df_players['PF'] + stats_df_players['PostPF']) * ((league_df_stats['lg_FT'].iloc[i] / league_df_stats['lg_PF'].iloc[i]) - 0.44 * (league_df_stats['lg_FTA'].iloc[i] / league_df_stats['lg_PF'].iloc[i]) * stats_df_players['vop'])
    )
)
minimumPer = -stats_df_players['PER'].min()
stats_df_players['PER'] = stats_df_players['PER'] + minimumPer + 1
stats_df_players['PER'] = stats_df_players.groupby('playerID', group_keys=False)['PER'].shift(1)
#CUMULATIVE PER
stats_df_players[stats_df_players['minutes'] + stats_df_players['PostMinutes'] < 100]['PER'] = 0

team_stats = (
    stats_df_players
    .sort_values(by=['tmID', 'year', 'PER'], ascending=[True, True, False])  # Sort within groups
    .groupby(['tmID', 'year'])  # Group by team ID and year
    .head(5)  # Take the top 5 players per team and year
    .groupby(['tmID', 'year'])['PER']  # Group again for PER calculation
    .mean()  # Calculate the mean of PER for the top 10 players
    .reset_index()  # Reset index for the result
)
team_stats['PER'] = team_stats.groupby('tmID', group_keys=False)['PER'].shift(-1)
team_stats['PER'] = team_stats['PER'].fillna(team_stats['PER'].mean())
print(team_stats['PER'].mean())

df_teams = pd.merge(df_teams, team_stats, on=['tmID', 'year'], how='left')
df_teams.to_csv('dataset/processed/teams_test.csv', index=False)


# Display the sorted DataFrame



3.265326317555512


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  stats_df_players[stats_df_players['minutes'] + stats_df_players['PostMinutes'] < 100]['PER'] = 0
