In [None]:
import pandas as pd
import numpy as np

df_players = pd.read_csv('dataset/processed/players_teams_processed.csv')
df_teams = pd.read_csv('dataset/processed/teams_processed.csv')
df_awards = pd.read_csv('dataset/processed/awards_processed.csv')

league_df_stats = pd.DataFrame()
league_df_stats['lg_AST'] = df_teams.groupby('year')['o_asts'].sum().reset_index(drop=True)
league_df_stats['lg_FG'] = df_teams.groupby('year')['o_fgm'].sum().reset_index(drop=True)
league_df_stats['lg_FT'] = df_teams.groupby('year')['o_ftm'].sum().reset_index(drop=True)
league_df_stats['lg_PTS'] = df_teams.groupby('year')['o_pts'].sum().reset_index(drop=True)
league_df_stats['lg_FGA'] = df_teams.groupby('year')['o_fga'].sum().reset_index(drop=True)
league_df_stats['lg_ORB'] = df_teams.groupby('year')['o_oreb'].sum().reset_index(drop=True)
league_df_stats['lg_TOV'] = df_teams.groupby('year')['o_to'].sum().reset_index(drop=True) 
league_df_stats['lg_FTA'] = df_teams.groupby('year')['o_fta'].sum().reset_index(drop=True) 
league_df_stats['lg_TRB'] = df_teams.groupby('year')['o_reb'].sum().reset_index(drop=True) 
league_df_stats['lg_PF'] = df_teams.groupby('year')['o_pf'].sum().reset_index(drop=True) 

stats_df_players = pd.merge(df_players, df_teams, on=['tmID', 'year'], how='left')

for i in range(10):
  stats_df_players.loc[stats_df_players['year'] == i+1,'factor'] = (2/3) - (0.5* (league_df_stats['lg_AST'].iloc[i] / league_df_stats['lg_FG'].iloc[i])) / (2 * (league_df_stats['lg_FG'].iloc[i] / league_df_stats['lg_FT'].iloc[i]))
  stats_df_players.loc[stats_df_players['year'] == i+1,'vop'] = league_df_stats['lg_PTS'].iloc[i] / (league_df_stats['lg_FGA'].iloc[i] - league_df_stats['lg_ORB'].iloc[i] + league_df_stats['lg_TOV'].iloc[i] + 0.44 * league_df_stats['lg_FTA'].iloc[i])
  stats_df_players.loc[stats_df_players['year'] == i+1,'drb'] = (league_df_stats['lg_TRB'].iloc[i] - league_df_stats['lg_ORB'].iloc[i]) / league_df_stats['lg_TRB'].iloc[i]

  stats_df_players.loc[stats_df_players['year'] == i+1,'PER'] = (
    (1 / (df_players['minutes'] + df_players['PostMinutes'])).replace([np.inf, -np.inf, np.nan], 0) * 
    ((df_players['threeMade'] + df_players['PostthreeMade'])
      +  (2 / 3) * (df_players['assists'] + df_players['PostAssists'])
      +  (2 - stats_df_players['factor'] * (stats_df_players['o_asts'] / stats_df_players['o_fgm']).replace([np.inf, -np.inf, np.nan], 0)) * (stats_df_players['fgMade'] + stats_df_players['PostfgMade']) 
      +  ((stats_df_players['ftMade'] + stats_df_players['PostftMade']) * 0.5 * (1 + (1 - (stats_df_players['o_asts'] / stats_df_players['o_fgm']).replace([np.inf, -np.inf, np.nan], 0)) + (2 / 3) * (stats_df_players['o_asts'] / stats_df_players['o_fgm']).replace([np.inf, -np.inf, np.nan], 0))) 
      - stats_df_players['vop'] * (stats_df_players['turnovers'] + stats_df_players['PostTurnovers']) 
      - stats_df_players['vop'] * stats_df_players['drb'] * ((stats_df_players['fgAttempted'] + stats_df_players['PostfgAttempted']) - (stats_df_players['fgMade'] + stats_df_players['PostfgMade'])) 
      - stats_df_players['vop'] * 0.44 * (0.44 + (0.56 * stats_df_players['drb'])) * ((stats_df_players['ftAttempted'] + stats_df_players['PostftAttempted']) - (stats_df_players['ftMade'] + stats_df_players['PostftMade']))
      + stats_df_players['vop'] * (1 - stats_df_players['drb']) * (stats_df_players['rebounds'] - stats_df_players['oRebounds'])
      + stats_df_players['vop'] * stats_df_players['drb'] * stats_df_players['oRebounds']
      + stats_df_players['vop'] * (stats_df_players['steals'] * stats_df_players['PostSteals'])
      + stats_df_players['vop'] * stats_df_players['drb'] * (stats_df_players['blocks'] * stats_df_players['PostBlocks'])
      - (stats_df_players['PF'] + stats_df_players['PostPF']) * ((league_df_stats['lg_FT'].iloc[i] / league_df_stats['lg_PF'].iloc[i]) - 0.44 * (league_df_stats['lg_FTA'].iloc[i] / league_df_stats['lg_PF'].iloc[i]) * stats_df_players['vop'])
    )
)
minimumPer = -stats_df_players['PER'].min()
stats_df_players['PER'] = stats_df_players['PER'] + minimumPer + 1
stats_df_players[stats_df_players['minutes'] + stats_df_players['PostMinutes'] < 100]['PER'] = 0

stats_df_players['Cumulative PER'] = 0.0
def calculate_cumulative_per(group):
    group.loc[group.index[0], 'Cumulative PER'] = group.loc[group.index[0], 'PER']
    for i in range(1, len(group)):
        group.loc[group.index[i], 'Cumulative PER'] = (
            group.loc[group.index[i], 'PER'] * 1 + group.loc[group.index[i-1], 'Cumulative PER'] * 0.4
        )
    group['Cumulative PER'] = group['Cumulative PER'].shift(1)
    return group
stats_df_players = stats_df_players.groupby('playerID').apply(calculate_cumulative_per).reset_index(drop=True)


award_types = [
    "All-Star Game Most Valuable Player",
    "Defensive Player of the Year",
    "Most Improved Player",
    "Most Valuable Player",
    "Rookie of the Year",
    "WNBA Finals Most Valuable Player"
]
relevant_awards = df_awards[df_awards['award'].isin(award_types)]
award_counts = (
    relevant_awards
    .groupby(['playerID', 'year'])
    .size()
    .rename('awards')
    .reset_index()
)
stats_df_players = stats_df_players.merge(
    award_counts, 
    how='left', 
    on=['playerID', 'year'], 
)
stats_df_players['awards'] = stats_df_players['awards'].fillna(0).astype(int)
stats_df_players['awards'] = stats_df_players['awards'].shift(1)
stats_df_players['Cumulative PER'] = stats_df_players['Cumulative PER'] + stats_df_players['awards'] * 0.005

team_stats = (
    stats_df_players
    .sort_values(by=['tmID', 'year', 'Cumulative PER'], ascending=[True, True, False])  
    .groupby(['tmID', 'year']) 
    .head(5) 
    .groupby(['tmID', 'year'])['Cumulative PER'] 
    .mean() 
    .reset_index()
)
team_stats['Cumulative PER'] = team_stats.groupby('tmID', group_keys=False)['Cumulative PER'].shift(-1)
team_stats['Cumulative PER'] = team_stats['Cumulative PER'].fillna(team_stats['Cumulative PER'].mean())
df_teams = pd.merge(df_teams, team_stats, on=['tmID', 'year'], how='left')
df_teams.to_csv('dataset/processed/teams_test.csv', index=False)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  stats_df_players[stats_df_players['minutes'] + stats_df_players['PostMinutes'] < 100]['PER'] = 0


        playerID  year       PER  Cumulative PER  minutes  PostMinutes
78    balesal01w     9  3.045896        3.093603    389.0          0.0
298   castriz01w     9  3.046349        5.158704    671.0          0.0
411   desouer01w     9  3.167192        4.234853    277.0          0.0
682   haynikr01w     9  3.003172        4.900484    486.0          0.0
921    lacyje01w     9  3.002775        4.164531    605.0          0.0
944   lattaiv01w     9  3.110455        3.124637    960.0          0.0
973   lennobe01w     9  3.176331        5.407602   1010.0          0.0
998   littlca01w     9  3.010430        3.108353    221.0          0.0
1012  lovelst01w     9  3.072002        4.990861    258.0          0.0
1058   mannkr01w     9  2.974779        4.770619    223.0          0.0
1265  nnamach01w     9  2.889071             NaN     84.0          0.0
1683  strotan01w     9  3.017127        4.378082    160.0          0.0
1739  terryka01w     9  3.000097        3.923019    275.0          0.0
2012  

  stats_df_players = stats_df_players.groupby('playerID').apply(calculate_cumulative_per).reset_index(drop=True)
