In [3]:
# Imports
import pandas as pd
import numpy as np

df_players = pd.read_csv('dataset/processed/players_teams_processed.csv')
df_awards = pd.read_csv('dataset/processed/awards_processed.csv')

stats_df_players = df_players[['playerID', 'year', 'tmID']].copy()

stats_df_players['minutes'] = df_players['minutes'] + df_players['PostMinutes']

# (FG + 0.5 * 3P) / FGA
shooting = (
    np.where(df_players['fgAttempted'] > 0, df_players['fgMade'] + 0.5 * df_players['threeMade'] / df_players['fgAttempted'], 0) * 0.4 +
    np.where(df_players['PostfgAttempted'] > 0, df_players['PostfgMade'] + 0.5 * df_players['PostthreeMade'] / df_players['PostfgAttempted'], 0) * 0.6
)

# TOV / (FGA + 0.44 * FTA + TOV)
turnovers = (
    np.where((df_players['fgAttempted'] + 0.44 * df_players['ftAttempted'] + df_players['turnovers']) > 0, df_players['turnovers'] / (df_players['fgAttempted'] + 0.44 * df_players['ftAttempted'] + df_players['turnovers']), 0) * 0.4 +
    np.where((df_players['PostfgAttempted'] + 0.44 * df_players['PostftAttempted'] + df_players['PostTurnovers']) > 0, df_players['PostTurnovers'] / (df_players['PostfgAttempted'] + 0.44 * df_players['PostftAttempted'] + df_players['PostTurnovers']), 0) * 0.6
)

# ORB / (ORB + Opp DRB)
rebounding = (
    np.where(df_players['rebounds'] > 0, df_players['oRebounds'] / df_players['rebounds'], 0) * 0.4 +
    np.where(df_players['PostRebounds'] > 0, df_players['PostoRebounds'] / df_players['PostRebounds'], 0) * 0.6
)

# FT / FGA
throws = (
    np.where(df_players['fgAttempted'] > 0, df_players['ftMade'] / df_players['fgAttempted'], 0) * 0.4 +
    np.where(df_players['PostfgAttempted'] > 0, df_players['PostftMade'] / df_players['PostfgAttempted'], 0) * 0.6
)

stats_df_players["Attacking Score"] = (0.4 * shooting) + (0.25 * turnovers) + (0.20 * rebounding) + (0.15 * throws)

stats_df_players['Cumulative Attacking Score'] = 0.0
def calculate_cumulative_attacking_score(group):
    group.loc[group.index[0], 'Cumulative Attacking Score'] = group.loc[group.index[0], 'Attacking Score']
    for i in range(1, len(group)):
        group.loc[group.index[i], 'Cumulative Attacking Score'] = (
            group.loc[group.index[i], 'Attacking Score'] * 0.7 + group.loc[group.index[i-1], 'Attacking Score'] * 0.3
        )
    group['Cumulative Attacking Score'] = group['Cumulative Attacking Score'].shift(1)
    return group

stats_df_players = stats_df_players.groupby('playerID').apply(calculate_cumulative_attacking_score).reset_index(drop=True)

blocking = (
    np.where(df_players['minutes'] > 0, df_players['blocks'] / df_players['minutes'], 0) * 0.4 +
    np.where(df_players['PostMinutes'] > 0, df_players['PostBlocks'] / df_players['PostMinutes'], 0) * 0.6
)

steals = (
    np.where(df_players['turnovers'] > 0, df_players['steals'] / df_players['turnovers'], df_players['steals']) * 0.4 +
    np.where(df_players['PostTurnovers'] > 0, df_players['PostSteals'] / df_players['PostTurnovers'], df_players['steals']) * 0.6
)

rebounding = (
    np.where(df_players['rebounds'] > 0, df_players['dRebounds'] / df_players['rebounds'], 0) * 0.4 +
    np.where(df_players['PostRebounds'] > 0, df_players['PostdRebounds'] / df_players['PostRebounds'], 0) * 0.6
)

stats_df_players["Defensive Score"] = (0.45 * blocking) + (0.30 * steals) + (0.25 * rebounding)

stats_df_players['Cumulative Defensive Score'] = 0.0
def calculate_cumulative_defensive_score(group):
    group.loc[group.index[0], 'Cumulative Defensive Score'] = group.loc[group.index[0], 'Defensive Score']
    for i in range(1, len(group)):
        group.loc[group.index[i], 'Cumulative Defensive Score'] = (
            group.loc[group.index[i], 'Defensive Score'] * 0.7 + group.loc[group.index[i-1], 'Defensive Score'] * 0.3
        )
    group['Cumulative Defensive Score'] = group['Cumulative Defensive Score'].shift(1)
    return group

stats_df_players = stats_df_players.groupby('playerID').apply(calculate_cumulative_defensive_score).reset_index(drop=True)

'''stats_df_players["Defensive Score"] = np.where(
    df_players['PostMinutes'] == 0,
    df_players['blocks'] / df_players['minutes'],
    np.where(
        df_players['minutes'] == 0,
        df_players['PostBlocks'] / df_players['PostMinutes'],
        (df_players['blocks'] / df_players['minutes']) * 0.4 + (df_players['PostBlocks'] / df_players['PostMinutes']) * 0.6
    )
)'''


#awards_count = df_awards.groupby(['playerID', 'year']).size().reset_index(name='awards_count')
#stats_df_players = awards_count.merge(stats_df_players, how="outer", on=["playerID", "year"])

#stats_df_players["Awards Count"] = stats_df_players["awards_count"].fillna(0)

#stats_df_players.dropna(axis=0, how='any', inplace=True)
#stats_df_players.isna().sum()

#gajos com 0 minutos
#df_players.loc[df_players['minutes'] <= 0, ['minutes', 'PostMinutes']]

stats_df_players.head()

df_teams = pd.read_csv('dataset/processed/teams_processed.csv')

team_stats = stats_df_players.groupby(['tmID', 'year'])[['Attacking Score', 'Defensive Score']].mean().reset_index()

team_stats[['Attacking Score', 'Defensive Score']] = (
    team_stats.groupby('tmID')[['Attacking Score', 'Defensive Score']].shift(1)
)

df_teams = pd.merge(df_teams, team_stats, on=['tmID', 'year'], how='left')

df_teams.to_csv('dataset/processed/teams_test.csv', index=False)


df_teams.head()
stats_df_players.head()

#df_teams["Defensive Score"] = 
#df_teams["Awards Count"] 

  stats_df_players = stats_df_players.groupby('playerID').apply(calculate_cumulative_attacking_score).reset_index(drop=True)
  stats_df_players = stats_df_players.groupby('playerID').apply(calculate_cumulative_defensive_score).reset_index(drop=True)


Unnamed: 0,playerID,year,tmID,minutes,Attacking Score,Cumulative Attacking Score,Defensive Score,Cumulative Defensive Score
0,abrossv01w,2,MIN,846,18.304108,,7.696496,
1,abrossv01w,3,MIN,805,19.100396,18.304108,7.686197,7.696496
2,abrossv01w,4,MIN,861,19.536924,18.861509,0.343874,7.689287
3,abrossv01w,5,MIN,529,9.897709,19.405965,0.329587,2.546571
4,abrossv01w,6,MIN,777,17.503381,12.789473,8.786287,0.333873
