In [None]:
import pandas as pd

In [None]:
df = pd.read_csv("master_df.csv")

df.drop("Unnamed: 0", axis=1, inplace=True)

In [None]:
original_columns = df.columns

original_columns

In [None]:
team_encoding = { 
    # ATLANTIC
    "TOR": 1,
    "BOS": 2,
    "NYK": 3, 
    "BRK": 4,
    "PHI": 5,

    # CENTRAL
    "CLE": 6,
    "IND": 7,
    "DET": 8,
    "CHI": 9,
    "MIL": 10,

    # SOUTHEAST
    "MIA": 11,
    "ATL": 12,
    "CHO": 13,
    "WAS": 14,
    "ORL": 15,

    # NORTHWEST
    "OKC": 16,
    "POR": 17,
    "UTA": 18,
    "DEN": 19,
    "MIN": 20,

    # PACIFIC
    "GSW": 21, 
    "LAC": 22,
    "SAC": 23,
    "PHO": 24,
    "LAL": 25,

    # SOUTH WEST
    "SAS": 26,
    "DAL": 27,
    "MEM": 28,
    "HOU": 29,
    "NOP": 30
}

In [None]:
unique_stats = [col.split("_")[0] + "_cumulative" for col in df.columns if "_team0" in col]
stats = [col.split("_")[0] for col in df.columns if "_team0" in col]
teams = team_encoding.keys()

In [None]:
def initialize_cumulative_average():
    cumulative_averages = {}
    for team in teams:
        team_cumulative_average = {}
        for stat in unique_stats:
            team_cumulative_average[stat] = 0
        cumulative_averages[team] = team_cumulative_average
        cumulative_averages[team]["games_played"] = 0
    
    return cumulative_averages

In [None]:
for stat in unique_stats:
    df[f'{stat}_team0'] = 0.0
    df[f'{stat}_team1'] = 0.0

In [None]:
cumulative_averages = initialize_cumulative_average()
current_season = None
    
for index, row in df.iterrows():
    team0 = row['team0']
    team1 = row['team1']
    game_season = row['season']

    if game_season != current_season:
        cumulative_averages = initialize_cumulative_average()
        current_season = game_season

    games_played_team0 = cumulative_averages[team0]["games_played"]
    games_played_team1 = cumulative_averages[team1]["games_played"]

    if games_played_team0 > 0:
        for stat in unique_stats:
            prev_avg_team0 = cumulative_averages[team0][stat] / games_played_team0
            df.at[index, f"{stat}_team0"] = prev_avg_team0
    
    if games_played_team1 > 0:
        for stat in unique_stats:
            prev_avg_team1 = cumulative_averages[team1][stat] / games_played_team1
            df.at[index, f"{stat}_team1"] = prev_avg_team1
    
    for stat in stats:
        cumulative_averages[team0][f"{stat}_cumulative"] += row[f"{stat}_team0"]
        cumulative_averages[team1][f"{stat}_cumulative"] += row[f"{stat}_team1"]

    cumulative_averages[team0]['games_played'] += 1
    cumulative_averages[team1]['games_played'] += 1

    if index % 100 == 0:
         print(f"{index} / {len(df)}")
         print(cumulative_averages["TOR"])

In [None]:
df

In [None]:
exceptions = ['team1', 'winner', 'season', 'date', 'team0']

cols_to_keep = [col for col in df.columns if "cumulative" in col or col in exceptions]

df_filtered = df[cols_to_keep]


In [None]:
df_filtered.to_csv("cumulative_average.csv")
df.to_csv("regular_cumulative_average.csv")

In [None]:
df_filtered