In [7]:
import pandas as pd
from functools import reduce

def yearly_data_generator(file_TPGS, file_OPGS, file_TAdv, file_TSS, file_OSS, new_file, year):
    #######################################################################################
    # Team Per Game Stats
    df_TPGS = pd.read_csv(file_TPGS)
    df_TPGS.rename(columns={'MP': 'MPA'}, inplace=True)
    df_TPGS['Team_clean'] = df_TPGS['Team'].str.replace(r'\*$', '', regex=True)

    #######################################################################################
    # Opponent Per Game Stats
    df_OPGS = pd.read_csv(file_OPGS)
    df_OPGS.rename(columns=lambda col: f"Opp.{col}", inplace=True)
    df_OPGS.rename(columns={'Opp.Rk': 'Rk', 'Opp.Team': 'Team', 'Opp.G': 'G', 'Opp.MP': 'Opp.MPA'}, inplace=True)
    df_OPGS['Team_clean'] = df_OPGS['Team'].str.replace(r'\*$', '', regex=True)

    #######################################################################################
    # Advanced Team Stats
    df_TAdv = pd.read_csv(file_TAdv, skiprows=1)
    df_TAdv.rename(columns={'3PAr': '%FGA_3P'}, inplace=True)
    df_TAdv = df_TAdv.dropna(axis=1, how='all')
    df_TAdv.rename(columns={'eFG%.1': 'Opp.eFG%', 'TOV%.1': 'Opp.TOV%', 'FT/FGA.1': 'Opp.FT/FGA'}, inplace=True)
    df_TAdv['Team_clean'] = df_TAdv['Team'].str.replace(r'\*$', '', regex=True)

    #######################################################################################
    # Team Shooting Stats
    shooting_column_names = [
        'Rk', 'Team', 'G', 'MP', 'FG%', 'FGA_Dist', 'a', '%FGA_2P', '%FGA_0-3ft', 
        '%FGA_3-10ft', '%FGA_10-16ft', '%FGA_16-3P', '%FGA_3P', 'b', '2P%', '0-3ft_FG%', 
        '3-10ft_FG%', '10-16ft_FG%', '16-3P_FG%', '3P%', 'c', '%ASTd_2P', '%ASTd_3P', 'd',
        '%FGA_Dunks', 'Dunks', 'e', '%FGA_Layups', 'Layups', 'f', '%FGA_corner3P', 
        'corner3P%', 'g', 'HeavesA', 'Heaves'
    ]

    df_TSS = pd.read_csv(file_TSS, skiprows=2, header=None, names=shooting_column_names)
    df_TSS = df_TSS.dropna(axis=1, how='all') 
    df_TSS['Team_clean'] = df_TSS['Team'].str.replace(r'\*$', '', regex=True)

    #######################################################################################
    # Opponent Shooting Stats
    opp_shooting_column_names = [f"Opp.{col}" for col in shooting_column_names]
    df_OSS = pd.read_csv(file_OSS, skiprows=2, header=None, names=opp_shooting_column_names)
    df_OSS.rename(columns={'Opp.Rk': 'Rk', 'Opp.Team': 'Team', 'Opp.G': 'G'}, inplace=True)
    df_OSS = df_OSS.dropna(axis=1, how='all') 
    df_OSS['Team_clean'] = df_OSS['Team'].str.replace(r'\*$', '', regex=True)

    #######################################################################################
    # Merging the 5 DataFrames
    Teams = [df_TPGS, df_OPGS, df_TAdv, df_TSS, df_OSS]
    merged_Teams = reduce(
        lambda left, right: pd.merge(left, right, on='Team_clean', how='inner', suffixes=('', '_dup')),
        Teams
    )

    # Drop duplicate columns
    merged_Teams = merged_Teams.loc[:, ~merged_Teams.columns.str.endswith('_dup')]
    merged_Teams['Year'] = year

    # Dropping unnecessary columns (will raise an error if they don't exist)
    merged_Teams.drop(columns=['Arena', 'Attend.', 'Attend./G', 'Team'], inplace=True)

    # Standardizing certain columns
    merged_Teams['Dunks1'] = merged_Teams['Dunks'] / merged_Teams['G']
    merged_Teams['Opp.Dunks1'] = merged_Teams['Opp.Dunks'] / merged_Teams['G']
    merged_Teams['Layups1'] = merged_Teams['Layups'] / merged_Teams['G']
    merged_Teams['Opp.Layups1'] = merged_Teams['Opp.Layups'] / merged_Teams['G']

    # Drop the original columns
    merged_Teams.drop(columns=['Dunks', 'Layups', 'Opp.Dunks', 'Opp.Layups'], inplace=True)

    # Rename columns for consistency
    merged_Teams.rename(columns={
        'Team_clean': 'Team',
        'Dunks1': 'Dunks',
        'Layups1': 'Layups',
        'Opp.Dunks1': 'Opp.Dunks',
        'Opp.Layups1': 'Opp.Layups'
    }, inplace=True)

    # Save the final dataset
    merged_Teams.to_csv(new_file, index=False)


In [None]:
# 24-25 Team Data

# Team Per Game Stats
# Path to your local CSV file
file_TPGS = "C:/Users/hagen/Downloads/NBA DATA/Teams/2024-2025/24-25 Per Game TS.csv"

# Opponent Per Game Stats
# Path to your local CSV file
file_OPGS = "C:/Users/hagen/Downloads/NBA DATA/Teams/2024-2025/24-25 Opponent TS.csv"

# Advanced Team Stats
# Path to your local CSV file
file_TAdv = "C:/Users/hagen/Downloads/NBA DATA/Teams/2024-2025/24-25 Advanced TS.csv"

# Team Shooting Stats
# Path to your local CSV file
file_TSS = "C:/Users/hagen/Downloads/NBA DATA/Teams/2024-2025/24-25 Shooting TS.csv"

# Opponent Shooting Stats
# Path to your local CSV file
file_OSS = "C:/Users/hagen/Downloads/NBA DATA/Teams/2024-2025/24-25 Opponent Shooting TS.csv"

new_file = "C:/Users/hagen/Downloads/NBA DATA/Teams/Cleaned Yearly Data/24-25 Team Data.csv"

year = 2024

yearly_data_generator(file_TPGS, file_OPGS, file_TAdv, file_TSS, file_OSS, new_file, year)


In [11]:
#23-24 Team Data

# Team Per Game Stats
# Path to your local CSV file
file_TPGS = "C:/Users/hagen/Downloads/NBA DATA/Teams/2023-2024/23-24 Per Game TS.csv"

# Opponent Per Game Stats
# Path to your local CSV file
file_OPGS = "C:/Users/hagen/Downloads/NBA DATA/Teams/2023-2024/23-24 Opponent TS.csv"

# Advanced Team Stats
# Path to your local CSV file
file_TAdv = "C:/Users/hagen/Downloads/NBA DATA/Teams/2023-2024/23-24 Advanced TS.csv"

# Team Shooting Stats
# Path to your local CSV file
file_TSS = "C:/Users/hagen/Downloads/NBA DATA/Teams/2023-2024/23-24 Shooting TS.csv"

# Opponent Shooting Stats
# Path to your local CSV file
file_OSS = "C:/Users/hagen/Downloads/NBA DATA/Teams/2023-2024/23-24 Opponent Shooting TS.csv"

new_file = "C:/Users/hagen/Downloads/NBA DATA/Teams/Cleaned Yearly Data/23-24 Team Data.csv"

year=2023

yearly_data_generator(file_TPGS, file_OPGS, file_TAdv, file_TSS, file_OSS, new_file, year)

In [19]:
# 22-23 Team Data

# Team Per Game Stats
file_TPGS = "C:/Users/hagen/Downloads/NBA DATA/Teams/2022-2023/22-23 Per Game TS.csv"

# Opponent Per Game Stats
file_OPGS = "C:/Users/hagen/Downloads/NBA DATA/Teams/2022-2023/22-23 Opponent TS.csv"

# Advanced Team Stats
file_TAdv = "C:/Users/hagen/Downloads/NBA DATA/Teams/2022-2023/22-23 Advanced TS.csv"

# Team Shooting Stats
file_TSS = "C:/Users/hagen/Downloads/NBA DATA/Teams/2022-2023/22-23 Shooting TS.csv"

# Opponent Shooting Stats
file_OSS = "C:/Users/hagen/Downloads/NBA DATA/Teams/2022-2023/22-23 Opponent Shooting TS.csv"

# Output file
new_file = "C:/Users/hagen/Downloads/NBA DATA/Teams/Cleaned Yearly Data/22-23 Team Data.csv"

year = 2022

# Run the data processing function
yearly_data_generator(file_TPGS, file_OPGS, file_TAdv, file_TSS, file_OSS, new_file, year)


In [21]:
# 21-22 Team Data

# Team Per Game Stats
file_TPGS = "C:/Users/hagen/Downloads/NBA DATA/Teams/2021-2022/21-22 Per Game TS.csv"

# Opponent Per Game Stats
file_OPGS = "C:/Users/hagen/Downloads/NBA DATA/Teams/2021-2022/21-22 Opponent TS.csv"

# Advanced Team Stats
file_TAdv = "C:/Users/hagen/Downloads/NBA DATA/Teams/2021-2022/21-22 Advanced TS.csv"

# Team Shooting Stats
file_TSS = "C:/Users/hagen/Downloads/NBA DATA/Teams/2021-2022/21-22 Shooting TS.csv"

# Opponent Shooting Stats
file_OSS = "C:/Users/hagen/Downloads/NBA DATA/Teams/2021-2022/21-22 Opponent Shooting TS.csv"

# Output file
new_file = "C:/Users/hagen/Downloads/NBA DATA/Teams/Cleaned Yearly Data/21-22 Team Data.csv"

year = 2021

# Run the data processing function
yearly_data_generator(file_TPGS, file_OPGS, file_TAdv, file_TSS, file_OSS, new_file, year)

In [23]:
# 20-21 Team Data

# Team Per Game Stats
file_TPGS = "C:/Users/hagen/Downloads/NBA DATA/Teams/2020-2021/20-21 Per Game TS.csv"

# Opponent Per Game Stats
file_OPGS = "C:/Users/hagen/Downloads/NBA DATA/Teams/2020-2021/20-21 Opponent TS.csv"

# Advanced Team Stats
file_TAdv = "C:/Users/hagen/Downloads/NBA DATA/Teams/2020-2021/20-21 Advanced TS.csv"

# Team Shooting Stats
file_TSS = "C:/Users/hagen/Downloads/NBA DATA/Teams/2020-2021/20-21 Shooting TS.csv"

# Opponent Shooting Stats
file_OSS = "C:/Users/hagen/Downloads/NBA DATA/Teams/2020-2021/20-21 Opponent Shooting TS.csv"

# Output file
new_file = "C:/Users/hagen/Downloads/NBA DATA/Teams/Cleaned Yearly Data/20-21 Team Data.csv"

year = 2020

# Run the data processing function
yearly_data_generator(file_TPGS, file_OPGS, file_TAdv, file_TSS, file_OSS, new_file, year)


In [25]:
# 19-20 Team Data

# Team Per Game Stats
file_TPGS = "C:/Users/hagen/Downloads/NBA DATA/Teams/2019-2020/19-20 Per Game TS.csv"

# Opponent Per Game Stats
file_OPGS = "C:/Users/hagen/Downloads/NBA DATA/Teams/2019-2020/19-20 Opponent TS.csv"

# Advanced Team Stats
file_TAdv = "C:/Users/hagen/Downloads/NBA DATA/Teams/2019-2020/19-20 Advanced TS.csv"

# Team Shooting Stats
file_TSS = "C:/Users/hagen/Downloads/NBA DATA/Teams/2019-2020/19-20 Shooting TS.csv"

# Opponent Shooting Stats
file_OSS = "C:/Users/hagen/Downloads/NBA DATA/Teams/2019-2020/19-20 Opponent Shooting TS.csv"

# Output file
new_file = "C:/Users/hagen/Downloads/NBA DATA/Teams/Cleaned Yearly Data/19-20 Team Data.csv"

year = 2019

# Run the data processing function
yearly_data_generator(file_TPGS, file_OPGS, file_TAdv, file_TSS, file_OSS, new_file, year)


In [27]:
# 18-19 Team Data

# Team Per Game Stats
file_TPGS = "C:/Users/hagen/Downloads/NBA DATA/Teams/2018-2019/18-19 Per Game TS.csv"

# Opponent Per Game Stats
file_OPGS = "C:/Users/hagen/Downloads/NBA DATA/Teams/2018-2019/18-19 Opponent TS.csv"

# Advanced Team Stats
file_TAdv = "C:/Users/hagen/Downloads/NBA DATA/Teams/2018-2019/18-19 Advanced TS.csv"

# Team Shooting Stats
file_TSS = "C:/Users/hagen/Downloads/NBA DATA/Teams/2018-2019/18-19 Shooting TS.csv"

# Opponent Shooting Stats
file_OSS = "C:/Users/hagen/Downloads/NBA DATA/Teams/2018-2019/18-19 Opponent Shooting TS.csv"

# Output file
new_file = "C:/Users/hagen/Downloads/NBA DATA/Teams/Cleaned Yearly Data/18-19 Team Data.csv"

year = 2018

# Run the data processing function
yearly_data_generator(file_TPGS, file_OPGS, file_TAdv, file_TSS, file_OSS, new_file, year)


In [29]:
# 17-18 Team Data

# Team Per Game Stats
file_TPGS = "C:/Users/hagen/Downloads/NBA DATA/Teams/2017-2018/17-18 Per Game TS.csv"

# Opponent Per Game Stats
file_OPGS = "C:/Users/hagen/Downloads/NBA DATA/Teams/2017-2018/17-18 Opponent TS.csv"

# Advanced Team Stats
file_TAdv = "C:/Users/hagen/Downloads/NBA DATA/Teams/2017-2018/17-18 Advanced TS.csv"

# Team Shooting Stats
file_TSS = "C:/Users/hagen/Downloads/NBA DATA/Teams/2017-2018/17-18 Shooting TS.csv"

# Opponent Shooting Stats
file_OSS = "C:/Users/hagen/Downloads/NBA DATA/Teams/2017-2018/17-18 Opponent Shooting TS.csv"

# Output file
new_file = "C:/Users/hagen/Downloads/NBA DATA/Teams/Cleaned Yearly Data/17-18 Team Data.csv"

year = 2017

# Run the data processing function
yearly_data_generator(file_TPGS, file_OPGS, file_TAdv, file_TSS, file_OSS, new_file, year)


In [31]:
# 16-17 Team Data

# Team Per Game Stats
file_TPGS = "C:/Users/hagen/Downloads/NBA DATA/Teams/2016-2017/16-17 Per Game TS.csv"

# Opponent Per Game Stats
file_OPGS = "C:/Users/hagen/Downloads/NBA DATA/Teams/2016-2017/16-17 Opponent TS.csv"

# Advanced Team Stats
file_TAdv = "C:/Users/hagen/Downloads/NBA DATA/Teams/2016-2017/16-17 Advanced TS.csv"

# Team Shooting Stats
file_TSS = "C:/Users/hagen/Downloads/NBA DATA/Teams/2016-2017/16-17 Shooting TS.csv"

# Opponent Shooting Stats
file_OSS = "C:/Users/hagen/Downloads/NBA DATA/Teams/2016-2017/16-17 Opponent Shooting TS.csv"

# Output file
new_file = "C:/Users/hagen/Downloads/NBA DATA/Teams/Cleaned Yearly Data/16-17 Team Data.csv"

year = 2016

# Run the data processing function
yearly_data_generator(file_TPGS, file_OPGS, file_TAdv, file_TSS, file_OSS, new_file, year)

In [33]:
# 15-16 Team Data

# Team Per Game Stats
file_TPGS = "C:/Users/hagen/Downloads/NBA DATA/Teams/2015-2016/15-16 Per Game TS.csv"

# Opponent Per Game Stats
file_OPGS = "C:/Users/hagen/Downloads/NBA DATA/Teams/2015-2016/15-16 Opponent TS.csv"

# Advanced Team Stats
file_TAdv = "C:/Users/hagen/Downloads/NBA DATA/Teams/2015-2016/15-16 Advanced TS.csv"

# Team Shooting Stats
file_TSS = "C:/Users/hagen/Downloads/NBA DATA/Teams/2015-2016/15-16 Shooting TS.csv"

# Opponent Shooting Stats
file_OSS = "C:/Users/hagen/Downloads/NBA DATA/Teams/2015-2016/15-16 Opponent Shooting TS.csv"

# Output file
new_file = "C:/Users/hagen/Downloads/NBA DATA/Teams/Cleaned Yearly Data/15-16 Team Data.csv"

year = 2015

# Run the data processing function
yearly_data_generator(file_TPGS, file_OPGS, file_TAdv, file_TSS, file_OSS, new_file, year)

In [35]:
# 14-15 Team Data

# Team Per Game Stats
file_TPGS = "C:/Users/hagen/Downloads/NBA DATA/Teams/2014-2015/14-15 Per Game TS.csv"

# Opponent Per Game Stats
file_OPGS = "C:/Users/hagen/Downloads/NBA DATA/Teams/2014-2015/14-15 Opponent TS.csv"

# Advanced Team Stats
file_TAdv = "C:/Users/hagen/Downloads/NBA DATA/Teams/2014-2015/14-15 Advanced TS.csv"

# Team Shooting Stats
file_TSS = "C:/Users/hagen/Downloads/NBA DATA/Teams/2014-2015/14-15 Shooting TS.csv"

# Opponent Shooting Stats
file_OSS = "C:/Users/hagen/Downloads/NBA DATA/Teams/2014-2015/14-15 Opponent Shooting TS.csv"

# Output file
new_file = "C:/Users/hagen/Downloads/NBA DATA/Teams/Cleaned Yearly Data/14-15 Team Data.csv"

year = 2014

# Run the data processing function
yearly_data_generator(file_TPGS, file_OPGS, file_TAdv, file_TSS, file_OSS, new_file, year)


In [37]:
# 13-14 Team Data

# Team Per Game Stats
file_TPGS = "C:/Users/hagen/Downloads/NBA DATA/Teams/2013-2014/13-14 Per Game TS.csv"

# Opponent Per Game Stats
file_OPGS = "C:/Users/hagen/Downloads/NBA DATA/Teams/2013-2014/13-14 Opponent TS.csv"

# Advanced Team Stats
file_TAdv = "C:/Users/hagen/Downloads/NBA DATA/Teams/2013-2014/13-14 Advanced TS.csv"

# Team Shooting Stats
file_TSS = "C:/Users/hagen/Downloads/NBA DATA/Teams/2013-2014/13-14 Shooting TS.csv"

# Opponent Shooting Stats
file_OSS = "C:/Users/hagen/Downloads/NBA DATA/Teams/2013-2014/13-14 Opponent Shooting TS.csv"

# Output file
new_file = "C:/Users/hagen/Downloads/NBA DATA/Teams/Cleaned Yearly Data/13-14 Team Data.csv"

year = 2013

# Run the data processing function
yearly_data_generator(file_TPGS, file_OPGS, file_TAdv, file_TSS, file_OSS, new_file, year)


In [41]:
# 12-13 Team Data

# Team Per Game Stats
file_TPGS = "C:/Users/hagen/Downloads/NBA DATA/Teams/2012-2013/12-13 Per Game TS.csv"

# Opponent Per Game Stats
file_OPGS = "C:/Users/hagen/Downloads/NBA DATA/Teams/2012-2013/12-13 Opponent TS.csv"

# Advanced Team Stats
file_TAdv = "C:/Users/hagen/Downloads/NBA DATA/Teams/2012-2013/12-13 Advanced TS.csv"

# Team Shooting Stats
file_TSS = "C:/Users/hagen/Downloads/NBA DATA/Teams/2012-2013/12-13 Shooting TS.csv"

# Opponent Shooting Stats
file_OSS = "C:/Users/hagen/Downloads/NBA DATA/Teams/2012-2013/12-13 Opponent Shooting TS.csv"

# Output file
new_file = "C:/Users/hagen/Downloads/NBA DATA/Teams/Cleaned Yearly Data/12-13 Team Data.csv"

year = 2012

# Run the data processing function
yearly_data_generator(file_TPGS, file_OPGS, file_TAdv, file_TSS, file_OSS, new_file, year)


In [49]:
# 11-12 Team Data

# Team Per Game Stats
file_TPGS = "C:/Users/hagen/Downloads/NBA DATA/Teams/2011-2012/11-12 Per Game TS.csv"

# Opponent Per Game Stats
file_OPGS = "C:/Users/hagen/Downloads/NBA DATA/Teams/2011-2012/11-12 Opponent TS.csv"

# Advanced Team Stats
file_TAdv = "C:/Users/hagen/Downloads/NBA DATA/Teams/2011-2012/11-12 Advanced TS.csv"

# Team Shooting Stats
file_TSS = "C:/Users/hagen/Downloads/NBA DATA/Teams/2011-2012/11-12 Shooting TS.csv"

# Opponent Shooting Stats
file_OSS = "C:/Users/hagen/Downloads/NBA DATA/Teams/2011-2012/11-12 Opponent Shooting TS.csv"

# Output file
new_file = "C:/Users/hagen/Downloads/NBA DATA/Teams/Cleaned Yearly Data/11-12 Team Data.csv"

year = 2011

# Run the data processing function
yearly_data_generator(file_TPGS, file_OPGS, file_TAdv, file_TSS, file_OSS, new_file, year)


In [45]:
# 10-11 Team Data

# Team Per Game Stats
file_TPGS = "C:/Users/hagen/Downloads/NBA DATA/Teams/2010-2011/10-11 Per Game TS.csv"

# Opponent Per Game Stats
file_OPGS = "C:/Users/hagen/Downloads/NBA DATA/Teams/2010-2011/10-11 Opponent TS.csv"

# Advanced Team Stats
file_TAdv = "C:/Users/hagen/Downloads/NBA DATA/Teams/2010-2011/10-11 Advanced TS.csv"

# Team Shooting Stats
file_TSS = "C:/Users/hagen/Downloads/NBA DATA/Teams/2010-2011/10-11 Shooting TS.csv"

# Opponent Shooting Stats
file_OSS = "C:/Users/hagen/Downloads/NBA DATA/Teams/2010-2011/10-11 Opponent Shooting TS.csv"

# Output file
new_file = "C:/Users/hagen/Downloads/NBA DATA/Teams/Cleaned Yearly Data/10-11 Team Data.csv"

year = 2010

# Run the data processing function
yearly_data_generator(file_TPGS, file_OPGS, file_TAdv, file_TSS, file_OSS, new_file, year)


In [47]:
# 09-10 Team Data

# Team Per Game Stats
file_TPGS = "C:/Users/hagen/Downloads/NBA DATA/Teams/2009-2010/09-10 Per Game TS.csv"

# Opponent Per Game Stats
file_OPGS = "C:/Users/hagen/Downloads/NBA DATA/Teams/2009-2010/09-10 Opponent TS.csv"

# Advanced Team Stats
file_TAdv = "C:/Users/hagen/Downloads/NBA DATA/Teams/2009-2010/09-10 Advanced TS.csv"

# Team Shooting Stats
file_TSS = "C:/Users/hagen/Downloads/NBA DATA/Teams/2009-2010/09-10 Shooting TS.csv"

# Opponent Shooting Stats
file_OSS = "C:/Users/hagen/Downloads/NBA DATA/Teams/2009-2010/09-10 Opponent Shooting TS.csv"

# Output file
new_file = "C:/Users/hagen/Downloads/NBA DATA/Teams/Cleaned Yearly Data/09-10 Team Data.csv"

year = 2009

# Run the data processing function
yearly_data_generator(file_TPGS, file_OPGS, file_TAdv, file_TSS, file_OSS, new_file, year)
