This model creates a projected value for a given statistic by using a mean of the previous three seasons for each player.
There is no projection for first year players.

# Import Packages and Data

In [1]:
# Import necessary libraries
import pandas as pd

In [2]:
# Load the data from a CSV file into a DataFrame
df = pd.read_csv('../Resources/properly_formatted_data.csv')

In [3]:
# Define the important columns to keep from the DataFrame
important_columns = ["IDfg", "Season", "Name", "Team", "Age"]
pa_list = ['H', '1B', '2B', '3B', 'HR', 'R', 'RBI', 'BB', 'SH', 'SB', 'L-WAR', 'wRC', 'WAR']
non_pa_list = ['G', 'AB', 'PA', 'AVG', 'BB%', 'OBP', 'SLG', 'OPS', 'ISO', 'wOBA', 'wRAA', 'wRC+', 'BB%+']
filtered_list = important_columns + pa_list + non_pa_list 

In [4]:
# Filter the DataFrame to include only the specified columns
df = df.loc[:, filtered_list]

In [5]:
# Combine lists of plate appearance stats and non-plate appearance stats
combined_stats = pa_list + non_pa_list

# Automate

In [6]:
def add_projected_values(dataframe, stat):
    """
    Adds a projected value for a given statistic to the DataFrame using a rolling mean 
    of the previous three seasons for each player.
    
    Args:
        dataframe (pd.DataFrame): The input DataFrame containing player statistics.
        stat (str): The name of the statistic to project.

    Returns:
        pd.DataFrame: The DataFrame with a new column for the projected statistic.
    """
    # Add league average for the given statistic to the DataFrame
    dataframe = dataframe.sort_values(by=['Name', 'Season'], ascending=[True, True])

    # Use vectorized rolling mean with groupby
    dataframe[f"Projected_{stat}"] = (
        dataframe.groupby('Name')[stat]    # Group by player
        .shift()                           # Shift to exclude the current season
        .rolling(window=3, min_periods=0)  # Rolling mean with up to 3 previous seasons
        .mean()                            # Compute the mean for the rolling window
    )

    return dataframe

# Loop through a list of statistics and apply the add_projected_values function to each one
for value in combined_stats:
    df = add_projected_values(df, value)


# Organize and Export

In [7]:
# Create a new column order
ordered_columns = []
for stat in combined_stats:
    ordered_columns.append(stat)  # Add actual stat
    projected_col = f"Projected_{stat}"
    if projected_col in df.columns:
        ordered_columns.append(projected_col)  # Add projected stat if it exists

# Add any remaining columns that are not stats or projected stats
remaining_columns = [col for col in df.columns if col not in ordered_columns]
ordered_columns.extend(remaining_columns)

# Reorder the DataFrame
df = df[ordered_columns]


In [8]:
# Get the last five columns
columns_to_move = df.columns[-5:].tolist()

# Get the remaining columns
remaining_columns = df.columns[:-5].tolist()

# Combine them with the columns_to_move first
new_column_order = columns_to_move + remaining_columns

# Reorder the DataFrame
df = df[new_column_order]


In [9]:
df.head()

Unnamed: 0,IDfg,Season,Name,Team,Age,H,Projected_H,1B,Projected_1B,2B,...,ISO,Projected_ISO,wOBA,Projected_wOBA,wRAA,Projected_wRAA,wRC+,Projected_wRC+,BB%+,Projected_BB%+
84726,1015773,1925,A. J. Lockhart,WMP,27.0,0.0,,0.0,,0.0,...,0.0,,0.0,,-0.9,,-100.0,,0.0,
3438,512,1999,A.J. Burnett,FLA,22.0,2.0,,2.0,,0.0,...,0.0,,0.106,,-3.4,,-50.0,,0.0,
3439,512,2000,A.J. Burnett,FLA,23.0,7.0,2.0,4.0,2.0,1.0,...,0.24,0.0,0.375,0.106,0.9,-3.4,122.0,-50.0,100.0,0.0
3440,512,2001,A.J. Burnett,FLA,24.0,4.0,4.5,3.0,3.0,1.0,...,0.02,0.12,0.103,0.2405,-11.2,-1.25,-46.0,36.0,19.0,50.0
3441,512,2002,A.J. Burnett,FLA,25.0,6.0,4.333333,3.0,3.0,2.0,...,0.088,0.086667,0.173,0.194667,-8.7,-4.566667,-1.0,8.666667,78.0,39.666667


In [10]:
# Export to a specific folder
df.to_csv('../Projection_Results/model_one.csv', index=False)
