In [3]:
import pandas as pd
import numpy as np
from sklearn.linear_model import LinearRegression, Ridge, Lasso, BayesianRidge, ElasticNet
from sklearn.ensemble import RandomForestRegressor
from sklearn.neighbors import KNeighborsRegressor
from sklearn.neural_network import MLPRegressor
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_absolute_error, r2_score, mean_squared_error
import plotly.express as px
pd.options.display.max_columns = 150

In [None]:
starters_train = pd.read_csv('data/fangraphs/pitchers/starters/pitchers_sp_19_21.csv')
starters_test = pd.read_csv('data/fangraphs/pitchers/starters/pitchers_sp_22.csv')

starters_train = starters_train.fillna(0)
starters_test = starters_test.fillna(0)


In [None]:
starters_train.columns

starter_features = ['K/9_x','BB/9_x','HR/9_x','BABIP_x','LOB%_x','GB%_x','HR/FB_x','vFA (pi)','ERA_x','xERA','FIP_x','xFIP_x','WAR','CG','ShO','SV_y','HLD','BS','IP_y','TBF','H','R','ER','HR','BB','IBB','HBP','WP','BK','K%','BB%','K-BB%','AVG',	'ERA-',	'FIP-', 'xFIP-', 'E-F',	'SIERA', 'BABIP', 'GB/FB', 'LD%','FB%_x','IFFB%','RS','RS/9','Balls','Strikes','Pitches','Pull%','Cent%','Oppo%','Soft%','Med%','Hard%','O-Swing%','Z-Swing%','Swing%','O-Contact%','Z-Contact%','Contact%','Zone%','F-Strike%','SwStr%','CStr%','CSW%','FBv','SL%','SLv','CT%','CTv','CB%','CBv','CH%','CHv','SF%','SFv','KN%','KNv','XX%','wFB','wSL','wCT','wCB','wCH','wSF','wKN','wFB/C','wSL/C','wCT/C','wCB/C','wCH/C','wSF/C','wKN/C']
starter_targets = ['W_x', 'SO', 'ERA','WHIP']
starters_train[starter_features] = starters_train[starter_features].replace({'%':''}, regex=True)
starters_test[starter_features] = starters_test[starter_features].replace({'%':''}, regex=True)
starters_test_dropped_cols = starters_test.drop(starter_targets,axis=1)

# Not used for now: 'L_x','SV_x','G_x','GS_x','IP_x',

In [None]:
# Train a random forest regressor
rf = RandomForestRegressor(n_estimators=100)
maes = []
for target in starter_targets:
    rf.fit(starters_train[starter_features], starters_train[target])
    # Make predictions on the test set
    predictions = rf.predict(starters_test_dropped_cols[starter_features])
    predict_string = "Predicted_"+str(target)
    # Assign the predictions to the players in the test set
    starters_test_dropped_cols[predict_string] = predictions
    mae = mean_absolute_error(predictions, starters_test[target])
    maes.append(mae)

In [None]:
maes

In [None]:
starters_test_dropped_cols

In [None]:
predicted_vs_actual_ws = px.scatter(starters_test_dropped_cols, x='W_y', y='Predicted_W_x', title='Predicted vs Actual Wins', hover_data=['Name','Team'])
predicted_vs_actual_ws.show()

In [None]:
starters_ws_vs_ks = px.scatter(starters_test_dropped_cols, x='Predicted_SO', y='Predicted_W_x', color='Predicted_ERA', title='Predicted Strikeouts vs Wins 2022', hover_data=['Name', 'Team'])
plot_filename='plots/predicted_starters_ws_vs_ks.html'
starters_ws_vs_ks.write_html(plot_filename)
starters_ws_vs_ks.show()

In [None]:
# Strikeouts Model

import pandas as pd
from sklearn.ensemble import RandomForestRegressor

# Load the data into a pandas dataframe
df = pd.read_csv("fantasy_baseball_data.csv")

# Preprocess the data
# ... (fill in missing values, handle outliers, etc.)

# Select features for the model
features = ["AVG", "HR", "RBI", "SB", "OPS", "Games", "AtBats", "Hits"]

# Split the data into training and testing sets
train_data = df[df["Year"] != 2022]
test_data = df[df["Year"] == 2022]

# Train a random forest regressor
rf = RandomForestRegressor(n_estimators=100)
rf.fit(train_data[features], train_data["FantasyValue"])

# Make predictions on the test set
predictions = rf.predict(test_data[features])

# Assign the predictions to the players in the test set
test_data["PredictedFantasyValue"] = predictions

# Sort the players by their predicted fantasy value
sorted_data = test_data.sort_values("PredictedFantasyValue", ascending=False)

# Display the top 10 players with the highest predicted fantasy value
print(sorted_data[["Player", "PredictedFantasyValue"]].head(10))


## Reliever Models

In [None]:
relievers_train = pd.read_csv('data/fangraphs/pitchers/relievers/pitchers_rp_19_21.csv')
relievers_test = pd.read_csv('data/fangraphs/pitchers/relievers/pitchers_rp_22.csv')

relievers_train = relievers_train.fillna(0)
relievers_test = relievers_test.fillna(0)


relievers_train.columns

relievers_features = ['K/9_x','BB/9_x','HR/9_x','BABIP_x','LOB%_x','GB%_x','HR/FB_x','vFA (pi)','ERA_x','xERA','FIP_x','xFIP_x','WAR','CG','ShO','BS','IP_y','TBF','H','R','ER','HR','BB','IBB','HBP','WP','BK','K%','BB%','K-BB%','AVG',	'ERA-',	'FIP-', 'xFIP-', 'E-F',	'SIERA', 'BABIP', 'GB/FB', 'LD%','FB%_x','IFFB%','RS','RS/9','Balls','Strikes','Pitches','Pull%','Cent%','Oppo%','Soft%','Med%','Hard%','O-Swing%','Z-Swing%','Swing%','O-Contact%','Z-Contact%','Contact%','Zone%','F-Strike%','SwStr%','CStr%','CSW%','FBv','SL%','SLv','CT%','CTv','CB%','CBv','CH%','CHv','SF%','SFv','KN%','KNv','XX%','wFB','wSL','wCT','wCB','wCH','wSF','wKN','wFB/C','wSL/C','wCT/C','wCB/C','wCH/C','wSF/C','wKN/C']
relievers_targets = ['W_x', 'SV_y', 'HLD', 'SO', 'ERA', 'WHIP']
relievers_train[relievers_features] = relievers_train[relievers_features].replace({'%':''}, regex=True)
relievers_test[relievers_features] = relievers_test[relievers_features].replace({'%':''}, regex=True)
relievers_test_dropped_cols = relievers_test.drop(relievers_targets,axis=1)

# Not used for now: 'L_x','SV_x','G_x','GS_x','IP_x',

# Train a random forest regressor
rf = RandomForestRegressor(n_estimators=100)
maes = []
for target in relievers_targets:
    rf.fit(relievers_train[relievers_features], relievers_train[target])
    # Make predictions on the test set
    predictions = rf.predict(relievers_test_dropped_cols[relievers_features])
    predict_string = "Predicted_"+str(target)
    # Assign the predictions to the players in the test set
    relievers_test_dropped_cols[predict_string] = predictions
    mae = mean_absolute_error(predictions, relievers_test[target])
    maes.append(mae)

In [None]:
relievers_vs_actual_saves = px.scatter(relievers_test_dropped_cols, x=relievers_test['SV_x'], y=relievers_test_dropped_cols['Predicted_SV_y'], title='Relievers Predicted vs Actual Saves 2022', hover_data=['Name', 'Team'])
plot_filename='plots/relievers_predicted_vs actual_saves.html'
relievers_vs_actual_saves.write_html(plot_filename)
relievers_vs_actual_saves.show()

In [None]:
relievers_vs_actual_saves = px.scatter(relievers_test_dropped_cols, x=relievers_test['SO'], y=relievers_test_dropped_cols['Predicted_SO'], title='Relievers Predicted vs Actual Strikeouts 2022', hover_data=['Name', 'Team'])
plot_filename='plots/relievers_predicted_vs_actual_saves.html'
relievers_vs_actual_saves.write_html(plot_filename)
relievers_vs_actual_saves.show()

In [None]:
relievers_ws_vs_ks = px.scatter(relievers_test_dropped_cols, x='Predicted_SO', y='Predicted_SV_y', color='Predicted_ERA', title='Predicted Strikeouts vs Saves 2022', hover_data=['Name', 'Team'])
plot_filename='plots/relievers_predicted_svs_vs_ks.html'
relievers_ws_vs_ks.write_html(plot_filename)
relievers_ws_vs_ks.show()

In [None]:
relievers_ws_vs_ks = px.scatter(relievers_test_dropped_cols, x='Predicted_SO', y='Predicted_HLD', color='Predicted_ERA', title='Predicted Strikeouts vs Holds 2022', hover_data=['Name', 'Team'])
plot_filename='plots/predicted_relievers_hlds_vs_ks.html'
relievers_ws_vs_ks.write_html(plot_filename)
relievers_ws_vs_ks.show()

### Projection Building Functions

In [29]:
def predict_starting_pitchers(model_obj, year_to_project, train_dataset, test_dataset):

    # Preprocess the data and select features for the model

    train_dataset = train_dataset.fillna(0)
    test_dataset = test_dataset.fillna(0)

    train_dataset['Sum_of_ERA_WHIP'] = train_dataset['ERA_x'] + train_dataset['WHIP']
    test_dataset['Sum_of_ERA_WHIP'] = test_dataset['ERA_x'] + test_dataset['WHIP']

    chosen_cols = ['K/9_x','BB/9_x','HR/9_x','BABIP_x','LOB%_x','GB%_x','HR/FB_x','vFA (pi)',
                   'FIP_x','xFIP_x','CG','ShO','BS','K%','BB%','K-BB%','AVG','FIP-', 'xFIP-', 
                   'E-F',	'BABIP', 'GB/FB', 'LD%','FB%_x','IFFB%','RS/9','Pull%','Cent%','Oppo%',
                   'Soft%','Med%','Hard%','O-Swing%','Z-Swing%','Swing%','O-Contact%','Z-Contact%',
                   'Contact%','Zone%','F-Strike%','SwStr%','CStr%','CSW%','FBv','SL%','SLv','CT%',
                   'CTv','CB%','CBv','CH%','CHv','SF%','SFv','KN%','KNv','XX%','wFB','wSL','wCT',
                   'wCB','wCH','wSF','wKN','wFB/C','wSL/C','wCT/C','wCB/C','wCH/C','wSF/C','wKN/C', 
                   'W_x', 'SO', 'Sum_of_ERA_WHIP']

    train_data_chosen_cols = train_dataset[chosen_cols]
    test_data_chosen_cols = test_dataset[chosen_cols]

    # Remove % signs
    train_data_chosen_cols = train_data_chosen_cols.replace('%','', regex=True)
    test_data_chosen_cols = test_data_chosen_cols.replace('%','', regex=True)

    # Convert cols to floats
    train_data_chosen_cols.astype(np.float64)
    test_data_chosen_cols.astype(np.float64)

    #starter_targets = ['W_x', 'SO', 'Sum_of_ERA_WHIP']
    starter_targets = ['Sum_of_ERA_WHIP']

    # Split the data into training and testing sets
    
    train_data_x = train_data_chosen_cols.drop(starter_targets, axis=1)
    test_data_x = test_data_chosen_cols.drop(starter_targets, axis=1)

    for target in starter_targets:
        train_data_y = train_data_chosen_cols[target]
        test_data_y = test_data_chosen_cols[target]
        # Scale x variable datasets
        scaler = MinMaxScaler()
        train_data_x_scaled = scaler.fit_transform(train_data_x)
        test_data_x_scaled = scaler.fit_transform(test_data_x)

        # ML Model using using fit and predict QB Fantasy Points
        model = model_obj
        model_name = type(model).__name__
        print('Predict Starting Pitchers with a '+ model_name + ' model')
        if (model_name == 'LinearRegression') or (model_name == 'Ridge'):
            model = model.fit(train_data_x_scaled,train_data_y)
        else:
            model = model.fit(train_data_x_scaled,train_data_y.values.ravel())
        y_preds = model.predict(test_data_x_scaled)
        
        mean_sq_err = None
        r2_err = None
        if year_to_project == 2023:
            pass
        else:
            # The mean squared error
            mean_sq_err = mean_squared_error(test_data_y, y_preds)
            #print("Mean squared error: %.2f" % mean_sq_err)
            
            # The mean absolute error
            mean_ab_err = mean_squared_error(test_data_y, y_preds)

            # The coefficient of determination: 1 is perfect prediction
            r2_err = r2_score(test_data_y.values.ravel(), y_preds)

        # Create new dataframe for projections
        player_point_proj = None
        proj_col_name  = 'Model_Projection_'+target
        actuals_col_name  = 'Actual_'+target
        if (model_name == 'LinearRegression') or (model_name == 'Ridge'):
            #player_point_proj = pd.DataFrame({'Age': test_data_x['Age'], 'SLG': test_data_x['SLG'], proj_col_name: y_preds[:,0], actuals_col_name: test_data_y[target]})
            player_point_proj = pd.DataFrame({'K/9_x': test_data_x['K/9_x'], 'BABIP': test_data_x['BABIP'], proj_col_name: y_preds, actuals_col_name: test_data_y})
        else:
            player_point_proj = pd.DataFrame({'K/9_x': test_data_x['K/9_x'], 'BABIP': test_data_x['BABIP'], proj_col_name: y_preds, actuals_col_name: test_data_y})
        # Merge player names and info back in
        #player_point_proj_wnames = player_point_proj.merge(master_df[['Player', 'Age', 'Position', 'Year', join_column, 'Fantasy_PPR']], how='inner', left_on=['Age', join_column, 'Actual_Points'], right_on=['Age', join_column, 'Fantasy_PPR'])

        player_point_proj_wnames = player_point_proj.merge(test_dataset[['Name', 'K/9_x', 'BABIP', target]], how='inner', on=['K/9_x', 'BABIP'])
        player_point_proj_wnames = player_point_proj_wnames.drop(columns=[target])

        # Calculate Model vs Actual Delta
        player_point_proj_wnames['Model_v_Actual_Delta'] = player_point_proj_wnames[proj_col_name] - player_point_proj_wnames[actuals_col_name]

        # Prep Dataframe for csv output
        player_point_proj_wnames = player_point_proj_wnames.sort_values(by=proj_col_name, ascending=False)
        player_point_proj_wnames = player_point_proj_wnames[['Name', 'K/9_x', proj_col_name, actuals_col_name, 'Model_v_Actual_Delta']]
        # Save dataframes
        if year_to_project == 2023:
            import time
            timestr = time.strftime("%Y%m%d-%H%M%S")
            df = player_point_proj_wnames
            adp = pd.read_csv('data/FantasyPros_2023_Overall_MLB_ADP_Rankings.csv')
            adp = adp.rename({'Player': 'Name'}, axis=1)
            df_w_adp = df.merge(adp, how='left', on='Name')
            df_w_adp = df_w_adp.rename({'AVG': 'ADP'}, axis=1)
            df_w_adp = df_w_adp[['Name',proj_col_name, actuals_col_name,'ADP']]
            df_w_adp = df_w_adp.sort_values(by=proj_col_name)
            df_w_adp = df_w_adp.drop_duplicates()
            #filename = 'projections/'+str(position)+'/'+str(model_name)+'2022_projections_'+timestr+'.csv'
            filename = 'projections/pitchers/starters/'+str(model_name)+'2023_projections_'+timestr+'.csv'
            df_w_adp.to_csv(filename)
        else:
            return model_name, mean_sq_err, mean_ab_err, r2_err
        

def fantasy_points_predictor(models, year_to_project, train_dataset, test_dataset):
    import time
    timestr = time.strftime("%Y%m%d-%H%M%S")

    results = []
    for model in models:
        model_name, mean_sq_err, mean_ab_err, r2_err = predict_starting_pitchers(model, year_to_project, train_dataset, test_dataset)
        result = [model_name, mean_sq_err, mean_ab_err, r2_err]
        results.append(result)
            #position_projs_df.append(result)
        #avg_position_projs = position_projs_df.groupby('Player')

    results_df = pd.DataFrame(results, columns=['Model Name', 'Mean Square Error', 'Mean Absolute Error', 'R2 Score'])
    results_df = results_df.sort_values(by=['Mean Absolute Error'], ascending=[False])

    top_sp_model = results_df.tail(1)
    
    if year_to_project == 2023:
        print('Simulation complete! Check the projections folder to find your ranked players by position for this years draft.')
    else:
        results_filename = 'projections/pitchers/starters/2023_model_results_summary_'+timestr+'.csv'
        results_df.to_csv(results_filename)
        top_model_filename = 'projections/pitchers/starters/2023_top_models_by_position_summary_'+timestr+'.csv'
        top_sp_model.to_csv(top_model_filename)
        print('Simulation complete! Check the predictor_tool_results folder to find summary of models.')
        return top_sp_model

def model_object_generator(model_name):
    if model_name == 'LinearRegression':
        return LinearRegression()
    elif model_name == 'Ridge':
        return Ridge()
    elif model_name == 'Lasso':
        return Lasso()
    elif model_name == 'BayesianRidge':
        return BayesianRidge()
    elif model_name == 'RandomForestRegressor':
        #return RandomForestRegressor(n_estimators=1000, min_samples_leaf=4, min_samples_split=10)
        return RandomForestRegressor(n_estimators=1000)
    elif model_name == 'KNeighborsRegressor':
        return KNeighborsRegressor()
    elif model_name == 'MLPRegressor':
        return MLPRegressor()
    elif model_name == 'Elastic_Net':
        return ElasticNet()
    else:
        print('Model not supported by model_object_generator function at the moment.')
        return None


In [30]:
models_list = [LinearRegression(), Ridge(), Lasso(), BayesianRidge(), ElasticNet(), RandomForestRegressor(), KNeighborsRegressor(), MLPRegressor(max_iter=1000)]
train_dataset_2022 = pd.read_csv('data/fangraphs/pitchers/starters/pitchers_sp_19_21.csv')
test_dataset_2022 = pd.read_csv('data/fangraphs/pitchers/starters/pitchers_sp_22.csv')

top_sp_models = fantasy_points_predictor(models_list, 2022, train_dataset_2022, test_dataset_2022)

Predict Starting Pitchers with a LinearRegression model
Predict Starting Pitchers with a Ridge model
Predict Starting Pitchers with a Lasso model
Predict Starting Pitchers with a BayesianRidge model
Predict Starting Pitchers with a ElasticNet model
Predict Starting Pitchers with a RandomForestRegressor model
Predict Starting Pitchers with a KNeighborsRegressor model
Predict Starting Pitchers with a MLPRegressor model
Simulation complete! Check the predictor_tool_results folder to find summary of models.


In [31]:
top_sp_models

Unnamed: 0,Model Name,Mean Square Error,Mean Absolute Error,R2 Score
7,MLPRegressor,0.423929,0.423929,0.681401


In [32]:
train_dataset = pd.read_csv('data/fangraphs/pitchers/starters/pitchers_sp_19_21.csv')
test_dataset = pd.read_csv('data/fangraphs/pitchers/starters/pitchers_sp_20_22.csv')

predict_starting_pitchers(MLPRegressor(max_iter=1000), 2023, train_dataset, test_dataset)

Predict Starting Pitchers with a MLPRegressor model


In [25]:
def predict_starting_pitchers(model_obj, proj_method, year_to_project, train_dataset, test_dataset):

    # Preprocess the data and select features for the model

    train_dataset = train_dataset.fillna(0)
    test_dataset = test_dataset.fillna(0)

    chosen_cols = []
    starter_targets = []
    if proj_method == "W+SV+HLD":
        train_dataset['Sum_of_W_SV_HLD'] = train_dataset['W_x'] + train_dataset['SV_y'] + train_dataset['HLD']
        test_dataset['Sum_of_W_SV_HLD'] = test_dataset['W_x'] + test_dataset['SV_y'] + test_dataset['HLD']

        chosen_cols = ['K/9_x','BB/9_x','HR/9_x','BABIP_x','LOB%_x','GB%_x','HR/FB_x','vFA (pi)',
                        'FIP_x','xFIP_x','CG','ShO','BS','K%','BB%','K-BB%','AVG', 'FIP-', 'xFIP-', 
                        'E-F',	'BABIP', 'GB/FB', 'LD%','FB%_x','IFFB%','RS/9','Pull%','Cent%','Oppo%',
                        'Soft%','Med%','Hard%','O-Swing%','Z-Swing%','Swing%','O-Contact%','Z-Contact%',
                        'Contact%','Zone%','F-Strike%','SwStr%','CStr%','CSW%','FBv','SL%','SLv','CT%',
                        'CTv','CB%','CBv','CH%','CHv','SF%','SFv','KN%','KNv','XX%','wFB','wSL','wCT',
                        'wCB','wCH','wSF','wKN','wFB/C','wSL/C','wCT/C','wCB/C','wCH/C','wSF/C','wKN/C', 
                        'W_x', 'SO', 'Sum_of_W_SV_HLD']  
        starter_targets = ['Sum_of_W_SV_HLD']
    
    elif proj_method == "ERA+WHIP":
        train_dataset['Sum_of_ERA_WHIP'] = train_dataset['ERA_x'] + train_dataset['WHIP']
        test_dataset['Sum_of_ERA_WHIP'] = test_dataset['ERA_x'] + test_dataset['WHIP']

        chosen_cols = ['K/9_x','BB/9_x','HR/9_x','BABIP_x','LOB%_x','GB%_x','HR/FB_x','vFA (pi)',
                        'FIP_x','xFIP_x','CG','ShO','BS','K%','BB%','K-BB%','AVG', 'FIP-', 'xFIP-', 
                        'E-F',	'BABIP', 'GB/FB', 'LD%','FB%_x','IFFB%','RS/9','Pull%','Cent%','Oppo%',
                        'Soft%','Med%','Hard%','O-Swing%','Z-Swing%','Swing%','O-Contact%','Z-Contact%',
                        'Contact%','Zone%','F-Strike%','SwStr%','CStr%','CSW%','FBv','SL%','SLv','CT%',
                        'CTv','CB%','CBv','CH%','CHv','SF%','SFv','KN%','KNv','XX%','wFB','wSL','wCT',
                        'wCB','wCH','wSF','wKN','wFB/C','wSL/C','wCT/C','wCB/C','wCH/C','wSF/C','wKN/C', 
                        'W_x', 'SO', 'Sum_of_ERA_WHIP']  
        starter_targets = ['Sum_of_ERA_WHIP']


    train_data_chosen_cols = train_dataset[chosen_cols]
    test_data_chosen_cols = test_dataset[chosen_cols]

    # Remove % signs
    train_data_chosen_cols = train_data_chosen_cols.replace('%','', regex=True)
    test_data_chosen_cols = test_data_chosen_cols.replace('%','', regex=True)

    # Convert cols to floats
    train_data_chosen_cols.astype(np.float64)
    test_data_chosen_cols.astype(np.float64)

    # Split the data into training and testing sets
    
    train_data_x = train_data_chosen_cols.drop(starter_targets, axis=1)
    test_data_x = test_data_chosen_cols.drop(starter_targets, axis=1)

    for target in starter_targets:
        train_data_y = train_data_chosen_cols[target]
        test_data_y = test_data_chosen_cols[target]
        # Scale x variable datasets
        scaler = MinMaxScaler()
        train_data_x_scaled = scaler.fit_transform(train_data_x)
        test_data_x_scaled = scaler.fit_transform(test_data_x)

        # ML Model using using fit and predict QB Fantasy Points
        model = model_obj
        model_name = type(model).__name__
        print('Predict Relief Pitchers targeting ' + proj_method + ' with a '+ model_name + ' model')
        if (model_name == 'LinearRegression') or (model_name == 'Ridge'):
            model = model.fit(train_data_x_scaled,train_data_y)
        else:
            model = model.fit(train_data_x_scaled,train_data_y.values.ravel())
        y_preds = model.predict(test_data_x_scaled)
        
        mean_sq_err = None
        r2_err = None
        if year_to_project == 2023:
            pass
        else:
            # The mean squared error
            mean_sq_err = mean_squared_error(test_data_y, y_preds)
            #print("Mean squared error: %.2f" % mean_sq_err)
            
            # The mean absolute error
            mean_ab_err = mean_squared_error(test_data_y, y_preds)

            # The coefficient of determination: 1 is perfect prediction
            r2_err = r2_score(test_data_y.values.ravel(), y_preds)

        # Create new dataframe for projections
        player_point_proj = None
        proj_col_name  = 'Model_Projection_'+target
        actuals_col_name  = 'Actual_'+target
        if (model_name == 'LinearRegression') or (model_name == 'Ridge'):
            #player_point_proj = pd.DataFrame({'Age': test_data_x['Age'], 'SLG': test_data_x['SLG'], proj_col_name: y_preds[:,0], actuals_col_name: test_data_y[target]})
            player_point_proj = pd.DataFrame({'K/9_x': test_data_x['K/9_x'], 'BABIP': test_data_x['BABIP'], proj_col_name: y_preds, actuals_col_name: test_data_y})
        else:
            player_point_proj = pd.DataFrame({'K/9_x': test_data_x['K/9_x'], 'BABIP': test_data_x['BABIP'], proj_col_name: y_preds, actuals_col_name: test_data_y})
        # Merge player names and info back in
        #player_point_proj_wnames = player_point_proj.merge(master_df[['Player', 'Age', 'Position', 'Year', join_column, 'Fantasy_PPR']], how='inner', left_on=['Age', join_column, 'Actual_Points'], right_on=['Age', join_column, 'Fantasy_PPR'])

        player_point_proj_wnames = player_point_proj.merge(test_dataset[['Name', 'K/9_x', 'BABIP', target]], how='inner', on=['K/9_x', 'BABIP'])
        player_point_proj_wnames = player_point_proj_wnames.drop(columns=[target])

        # Calculate Model vs Actual Delta
        player_point_proj_wnames['Model_v_Actual_Delta'] = player_point_proj_wnames[proj_col_name] - player_point_proj_wnames[actuals_col_name]

        # Prep Dataframe for csv output
        player_point_proj_wnames = player_point_proj_wnames.sort_values(by=proj_col_name, ascending=False)
        player_point_proj_wnames = player_point_proj_wnames[['Name', 'K/9_x', proj_col_name, actuals_col_name, 'Model_v_Actual_Delta']]
        # Save dataframes
        if year_to_project == 2023:
            import time
            timestr = time.strftime("%Y%m%d-%H%M%S")
            df = player_point_proj_wnames
            adp = pd.read_csv('data/FantasyPros_2023_Overall_MLB_ADP_Rankings.csv')
            adp = adp.rename({'Player': 'Name'}, axis=1)
            df_w_adp = df.merge(adp, how='left', on='Name')
            df_w_adp = df_w_adp.rename({'AVG': 'ADP'}, axis=1)
            df_w_adp = df_w_adp[['Name',proj_col_name, actuals_col_name,'ADP']]
            if proj_method == "W+SV+HLD":
                df_w_adp = df_w_adp.drop_duplicates()
                df_w_adp = df_w_adp.sort_values(by=proj_col_name, ascending=False)
                df_w_adp = df_w_adp[df_w_adp['Name'] != 'Luis Garcia']
                df_w_adp = df_w_adp[df_w_adp['Name'] != 'Javy Guerra']
                df_w_adp = df_w_adp[df_w_adp['Name'] != 'Will Smith']
                filename = 'projections/pitchers/starters/'+str(model_name)+'2023_W_SV_HLD_projections_'+timestr+'.csv'
                df_w_adp.to_csv(filename)
            elif proj_method == "ERA+WHIP":
                df_w_adp = df_w_adp.drop_duplicates()
                df_w_adp = df_w_adp.sort_values(by=proj_col_name, ascending=True)
                df_w_adp = df_w_adp[df_w_adp['Name'] != 'Luis Garcia']
                df_w_adp = df_w_adp[df_w_adp['Name'] != 'Javy Guerra']
                df_w_adp = df_w_adp[df_w_adp['Name'] != 'Will Smith']
                filename = 'projections/pitchers/starters/'+str(model_name)+'2023_ERA_WHIP_projections_'+timestr+'.csv'
                df_w_adp.to_csv(filename)            
            #filename = 'projections/'+str(position)+'/'+str(model_name)+'2022_projections_'+timestr+'.csv'

        else:
            return model_name, mean_sq_err, mean_ab_err, r2_err
        

def fantasy_points_predictor(models, proj_method, year_to_project, train_dataset, test_dataset):
    import time
    timestr = time.strftime("%Y%m%d-%H%M%S")

    results = []
    for model in models:
        model_name, mean_sq_err, mean_ab_err, r2_err = predict_starting_pitchers(model, proj_method, year_to_project, train_dataset, test_dataset)
        result = [model_name, mean_sq_err, mean_ab_err, r2_err]
        results.append(result)
            #position_projs_df.append(result)
        #avg_position_projs = position_projs_df.groupby('Player')

    results_df = pd.DataFrame(results, columns=['Model Name', 'Mean Square Error', 'Mean Absolute Error', 'R2 Score'])
    results_df = results_df.sort_values(by=['Mean Absolute Error'])

    top_rp_model = results_df.head(1)
    
    if year_to_project == 2023:
        print('Simulation complete! Check the projections folder to find your ranked players by position for this years draft.')
    else:
        if proj_method == "W+SV+HLD":
            results_filename = 'projections/pitchers/starters/2023_model_results_W_SV_HLD_summary_'+timestr+'.csv'
            results_df.to_csv(results_filename)
            top_model_filename = 'projections/pitchers/starters/2023_top_models_W_SV_HLD_by_position_summary_'+timestr+'.csv'
            top_rp_model.to_csv(top_model_filename)
        elif proj_method == "ERA+WHIP":
            results_filename = 'projections/pitchers/starters/2023_model_results_ERA_WHIP_summary_'+timestr+'.csv'
            results_df.to_csv(results_filename)
            top_model_filename = 'projections/pitchers/starters/2023_top_models_ERA_WHIP_by_position_summary_'+timestr+'.csv'
            top_rp_model.to_csv(top_model_filename)
        print('Simulation complete! Check the predictor_tool_results folder to find summary of models.')
        return top_rp_model

def model_object_generator(model_name):
    if model_name == 'LinearRegression':
        return LinearRegression()
    elif model_name == 'Ridge':
        return Ridge()
    elif model_name == 'Lasso':
        return Lasso()
    elif model_name == 'BayesianRidge':
        return BayesianRidge()
    elif model_name == 'RandomForestRegressor':
        #return RandomForestRegressor(n_estimators=1000, min_samples_leaf=4, min_samples_split=10)
        return RandomForestRegressor(n_estimators=1000)
    elif model_name == 'KNeighborsRegressor':
        return KNeighborsRegressor()
    elif model_name == 'MLPRegressor':
        return MLPRegressor()
    elif model_name == 'Elastic_Net':
        return ElasticNet()
    else:
        print('Model not supported by model_object_generator function at the moment.')
        return None

def combined_starter_rankings(wsvhld_dataset, erawhip_dataset):
    import time
    timestr = time.strftime("%Y%m%d-%H%M%S")
    
    #wsvhld_dataset = wsvhld_dataset.iloc[:,1:]
    print(wsvhld_dataset.columns)
    wsvhld_min = wsvhld_dataset['Model_Projection_Sum_of_W_SV_HLD'].min()
    wsvhld_max = wsvhld_dataset['Model_Projection_Sum_of_W_SV_HLD'].max()
    wsvhld_dataset['Scaled_WSVHLD_Rank'] = (wsvhld_dataset['Model_Projection_Sum_of_W_SV_HLD']-wsvhld_min)/(wsvhld_max-wsvhld_min)

    #erawhip_dataset = wsvhld_dataset.iloc[:,1:]
    erawhip_min = erawhip_dataset['Model_Projection_Sum_of_ERA_WHIP'].min()
    erawhip_max = erawhip_dataset['Model_Projection_Sum_of_ERA_WHIP'].max()
    erawhip_dataset['Scaled_ERAWHIP_Rank'] = 1-(erawhip_dataset['Model_Projection_Sum_of_ERA_WHIP']-erawhip_min)/(erawhip_max-erawhip_min)

    ovr_rp_rankings = wsvhld_dataset.merge(erawhip_dataset, how='left', on='Name')
    ovr_rp_rankings['Scaled_OVR_Score'] = (ovr_rp_rankings['Scaled_WSVHLD_Rank'] + ovr_rp_rankings['Scaled_ERAWHIP_Rank']) * 50
    ovr_rp_rankings = ovr_rp_rankings.sort_values(by='Scaled_OVR_Score', ascending=False)
    ovr_rp_rankings['OVR_Rank'] = np.arange(len(ovr_rp_rankings))+1
    
    ovr_rp_rankings_filename = 'projections/pitchers/starters/2023_ovr_sp_rankings_'+timestr+'.csv'
    ovr_rp_rankings.to_csv(ovr_rp_rankings_filename)



In [26]:
models_list = [LinearRegression(), Ridge(), Lasso(), BayesianRidge(), ElasticNet(), RandomForestRegressor(), KNeighborsRegressor(), MLPRegressor(max_iter=1000)]
train_dataset_2022 = pd.read_csv('data/fangraphs/pitchers/starters/pitchers_sp_19_21.csv')
test_dataset_2022 = pd.read_csv('data/fangraphs/pitchers/starters/pitchers_sp_22.csv')
top_sp_models_w = fantasy_points_predictor(models_list, "W+SV+HLD", 2022, train_dataset_2022, test_dataset_2022)
top_sp_models_era = fantasy_points_predictor(models_list, "ERA+WHIP", 2022, train_dataset_2022, test_dataset_2022)

Predict Relief Pitchers targeting W+SV+HLD with a LinearRegression model
Predict Relief Pitchers targeting W+SV+HLD with a Ridge model
Predict Relief Pitchers targeting W+SV+HLD with a Lasso model
Predict Relief Pitchers targeting W+SV+HLD with a BayesianRidge model
Predict Relief Pitchers targeting W+SV+HLD with a ElasticNet model
Predict Relief Pitchers targeting W+SV+HLD with a RandomForestRegressor model
Predict Relief Pitchers targeting W+SV+HLD with a KNeighborsRegressor model
Predict Relief Pitchers targeting W+SV+HLD with a MLPRegressor model



Stochastic Optimizer: Maximum iterations (1000) reached and the optimization hasn't converged yet.



Simulation complete! Check the predictor_tool_results folder to find summary of models.
Predict Relief Pitchers targeting ERA+WHIP with a LinearRegression model
Predict Relief Pitchers targeting ERA+WHIP with a Ridge model
Predict Relief Pitchers targeting ERA+WHIP with a Lasso model
Predict Relief Pitchers targeting ERA+WHIP with a BayesianRidge model
Predict Relief Pitchers targeting ERA+WHIP with a ElasticNet model
Predict Relief Pitchers targeting ERA+WHIP with a RandomForestRegressor model
Predict Relief Pitchers targeting ERA+WHIP with a KNeighborsRegressor model
Predict Relief Pitchers targeting ERA+WHIP with a MLPRegressor model
Simulation complete! Check the predictor_tool_results folder to find summary of models.


KeyError: 'Model_Name'

In [27]:
train_dataset = pd.read_csv('data/fangraphs/pitchers/starters/pitchers_sp_19_21.csv')
test_dataset = pd.read_csv('data/fangraphs/pitchers/starters/pitchers_sp_20_22.csv')
predict_starting_pitchers(ElasticNet(), "W+SV+HLD", 2023, train_dataset, test_dataset)
predict_starting_pitchers(MLPRegressor(max_iter=1000), "ERA+WHIP", 2023, train_dataset, test_dataset)

Predict Relief Pitchers targeting W+SV+HLD with a ElasticNet model
Predict Relief Pitchers targeting ERA+WHIP with a MLPRegressor model


In [28]:
wsvhld_dataset = pd.read_csv('projections/pitchers/starters/ElasticNet2023_W_SV_HLD_projections_20230329-163803.csv')
erawhip_dataset = pd.read_csv('projections/pitchers/starters/MLPRegressor2023_ERA_WHIP_projections_20230329-163805.csv')
combined_starter_rankings(wsvhld_dataset, erawhip_dataset)

Index(['Unnamed: 0', 'Name', 'Model_Projection_Sum_of_W_SV_HLD',
       'Actual_Sum_of_W_SV_HLD', 'ADP', 'ADP.1'],
      dtype='object')


### Relievers Projection Building Functions

In [47]:
def predict_relief_pitchers(model_obj, proj_method, year_to_project, train_dataset, test_dataset):

    # Preprocess the data and select features for the model

    train_dataset = train_dataset.fillna(0)
    test_dataset = test_dataset.fillna(0)

    chosen_cols = []
    starter_targets = []
    if proj_method == "W+SV+HLD":
        train_dataset['Sum_of_W_SV_HLD'] = train_dataset['W_x'] + train_dataset['SV_y'] + train_dataset['HLD']
        test_dataset['Sum_of_W_SV_HLD'] = test_dataset['W_x'] + test_dataset['SV_y'] + test_dataset['HLD']

        chosen_cols = ['K/9_x','BB/9_x','HR/9_x','BABIP_x','LOB%_x','GB%_x','HR/FB_x','vFA (pi)',
                        'FIP_x','xFIP_x','CG','ShO','BS','K%','BB%','K-BB%','AVG', 'FIP-', 'xFIP-', 
                        'E-F',	'BABIP', 'GB/FB', 'LD%','FB%_x','IFFB%','RS/9','Pull%','Cent%','Oppo%',
                        'Soft%','Med%','Hard%','O-Swing%','Z-Swing%','Swing%','O-Contact%','Z-Contact%',
                        'Contact%','Zone%','F-Strike%','SwStr%','CStr%','CSW%','FBv','SL%','SLv','CT%',
                        'CTv','CB%','CBv','CH%','CHv','SF%','SFv','KN%','KNv','XX%','wFB','wSL','wCT',
                        'wCB','wCH','wSF','wKN','wFB/C','wSL/C','wCT/C','wCB/C','wCH/C','wSF/C','wKN/C', 
                        'W_x', 'SO', 'Sum_of_W_SV_HLD']  
        starter_targets = ['Sum_of_W_SV_HLD']
    
    elif proj_method == "ERA+WHIP":
        train_dataset['Sum_of_ERA_WHIP'] = train_dataset['ERA_x'] + train_dataset['WHIP']
        test_dataset['Sum_of_ERA_WHIP'] = test_dataset['ERA_x'] + test_dataset['WHIP']

        chosen_cols = ['K/9_x','BB/9_x','HR/9_x','BABIP_x','LOB%_x','GB%_x','HR/FB_x','vFA (pi)',
                        'FIP_x','xFIP_x','CG','ShO','BS','K%','BB%','K-BB%','AVG', 'FIP-', 'xFIP-', 
                        'E-F',	'BABIP', 'GB/FB', 'LD%','FB%_x','IFFB%','RS/9','Pull%','Cent%','Oppo%',
                        'Soft%','Med%','Hard%','O-Swing%','Z-Swing%','Swing%','O-Contact%','Z-Contact%',
                        'Contact%','Zone%','F-Strike%','SwStr%','CStr%','CSW%','FBv','SL%','SLv','CT%',
                        'CTv','CB%','CBv','CH%','CHv','SF%','SFv','KN%','KNv','XX%','wFB','wSL','wCT',
                        'wCB','wCH','wSF','wKN','wFB/C','wSL/C','wCT/C','wCB/C','wCH/C','wSF/C','wKN/C', 
                        'W_x', 'SO', 'Sum_of_ERA_WHIP']  
        starter_targets = ['Sum_of_ERA_WHIP']


    train_data_chosen_cols = train_dataset[chosen_cols]
    test_data_chosen_cols = test_dataset[chosen_cols]

    # Remove % signs
    train_data_chosen_cols = train_data_chosen_cols.replace('%','', regex=True)
    test_data_chosen_cols = test_data_chosen_cols.replace('%','', regex=True)

    # Convert cols to floats
    train_data_chosen_cols.astype(np.float64)
    test_data_chosen_cols.astype(np.float64)

    # Split the data into training and testing sets
    
    train_data_x = train_data_chosen_cols.drop(starter_targets, axis=1)
    test_data_x = test_data_chosen_cols.drop(starter_targets, axis=1)

    for target in starter_targets:
        train_data_y = train_data_chosen_cols[target]
        test_data_y = test_data_chosen_cols[target]
        # Scale x variable datasets
        scaler = MinMaxScaler()
        train_data_x_scaled = scaler.fit_transform(train_data_x)
        test_data_x_scaled = scaler.fit_transform(test_data_x)

        # ML Model using using fit and predict QB Fantasy Points
        model = model_obj
        model_name = type(model).__name__
        print('Predict Relief Pitchers targeting ' + proj_method + ' with a '+ model_name + ' model')
        if (model_name == 'LinearRegression') or (model_name == 'Ridge'):
            model = model.fit(train_data_x_scaled,train_data_y)
        else:
            model = model.fit(train_data_x_scaled,train_data_y.values.ravel())
        y_preds = model.predict(test_data_x_scaled)
        
        mean_sq_err = None
        r2_err = None
        if year_to_project == 2023:
            pass
        else:
            # The mean squared error
            mean_sq_err = mean_squared_error(test_data_y, y_preds)
            #print("Mean squared error: %.2f" % mean_sq_err)
            
            # The mean absolute error
            mean_ab_err = mean_squared_error(test_data_y, y_preds)

            # The coefficient of determination: 1 is perfect prediction
            r2_err = r2_score(test_data_y.values.ravel(), y_preds)

        # Create new dataframe for projections
        player_point_proj = None
        proj_col_name  = 'Model_Projection_'+target
        actuals_col_name  = 'Actual_'+target
        if (model_name == 'LinearRegression') or (model_name == 'Ridge'):
            #player_point_proj = pd.DataFrame({'Age': test_data_x['Age'], 'SLG': test_data_x['SLG'], proj_col_name: y_preds[:,0], actuals_col_name: test_data_y[target]})
            player_point_proj = pd.DataFrame({'K/9_x': test_data_x['K/9_x'], 'BABIP': test_data_x['BABIP'], proj_col_name: y_preds, actuals_col_name: test_data_y})
        else:
            player_point_proj = pd.DataFrame({'K/9_x': test_data_x['K/9_x'], 'BABIP': test_data_x['BABIP'], proj_col_name: y_preds, actuals_col_name: test_data_y})
        # Merge player names and info back in
        #player_point_proj_wnames = player_point_proj.merge(master_df[['Player', 'Age', 'Position', 'Year', join_column, 'Fantasy_PPR']], how='inner', left_on=['Age', join_column, 'Actual_Points'], right_on=['Age', join_column, 'Fantasy_PPR'])

        player_point_proj_wnames = player_point_proj.merge(test_dataset[['Name', 'K/9_x', 'BABIP', target]], how='inner', on=['K/9_x', 'BABIP'])
        player_point_proj_wnames = player_point_proj_wnames.drop(columns=[target])

        # Calculate Model vs Actual Delta
        player_point_proj_wnames['Model_v_Actual_Delta'] = player_point_proj_wnames[proj_col_name] - player_point_proj_wnames[actuals_col_name]

        # Prep Dataframe for csv output
        player_point_proj_wnames = player_point_proj_wnames.sort_values(by=proj_col_name, ascending=False)
        player_point_proj_wnames = player_point_proj_wnames[['Name', 'K/9_x', proj_col_name, actuals_col_name, 'Model_v_Actual_Delta']]
        # Save dataframes
        if year_to_project == 2023:
            import time
            timestr = time.strftime("%Y%m%d-%H%M%S")
            df = player_point_proj_wnames
            adp = pd.read_csv('data/FantasyPros_2023_Overall_MLB_ADP_Rankings.csv')
            adp = adp.rename({'Player': 'Name'}, axis=1)
            df_w_adp = df.merge(adp, how='left', on='Name')
            df_w_adp = df_w_adp.rename({'AVG': 'ADP'}, axis=1)
            df_w_adp = df_w_adp[['Name',proj_col_name, actuals_col_name,'ADP']]
            if proj_method == "W+SV+HLD":
                df_w_adp = df_w_adp.drop_duplicates()
                df_w_adp = df_w_adp.sort_values(by=proj_col_name, ascending=False)
                df_w_adp = df_w_adp[df_w_adp['Name'] != 'Luis Garcia']
                df_w_adp = df_w_adp[df_w_adp['Name'] != 'Javy Guerra']
                df_w_adp = df_w_adp[df_w_adp['Name'] != 'Will Smith']
                filename = 'projections/pitchers/relievers/'+str(model_name)+'2023_W_SV_HLD_projections_'+timestr+'.csv'
                df_w_adp.to_csv(filename)
            elif proj_method == "ERA+WHIP":
                df_w_adp = df_w_adp.drop_duplicates()
                df_w_adp = df_w_adp.sort_values(by=proj_col_name, ascending=True)
                df_w_adp = df_w_adp[df_w_adp['Name'] != 'Luis Garcia']
                df_w_adp = df_w_adp[df_w_adp['Name'] != 'Javy Guerra']
                df_w_adp = df_w_adp[df_w_adp['Name'] != 'Will Smith']
                filename = 'projections/pitchers/relievers/'+str(model_name)+'2023_ERA_WHIP_projections_'+timestr+'.csv'
                df_w_adp.to_csv(filename)            
            #filename = 'projections/'+str(position)+'/'+str(model_name)+'2022_projections_'+timestr+'.csv'

        else:
            return model_name, mean_sq_err, mean_ab_err, r2_err
        

def fantasy_points_predictor(models, proj_method, year_to_project, train_dataset, test_dataset):
    import time
    timestr = time.strftime("%Y%m%d-%H%M%S")

    results = []
    for model in models:
        model_name, mean_sq_err, mean_ab_err, r2_err = predict_relief_pitchers(model, proj_method, year_to_project, train_dataset, test_dataset)
        result = [model_name, mean_sq_err, mean_ab_err, r2_err]
        results.append(result)
            #position_projs_df.append(result)
        #avg_position_projs = position_projs_df.groupby('Player')

    results_df = pd.DataFrame(results, columns=['Model Name', 'Mean Square Error', 'Mean Absolute Error', 'R2 Score'])
    results_df = results_df.sort_values(by=['Mean Absolute Error'])

    top_rp_model = results_df.head(1)
    
    if year_to_project == 2023:
        print('Simulation complete! Check the projections folder to find your ranked players by position for this years draft.')
    else:
        if proj_method == "W+SV+HLD":
            results_filename = 'projections/pitchers/relievers/2023_model_results_W_SV_HLD_summary_'+timestr+'.csv'
            results_df.to_csv(results_filename)
            top_model_filename = 'projections/pitchers/relievers/2023_top_models_W_SV_HLD_by_position_summary_'+timestr+'.csv'
            top_rp_model.to_csv(top_model_filename)
        elif proj_method == "ERA+WHIP":
            results_filename = 'projections/pitchers/relievers/2023_model_results_ERA_WHIP_summary_'+timestr+'.csv'
            results_df.to_csv(results_filename)
            top_model_filename = 'projections/pitchers/relievers/2023_top_models_ERA_WHIP_by_position_summary_'+timestr+'.csv'
            top_rp_model.to_csv(top_model_filename)
        print('Simulation complete! Check the predictor_tool_results folder to find summary of models.')
        return top_rp_model

def model_object_generator(model_name):
    if model_name == 'LinearRegression':
        return LinearRegression()
    elif model_name == 'Ridge':
        return Ridge()
    elif model_name == 'Lasso':
        return Lasso()
    elif model_name == 'BayesianRidge':
        return BayesianRidge()
    elif model_name == 'RandomForestRegressor':
        #return RandomForestRegressor(n_estimators=1000, min_samples_leaf=4, min_samples_split=10)
        return RandomForestRegressor(n_estimators=1000)
    elif model_name == 'KNeighborsRegressor':
        return KNeighborsRegressor()
    elif model_name == 'MLPRegressor':
        return MLPRegressor()
    elif model_name == 'Elastic_Net':
        return ElasticNet()
    else:
        print('Model not supported by model_object_generator function at the moment.')
        return None

def combined_reliever_rankings(wsvhld_dataset, erawhip_dataset):
    import time
    timestr = time.strftime("%Y%m%d-%H%M%S")
    
    #wsvhld_dataset = wsvhld_dataset.iloc[:,1:]
    print(wsvhld_dataset.columns)
    wsvhld_min = wsvhld_dataset['Model_Projection_Sum_of_W_SV_HLD'].min()
    wsvhld_max = wsvhld_dataset['Model_Projection_Sum_of_W_SV_HLD'].max()
    wsvhld_dataset['Scaled_WSVHLD_Rank'] = (wsvhld_dataset['Model_Projection_Sum_of_W_SV_HLD']-wsvhld_min)/(wsvhld_max-wsvhld_min)

    #erawhip_dataset = wsvhld_dataset.iloc[:,1:]
    erawhip_min = erawhip_dataset['Model_Projection_Sum_of_ERA_WHIP'].min()
    erawhip_max = erawhip_dataset['Model_Projection_Sum_of_ERA_WHIP'].max()
    erawhip_dataset['Scaled_ERAWHIP_Rank'] = 1-(erawhip_dataset['Model_Projection_Sum_of_ERA_WHIP']-erawhip_min)/(erawhip_max-erawhip_min)

    ovr_rp_rankings = wsvhld_dataset.merge(erawhip_dataset, how='left', on='Name')
    ovr_rp_rankings['Scaled_OVR_Score'] = (ovr_rp_rankings['Scaled_WSVHLD_Rank'] + ovr_rp_rankings['Scaled_ERAWHIP_Rank']) * 50
    ovr_rp_rankings = ovr_rp_rankings.sort_values(by='Scaled_OVR_Score', ascending=False)
    ovr_rp_rankings['OVR_Rank'] = np.arange(len(ovr_rp_rankings))+1
    
    ovr_rp_rankings_filename = 'projections/pitchers/relievers/2023_ovr_rp_rankings_'+timestr+'.csv'
    ovr_rp_rankings.to_csv(ovr_rp_rankings_filename)



In [48]:
models_list = [LinearRegression(), Ridge(), Lasso(), BayesianRidge(), ElasticNet(), RandomForestRegressor(), KNeighborsRegressor(), MLPRegressor(max_iter=1000)]
train_dataset_2022 = pd.read_csv('data/fangraphs/pitchers/relievers/pitchers_rp_19_21.csv')
test_dataset_2022 = pd.read_csv('data/fangraphs/pitchers/relievers/pitchers_rp_22.csv')

top_rp_models_sv = fantasy_points_predictor(models_list, "W+SV+HLD", 2022, train_dataset_2022, test_dataset_2022)

Predict Relief Pitchers targeting W+SV+HLD with a LinearRegression model
Predict Relief Pitchers targeting W+SV+HLD with a Ridge model
Predict Relief Pitchers targeting W+SV+HLD with a Lasso model
Predict Relief Pitchers targeting W+SV+HLD with a BayesianRidge model
Predict Relief Pitchers targeting W+SV+HLD with a ElasticNet model
Predict Relief Pitchers targeting W+SV+HLD with a RandomForestRegressor model
Predict Relief Pitchers targeting W+SV+HLD with a KNeighborsRegressor model
Predict Relief Pitchers targeting W+SV+HLD with a MLPRegressor model
Simulation complete! Check the predictor_tool_results folder to find summary of models.




In [49]:
top_rp_models_sv

Unnamed: 0,Model Name,Mean Square Error,Mean Absolute Error,R2 Score
7,MLPRegressor,54.9572,54.9572,0.516485


In [50]:
train_dataset = pd.read_csv('data/fangraphs/pitchers/relievers/pitchers_rp_19_21.csv')
test_dataset = pd.read_csv('data/fangraphs/pitchers/relievers/pitchers_rp_20_22.csv')

predict_relief_pitchers(MLPRegressor(max_iter=1000), "W+SV+HLD", 2023, train_dataset, test_dataset)

Predict Relief Pitchers targeting W+SV+HLD with a MLPRegressor model




In [51]:
models_list = [LinearRegression(), Ridge(), Lasso(), BayesianRidge(), ElasticNet(), RandomForestRegressor(), KNeighborsRegressor(), MLPRegressor(max_iter=1000)]
train_dataset_2022 = pd.read_csv('data/fangraphs/pitchers/relievers/pitchers_rp_19_21.csv')
test_dataset_2022 = pd.read_csv('data/fangraphs/pitchers/relievers/pitchers_rp_22.csv')

top_rp_models_era = fantasy_points_predictor(models_list, "ERA+WHIP", 2022, train_dataset_2022, test_dataset_2022)

Predict Relief Pitchers targeting ERA+WHIP with a LinearRegression model
Predict Relief Pitchers targeting ERA+WHIP with a Ridge model
Predict Relief Pitchers targeting ERA+WHIP with a Lasso model
Predict Relief Pitchers targeting ERA+WHIP with a BayesianRidge model
Predict Relief Pitchers targeting ERA+WHIP with a ElasticNet model
Predict Relief Pitchers targeting ERA+WHIP with a RandomForestRegressor model
Predict Relief Pitchers targeting ERA+WHIP with a KNeighborsRegressor model
Predict Relief Pitchers targeting ERA+WHIP with a MLPRegressor model
Simulation complete! Check the predictor_tool_results folder to find summary of models.


In [52]:
top_rp_models_era

Unnamed: 0,Model Name,Mean Square Error,Mean Absolute Error,R2 Score
1,Ridge,1.274435,1.274435,0.703104


In [53]:
train_dataset = pd.read_csv('data/fangraphs/pitchers/relievers/pitchers_rp_19_21.csv')
test_dataset = pd.read_csv('data/fangraphs/pitchers/relievers/pitchers_rp_20_22.csv')

predict_relief_pitchers(Ridge(), "ERA+WHIP", 2023, train_dataset, test_dataset)

Predict Relief Pitchers targeting ERA+WHIP with a Ridge model


In [54]:
wsvhld_dataset = pd.read_csv('projections/pitchers/relievers/MLPRegressor2023_W_SV_HLD_projections_20230312-163802.csv')
erawhip_dataset = pd.read_csv('projections/pitchers/relievers/Ridge2023_ERA_WHIP_projections_20230312-163804.csv')
combined_reliever_rankings(wsvhld_dataset, erawhip_dataset)

Index(['Unnamed: 0', 'Name', 'Model_Projection_Sum_of_W_SV_HLD',
       'Actual_Sum_of_W_SV_HLD', 'ADP'],
      dtype='object')


## ESPN Projections

In [8]:
def predict_starting_pitchers_espn(model_obj, year_to_project, train_dataset, test_dataset):

    # Preprocess the data and select features for the model

    train_dataset = train_dataset.fillna(0)
    test_dataset = test_dataset.fillna(0)

    train_dataset['Sum_of_ERA_WHIP'] = train_dataset['ERA_x'] + train_dataset['WHIP']
    test_dataset['Sum_of_ERA_WHIP'] = test_dataset['ERA_x'] + test_dataset['WHIP']

    train_dataset['Fantasy_Points'] = train_dataset['IP_x'] - train_dataset['ER']*2 + train_dataset['W_x']*2 - train_dataset['L_x']*2 + train_dataset['SV_x']*5 - train_dataset['BS'] + train_dataset['SO'] - train_dataset['H'] - train_dataset['BB'] + train_dataset['ShO']*2 - train_dataset['HBP'] + train_dataset['CG']*2 + train_dataset['HLD']*2
    test_dataset['Fantasy_Points'] = test_dataset['IP_x'] - test_dataset['ER']*2 + test_dataset['W_x']*2 - test_dataset['L_x']*2 + test_dataset['SV_x']*5 - test_dataset['BS'] + test_dataset['SO'] - test_dataset['H'] - test_dataset['BB'] + test_dataset['ShO']*2 - test_dataset['HBP'] + test_dataset['CG']*2 + test_dataset['HLD']*2

    chosen_cols = ['K/9_x','BB/9_x','HR/9_x','BABIP_x','LOB%_x','GB%_x','HR/FB_x','vFA (pi)',
                   'FIP_x','xFIP_x','CG','ShO','BS','K%','BB%','K-BB%','AVG','FIP-', 'xFIP-', 
                   'E-F',	'BABIP', 'GB/FB', 'LD%','FB%_x','IFFB%','RS/9','Pull%','Cent%','Oppo%',
                   'Soft%','Med%','Hard%','O-Swing%','Z-Swing%','Swing%','O-Contact%','Z-Contact%',
                   'Contact%','Zone%','F-Strike%','SwStr%','CStr%','CSW%','FBv','SL%','SLv','CT%',
                   'CTv','CB%','CBv','CH%','CHv','SF%','SFv','KN%','KNv','XX%','wFB','wSL','wCT',
                   'wCB','wCH','wSF','wKN','wFB/C','wSL/C','wCT/C','wCB/C','wCH/C','wSF/C','wKN/C', 
                   'W_x', 'SO', 'Sum_of_ERA_WHIP', 'Fantasy_Points']

    train_data_chosen_cols = train_dataset[chosen_cols]
    test_data_chosen_cols = test_dataset[chosen_cols]

    # Remove % signs
    train_data_chosen_cols = train_data_chosen_cols.replace('%','', regex=True)
    test_data_chosen_cols = test_data_chosen_cols.replace('%','', regex=True)

    # Convert cols to floats
    train_data_chosen_cols.astype(np.float64)
    test_data_chosen_cols.astype(np.float64)

    #starter_targets = ['W_x', 'SO', 'Sum_of_ERA_WHIP']
    starter_targets = ['Fantasy_Points']

    # Split the data into training and testing sets
    
    train_data_x = train_data_chosen_cols.drop(starter_targets, axis=1)
    test_data_x = test_data_chosen_cols.drop(starter_targets, axis=1)

    for target in starter_targets:
        train_data_y = train_data_chosen_cols[target]
        test_data_y = test_data_chosen_cols[target]
        # Scale x variable datasets
        scaler = MinMaxScaler()
        train_data_x_scaled = scaler.fit_transform(train_data_x)
        test_data_x_scaled = scaler.fit_transform(test_data_x)

        # ML Model using using fit and predict QB Fantasy Points
        model = model_obj
        model_name = type(model).__name__
        print('Predict Starting Pitchers with a '+ model_name + ' model')
        if (model_name == 'LinearRegression') or (model_name == 'Ridge'):
            model = model.fit(train_data_x_scaled,train_data_y)
        else:
            model = model.fit(train_data_x_scaled,train_data_y.values.ravel())
        y_preds = model.predict(test_data_x_scaled)
        
        mean_sq_err = None
        r2_err = None
        if year_to_project == 2023:
            pass
        else:
            # The mean squared error
            mean_sq_err = mean_squared_error(test_data_y, y_preds)
            #print("Mean squared error: %.2f" % mean_sq_err)
            
            # The mean absolute error
            mean_ab_err = mean_squared_error(test_data_y, y_preds)

            # The coefficient of determination: 1 is perfect prediction
            r2_err = r2_score(test_data_y.values.ravel(), y_preds)

        # Create new dataframe for projections
        player_point_proj = None
        proj_col_name  = 'Model_Projection_'+target
        actuals_col_name  = 'Actual_'+target
        if (model_name == 'LinearRegression') or (model_name == 'Ridge'):
            #player_point_proj = pd.DataFrame({'Age': test_data_x['Age'], 'SLG': test_data_x['SLG'], proj_col_name: y_preds[:,0], actuals_col_name: test_data_y[target]})
            player_point_proj = pd.DataFrame({'K/9_x': test_data_x['K/9_x'], 'BABIP': test_data_x['BABIP'], proj_col_name: y_preds, actuals_col_name: test_data_y})
        else:
            player_point_proj = pd.DataFrame({'K/9_x': test_data_x['K/9_x'], 'BABIP': test_data_x['BABIP'], proj_col_name: y_preds, actuals_col_name: test_data_y})
        # Merge player names and info back in
        #player_point_proj_wnames = player_point_proj.merge(master_df[['Player', 'Age', 'Position', 'Year', join_column, 'Fantasy_PPR']], how='inner', left_on=['Age', join_column, 'Actual_Points'], right_on=['Age', join_column, 'Fantasy_PPR'])

        player_point_proj_wnames = player_point_proj.merge(test_dataset[['Name', 'K/9_x', 'BABIP', target]], how='inner', on=['K/9_x', 'BABIP'])
        player_point_proj_wnames = player_point_proj_wnames.drop(columns=[target])

        # Calculate Model vs Actual Delta
        player_point_proj_wnames['Model_v_Actual_Delta'] = player_point_proj_wnames[proj_col_name] - player_point_proj_wnames[actuals_col_name]

        # Prep Dataframe for csv output
        player_point_proj_wnames = player_point_proj_wnames.sort_values(by=proj_col_name, ascending=False)
        player_point_proj_wnames = player_point_proj_wnames[['Name', 'K/9_x', proj_col_name, actuals_col_name, 'Model_v_Actual_Delta']]
        # Save dataframes
        if year_to_project == 2023:
            import time
            timestr = time.strftime("%Y%m%d-%H%M%S")
            df = player_point_proj_wnames
            adp = pd.read_csv('data/FantasyPros_2023_Overall_MLB_ADP_Rankings.csv')
            adp = adp.rename({'Player': 'Name'}, axis=1)
            df_w_adp = df.merge(adp, how='left', on='Name')
            df_w_adp = df_w_adp.rename({'AVG': 'ADP'}, axis=1)
            df_w_adp = df_w_adp[['Name',proj_col_name, actuals_col_name,'ADP']]
            df_w_adp = df_w_adp.sort_values(by=proj_col_name, ascending=False)
            df_w_adp = df_w_adp.drop_duplicates()
            #filename = 'projections/'+str(position)+'/'+str(model_name)+'2022_projections_'+timestr+'.csv'
            filename = 'espn_projections/pitchers/starters/'+str(model_name)+'2023_projections_'+timestr+'.csv'
            df_w_adp.to_csv(filename)
        else:
            return model_name, mean_sq_err, mean_ab_err, r2_err
        

def fantasy_points_predictor_espn(models, year_to_project, train_dataset, test_dataset):
    import time
    timestr = time.strftime("%Y%m%d-%H%M%S")

    results = []
    for model in models:
        model_name, mean_sq_err, mean_ab_err, r2_err = predict_starting_pitchers_espn(model, year_to_project, train_dataset, test_dataset)
        result = [model_name, mean_sq_err, mean_ab_err, r2_err]
        results.append(result)
            #position_projs_df.append(result)
        #avg_position_projs = position_projs_df.groupby('Player')

    results_df = pd.DataFrame(results, columns=['Model Name', 'Mean Square Error', 'Mean Absolute Error', 'R2 Score'])
    results_df = results_df.sort_values(by=['Mean Absolute Error'], ascending=[False])

    top_sp_model = results_df.tail(1)
    
    if year_to_project == 2023:
        print('Simulation complete! Check the projections folder to find your ranked players by position for this years draft.')
    else:
        results_filename = 'espn_projections/pitchers/starters/2023_model_results_summary_'+timestr+'.csv'
        results_df.to_csv(results_filename)
        top_model_filename = 'espn_projections/pitchers/starters/2023_top_models_by_position_summary_'+timestr+'.csv'
        top_sp_model.to_csv(top_model_filename)
        print('Simulation complete! Check the predictor_tool_results folder to find summary of models.')
        return top_sp_model

def model_object_generator(model_name):
    if model_name == 'LinearRegression':
        return LinearRegression()
    elif model_name == 'Ridge':
        return Ridge()
    elif model_name == 'Lasso':
        return Lasso()
    elif model_name == 'BayesianRidge':
        return BayesianRidge()
    elif model_name == 'RandomForestRegressor':
        #return RandomForestRegressor(n_estimators=1000, min_samples_leaf=4, min_samples_split=10)
        return RandomForestRegressor(n_estimators=1000)
    elif model_name == 'KNeighborsRegressor':
        return KNeighborsRegressor()
    elif model_name == 'MLPRegressor':
        return MLPRegressor()
    elif model_name == 'Elastic_Net':
        return ElasticNet()
    else:
        print('Model not supported by model_object_generator function at the moment.')
        return None


In [9]:
models_list = [LinearRegression(), Ridge(), Lasso(), BayesianRidge(), ElasticNet(), RandomForestRegressor(), KNeighborsRegressor(), MLPRegressor(max_iter=1000)]
train_dataset_2022 = pd.read_csv('data/fangraphs/pitchers/starters/pitchers_sp_19_21.csv')
test_dataset_2022 = pd.read_csv('data/fangraphs/pitchers/starters/pitchers_sp_22.csv')

top_sp_models = fantasy_points_predictor_espn(models_list, 2022, train_dataset_2022, test_dataset_2022)

Predict Starting Pitchers with a LinearRegression model
Predict Starting Pitchers with a Ridge model
Predict Starting Pitchers with a Lasso model
Predict Starting Pitchers with a BayesianRidge model
Predict Starting Pitchers with a ElasticNet model
Predict Starting Pitchers with a RandomForestRegressor model
Predict Starting Pitchers with a KNeighborsRegressor model
Predict Starting Pitchers with a MLPRegressor model
Simulation complete! Check the predictor_tool_results folder to find summary of models.




In [10]:
train_dataset = pd.read_csv('data/fangraphs/pitchers/starters/pitchers_sp_19_21.csv')
test_dataset = pd.read_csv('data/fangraphs/pitchers/starters/pitchers_sp_20_22.csv')

predict_starting_pitchers_espn(Ridge(), 2023, train_dataset, test_dataset)

Predict Starting Pitchers with a Ridge model


In [21]:
def predict_relief_pitchers_espn(model_obj, year_to_project, train_dataset, test_dataset):

    # Preprocess the data and select features for the model

    train_dataset = train_dataset.fillna(0)
    test_dataset = test_dataset.fillna(0)

    train_dataset['Sum_of_W_SV_HLD'] = train_dataset['W_x'] + train_dataset['SV_y'] + train_dataset['HLD']
    test_dataset['Sum_of_W_SV_HLD'] = test_dataset['W_x'] + test_dataset['SV_y'] + test_dataset['HLD']

    train_dataset['Fantasy_Points'] = train_dataset['IP_x'] - train_dataset['ER']*2 + train_dataset['W_x']*2 - train_dataset['L_x']*2 + train_dataset['SV_x']*5 - train_dataset['BS'] + train_dataset['SO'] - train_dataset['H'] - train_dataset['BB'] + train_dataset['ShO']*2 - train_dataset['HBP'] + train_dataset['CG']*2 + train_dataset['HLD']*2
    test_dataset['Fantasy_Points'] = test_dataset['IP_x'] - test_dataset['ER']*2 + test_dataset['W_x']*2 - test_dataset['L_x']*2 + test_dataset['SV_x']*5 - test_dataset['BS'] + test_dataset['SO'] - test_dataset['H'] - test_dataset['BB'] + test_dataset['ShO']*2 - test_dataset['HBP'] + test_dataset['CG']*2 + test_dataset['HLD']*2


    chosen_cols = ['K/9_x','BB/9_x','HR/9_x','BABIP_x','LOB%_x','GB%_x','HR/FB_x','vFA (pi)',
                    'FIP_x','xFIP_x','CG','ShO','BS','K%','BB%','K-BB%','AVG', 'FIP-', 'xFIP-', 
                    'E-F',	'BABIP', 'GB/FB', 'LD%','FB%_x','IFFB%','RS/9','Pull%','Cent%','Oppo%',
                    'Soft%','Med%','Hard%','O-Swing%','Z-Swing%','Swing%','O-Contact%','Z-Contact%',
                    'Contact%','Zone%','F-Strike%','SwStr%','CStr%','CSW%','FBv','SL%','SLv','CT%',
                    'CTv','CB%','CBv','CH%','CHv','SF%','SFv','KN%','KNv','XX%','wFB','wSL','wCT',
                    'wCB','wCH','wSF','wKN','wFB/C','wSL/C','wCT/C','wCB/C','wCH/C','wSF/C','wKN/C', 
                    'W_x', 'SO', 'Sum_of_W_SV_HLD', 'Fantasy_Points']  
    reliever_targets = ['Fantasy_Points']

    train_data_chosen_cols = train_dataset[chosen_cols]
    test_data_chosen_cols = test_dataset[chosen_cols]

    # Remove % signs
    train_data_chosen_cols = train_data_chosen_cols.replace('%','', regex=True)
    test_data_chosen_cols = test_data_chosen_cols.replace('%','', regex=True)

    # Convert cols to floats
    train_data_chosen_cols.astype(np.float64)
    test_data_chosen_cols.astype(np.float64)

    # Split the data into training and testing sets
    
    train_data_x = train_data_chosen_cols.drop(reliever_targets, axis=1)
    test_data_x = test_data_chosen_cols.drop(reliever_targets, axis=1)

    for target in reliever_targets:
        train_data_y = train_data_chosen_cols[target]
        test_data_y = test_data_chosen_cols[target]
        # Scale x variable datasets
        scaler = MinMaxScaler()
        train_data_x_scaled = scaler.fit_transform(train_data_x)
        test_data_x_scaled = scaler.fit_transform(test_data_x)

        # ML Model using using fit and predict QB Fantasy Points
        model = model_obj
        model_name = type(model).__name__
        print('Predict Relief Pitchers targeting Fantasy Points with a '+ model_name + ' model')
        if (model_name == 'LinearRegression') or (model_name == 'Ridge'):
            model = model.fit(train_data_x_scaled,train_data_y)
        else:
            model = model.fit(train_data_x_scaled,train_data_y.values.ravel())
        y_preds = model.predict(test_data_x_scaled)
        
        mean_sq_err = None
        r2_err = None
        if year_to_project == 2023:
            pass
        else:
            # The mean squared error
            mean_sq_err = mean_squared_error(test_data_y, y_preds)
            #print("Mean squared error: %.2f" % mean_sq_err)
            
            # The mean absolute error
            mean_ab_err = mean_squared_error(test_data_y, y_preds)

            # The coefficient of determination: 1 is perfect prediction
            r2_err = r2_score(test_data_y.values.ravel(), y_preds)

        # Create new dataframe for projections
        player_point_proj = None
        proj_col_name  = 'Model_Projection_'+target
        actuals_col_name  = 'Actual_'+target
        if (model_name == 'LinearRegression') or (model_name == 'Ridge'):
            #player_point_proj = pd.DataFrame({'Age': test_data_x['Age'], 'SLG': test_data_x['SLG'], proj_col_name: y_preds[:,0], actuals_col_name: test_data_y[target]})
            player_point_proj = pd.DataFrame({'K/9_x': test_data_x['K/9_x'], 'BABIP': test_data_x['BABIP'], proj_col_name: y_preds, actuals_col_name: test_data_y})
        else:
            player_point_proj = pd.DataFrame({'K/9_x': test_data_x['K/9_x'], 'BABIP': test_data_x['BABIP'], proj_col_name: y_preds, actuals_col_name: test_data_y})
        # Merge player names and info back in
        #player_point_proj_wnames = player_point_proj.merge(master_df[['Player', 'Age', 'Position', 'Year', join_column, 'Fantasy_PPR']], how='inner', left_on=['Age', join_column, 'Actual_Points'], right_on=['Age', join_column, 'Fantasy_PPR'])

        player_point_proj_wnames = player_point_proj.merge(test_dataset[['Name', 'K/9_x', 'BABIP', target]], how='inner', on=['K/9_x', 'BABIP'])
        player_point_proj_wnames = player_point_proj_wnames.drop(columns=[target])

        # Calculate Model vs Actual Delta
        player_point_proj_wnames['Model_v_Actual_Delta'] = player_point_proj_wnames[proj_col_name] - player_point_proj_wnames[actuals_col_name]

        # Prep Dataframe for csv output
        player_point_proj_wnames = player_point_proj_wnames.sort_values(by=proj_col_name, ascending=False)
        player_point_proj_wnames = player_point_proj_wnames[['Name', 'K/9_x', proj_col_name, actuals_col_name, 'Model_v_Actual_Delta']]
        # Save dataframes
        if year_to_project == 2023:
            import time
            timestr = time.strftime("%Y%m%d-%H%M%S")
            df = player_point_proj_wnames
            adp = pd.read_csv('data/FantasyPros_2023_Overall_MLB_ADP_Rankings.csv')
            adp = adp.rename({'Player': 'Name'}, axis=1)
            df_w_adp = df.merge(adp, how='left', on='Name')
            df_w_adp = df_w_adp.rename({'AVG': 'ADP'}, axis=1)
            df_w_adp = df_w_adp[['Name',proj_col_name, actuals_col_name,'ADP']]
            df_w_adp = df_w_adp.drop_duplicates()
            df_w_adp = df_w_adp.sort_values(by=proj_col_name, ascending=False)
            df_w_adp = df_w_adp[df_w_adp['Name'] != 'Luis Garcia']
            df_w_adp = df_w_adp[df_w_adp['Name'] != 'Javy Guerra']
            df_w_adp = df_w_adp[df_w_adp['Name'] != 'Will Smith']
            filename = 'espn_projections/pitchers/relievers/'+str(model_name)+'2023_ERA_WHIP_projections_'+timestr+'.csv'
            df_w_adp.to_csv(filename)            
            #filename = 'projections/'+str(position)+'/'+str(model_name)+'2022_projections_'+timestr+'.csv'

        else:
            return model_name, mean_sq_err, mean_ab_err, r2_err
        

def fantasy_points_predictor_rp_espn(models, year_to_project, train_dataset, test_dataset):
    import time
    timestr = time.strftime("%Y%m%d-%H%M%S")

    results = []
    for model in models:
        model_name, mean_sq_err, mean_ab_err, r2_err = predict_relief_pitchers_espn(model, year_to_project, train_dataset, test_dataset)
        result = [model_name, mean_sq_err, mean_ab_err, r2_err]
        results.append(result)
            #position_projs_df.append(result)
        #avg_position_projs = position_projs_df.groupby('Player')

    results_df = pd.DataFrame(results, columns=['Model Name', 'Mean Square Error', 'Mean Absolute Error', 'R2 Score'])
    results_df = results_df.sort_values(by=['Mean Absolute Error'])

    top_rp_model = results_df.head(1)
    
    if year_to_project == 2023:
        print('Simulation complete! Check the projections folder to find your ranked players by position for this years draft.')
    else:
        results_filename = 'espn_projections/pitchers/relievers/2023_model_results_Fantasy_Points_summary_'+timestr+'.csv'
        results_df.to_csv(results_filename)
        top_model_filename = 'espn_projections/pitchers/relievers/2023_top_models_Fantasy_Points_by_position_summary_'+timestr+'.csv'
        top_rp_model.to_csv(top_model_filename)
        print('Simulation complete! Check the predictor_tool_results folder to find summary of models.')
        return top_rp_model

def model_object_generator(model_name):
    if model_name == 'LinearRegression':
        return LinearRegression()
    elif model_name == 'Ridge':
        return Ridge()
    elif model_name == 'Lasso':
        return Lasso()
    elif model_name == 'BayesianRidge':
        return BayesianRidge()
    elif model_name == 'RandomForestRegressor':
        #return RandomForestRegressor(n_estimators=1000, min_samples_leaf=4, min_samples_split=10)
        return RandomForestRegressor(n_estimators=1000)
    elif model_name == 'KNeighborsRegressor':
        return KNeighborsRegressor()
    elif model_name == 'MLPRegressor':
        return MLPRegressor()
    elif model_name == 'Elastic_Net':
        return ElasticNet()
    else:
        print('Model not supported by model_object_generator function at the moment.')
        return None


In [19]:
models_list = [LinearRegression(), Ridge(), Lasso(), BayesianRidge(), ElasticNet(), RandomForestRegressor(), KNeighborsRegressor(), MLPRegressor(max_iter=1000)]
train_dataset_2022 = pd.read_csv('data/fangraphs/pitchers/relievers/pitchers_rp_19_21.csv')
test_dataset_2022 = pd.read_csv('data/fangraphs/pitchers/relievers/pitchers_rp_22.csv')

top_rp_models = fantasy_points_predictor_rp_espn(models_list, 2022, train_dataset_2022, test_dataset_2022)

Predict Relief Pitchers targeting Fantasy Points with a LinearRegression model
Predict Relief Pitchers targeting Fantasy Points with a Ridge model
Predict Relief Pitchers targeting Fantasy Points with a Lasso model
Predict Relief Pitchers targeting Fantasy Points with a BayesianRidge model
Predict Relief Pitchers targeting Fantasy Points with a ElasticNet model
Predict Relief Pitchers targeting Fantasy Points with a RandomForestRegressor model
Predict Relief Pitchers targeting Fantasy Points with a KNeighborsRegressor model
Predict Relief Pitchers targeting Fantasy Points with a MLPRegressor model
Simulation complete! Check the predictor_tool_results folder to find summary of models.




In [22]:
train_dataset = pd.read_csv('data/fangraphs/pitchers/relievers/pitchers_rp_19_21.csv')
test_dataset = pd.read_csv('data/fangraphs/pitchers/relievers/pitchers_rp_20_22.csv')

predict_relief_pitchers_espn(Lasso(), 2023, train_dataset, test_dataset)

Predict Relief Pitchers targeting Fantasy Points with a Lasso model
