In [None]:
import pandas as pd
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error
import plotly.express as px
pd.options.display.max_columns = 150

In [None]:
starters_train = pd.read_csv('data/fangraphs/pitchers/starters/pitchers_sp_19_21.csv')
starters_test = pd.read_csv('data/fangraphs/pitchers/starters/pitchers_sp_22.csv')

starters_train = starters_train.fillna(0)
starters_test = starters_test.fillna(0)


In [None]:
starters_train.columns

starter_features = ['K/9_x','BB/9_x','HR/9_x','BABIP_x','LOB%_x','GB%_x','HR/FB_x','vFA (pi)','ERA_x','xERA','FIP_x','xFIP_x','WAR','CG','ShO','SV_y','HLD','BS','IP_y','TBF','H','R','ER','HR','BB','IBB','HBP','WP','BK','K%','BB%','K-BB%','AVG',	'ERA-',	'FIP-', 'xFIP-', 'E-F',	'SIERA', 'BABIP', 'GB/FB', 'LD%','FB%_x','IFFB%','RS','RS/9','Balls','Strikes','Pitches','Pull%','Cent%','Oppo%','Soft%','Med%','Hard%','O-Swing%','Z-Swing%','Swing%','O-Contact%','Z-Contact%','Contact%','Zone%','F-Strike%','SwStr%','CStr%','CSW%','FBv','SL%','SLv','CT%','CTv','CB%','CBv','CH%','CHv','SF%','SFv','KN%','KNv','XX%','wFB','wSL','wCT','wCB','wCH','wSF','wKN','wFB/C','wSL/C','wCT/C','wCB/C','wCH/C','wSF/C','wKN/C']
starter_targets = ['W_x', 'SO', 'ERA','WHIP']
starters_train[starter_features] = starters_train[starter_features].replace({'%':''}, regex=True)
starters_test[starter_features] = starters_test[starter_features].replace({'%':''}, regex=True)
starters_test_dropped_cols = starters_test.drop(starter_targets,axis=1)

# Not used for now: 'L_x','SV_x','G_x','GS_x','IP_x',

In [None]:
# Train a random forest regressor
rf = RandomForestRegressor(n_estimators=100)
maes = []
for target in starter_targets:
    rf.fit(starters_train[starter_features], starters_train[target])
    # Make predictions on the test set
    predictions = rf.predict(starters_test_dropped_cols[starter_features])
    predict_string = "Predicted_"+str(target)
    # Assign the predictions to the players in the test set
    starters_test_dropped_cols[predict_string] = predictions
    mae = mean_absolute_error(predictions, starters_test[target])
    maes.append(mae)

In [None]:
maes

In [None]:
starters_test_dropped_cols

In [None]:
predicted_vs_actual_ws = px.scatter(starters_test_dropped_cols, x='W_y', y='Predicted_W_x', title='Predicted vs Actual Wins', hover_data=['Name','Team'])
predicted_vs_actual_ws.show()

In [None]:
starters_ws_vs_ks = px.scatter(starters_test_dropped_cols, x='Predicted_SO', y='Predicted_W_x', color='Predicted_ERA', title='Predicted Strikeouts vs Wins 2022', hover_data=['Name', 'Team'])
plot_filename='plots/predicted_starters_ws_vs_ks.html'
starters_ws_vs_ks.write_html(plot_filename)
starters_ws_vs_ks.show()

In [None]:
# Strikeouts Model

import pandas as pd
from sklearn.ensemble import RandomForestRegressor

# Load the data into a pandas dataframe
df = pd.read_csv("fantasy_baseball_data.csv")

# Preprocess the data
# ... (fill in missing values, handle outliers, etc.)

# Select features for the model
features = ["AVG", "HR", "RBI", "SB", "OPS", "Games", "AtBats", "Hits"]

# Split the data into training and testing sets
train_data = df[df["Year"] != 2022]
test_data = df[df["Year"] == 2022]

# Train a random forest regressor
rf = RandomForestRegressor(n_estimators=100)
rf.fit(train_data[features], train_data["FantasyValue"])

# Make predictions on the test set
predictions = rf.predict(test_data[features])

# Assign the predictions to the players in the test set
test_data["PredictedFantasyValue"] = predictions

# Sort the players by their predicted fantasy value
sorted_data = test_data.sort_values("PredictedFantasyValue", ascending=False)

# Display the top 10 players with the highest predicted fantasy value
print(sorted_data[["Player", "PredictedFantasyValue"]].head(10))


## Reliever Models

In [None]:
relievers_train = pd.read_csv('data/fangraphs/pitchers/relievers/pitchers_rp_19_21.csv')
relievers_test = pd.read_csv('data/fangraphs/pitchers/relievers/pitchers_rp_22.csv')

relievers_train = relievers_train.fillna(0)
relievers_test = relievers_test.fillna(0)


relievers_train.columns

relievers_features = ['K/9_x','BB/9_x','HR/9_x','BABIP_x','LOB%_x','GB%_x','HR/FB_x','vFA (pi)','ERA_x','xERA','FIP_x','xFIP_x','WAR','CG','ShO','BS','IP_y','TBF','H','R','ER','HR','BB','IBB','HBP','WP','BK','K%','BB%','K-BB%','AVG',	'ERA-',	'FIP-', 'xFIP-', 'E-F',	'SIERA', 'BABIP', 'GB/FB', 'LD%','FB%_x','IFFB%','RS','RS/9','Balls','Strikes','Pitches','Pull%','Cent%','Oppo%','Soft%','Med%','Hard%','O-Swing%','Z-Swing%','Swing%','O-Contact%','Z-Contact%','Contact%','Zone%','F-Strike%','SwStr%','CStr%','CSW%','FBv','SL%','SLv','CT%','CTv','CB%','CBv','CH%','CHv','SF%','SFv','KN%','KNv','XX%','wFB','wSL','wCT','wCB','wCH','wSF','wKN','wFB/C','wSL/C','wCT/C','wCB/C','wCH/C','wSF/C','wKN/C']
relievers_targets = ['W_x', 'SV_y', 'HLD', 'SO', 'ERA', 'WHIP']
relievers_train[relievers_features] = relievers_train[relievers_features].replace({'%':''}, regex=True)
relievers_test[relievers_features] = relievers_test[relievers_features].replace({'%':''}, regex=True)
relievers_test_dropped_cols = relievers_test.drop(relievers_targets,axis=1)

# Not used for now: 'L_x','SV_x','G_x','GS_x','IP_x',

# Train a random forest regressor
rf = RandomForestRegressor(n_estimators=100)
maes = []
for target in relievers_targets:
    rf.fit(relievers_train[relievers_features], relievers_train[target])
    # Make predictions on the test set
    predictions = rf.predict(relievers_test_dropped_cols[relievers_features])
    predict_string = "Predicted_"+str(target)
    # Assign the predictions to the players in the test set
    relievers_test_dropped_cols[predict_string] = predictions
    mae = mean_absolute_error(predictions, relievers_test[target])
    maes.append(mae)

In [None]:
relievers_vs_actual_saves = px.scatter(relievers_test_dropped_cols, x=relievers_test['SV_x'], y=relievers_test_dropped_cols['Predicted_SV_y'], title='Relievers Predicted vs Actual Saves 2022', hover_data=['Name', 'Team'])
plot_filename='plots/relievers_predicted_vs actual_saves.html'
relievers_vs_actual_saves.write_html(plot_filename)
relievers_vs_actual_saves.show()

In [None]:
relievers_vs_actual_saves = px.scatter(relievers_test_dropped_cols, x=relievers_test['SO'], y=relievers_test_dropped_cols['Predicted_SO'], title='Relievers Predicted vs Actual Strikeouts 2022', hover_data=['Name', 'Team'])
plot_filename='plots/relievers_predicted_vs_actual_saves.html'
relievers_vs_actual_saves.write_html(plot_filename)
relievers_vs_actual_saves.show()

In [None]:
relievers_ws_vs_ks = px.scatter(relievers_test_dropped_cols, x='Predicted_SO', y='Predicted_SV_y', color='Predicted_ERA', title='Predicted Strikeouts vs Saves 2022', hover_data=['Name', 'Team'])
plot_filename='plots/relievers_predicted_svs_vs_ks.html'
relievers_ws_vs_ks.write_html(plot_filename)
relievers_ws_vs_ks.show()

In [None]:
relievers_ws_vs_ks = px.scatter(relievers_test_dropped_cols, x='Predicted_SO', y='Predicted_HLD', color='Predicted_ERA', title='Predicted Strikeouts vs Holds 2022', hover_data=['Name', 'Team'])
plot_filename='plots/predicted_relievers_hlds_vs_ks.html'
relievers_ws_vs_ks.write_html(plot_filename)
relievers_ws_vs_ks.show()