<a href="https://colab.research.google.com/github/starlingomez/preworkmac/blob/master/SEPTIEMBRE18.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#JUSTINE STEELE

In [1]:
# Import necessary libraries
import pandas as pd
import numpy as np
from xgboost import XGBRegressor
from sklearn.model_selection import TimeSeriesSplit, cross_val_score
from sklearn.metrics import mean_squared_error

# Input data
data = {
    'IP': [7, 6, 7.2, 6, 3.1, 4, 6.2, 6, 4, 3, 4, 4.2, 7, 1.1, 6, 7, 6, 5, 5.2, 0.2, 1, 6.2, 5, 6.1, 4, 5, 5, 6],
    'TBF': [28, 24, 27, 23, 18, 20, 27, 25, 12, 11, 22, 20, 25, 6, 23, 25, 23, 23, 24, 3, 5, 27, 26, 26, 15, 22, 22, 26],
    'K/9': [12.9, 3, 4.7, 7.5, 8.1, 6.8, 6.8, 10.5, 6.8, 18, 2.3, 9.6, 10.3, 6.8, 13.5, 6.4, 13.5, 10.8, 11.1, 13.5, 18, 9.5, 12.6, 8.5, 9, 14.4, 9, 16.5],
    'BB/9': [2.6, 4.5, 3.5, 1.5, 5.4, 9, 0, 3, 0, 0, 2.3, 3.9, 0, 6.8, 1.5, 1.3, 0, 0, 0, 0, 18, 1.4, 1.8, 1.4, 0, 3.6, 3.6, 4.5],
    'BABIP': [0.375, 0.222, 0.15, 0.133, 0.538, 0.25, 0.3, 0.313, 0, 0.25, 0.412, 0.091, 0.188, 0.25, 0.25, 0.118, 0.429, 0.438, 0.529, 0.5, 0, 0.333, 0.471, 0.263, 0.273, 0.5, 0.286, 0.364],
    'STRIKEOUT': [10, 2, 4, 5, 3, 3, 5, 7, 3, 6, 1, 5, 8, 1, 9, 5, 9, 6, 7, 1, 2, 7, 7, 6, 4, 8, 5, 11]
}

# Create DataFrame
df = pd.DataFrame(data)

# Features and target variable
X = df[['IP', 'K/9', 'TBF', 'BB/9', 'BABIP']]
y = df['STRIKEOUT']

# Initialize XGBoost Regressor
model = XGBRegressor(objective='reg:squarederror', n_estimators=100, random_state=42)

# TimeSeriesSplit for backtesting
tscv = TimeSeriesSplit(n_splits=5)

# List to store Mean Squared Errors for each fold
mse_scores = []

# Perform backtesting
for train_index, test_index in tscv.split(X):
    X_train, X_test = X.iloc[train_index], X.iloc[test_index]
    y_train, y_test = y.iloc[train_index], y.iloc[test_index]

    # Train the model
    model.fit(X_train, y_train)

    # Predict and calculate MSE
    y_pred = model.predict(X_test)
    mse = mean_squared_error(y_test, y_pred)
    mse_scores.append(mse)

    print(f'Test Indices: {test_index}')
    print(f'Predicted Strikeouts: {np.round(y_pred, 2)}')
    print(f'Actual Strikeouts: {y_test.values}')
    print(f'Mean Squared Error: {mse}\n')

print(f'Average Mean Squared Error across all folds: {np.mean(mse_scores)}')

# Fit model on entire dataset
model.fit(X, y)

# Predict for next game (example input)
next_game_data = {
    'IP': [6],      # Example IP
    'K/9': [9.65],     # Example K/9
    'TBF': [25],    # Example TBF
    'BB/9': [1.95],    # Example BB/9
    'BABIP': [0.25]  # Example BABIP
}

next_game_df = pd.DataFrame(next_game_data)
predicted_strikeouts = model.predict(next_game_df)
print(f'Predicted Strikeouts for Next Game: {predicted_strikeouts[0]}')


Test Indices: [ 8  9 10 11]
Predicted Strikeouts: [4.95 7.   3.66 3.77]
Actual Strikeouts: [3 6 1 5]
Mean Squared Error: 3.358303932222441

Test Indices: [12 13 14 15]
Predicted Strikeouts: [4.41 2.99 7.   4.14]
Actual Strikeouts: [8 1 9 5]
Mean Squared Error: 5.399230089916571

Test Indices: [16 17 18 19]
Predicted Strikeouts: [8.63 7.99 7.99 6.  ]
Actual Strikeouts: [9 6 7 1]
Mean Squared Error: 7.518659046857579

Test Indices: [20 21 22 23]
Predicted Strikeouts: [3.06 5.   6.02 4.41]
Actual Strikeouts: [2 7 7 6]
Mean Squared Error: 2.1525186345978256

Test Indices: [24 25 26 27]
Predicted Strikeouts: [2.47 8.29 5.7  8.82]
Actual Strikeouts: [ 4  8  5 11]
Mean Squared Error: 1.9193139412159894

Average Mean Squared Error across all folds: 4.069605128962081
Predicted Strikeouts for Next Game: 5.834019660949707


#AUSTIN GOMBER

In [2]:
# Import necessary libraries
import pandas as pd
import numpy as np
from xgboost import XGBRegressor
from sklearn.model_selection import TimeSeriesSplit, cross_val_score
from sklearn.metrics import mean_squared_error

# Input data
data = {
    'IP': [4.2, 6, 3.1, 5.1, 5, 6, 6.2, 4.2, 7, 1, 5, 5.1, 5.2, 2.2, 3, 1, 7, 6, 3, 5, 3.2, 5, 6, 3, 3, 6.2],
    'TBF': [23, 24, 16, 26, 20, 27, 25, 23, 26, 7, 21, 29, 23, 16, 18, 8, 25, 26, 19, 23, 20, 23, 26, 18, 14, 27],
    'K/9': [3.9, 4.5, 8.1, 3.4, 5.4, 3, 14.8, 17.4, 5.1, 0, 7.2, 3.4, 11.1, 10.1, 12, 18, 9, 9, 0, 10.8, 9.8, 9, 4.5, 12, 6, 2.7],
    'BB/9': [7.7, 4.5, 8.1, 6.8, 3.6, 6, 0, 1.9, 2.6, 9, 1.8, 3.4, 4.8, 3.4, 15, 18, 3.9, 0, 3, 1.8, 0, 7.2, 0, 9, 6, 2.7],
    'BABIP': [0.077, 0.125, 0.444, 0.35, 0.214, 0.316, 0.167, 0.545, 0.316, 0.5, 0.214, 0.348, 0.385, 0.545, 0.25, 0.5, 0.133, 0.278, 0.4, 0.308, 0.462, 0.429, 0.364, 0.5, 0.333, 0.227],
    'STRIKEOUT': [2, 3, 3, 2, 3, 2, 11, 9, 4, 0, 4, 2, 7, 3, 4, 2, 7, 6, 0, 6, 4, 5, 3, 4, 2, 2]
}

# Create DataFrame
df = pd.DataFrame(data)

# Features and target variable
X = df[['IP', 'K/9', 'TBF', 'BB/9', 'BABIP']]
y = df['STRIKEOUT']

# Initialize XGBoost Regressor
model = XGBRegressor(objective='reg:squarederror', n_estimators=100, random_state=42)

# TimeSeriesSplit for backtesting
tscv = TimeSeriesSplit(n_splits=5)

# List to store Mean Squared Errors for each fold
mse_scores = []

# Perform backtesting
for train_index, test_index in tscv.split(X):
    X_train, X_test = X.iloc[train_index], X.iloc[test_index]
    y_train, y_test = y.iloc[train_index], y.iloc[test_index]

    # Train the model
    model.fit(X_train, y_train)

    # Predict and calculate MSE
    y_pred = model.predict(X_test)
    mse = mean_squared_error(y_test, y_pred)
    mse_scores.append(mse)

    print(f'Test Indices: {test_index}')
    print(f'Predicted Strikeouts: {np.round(y_pred, 2)}')
    print(f'Actual Strikeouts: {y_test.values}')
    print(f'Mean Squared Error: {mse}\n')

print(f'Average Mean Squared Error across all folds: {np.mean(mse_scores)}')

# Fit model on entire dataset
model.fit(X, y)

# Predict for next game (example input)
next_game_data = {
    'IP': [6],      # Example IP
    'K/9': [6],     # Example K/9
    'TBF': [24],    # Example TBF
    'BB/9': [2.15],    # Example BB/9
    'BABIP': [0.29]  # Example BABIP
}

next_game_df = pd.DataFrame(next_game_data)
predicted_strikeouts = model.predict(next_game_df)
print(f'Predicted Strikeouts for Next Game: {predicted_strikeouts[0]}')


Test Indices: [6 7 8 9]
Predicted Strikeouts: [3. 3. 3. 2.]
Actual Strikeouts: [11  9  4  0]
Mean Squared Error: 26.25337843001813

Test Indices: [10 11 12 13]
Predicted Strikeouts: [3.   2.   3.   0.87]
Actual Strikeouts: [4 2 7 3]
Mean Squared Error: 5.3843291551854415

Test Indices: [14 15 16 17]
Predicted Strikeouts: [7. 9. 3. 4.]
Actual Strikeouts: [4 2 7 6]
Mean Squared Error: 19.493214808625808

Test Indices: [18 19 20 21]
Predicted Strikeouts: [1.8  8.96 6.33 5.41]
Actual Strikeouts: [0 6 4 5]
Mean Squared Error: 4.402925953408218

Test Indices: [22 23 24 25]
Predicted Strikeouts: [4.14 3.49 1.61 5.42]
Actual Strikeouts: [3 4 2 2]
Mean Squared Error: 3.341643901279852

Average Mean Squared Error across all folds: 11.775098449703489
Predicted Strikeouts for Next Game: 4.030830383300781


#DEAN KREMER

In [3]:
# Import necessary libraries
import pandas as pd
import numpy as np
from xgboost import XGBRegressor
from sklearn.model_selection import TimeSeriesSplit, cross_val_score
from sklearn.metrics import mean_squared_error

# Input data
data = {
    "IP": [6, 4.1, 4, 6, 4.2, 7, 5, 6, 7, 6, 5.1, 4, 6.2, 6, 7, 6, 6, 4.1, 5.1, 4, 6, 6, 6, 5, 3.1],
    "TBF": [26, 19, 16, 24, 25, 25, 19, 22, 25, 21, 24, 20, 26, 21, 27, 25, 25, 22, 20, 23, 22, 23, 22, 18, 18],
    "K/9": [6, 10.4, 9, 13.5, 5.8, 9, 7.2, 12, 9, 12, 5.1, 4.5, 12.1, 12, 10.3, 12, 6, 14.5, 3.4, 2.3, 12, 15, 12, 3.6, 8.1],
    "BB/9": [1.5, 4.2, 2.3, 0, 1.9, 0, 1.8, 4.5, 2.6, 3, 3.4, 2.3, 2.7, 6, 0, 1.5, 3, 2.1, 1.7, 2.3, 3, 3, 0, 1.8, 5.4],
    "BABIP": [0.35, 0.333, 0.364, 0.25, 0.45, 0.176, 0.083, 0.182, 0.125, 0.182, 0.316, 0.308, 0.214, 0, 0.316, 0.333, 0.278, 0.455, 0.125, 0.444, 0.167, 0.364, 0.231, 0.071, 0.417],
    "STRIKEOUT": [4, 5, 4, 9, 3, 7, 4, 8, 7, 8, 3, 2, 9, 8, 8, 8, 4, 7, 2, 1, 8, 10, 8, 2, 3]
}

# Create DataFrame
df = pd.DataFrame(data)

# Features and target variable
X = df[['IP', 'K/9', 'TBF', 'BB/9', 'BABIP']]
y = df['STRIKEOUT']

# Initialize XGBoost Regressor
model = XGBRegressor(objective='reg:squarederror', n_estimators=100, random_state=42)

# TimeSeriesSplit for backtesting
tscv = TimeSeriesSplit(n_splits=5)

# List to store Mean Squared Errors for each fold
mse_scores = []

# Perform backtesting
for train_index, test_index in tscv.split(X):
    X_train, X_test = X.iloc[train_index], X.iloc[test_index]
    y_train, y_test = y.iloc[train_index], y.iloc[test_index]

    # Train the model
    model.fit(X_train, y_train)

    # Predict and calculate MSE
    y_pred = model.predict(X_test)
    mse = mean_squared_error(y_test, y_pred)
    mse_scores.append(mse)

    print(f'Test Indices: {test_index}')
    print(f'Predicted Strikeouts: {np.round(y_pred, 2)}')
    print(f'Actual Strikeouts: {y_test.values}')
    print(f'Mean Squared Error: {mse}\n')

print(f'Average Mean Squared Error across all folds: {np.mean(mse_scores)}')

# Fit model on entire dataset
model.fit(X, y)

# Predict for next game (example input)
next_game_data = {
    'IP': [5.15],      # Example IP
    'K/9': [7.9],     # Example K/9
    'TBF': [23],    # Example TBF
    'BB/9': [3.6],    # Example BB/9
    'BABIP': [0.256]  # Example BABIP
}

next_game_df = pd.DataFrame(next_game_data)
predicted_strikeouts = model.predict(next_game_df)
print(f'Predicted Strikeouts for Next Game: {predicted_strikeouts[0]}')


Test Indices: [5 6 7 8]
Predicted Strikeouts: [4. 4. 5. 4.]
Actual Strikeouts: [7 4 8 7]
Mean Squared Error: 6.748066960477729

Test Indices: [ 9 10 11 12]
Predicted Strikeouts: [8.   3.65 3.48 8.  ]
Actual Strikeouts: [8 3 2 9]
Mean Squared Error: 0.9067961979433932

Test Indices: [13 14 15 16]
Predicted Strikeouts: [8.   5.81 6.62 5.79]
Actual Strikeouts: [8 8 8 4]
Mean Squared Error: 2.4814903413242178

Test Indices: [17 18 19 20]
Predicted Strikeouts: [5.4  3.22 2.   8.  ]
Actual Strikeouts: [7 2 1 8]
Mean Squared Error: 1.259694791901694

Test Indices: [21 22 23 24]
Predicted Strikeouts: [9.   8.   2.08 3.63]
Actual Strikeouts: [10  8  2  3]
Mean Squared Error: 0.35265559922869727

Average Mean Squared Error across all folds: 2.3497407781751463
Predicted Strikeouts for Next Game: 3.334728479385376


In [9]:
# Import necessary libraries
import pandas as pd
import numpy as np
from xgboost import XGBRegressor
from sklearn.model_selection import TimeSeriesSplit, cross_val_score
from sklearn.metrics import mean_squared_error

# Input data
data = {
    "IP": [6, 4.1, 4, 6, 4.2, 7, 5, 6, 7, 6, 5.1, 4, 6.2, 6, 7, 6, 6, 4.1, 5.1, 4, 6, 6, 6, 5, 3.1],
    "TBF": [26, 19, 16, 24, 25, 25, 19, 22, 25, 21, 24, 20, 26, 21, 27, 25, 25, 22, 20, 23, 22, 23, 22, 18, 18],
    "K/9": [6, 10.4, 9, 13.5, 5.8, 9, 7.2, 12, 9, 12, 5.1, 4.5, 12.1, 12, 10.3, 12, 6, 14.5, 3.4, 2.3, 12, 15, 12, 3.6, 8.1],
    "BB/9": [1.5, 4.2, 2.3, 0, 1.9, 0, 1.8, 4.5, 2.6, 3, 3.4, 2.3, 2.7, 6, 0, 1.5, 3, 2.1, 1.7, 2.3, 3, 3, 0, 1.8, 5.4],
    "BABIP": [0.35, 0.333, 0.364, 0.25, 0.45, 0.176, 0.083, 0.182, 0.125, 0.182, 0.316, 0.308, 0.214, 0, 0.316, 0.333, 0.278, 0.455, 0.125, 0.444, 0.167, 0.364, 0.231, 0.071, 0.417],
    "STRIKEOUT": [4, 5, 4, 9, 3, 7, 4, 8, 7, 8, 3, 2, 9, 8, 8, 8, 4, 7, 2, 1, 8, 10, 8, 2, 3]
}

# Create DataFrame
df = pd.DataFrame(data)

# Features and target variable
X = df[['IP', 'K/9', 'TBF', 'BB/9', 'BABIP']]
y = df['STRIKEOUT']

# Initialize XGBoost Regressor
model = XGBRegressor(objective='reg:squarederror', n_estimators=100, random_state=42)

# TimeSeriesSplit for backtesting
tscv = TimeSeriesSplit(n_splits=5)

# List to store Mean Squared Errors for each fold
mse_scores = []

# Perform backtesting
for train_index, test_index in tscv.split(X):
    X_train, X_test = X.iloc[train_index], X.iloc[test_index]
    y_train, y_test = y.iloc[train_index], y.iloc[test_index]

    # Train the model
    model.fit(X_train, y_train)

    # Predict and calculate MSE
    y_pred = model.predict(X_test)
    mse = mean_squared_error(y_test, y_pred)
    mse_scores.append(mse)

    print(f'Test Indices: {test_index}')
    print(f'Predicted Strikeouts: {np.round(y_pred, 2)}')
    print(f'Actual Strikeouts: {y_test.values}')
    print(f'Mean Squared Error: {mse}\n')

print(f'Average Mean Squared Error across all folds: {np.mean(mse_scores)}')

# Fit model on entire dataset
model.fit(X, y)

# Predict for next game (example input)
next_game_data = {
    'IP': [5.5],      # Example IP
    'K/9': [8.45],     # Example K/9
    'TBF': [22.5],    # Example TBF
    'BB/9': [2.95],    # Example BB/9
    'BABIP': [0.2627]  # Example BABIP
}

next_game_df = pd.DataFrame(next_game_data)
predicted_strikeouts = model.predict(next_game_df)
print(f'Predicted Strikeouts for Next Game: {predicted_strikeouts[0]}')


Test Indices: [5 6 7 8]
Predicted Strikeouts: [4. 4. 5. 4.]
Actual Strikeouts: [7 4 8 7]
Mean Squared Error: 6.748066960477729

Test Indices: [ 9 10 11 12]
Predicted Strikeouts: [8.   3.65 3.48 8.  ]
Actual Strikeouts: [8 3 2 9]
Mean Squared Error: 0.9067961979433932

Test Indices: [13 14 15 16]
Predicted Strikeouts: [8.   5.81 6.62 5.79]
Actual Strikeouts: [8 8 8 4]
Mean Squared Error: 2.4814903413242178

Test Indices: [17 18 19 20]
Predicted Strikeouts: [5.4  3.22 2.   8.  ]
Actual Strikeouts: [7 2 1 8]
Mean Squared Error: 1.259694791901694

Test Indices: [21 22 23 24]
Predicted Strikeouts: [9.   8.   2.08 3.63]
Actual Strikeouts: [10  8  2  3]
Mean Squared Error: 0.35265559922869727

Average Mean Squared Error across all folds: 2.3497407781751463
Predicted Strikeouts for Next Game: 3.339564561843872


#SPENCER SCHWELLENBACH

In [8]:
# Import necessary libraries
import pandas as pd
import numpy as np
from xgboost import XGBRegressor
from sklearn.model_selection import TimeSeriesSplit, cross_val_score
from sklearn.metrics import mean_squared_error

# Input data
data = {
    'IP': [5, 7, 4.1, 6, 5, 3.1, 6, 5.1, 6, 5, 7, 4, 6, 5.2, 4.2, 0.2, 6, 6.2, 6.2, 6.2, 6.2, 5, 5, 6, 6.2, 3, 3.2, 4],
    'TBF': [22, 28, 19, 19, 22, 20, 25, 23, 21, 21, 24, 19, 26, 28, 26, 11, 23, 24, 25, 25, 28, 25, 21, 22, 27, 15, 18, 19],
    'K/9': [9, 9, 12.5, 13.5, 7.2, 8.1, 7.5, 5.1, 7.5, 7.2, 6.4, 11.3, 6, 11.1, 5.8, 0, 9, 12.1, 8.1, 10.8, 9.5, 10.8, 12.6, 13.5, 8.1, 9, 9.8, 4.5],
    'BB/9': [3.6, 2.6, 4.2, 1.5, 3.6, 8.1, 1.5, 5.1, 0, 1.8, 1.3, 0, 4.5, 6.4, 7.7, 40.5, 1.5, 2.7, 2.7, 2.7, 2.7, 9, 1.8, 3, 2.7, 9, 7.4, 4.5],
    'BABIP': [0.286, 0.316, 0.5, 0.222, 0.333, 0.5, 0.235, 0.294, 0.188, 0.154, 0.222, 0.364, 0.333, 0.438, 0.421, 0.571, 0.25, 0.1, 0.294, 0.333, 0.333, 0.429, 0.385, 0.182, 0.278, 0.556, 0.3, 0.333],
    'STRIKEOUT': [5, 7, 6, 9, 4, 3, 5, 3, 5, 4, 5, 5, 4, 7, 3, 0, 6, 9, 6, 8, 7, 6, 7, 9, 6, 3, 4, 2]
}

# Create DataFrame
df = pd.DataFrame(data)

# Features and target variable
X = df[['IP', 'K/9', 'TBF', 'BB/9', 'BABIP']]
y = df['STRIKEOUT']

# Initialize XGBoost Regressor
model = XGBRegressor(objective='reg:squarederror', n_estimators=100, random_state=42)

# TimeSeriesSplit for backtesting
tscv = TimeSeriesSplit(n_splits=5)

# List to store Mean Squared Errors for each fold
mse_scores = []

# Perform backtesting
for train_index, test_index in tscv.split(X):
    X_train, X_test = X.iloc[train_index], X.iloc[test_index]
    y_train, y_test = y.iloc[train_index], y.iloc[test_index]

    # Train the model
    model.fit(X_train, y_train)

    # Predict and calculate MSE
    y_pred = model.predict(X_test)
    mse = mean_squared_error(y_test, y_pred)
    mse_scores.append(mse)

    print(f'Test Indices: {test_index}')
    print(f'Predicted Strikeouts: {np.round(y_pred, 2)}')
    print(f'Actual Strikeouts: {y_test.values}')
    print(f'Mean Squared Error: {mse}\n')

print(f'Average Mean Squared Error across all folds: {np.mean(mse_scores)}')

# Fit model on entire dataset
model.fit(X, y)

# Predict for next game (example input)
next_game_data = {
    'IP': [6],      # Example IP
    'K/9': [10.5],     # Example K/9
    'TBF': [23.5],    # Example TBF
    'BB/9': [1.55],    # Example BB/9
    'BABIP': [0.27]  # Example BABIP
}

next_game_df = pd.DataFrame(next_game_data)
predicted_strikeouts = model.predict(next_game_df)
print(f'Predicted Strikeouts for Next Game: {predicted_strikeouts[0]}')


Test Indices: [ 8  9 10 11]
Predicted Strikeouts: [5.   4.   5.43 5.48]
Actual Strikeouts: [5 4 5 5]
Mean Squared Error: 0.10511588611410616

Test Indices: [12 13 14 15]
Predicted Strikeouts: [3.64 5.04 3.   3.  ]
Actual Strikeouts: [4 7 3 0]
Mean Squared Error: 3.2440244012806403

Test Indices: [16 17 18 19]
Predicted Strikeouts: [6.46 6.46 4.   6.99]
Actual Strikeouts: [6 9 6 8]
Mean Squared Error: 2.9161299411968002

Test Indices: [20 21 22 23]
Predicted Strikeouts: [6.81 5.21 6.04 9.  ]
Actual Strikeouts: [7 6 7 9]
Mean Squared Error: 0.3943508042056578

Test Indices: [24 25 26 27]
Predicted Strikeouts: [5.99 3.94 5.01 3.05]
Actual Strikeouts: [6 3 4 2]
Mean Squared Error: 0.7519063969233031

Average Mean Squared Error across all folds: 1.4823054859441016
Predicted Strikeouts for Next Game: 6.406247138977051


#RYAN WEATHER

In [6]:
# Import necessary libraries
import pandas as pd
import numpy as np
from xgboost import XGBRegressor
from sklearn.model_selection import TimeSeriesSplit, cross_val_score
from sklearn.metrics import mean_squared_error

# Input data
data = {
    'IP': [6.1, 4, 4.2, 4.1, 6, 5, 4.1, 5, 3.1, 5, 5.1, 5.2, 4, 6, 6.2, 6, 0.2, 5, 4.1, 7, 3, 6, 6, 6, 6, 3.2],
    'TBF': [24, 19, 24, 18, 25, 19, 19, 22, 21, 24, 27, 27, 20, 23, 26, 22, 7, 21, 21, 26, 21, 23, 23, 23, 23, 18],
    'K/9': [7.1, 11.3, 13.5, 2.1, 7.5, 10.8, 6.2, 7.2, 8.1, 12.6, 1.7, 15.9, 2.3, 15, 9.5, 9, 0, 7.2, 4.2, 5.1, 6, 9, 9, 9, 10.5, 9.8],
    'BB/9': [2.8, 4.5, 3.9, 4.2, 0, 3.6, 4.2, 0, 2.7, 3.6, 1.7, 3.2, 4.5, 3, 5.4, 0, 40.5, 5.4, 8.3, 0, 12, 1.5, 3, 3, 4.5, 2.5],
    'BABIP': [0.067, 0.3, 0.467, 0.286, 0.278, 0.1, 0.357, 0.188, 0.588, 0.385, 0.318, 0.533, 0.375, 0.273, 0.267, 0.143, 0.5, 0.231, 0.267, 0.211, 0.364, 0.2, 0.333, 0.2, 0.167, 0.364],
    'STRIKEOUT': [5, 5, 7, 1, 5, 6, 3, 4, 3, 7, 1, 10, 1, 10, 7, 6, 0, 4, 2, 4, 2, 6, 6, 6, 7, 4]
}

# Create DataFrame
df = pd.DataFrame(data)

# Features and target variable
X = df[['IP', 'K/9', 'TBF', 'BB/9', 'BABIP']]
y = df['STRIKEOUT']

# Initialize XGBoost Regressor
model = XGBRegressor(objective='reg:squarederror', n_estimators=100, random_state=42)

# TimeSeriesSplit for backtesting
tscv = TimeSeriesSplit(n_splits=5)

# List to store Mean Squared Errors for each fold
mse_scores = []

# Perform backtesting
for train_index, test_index in tscv.split(X):
    X_train, X_test = X.iloc[train_index], X.iloc[test_index]
    y_train, y_test = y.iloc[train_index], y.iloc[test_index]

    # Train the model
    model.fit(X_train, y_train)

    # Predict and calculate MSE
    y_pred = model.predict(X_test)
    mse = mean_squared_error(y_test, y_pred)
    mse_scores.append(mse)

    print(f'Test Indices: {test_index}')
    print(f'Predicted Strikeouts: {np.round(y_pred, 2)}')
    print(f'Actual Strikeouts: {y_test.values}')
    print(f'Mean Squared Error: {mse}\n')

print(f'Average Mean Squared Error across all folds: {np.mean(mse_scores)}')

# Fit model on entire dataset
model.fit(X, y)

# Predict for next game (example input)
next_game_data = {
    'IP': [6],      # Example IP
    'K/9': [10.3],     # Example K/9
    'TBF': [24],    # Example TBF
    'BB/9': [1.5],    # Example BB/9
    'BABIP': [0.238]  # Example BABIP
}

next_game_df = pd.DataFrame(next_game_data)
predicted_strikeouts = model.predict(next_game_df)
print(f'Predicted Strikeouts for Next Game: {predicted_strikeouts[0]}')


Test Indices: [6 7 8 9]
Predicted Strikeouts: [1. 6. 5. 6.]
Actual Strikeouts: [3 4 3 7]
Mean Squared Error: 3.2470953951706214

Test Indices: [10 11 12 13]
Predicted Strikeouts: [2.08 7.   1.   7.  ]
Actual Strikeouts: [ 1 10  1 10]
Mean Squared Error: 4.793929339591131

Test Indices: [14 15 16 17]
Predicted Strikeouts: [4.99 5.04 1.   4.28]
Actual Strikeouts: [7 6 0 4]
Mean Squared Error: 1.5118961076463258

Test Indices: [18 19 20 21]
Predicted Strikeouts: [3.2  3.81 0.   7.  ]
Actual Strikeouts: [2 4 2 6]
Mean Squared Error: 1.6144889889038128

Test Indices: [22 23 24 25]
Predicted Strikeouts: [5.99 6.   6.31 5.  ]
Actual Strikeouts: [6 6 7 4]
Mean Squared Error: 0.37003090232786917

Average Mean Squared Error across all folds: 2.3074881467279518
Predicted Strikeouts for Next Game: 6.588809013366699


#LANDON KNACK

In [7]:
# Import necessary libraries
import pandas as pd
import numpy as np
from xgboost import XGBRegressor
from sklearn.model_selection import TimeSeriesSplit, cross_val_score
from sklearn.metrics import mean_squared_error

# Input data
data = {
    'IP': [5.1, 5.1, 5.2, 4.2, 7, 7, 5, 5.2, 6, 4, 7, 5, 6, 5.2, 6, 5, 6, 4.2, 2.1, 6, 2, 7.1, 6.1, 5, 6, 6, 2],
    'TBF': [26, 23, 28, 22, 27, 27, 22, 27, 25, 24, 26, 18, 26, 26, 20, 19, 21, 25, 18, 24, 6, 25, 25, 20, 22, 22, 14],
    'K/9': [6.8, 8.4, 6.4, 0, 3.9, 3.9, 3.6, 3.2, 7.5, 6.8, 10.3, 10.8, 6, 12.7, 13.5, 7.2, 10.5, 9.6, 0, 13.5, 4.5, 6.1, 7.1, 9, 9, 7.5, 9],
    'BB/9': [3.4, 1.7, 1.6, 3.9, 2.6, 1.3, 3.6, 7.9, 1.5, 2.3, 1.3, 0, 0, 4.8, 1.5, 3.6, 1.5, 3.9, 11.6, 1.5, 0, 3.7, 0, 1.8, 1.5, 3, 4.5],
    'BABIP': [0.444, 0.353, 0.381, 0.35, 0.1, 0.19, 0.389, 0.316, 0.278, 0.444, 0.235, 0.273, 0.333, 0.467, 0.1, 0.231, 0.154, 0.438, 0.5, 0.429, 0, 0.118, 0.158, 0.231, 0.2, 0.133, 0.636],
    'STRIKEOUT': [4, 5, 4, 0, 3, 3, 2, 2, 5, 3, 8, 6, 4, 8, 9, 4, 7, 5, 0, 9, 1, 5, 5, 5, 6, 5, 2]
}

# Create DataFrame
df = pd.DataFrame(data)

# Features and target variable
X = df[['IP', 'K/9', 'TBF', 'BB/9', 'BABIP']]
y = df['STRIKEOUT']

# Initialize XGBoost Regressor
model = XGBRegressor(objective='reg:squarederror', n_estimators=100, random_state=42)

# TimeSeriesSplit for backtesting
tscv = TimeSeriesSplit(n_splits=5)

# List to store Mean Squared Errors for each fold
mse_scores = []

# Perform backtesting
for train_index, test_index in tscv.split(X):
    X_train, X_test = X.iloc[train_index], X.iloc[test_index]
    y_train, y_test = y.iloc[train_index], y.iloc[test_index]

    # Train the model
    model.fit(X_train, y_train)

    # Predict and calculate MSE
    y_pred = model.predict(X_test)
    mse = mean_squared_error(y_test, y_pred)
    mse_scores.append(mse)

    print(f'Test Indices: {test_index}')
    print(f'Predicted Strikeouts: {np.round(y_pred, 2)}')
    print(f'Actual Strikeouts: {y_test.values}')
    print(f'Mean Squared Error: {mse}\n')

print(f'Average Mean Squared Error across all folds: {np.mean(mse_scores)}')

# Fit model on entire dataset
model.fit(X, y)

# Predict for next game (example input)
next_game_data = {
    'IP': [4.6],      # Example IP
    'K/9': [9.3],     # Example K/9
    'TBF': [19],    # Example TBF
    'BB/9': [2],    # Example BB/9
    'BABIP': [0.2095]  # Example BABIP
}

next_game_df = pd.DataFrame(next_game_data)
predicted_strikeouts = model.predict(next_game_df)
print(f'Predicted Strikeouts for Next Game: {predicted_strikeouts[0]}')


Test Indices: [ 7  8  9 10]
Predicted Strikeouts: [4.   4.   0.   4.29]
Actual Strikeouts: [2 5 3 8]
Mean Squared Error: 6.944034904560157

Test Indices: [11 12 13 14]
Predicted Strikeouts: [5.91 3.03 5.91 5.91]
Actual Strikeouts: [6 4 8 9]
Mean Squared Error: 3.717092882994976

Test Indices: [15 16 17 18]
Predicted Strikeouts: [3.52 7.86 4.98 0.02]
Actual Strikeouts: [4 7 5 0]
Mean Squared Error: 0.24241126057159296

Test Indices: [19 20 21 22]
Predicted Strikeouts: [9.   3.37 3.91 4.  ]
Actual Strikeouts: [9 1 5 5]
Mean Squared Error: 1.9505224538474977

Test Indices: [23 24 25 26]
Predicted Strikeouts: [4.98 4.98 4.98 3.25]
Actual Strikeouts: [5 6 5 2]
Mean Squared Error: 0.6531442719182934

Average Mean Squared Error across all folds: 2.7014411547785033
Predicted Strikeouts for Next Game: 5.139510631561279


#FRAMBER VALDEZ

In [9]:
# Import necessary libraries
import pandas as pd
import numpy as np
from xgboost import XGBRegressor
from sklearn.model_selection import TimeSeriesSplit, cross_val_score
from sklearn.metrics import mean_squared_error

# Input data
data = {
    "IP": [5, 6, 5, 5, 2.1, 7.1, 6.1, 4.1, 5.2, 5.2, 6, 2.1, 5, 3, 2, 4.1, 5.1, 4.2, 6, 4.1, 5, 6.2, 4, 4, 5, 5, 6],
    "TBF": [23, 22, 21, 23, 16, 29, 24, 23, 23, 27, 21, 17, 20, 17, 7, 22, 22, 21, 27, 18, 22, 23, 20, 19, 18, 22, 24],
    "K/9": [1.8, 7.5, 12.6, 9, 3.9, 2.5, 0, 4.2, 4.8, 4.8, 6, 7.7, 9, 12, 4.5, 4.2, 3.4, 11.6, 4.5, 6.2, 7.2, 6.8, 9, 4.5, 7.2, 5.4, 4.5],
    "BB/9": [3.6, 0, 3.6, 7.2, 0, 2.5, 2.8, 8.3, 1.6, 3.2, 0, 7.7, 5.4, 12, 0, 4.2, 1.7, 1.9, 3, 2.1, 1.8, 1.4, 2.3, 2.3, 3.6, 1.8, 1.5],
    "BABIP": [0.211, 0.188, 0.273, 0.357, 0.615, 0.2, 0.182, 0.267, 0.278, 0.364, 0.125, 0.583, 0.091, 0.375, 0.167, 0.353, 0.235, 0.385, 0.455, 0.231, 0.333, 0.063, 0.429, 0.333, 0.167, 0.412, 0.211],
    "STRIKEOUT": [1, 5, 7, 5, 1, 2, 0, 2, 3, 3, 4, 2, 5, 4, 1, 2, 2, 6, 3, 3, 4, 5, 4, 2, 4, 3, 3]
}

# Create DataFrame
df = pd.DataFrame(data)

# Features and target variable
X = df[['IP', 'K/9', 'TBF', 'BB/9', 'BABIP']]
y = df['STRIKEOUT']

# Initialize XGBoost Regressor
model = XGBRegressor(objective='reg:squarederror', n_estimators=100, random_state=42)

# TimeSeriesSplit for backtesting
tscv = TimeSeriesSplit(n_splits=5)

# List to store Mean Squared Errors for each fold
mse_scores = []

# Perform backtesting
for train_index, test_index in tscv.split(X):
    X_train, X_test = X.iloc[train_index], X.iloc[test_index]
    y_train, y_test = y.iloc[train_index], y.iloc[test_index]

    # Train the model
    model.fit(X_train, y_train)

    # Predict and calculate MSE
    y_pred = model.predict(X_test)
    mse = mean_squared_error(y_test, y_pred)
    mse_scores.append(mse)

    print(f'Test Indices: {test_index}')
    print(f'Predicted Strikeouts: {np.round(y_pred, 2)}')
    print(f'Actual Strikeouts: {y_test.values}')
    print(f'Mean Squared Error: {mse}\n')

print(f'Average Mean Squared Error across all folds: {np.mean(mse_scores)}')

# Fit model on entire dataset
model.fit(X, y)

# Predict for next game (example input)
next_game_data = {
    'IP': [6.05],      # Example IP
    'K/9': [9],     # Example K/9
    'TBF': [25],    # Example TBF
    'BB/9': [2.6],    # Example BB/9
    'BABIP': [0.2795]  # Example BABIP
}

next_game_df = pd.DataFrame(next_game_data)
predicted_strikeouts = model.predict(next_game_df)
print(f'Predicted Strikeouts for Next Game: {predicted_strikeouts[0]}')


Test Indices: [ 7  8  9 10]
Predicted Strikeouts: [1.   1.   1.   1.06]
Actual Strikeouts: [2 3 3 4]
Mean Squared Error: 4.404476041122745

Test Indices: [11 12 13 14]
Predicted Strikeouts: [4.91 5.   4.91 1.88]
Actual Strikeouts: [2 5 4 1]
Mean Squared Error: 2.5112521611917487

Test Indices: [15 16 17 18]
Predicted Strikeouts: [1.07 1.03 4.3  1.16]
Actual Strikeouts: [2 2 6 3]
Mean Squared Error: 2.017164226676101

Test Indices: [19 20 21 22]
Predicted Strikeouts: [3.45 3.77 3.87 4.78]
Actual Strikeouts: [3 4 5 4]
Mean Squared Error: 0.5359833028122125

Test Indices: [23 24 25 26]
Predicted Strikeouts: [2.03 3.77 2.55 1.83]
Actual Strikeouts: [2 4 3 3]
Mean Squared Error: 0.40674435369331263

Average Mean Squared Error across all folds: 1.975124017099224
Predicted Strikeouts for Next Game: 4.988246917724609


In [None]:
# Import necessary libraries
import pandas as pd
import numpy as np
from xgboost import XGBRegressor
from sklearn.model_selection import TimeSeriesSplit, cross_val_score
from sklearn.metrics import mean_squared_error

# Input data
data = {
    "IP": [5, 6, 5, 5, 2.1, 7.1, 6.1, 4.1, 5.2, 5.2, 6, 2.1, 5, 3, 2, 4.1, 5.1, 4.2, 6, 4.1, 5, 6.2, 4, 4, 5, 5, 6],
    "TBF": [23, 22, 21, 23, 16, 29, 24, 23, 23, 27, 21, 17, 20, 17, 7, 22, 22, 21, 27, 18, 22, 23, 20, 19, 18, 22, 24],
    "K/9": [1.8, 7.5, 12.6, 9, 3.9, 2.5, 0, 4.2, 4.8, 4.8, 6, 7.7, 9, 12, 4.5, 4.2, 3.4, 11.6, 4.5, 6.2, 7.2, 6.8, 9, 4.5, 7.2, 5.4, 4.5],
    "BB/9": [3.6, 0, 3.6, 7.2, 0, 2.5, 2.8, 8.3, 1.6, 3.2, 0, 7.7, 5.4, 12, 0, 4.2, 1.7, 1.9, 3, 2.1, 1.8, 1.4, 2.3, 2.3, 3.6, 1.8, 1.5],
    "BABIP": [0.211, 0.188, 0.273, 0.357, 0.615, 0.2, 0.182, 0.267, 0.278, 0.364, 0.125, 0.583, 0.091, 0.375, 0.167, 0.353, 0.235, 0.385, 0.455, 0.231, 0.333, 0.063, 0.429, 0.333, 0.167, 0.412, 0.211],
    "STRIKEOUT": [1, 5, 7, 5, 1, 2, 0, 2, 3, 3, 4, 2, 5, 4, 1, 2, 2, 6, 3, 3, 4, 5, 4, 2, 4, 3, 3]
}

# Create DataFrame
df = pd.DataFrame(data)

# Features and target variable
X = df[['IP', 'K/9', 'TBF', 'BB/9', 'BABIP']]
y = df['STRIKEOUT']

# Initialize XGBoost Regressor
model = XGBRegressor(objective='reg:squarederror', n_estimators=100, random_state=42)

# TimeSeriesSplit for backtesting
tscv = TimeSeriesSplit(n_splits=5)

# List to store Mean Squared Errors for each fold
mse_scores = []

# Perform backtesting
for train_index, test_index in tscv.split(X):
    X_train, X_test = X.iloc[train_index], X.iloc[test_index]
    y_train, y_test = y.iloc[train_index], y.iloc[test_index]

    # Train the model
    model.fit(X_train, y_train)

    # Predict and calculate MSE
    y_pred = model.predict(X_test)
    mse = mean_squared_error(y_test, y_pred)
    mse_scores.append(mse)

    print(f'Test Indices: {test_index}')
    print(f'Predicted Strikeouts: {np.round(y_pred, 2)}')
    print(f'Actual Strikeouts: {y_test.values}')
    print(f'Mean Squared Error: {mse}\n')

print(f'Average Mean Squared Error across all folds: {np.mean(mse_scores)}')

# Fit model on entire dataset
model.fit(X, y)

# Predict for next game (example input)
next_game_data = {
    'IP': [6.05],      # Example IP
    'K/9': [9],     # Example K/9
    'TBF': [25],    # Example TBF
    'BB/9': [2.6],    # Example BB/9
    'BABIP': [0.2795]  # Example BABIP
}

next_game_df = pd.DataFrame(next_game_data)
predicted_strikeouts = model.predict(next_game_df)
print(f'Predicted Strikeouts for Next Game: {predicted_strikeouts[0]}')


Test Indices: [ 7  8  9 10]
Predicted Strikeouts: [1.   1.   1.   1.06]
Actual Strikeouts: [2 3 3 4]
Mean Squared Error: 4.404476041122745

Test Indices: [11 12 13 14]
Predicted Strikeouts: [4.91 5.   4.91 1.88]
Actual Strikeouts: [2 5 4 1]
Mean Squared Error: 2.5112521611917487

Test Indices: [15 16 17 18]
Predicted Strikeouts: [1.07 1.03 4.3  1.16]
Actual Strikeouts: [2 2 6 3]
Mean Squared Error: 2.017164226676101

Test Indices: [19 20 21 22]
Predicted Strikeouts: [3.45 3.77 3.87 4.78]
Actual Strikeouts: [3 4 5 4]
Mean Squared Error: 0.5359833028122125

Test Indices: [23 24 25 26]
Predicted Strikeouts: [2.03 3.77 2.55 1.83]
Actual Strikeouts: [2 4 3 3]
Mean Squared Error: 0.40674435369331263

Average Mean Squared Error across all folds: 1.975124017099224
Predicted Strikeouts for Next Game: 4.988246917724609


#DYLAN CEASE

In [11]:
# Import necessary libraries
import pandas as pd
import numpy as np
from xgboost import XGBRegressor
from sklearn.model_selection import TimeSeriesSplit, cross_val_score
from sklearn.metrics import mean_squared_error

# Input data
data = {
    "IP": [6, 5, 4.2, 5.2, 5, 5.2, 6, 6, 7, 6, 6, 7, 5, 5, 2.2, 5.2, 6.1, 4.2, 4, 4.2, 6, 6.2, 7, 2, 5, 3.1, 6, 7],
    "TBF": [26, 23, 18, 26, 24, 22, 26, 25, 26, 28, 24, 28, 22, 19, 13, 26, 26, 23, 23, 23, 24, 27, 28, 16, 25, 18, 23, 27],
    "K/9": [12, 3.6, 7.7, 3.2, 5.4, 4.8, 7.5, 9, 7.7, 0, 9, 11.6, 18, 9, 6.7, 11.1, 4.3, 9.6, 2.3, 7.7, 1.5, 2.7, 2.6, 0, 7.2, 8.1, 4.5, 9],
    "BB/9": [0, 5.4, 1.9, 3.2, 3.6, 3.2, 3, 3, 1.3, 1.5, 1.5, 1.3, 3.6, 1.8, 6.7, 4.8, 5.7, 3.9, 6.8, 3.9, 3, 1.4, 0, 22.5, 5.4, 5.4, 3, 1.3],
    "BABIP": [0.471, 0.25, 0.25, 0.364, 0.375, 0.133, 0.368, 0.353, 0.211, 0.462, 0.294, 0.294, 0.444, 0.182, 0.222, 0.375, 0.211, 0.5, 0.389, 0.313, 0.19, 0.292, 0.24, 0.5, 0.353, 0.364, 0.167, 0.263],
    "STRIKEOUT": [8, 2, 4, 2, 3, 3, 5, 6, 6, 0, 6, 9, 10, 5, 2, 7, 3, 5, 1, 4, 1, 2, 2, 0, 4, 3, 3, 7]
}

# Create DataFrame
df = pd.DataFrame(data)

# Features and target variable
X = df[['IP', 'K/9', 'TBF', 'BB/9', 'BABIP']]
y = df['STRIKEOUT']

# Initialize XGBoost Regressor
model = XGBRegressor(objective='reg:squarederror', n_estimators=100, random_state=42)

# TimeSeriesSplit for backtesting
tscv = TimeSeriesSplit(n_splits=5)

# List to store Mean Squared Errors for each fold
mse_scores = []

# Perform backtesting
for train_index, test_index in tscv.split(X):
    X_train, X_test = X.iloc[train_index], X.iloc[test_index]
    y_train, y_test = y.iloc[train_index], y.iloc[test_index]

    # Train the model
    model.fit(X_train, y_train)

    # Predict and calculate MSE
    y_pred = model.predict(X_test)
    mse = mean_squared_error(y_test, y_pred)
    mse_scores.append(mse)

    print(f'Test Indices: {test_index}')
    print(f'Predicted Strikeouts: {np.round(y_pred, 2)}')
    print(f'Actual Strikeouts: {y_test.values}')
    print(f'Mean Squared Error: {mse}\n')

print(f'Average Mean Squared Error across all folds: {np.mean(mse_scores)}')

# Fit model on entire dataset
model.fit(X, y)

# Predict for next game (example input)
next_game_data = {
    'IP': [6],      # Example IP
    'K/9': [10.5],     # Example K/9
    'TBF': [23],    # Example TBF
    'BB/9': [3],    # Example BB/9
    'BABIP': [0.267]  # Example BABIP
}

next_game_df = pd.DataFrame(next_game_data)
predicted_strikeouts = model.predict(next_game_df)
print(f'Predicted Strikeouts for Next Game: {predicted_strikeouts[0]}')


Test Indices: [ 8  9 10 11]
Predicted Strikeouts: [5.83 4.46 6.   6.  ]
Actual Strikeouts: [6 0 6 9]
Mean Squared Error: 7.227012711731788

Test Indices: [12 13 14 15]
Predicted Strikeouts: [7.23 5.87 2.97 5.71]
Actual Strikeouts: [10  5  2  7]
Mean Squared Error: 2.756226426087096

Test Indices: [16 17 18 19]
Predicted Strikeouts: [1.01 4.14 1.51 4.  ]
Actual Strikeouts: [3 5 1 4]
Mean Squared Error: 1.2383916286711454

Test Indices: [20 21 22 23]
Predicted Strikeouts: [0.32 1.23 1.28 0.  ]
Actual Strikeouts: [1 2 2 0]
Mean Squared Error: 0.3945092004709285

Test Indices: [24 25 26 27]
Predicted Strikeouts: [2.55 3.96 3.06 6.09]
Actual Strikeouts: [4 3 3 7]
Mean Squared Error: 0.9601767395312351

Average Mean Squared Error across all folds: 2.5152633412984384
Predicted Strikeouts for Next Game: 5.998529434204102


#TANNER BIBEE

In [12]:
# Import necessary libraries
import pandas as pd
import numpy as np
from xgboost import XGBRegressor
from sklearn.model_selection import TimeSeriesSplit, cross_val_score
from sklearn.metrics import mean_squared_error

# Input data
data = {
    'IP': [6, 4.2, 4.1, 7, 6, 6, 5, 4.2, 6, 5.2, 6, 3, 4, 6, 2, 7, 6, 5, 7, 6, 5.2, 5.2, 6.1, 6, 5.1, 2.2, 5],
    'TBF': [25, 20, 27, 27, 26, 23, 20, 22, 24, 24, 22, 13, 22, 24, 7, 27, 22, 21, 23, 24, 29, 25, 24, 22, 22, 16, 22],
    'K/9': [9, 3.9, 6.2, 6.4, 10.5, 10.5, 12.6, 15.4, 9, 6.4, 7.5, 9, 4.5, 6, 4.5, 12.9, 10.5, 9, 9, 12, 6.4, 4.8, 5.7, 9, 8.4, 16.9, 12.6],
    'BB/9': [4.5, 1.9, 2.1, 2.6, 3, 4.5, 0, 5.8, 1.5, 6.4, 1.5, 6, 9, 3, 0, 1.3, 1.5, 0, 1.3, 3, 4.8, 1.6, 2.8, 0, 1.7, 10.1, 3.6],
    'BABIP': [0.267, 0.375, 0.524, 0.25, 0.333, 0.091, 0.308, 0.455, 0.353, 0.25, 0.188, 0.375, 0.4, 0.176, 0.167, 0.286, 0.286, 0.286, 0.267, 0.308, 0.316, 0.333, 0.125, 0.2, 0.375, 0.625, 0.385],
    'STRIKEOUT': [6, 2, 3, 5, 7, 7, 7, 8, 6, 4, 5, 3, 2, 4, 1, 10, 7, 5, 7, 8, 4, 3, 4, 6, 5, 5, 7]
}

# Create DataFrame
df = pd.DataFrame(data)

# Features and target variable
X = df[['IP', 'K/9', 'TBF', 'BB/9', 'BABIP']]
y = df['STRIKEOUT']

# Initialize XGBoost Regressor
model = XGBRegressor(objective='reg:squarederror', n_estimators=100, random_state=42)

# TimeSeriesSplit for backtesting
tscv = TimeSeriesSplit(n_splits=5)

# List to store Mean Squared Errors for each fold
mse_scores = []

# Perform backtesting
for train_index, test_index in tscv.split(X):
    X_train, X_test = X.iloc[train_index], X.iloc[test_index]
    y_train, y_test = y.iloc[train_index], y.iloc[test_index]

    # Train the model
    model.fit(X_train, y_train)

    # Predict and calculate MSE
    y_pred = model.predict(X_test)
    mse = mean_squared_error(y_test, y_pred)
    mse_scores.append(mse)

    print(f'Test Indices: {test_index}')
    print(f'Predicted Strikeouts: {np.round(y_pred, 2)}')
    print(f'Actual Strikeouts: {y_test.values}')
    print(f'Mean Squared Error: {mse}\n')

print(f'Average Mean Squared Error across all folds: {np.mean(mse_scores)}')

# Fit model on entire dataset
model.fit(X, y)

# Predict for next game (example input)
next_game_data = {
    'IP': [5.2],      # Example IP
    'K/9': [9.9],     # Example K/9
    'TBF': [23],    # Example TBF
    'BB/9': [1.8],    # Example BB/9
    'BABIP': [0.294]  # Example BABIP
}

next_game_df = pd.DataFrame(next_game_data)
predicted_strikeouts = model.predict(next_game_df)
print(f'Predicted Strikeouts for Next Game: {predicted_strikeouts[0]}')


Test Indices: [ 7  8  9 10]
Predicted Strikeouts: [2.96 6.   5.79 5.79]
Actual Strikeouts: [8 6 4 5]
Mean Squared Error: 7.298848695363972

Test Indices: [11 12 13 14]
Predicted Strikeouts: [7.12 2.64 3.76 2.64]
Actual Strikeouts: [3 2 4 1]
Mean Squared Error: 5.033156562651882

Test Indices: [15 16 17 18]
Predicted Strikeouts: [7.   7.   5.75 6.  ]
Actual Strikeouts: [10  7  5  7]
Mean Squared Error: 2.638485090699362

Test Indices: [19 20 21 22]
Predicted Strikeouts: [7.   4.01 3.83 3.78]
Actual Strikeouts: [8 4 3 4]
Mean Squared Error: 0.43436874767195377

Test Indices: [23 24 25 26]
Predicted Strikeouts: [5.8  4.8  7.39 7.46]
Actual Strikeouts: [6 5 5 7]
Mean Squared Error: 1.5046485308104138

Average Mean Squared Error across all folds: 3.381901525439517
Predicted Strikeouts for Next Game: 6.055200099945068


#BAILY OBER

In [13]:
# Import necessary libraries
import pandas as pd
import numpy as np
from xgboost import XGBRegressor
from sklearn.model_selection import TimeSeriesSplit, cross_val_score
from sklearn.metrics import mean_squared_error

# Input data
data = {
    'IP': [3, 7, 6, 6, 2, 6, 4.2, 4, 1.2, 6, 4, 6, 5.1, 5, 5.2, 7, 6, 1, 7.1, 2.2, 5.1, 3, 2, 6, 2, 4.1, 6.1, 4.2],
    'TBF': [19, 23, 23, 26, 15, 23, 21, 20, 7, 29, 20, 21, 27, 20, 24, 24, 22, 4, 25, 13, 22, 15, 12, 20, 7, 21, 28, 19],
    'K/9': [9, 5.1, 3, 7.5, 4.5, 4.5, 3.9, 9, 5.4, 10.5, 4.5, 7.5, 11.8, 5.4, 4.8, 5.1, 12, 0, 7.4, 16.9, 6.8, 12, 4.5, 7.5, 9, 12.5, 5.7, 9.6],
    'BB/9': [18, 0, 7.5, 1.5, 9, 3, 1.9, 9, 5.4, 0, 2.3, 4.5, 0, 1.8, 3.2, 1.3, 1.5, 0, 0, 0, 1.7, 3, 9, 0, 0, 6.2, 2.8, 1.9],
    'BABIP': [0.222, 0.158, 0.063, 0.35, 0.545, 0.118, 0.267, 0.273, 0.2, 0.421, 0.412, 0.077, 0.5, 0.267, 0.278, 0.316, 0.167, 0.25, 0.211, 0.571, 0.412, 0.556, 0.444, 0.133, 0.2, 0.417, 0.381, 0.333],
    'STRIKEOUT': [3, 4, 2, 5, 1, 3, 2, 4, 1, 7, 2, 5, 7, 3, 3, 4, 8, 0, 6, 5, 4, 4, 1, 5, 2, 6, 4, 5]
}

# Create DataFrame
df = pd.DataFrame(data)

# Features and target variable
X = df[['IP', 'K/9', 'TBF', 'BB/9', 'BABIP']]
y = df['STRIKEOUT']

# Initialize XGBoost Regressor
model = XGBRegressor(objective='reg:squarederror', n_estimators=100, random_state=42)

# TimeSeriesSplit for backtesting
tscv = TimeSeriesSplit(n_splits=5)

# List to store Mean Squared Errors for each fold
mse_scores = []

# Perform backtesting
for train_index, test_index in tscv.split(X):
    X_train, X_test = X.iloc[train_index], X.iloc[test_index]
    y_train, y_test = y.iloc[train_index], y.iloc[test_index]

    # Train the model
    model.fit(X_train, y_train)

    # Predict and calculate MSE
    y_pred = model.predict(X_test)
    mse = mean_squared_error(y_test, y_pred)
    mse_scores.append(mse)

    print(f'Test Indices: {test_index}')
    print(f'Predicted Strikeouts: {np.round(y_pred, 2)}')
    print(f'Actual Strikeouts: {y_test.values}')
    print(f'Mean Squared Error: {mse}\n')

print(f'Average Mean Squared Error across all folds: {np.mean(mse_scores)}')

# Fit model on entire dataset
model.fit(X, y)

# Predict for next game (example input)
next_game_data = {
    'IP': [6],      # Example IP
    'K/9': [9],     # Example K/9
    'TBF': [23],    # Example TBF
    'BB/9': [2.3],    # Example BB/9
    'BABIP': [0.2]  # Example BABIP
}

next_game_df = pd.DataFrame(next_game_data)
predicted_strikeouts = model.predict(next_game_df)
print(f'Predicted Strikeouts for Next Game: {predicted_strikeouts[0]}')


Test Indices: [ 8  9 10 11]
Predicted Strikeouts: [2.78 5.   2.16 4.  ]
Actual Strikeouts: [1 7 2 5]
Mean Squared Error: 2.052252495126396

Test Indices: [12 13 14 15]
Predicted Strikeouts: [7.   2.71 2.22 4.  ]
Actual Strikeouts: [7 3 3 4]
Mean Squared Error: 0.17187487175212368

Test Indices: [16 17 18 19]
Predicted Strikeouts: [5.73 1.46 3.99 3.23]
Actual Strikeouts: [8 0 6 5]
Mean Squared Error: 3.613066583207388

Test Indices: [20 21 22 23]
Predicted Strikeouts: [3.02 6.8  1.   5.01]
Actual Strikeouts: [4 4 1 5]
Mean Squared Error: 2.205956864706099

Test Indices: [24 25 26 27]
Predicted Strikeouts: [5.09 4.25 3.14 3.98]
Actual Strikeouts: [2 6 4 5]
Mean Squared Error: 3.604243941889095

Average Mean Squared Error across all folds: 2.3294789513362204
Predicted Strikeouts for Next Game: 5.265422821044922


#TANNER HOUCK

In [14]:
# Import necessary libraries
import pandas as pd
import numpy as np
from xgboost import XGBRegressor
from sklearn.model_selection import TimeSeriesSplit, cross_val_score
from sklearn.metrics import mean_squared_error

data = {
    'IP': [6, 7.2, 7, 5, 6, 5, 7, 7, 6, 5.2, 5, 4.1, 5, 3.2, 5, 1, 6.2, 6, 5.2, 6, 6, 5.1, 6, 5.1, 6, 5, 5, 4.2],
    'TBF': [24, 27, 29, 20, 24, 25, 26, 23, 22, 24, 24, 28, 20, 19, 21, 4, 28, 23, 26, 26, 23, 29, 26, 24, 25, 18, 20, 19],
    'K/9': [9, 5.9, 12.9, 10.8, 13.5, 9, 9, 12.9, 15, 6.4, 7.2, 6.2, 7.2, 4.9, 12.6, 0, 12.1, 10.5, 6.4, 4.5, 6, 3.4, 13.5, 8.4, 13.5, 10.8, 3.6, 11.6],
    'BB/9': [4.5, 1.2, 1.3, 5.4, 0, 3.6, 2.6, 0, 0, 3.2, 5.4, 8.3, 5.4, 4.9, 1.8, 0, 1.4, 4.5, 1.6, 3, 1.5, 3.4, 4.5, 3.4, 0, 1.8, 1.8, 0],
    'BABIP': [0.133, 0.1, 0.353, 0.182, 0.385, 0.5, 0.125, 0.083, 0.333, 0.294, 0.375, 0.5, 0.154, 0.4, 0.385, 0.25, 0.389, 0.167, 0.316, 0.3, 0.222, 0.48, 0.231, 0.375, 0.429, 0.273, 0.235, 0.2],
    'STRIKEOUT': [6, 5, 10, 6, 9, 5, 7, 10, 10, 4, 4, 3, 4, 2, 7, 0, 9, 7, 4, 3, 4, 2, 9, 5, 9, 6, 2, 6]
}

# Create DataFrame
df = pd.DataFrame(data)

# Features and target variable
X = df[['IP', 'K/9', 'TBF', 'BB/9', 'BABIP']]
y = df['STRIKEOUT']

# Initialize XGBoost Regressor
model = XGBRegressor(objective='reg:squarederror', n_estimators=100, random_state=42)

# TimeSeriesSplit for backtesting
tscv = TimeSeriesSplit(n_splits=5)

# List to store Mean Squared Errors for each fold
mse_scores = []

# Perform backtesting
for train_index, test_index in tscv.split(X):
    X_train, X_test = X.iloc[train_index], X.iloc[test_index]
    y_train, y_test = y.iloc[train_index], y.iloc[test_index]

    # Train the model
    model.fit(X_train, y_train)

    # Predict and calculate MSE
    y_pred = model.predict(X_test)
    mse = mean_squared_error(y_test, y_pred)
    mse_scores.append(mse)

    print(f'Test Indices: {test_index}')
    print(f'Predicted Strikeouts: {np.round(y_pred, 2)}')
    print(f'Actual Strikeouts: {y_test.values}')
    print(f'Mean Squared Error: {mse}\n')

print(f'Average Mean Squared Error across all folds: {np.mean(mse_scores)}')

# Fit model on entire dataset
model.fit(X, y)

# Predict for next game (example input)
next_game_data = {
    'IP': [6],      # Example IP
    'K/9': [8.1],     # Example K/9
    'TBF': [26],    # Example TBF
    'BB/9': [2.65],    # Example BB/9
    'BABIP': [0.27]  # Example BABIP
}

next_game_df = pd.DataFrame(next_game_data)
predicted_strikeouts = model.predict(next_game_df)
print(f'Predicted Strikeouts for Next Game: {predicted_strikeouts[0]}')


Test Indices: [ 8  9 10 11]
Predicted Strikeouts: [9.   5.99 5.88 5.  ]
Actual Strikeouts: [10  4  4  3]
Mean Squared Error: 3.1262856502883665

Test Indices: [12 13 14 15]
Predicted Strikeouts: [5.18 3.   5.05 4.11]
Actual Strikeouts: [4 2 7 0]
Mean Squared Error: 5.778611603751429

Test Indices: [16 17 18 19]
Predicted Strikeouts: [6.76 6.   4.09 5.08]
Actual Strikeouts: [9 7 4 3]
Mean Squared Error: 2.5905339869191266

Test Indices: [20 21 22 23]
Predicted Strikeouts: [3.44 2.98 7.65 4.  ]
Actual Strikeouts: [4 2 9 5]
Mean Squared Error: 1.0277561359847738

Test Indices: [24 25 26 27]
Predicted Strikeouts: [9.   5.68 2.14 6.91]
Actual Strikeouts: [9 6 2 6]
Mean Squared Error: 0.23797524610536414

Average Mean Squared Error across all folds: 2.552232524609812
Predicted Strikeouts for Next Game: 4.007012844085693


#DJ HERZ

In [15]:
# Import necessary libraries
import pandas as pd
import numpy as np
from xgboost import XGBRegressor
from sklearn.model_selection import TimeSeriesSplit, cross_val_score
from sklearn.metrics import mean_squared_error

# Input data
data = {
    'IP': [4.2, 6, 4.1, 6.1, 7, 5, 3.2, 4.2, 5.1, 6.1, 5, 5, 3.1, 5, 6, 5, 1, 5, 4.2, 2, 6, 8, 4.1, 3, 7, 7.1, 3.2],
    'TBF': [22, 23, 20, 28, 22, 23, 18, 24, 22, 25, 23, 21, 13, 23, 20, 21, 5, 17, 19, 7, 26, 28, 21, 17, 27, 26, 19],
    'K/9': [9.6, 10.5, 4.2, 9.9, 11.6, 12.6, 2.5, 11.6, 8.4, 5.7, 5.4, 10.8, 21.6, 7.2, 12, 12.6, 27, 9, 3.9, 13.5, 12, 1.1, 14.5, 9, 9, 6.1, 2.5],
    'BB/9': [5.8, 4.5, 4.2, 1.4, 0, 5.4, 7.4, 1.9, 1.7, 0, 7.2, 1.8, 0, 1.8, 3, 1.8, 0, 1.8, 3.9, 0, 1.5, 1.1, 4.2, 9, 1.3, 1.2, 4.9],
    'BABIP': [0.308, 0.154, 0.286, 0.45, 0.077, 0.182, 0.231, 0.438, 0.267, 0.316, 0.267, 0.25, 0.5, 0.412, 0, 0.333, 1, 0.091, 0.143, 0, 0.333, 0, 0.4, 0.545, 0.278, 0.2, 0.4],
    'STRIKEOUT': [5, 7, 2, 7, 9, 7, 1, 6, 5, 4, 3, 6, 8, 4, 8, 7, 3, 5, 2, 3, 8, 1, 7, 3, 7, 5, 1]
}


# Create DataFrame
df = pd.DataFrame(data)

# Features and target variable
X = df[['IP', 'K/9', 'TBF', 'BB/9', 'BABIP']]
y = df['STRIKEOUT']

# Initialize XGBoost Regressor
model = XGBRegressor(objective='reg:squarederror', n_estimators=100, random_state=42)

# TimeSeriesSplit for backtesting
tscv = TimeSeriesSplit(n_splits=5)

# List to store Mean Squared Errors for each fold
mse_scores = []

# Perform backtesting
for train_index, test_index in tscv.split(X):
    X_train, X_test = X.iloc[train_index], X.iloc[test_index]
    y_train, y_test = y.iloc[train_index], y.iloc[test_index]

    # Train the model
    model.fit(X_train, y_train)

    # Predict and calculate MSE
    y_pred = model.predict(X_test)
    mse = mean_squared_error(y_test, y_pred)
    mse_scores.append(mse)

    print(f'Test Indices: {test_index}')
    print(f'Predicted Strikeouts: {np.round(y_pred, 2)}')
    print(f'Actual Strikeouts: {y_test.values}')
    print(f'Mean Squared Error: {mse}\n')

print(f'Average Mean Squared Error across all folds: {np.mean(mse_scores)}')

# Fit model on entire dataset
model.fit(X, y)

# Predict for next game (example input)
next_game_data = {
    'IP': [5],      # Example IP
    'K/9': [10.4],     # Example K/9
    'TBF': [20],    # Example TBF
    'BB/9': [3.5],    # Example BB/9
    'BABIP': [0.25]  # Example BABIP
}

next_game_df = pd.DataFrame(next_game_data)
predicted_strikeouts = model.predict(next_game_df)
print(f'Predicted Strikeouts for Next Game: {predicted_strikeouts[0]}')


Test Indices: [ 7  8  9 10]
Predicted Strikeouts: [5. 7. 7. 7.]
Actual Strikeouts: [6 5 4 3]
Mean Squared Error: 7.49987864789415

Test Indices: [11 12 13 14]
Predicted Strikeouts: [7.   5.41 3.05 7.  ]
Actual Strikeouts: [6 8 4 8]
Mean Squared Error: 2.404834039420791

Test Indices: [15 16 17 18]
Predicted Strikeouts: [6.84 8.   5.19 2.85]
Actual Strikeouts: [7 3 5 2]
Mean Squared Error: 6.444101596976736

Test Indices: [19 20 21 22]
Predicted Strikeouts: [4.92 7.05 3.53 6.93]
Actual Strikeouts: [3 8 1 7]
Mean Squared Error: 2.748788461292577

Test Indices: [23 24 25 26]
Predicted Strikeouts: [3.01 6.21 4.61 1.  ]
Actual Strikeouts: [3 7 5 1]
Mean Squared Error: 0.19573861527178238

Average Mean Squared Error across all folds: 3.858668272171207
Predicted Strikeouts for Next Game: 6.013082027435303


#JOSE QUINTANA

In [16]:
# Import necessary libraries
import pandas as pd
import numpy as np
from xgboost import XGBRegressor
from sklearn.model_selection import TimeSeriesSplit, cross_val_score
from sklearn.metrics import mean_squared_error

# Input data
data = {
    'IP': [4.2, 5.2, 5.1, 5, 5, 8, 2.2, 5, 5.1, 6, 6, 4, 3.2, 6, 6.1, 4, 7, 7, 5.2, 5, 6, 5, 6.2, 4, 5, 6.1, 5, 6.2, 7],
    'TBF': [21, 26, 23, 21, 24, 29, 19, 22, 21, 21, 26, 21, 18, 21, 25, 21, 28, 24, 25, 22, 24, 22, 27, 23, 24, 24, 23, 25, 23],
    'K/9': [7.7, 6.4, 6.8, 7.2, 7.2, 3.4, 6.7, 5.4, 6.8, 6, 4.5, 9, 2.5, 9, 11.4, 15.8, 1.3, 6.4, 12.7, 10.8, 7.5, 7.2, 10.8, 2.3, 5.4, 0, 10.8, 8.1, 5.1],
    'BB/9': [3.9, 6.4, 3.4, 5.4, 5.4, 1.1, 3.4, 3.6, 0, 0, 0, 6.8, 4.9, 3, 1.4, 6.8, 3.9, 1.3, 1.6, 9, 1.5, 7.2, 2.7, 9, 3.6, 2.8, 5.4, 2.7, 0],
    'BABIP': [0.357, 0.235, 0.294, 0.286, 0.375, 0.12, 0.625, 0.214, 0.25, 0.2, 0.286, 0.2, 0.429, 0.083, 0.267, 0.5, 0.174, 0.059, 0.167, 0.2, 0.278, 0.231, 0.2, 0.375, 0.353, 0.182, 0.429, 0.294, 0.158],
    'STRIKEOUT': [4, 4, 4, 4, 4, 3, 2, 3, 4, 4, 3, 4, 1, 6, 8, 7, 1, 5, 8, 6, 5, 4, 8, 1, 3, 0, 6, 6, 4]
}


# Create DataFrame
df = pd.DataFrame(data)

# Features and target variable
X = df[['IP', 'K/9', 'TBF', 'BB/9', 'BABIP']]
y = df['STRIKEOUT']

# Initialize XGBoost Regressor
model = XGBRegressor(objective='reg:squarederror', n_estimators=100, random_state=42)

# TimeSeriesSplit for backtesting
tscv = TimeSeriesSplit(n_splits=5)

# List to store Mean Squared Errors for each fold
mse_scores = []

# Perform backtesting
for train_index, test_index in tscv.split(X):
    X_train, X_test = X.iloc[train_index], X.iloc[test_index]
    y_train, y_test = y.iloc[train_index], y.iloc[test_index]

    # Train the model
    model.fit(X_train, y_train)

    # Predict and calculate MSE
    y_pred = model.predict(X_test)
    mse = mean_squared_error(y_test, y_pred)
    mse_scores.append(mse)

    print(f'Test Indices: {test_index}')
    print(f'Predicted Strikeouts: {np.round(y_pred, 2)}')
    print(f'Actual Strikeouts: {y_test.values}')
    print(f'Mean Squared Error: {mse}\n')

print(f'Average Mean Squared Error across all folds: {np.mean(mse_scores)}')

# Fit model on entire dataset
model.fit(X, y)

# Predict for next game (example input)
next_game_data = {
    'IP': [5.1],      # Example IP
    'K/9': [6.8],     # Example K/9
    'TBF': [23],    # Example TBF
    'BB/9': [3.4],    # Example BB/9
    'BABIP': [0.25]  # Example BABIP
}

next_game_df = pd.DataFrame(next_game_data)
predicted_strikeouts = model.predict(next_game_df)
print(f'Predicted Strikeouts for Next Game: {predicted_strikeouts[0]}')


Test Indices: [ 9 10 11 12]
Predicted Strikeouts: [3.   3.   2.31 2.27]
Actual Strikeouts: [4 3 4 1]
Mean Squared Error: 1.363493593224078

Test Indices: [13 14 15 16]
Predicted Strikeouts: [4.   4.   4.   2.54]
Actual Strikeouts: [6 8 7 1]
Mean Squared Error: 7.840620756880298

Test Indices: [17 18 19 20]
Predicted Strikeouts: [3.66 7.91 5.91 4.04]
Actual Strikeouts: [5 8 6 5]
Mean Squared Error: 0.683300540107723

Test Indices: [21 22 23 24]
Predicted Strikeouts: [4.01 6.86 1.03 2.99]
Actual Strikeouts: [4 8 1 3]
Mean Squared Error: 0.3258415818590663

Test Indices: [25 26 27 28]
Predicted Strikeouts: [1.   7.   5.06 3.01]
Actual Strikeouts: [0 6 6 4]
Mean Squared Error: 0.9707430125351841

Average Mean Squared Error across all folds: 2.23679989692127
Predicted Strikeouts for Next Game: 4.003602981567383


#FREDDY PERALTA

In [18]:
# Import necessary libraries
import pandas as pd
import numpy as np
from xgboost import XGBRegressor
from sklearn.model_selection import TimeSeriesSplit, cross_val_score
from sklearn.metrics import mean_squared_error

# Input data
data = {
    'IP': [5, 7, 6.2, 6, 5, 5.1, 5.2, 5, 7.2, 5.2, 6, 7, 5.2, 5.1, 6, 5.1, 2, 4.2, 7, 1, 6, 5.1, 7, 6, 7.2, 5, 5.2],
    'TBF': [21, 24, 24, 24, 25, 27, 25, 22, 29, 27, 23, 28, 22, 27, 24, 25, 16, 19, 24, 5, 26, 20, 23, 22, 28, 20, 24],
    'K/9': [18, 5.1, 12.1, 7.5, 9, 5.1, 7.9, 5.4, 12.9, 11.1, 9, 5.1, 3.2, 11.8, 9, 10.1, 4.5, 7.7, 7.7, 0, 9, 6.8, 5.1, 7.5, 7, 10.8, 15.9],
    'BB/9': [1.8, 1.3, 1.4, 0, 0, 1.7, 3.2, 1.8, 4.7, 6.4, 1.5, 5.1, 1.6, 3.4, 0, 8.4, 0, 0, 1.3, 18, 3, 1.7, 0, 1.5, 1.2, 1.8, 3.2],
    'BABIP': [0.5, 0.211, 0.214, 0.316, 0.526, 0.5, 0.353, 0.353, 0.143, 0.286, 0.2, 0.167, 0.263, 0.5, 0.267, 0.308, 0.571, 0.4, 0.176, 0, 0.25, 0.143, 0.158, 0.071, 0.19, 0.5, 0.4],
    'STRIKEOUT': [10, 4, 9, 5, 5, 3, 5, 3, 11, 7, 6, 4, 2, 7, 6, 6, 1, 4, 6, 0, 6, 4, 4, 5, 6, 6, 10]
}


# Create DataFrame
df = pd.DataFrame(data)

# Features and target variable
X = df[['IP', 'K/9', 'TBF', 'BB/9', 'BABIP']]
y = df['STRIKEOUT']

# Initialize XGBoost Regressor
model = XGBRegressor(objective='reg:squarederror', n_estimators=100, random_state=42)

# TimeSeriesSplit for backtesting
tscv = TimeSeriesSplit(n_splits=5)

# List to store Mean Squared Errors for each fold
mse_scores = []

# Perform backtesting
for train_index, test_index in tscv.split(X):
    X_train, X_test = X.iloc[train_index], X.iloc[test_index]
    y_train, y_test = y.iloc[train_index], y.iloc[test_index]

    # Train the model
    model.fit(X_train, y_train)

    # Predict and calculate MSE
    y_pred = model.predict(X_test)
    mse = mean_squared_error(y_test, y_pred)
    mse_scores.append(mse)

    print(f'Test Indices: {test_index}')
    print(f'Predicted Strikeouts: {np.round(y_pred, 2)}')
    print(f'Actual Strikeouts: {y_test.values}')
    print(f'Mean Squared Error: {mse}\n')

print(f'Average Mean Squared Error across all folds: {np.mean(mse_scores)}')

# Fit model on entire dataset
model.fit(X, y)

# Predict for next game (example input)
next_game_data = {
    'IP': [5.1],      # Example IP
    'K/9': [10.5],     # Example K/9
    'TBF': [23],    # Example TBF
    'BB/9': [3],    # Example BB/9
    'BABIP': [0.2585]  # Example BABIP
}

next_game_df = pd.DataFrame(next_game_data)
predicted_strikeouts = model.predict(next_game_df)
print(f'Predicted Strikeouts for Next Game: {predicted_strikeouts[0]}')


Test Indices: [ 7  8  9 10]
Predicted Strikeouts: [4.05 9.   3.95 5.  ]
Actual Strikeouts: [ 3 11  7  6]
Mean Squared Error: 3.8582557310230214

Test Indices: [11 12 13 14]
Predicted Strikeouts: [3.99 4.06 6.86 5.3 ]
Actual Strikeouts: [4 2 7 6]
Mean Squared Error: 1.1870720998297202

Test Indices: [15 16 17 18]
Predicted Strikeouts: [5.12 2.   4.92 5.85]
Actual Strikeouts: [6 1 4 6]
Mean Squared Error: 0.6598681450651753

Test Indices: [19 20 21 22]
Predicted Strikeouts: [1.  6.  3.2 4. ]
Actual Strikeouts: [0 6 4 4]
Mean Squared Error: 0.4100736575282049

Test Indices: [23 24 25 26]
Predicted Strikeouts: [ 5.63  4.37  4.85 10.73]
Actual Strikeouts: [ 5  6  6 10]
Mean Squared Error: 1.2285415264176436

Average Mean Squared Error across all folds: 1.4687622319727531
Predicted Strikeouts for Next Game: 6.00051736831665


#AARON NOLA

In [19]:
# Import necessary libraries
import pandas as pd
import numpy as np
from xgboost import XGBRegressor
from sklearn.model_selection import TimeSeriesSplit, cross_val_score
from sklearn.metrics import mean_squared_error

# Input data
data = {
    'IP': [5.2, 5.1, 6, 3.2, 5, 5.2, 5, 4.2, 3.2, 4, 5.1, 2, 3.1, 5, 4.2, 7, 6, 3.2, 6, 5, 5, 5, 5, 1.2, 5, 5, 7],
    'TBF': [24, 22, 22, 21, 24, 21, 23, 21, 19, 18, 23, 9, 14, 24, 18, 26, 23, 16, 22, 19, 21, 17, 22, 14, 23, 22, 24],
    'K/9': [3.2, 8.4, 4.5, 7.4, 14.4, 7.9, 7.2, 13.5, 14.7, 13.5, 5.1, 22.5, 13.5, 5.4, 13.5, 9, 9, 2.5, 6, 5.4, 14.4, 7.2, 12.6, 0, 12.6, 9, 7.7],
    'BB/9': [6.4, 3.4, 1.5, 2.5, 7.2, 1.6, 5.4, 5.8, 9.8, 9, 1.7, 4.5, 0, 9, 1.9, 1.3, 4.5, 0, 0, 0, 3.6, 1.8, 5.4, 16.2, 3.6, 5.4, 0],
    'BABIP': [0.333, 0.467, 0.111, 0.467, 0.364, 0.077, 0.143, 0.3, 0.375, 0.25, 0.294, 0.333, 0.25, 0.214, 0.125, 0.222, 0.143, 0.25, 0.167, 0.313, 0.364, 0.167, 0.364, 0.6, 0.385, 0.286, 0.222],
    'STRIKEOUT': [2, 5, 3, 3, 8, 5, 4, 7, 6, 6, 3, 5, 5, 3, 7, 7, 6, 1, 4, 3, 8, 4, 7, 0, 7, 5, 6]
}


# Create DataFrame
df = pd.DataFrame(data)

# Features and target variable
X = df[['IP', 'K/9', 'TBF', 'BB/9', 'BABIP']]
y = df['STRIKEOUT']

# Initialize XGBoost Regressor
model = XGBRegressor(objective='reg:squarederror', n_estimators=100, random_state=42)

# TimeSeriesSplit for backtesting
tscv = TimeSeriesSplit(n_splits=5)

# List to store Mean Squared Errors for each fold
mse_scores = []

# Perform backtesting
for train_index, test_index in tscv.split(X):
    X_train, X_test = X.iloc[train_index], X.iloc[test_index]
    y_train, y_test = y.iloc[train_index], y.iloc[test_index]

    # Train the model
    model.fit(X_train, y_train)

    # Predict and calculate MSE
    y_pred = model.predict(X_test)
    mse = mean_squared_error(y_test, y_pred)
    mse_scores.append(mse)

    print(f'Test Indices: {test_index}')
    print(f'Predicted Strikeouts: {np.round(y_pred, 2)}')
    print(f'Actual Strikeouts: {y_test.values}')
    print(f'Mean Squared Error: {mse}\n')

print(f'Average Mean Squared Error across all folds: {np.mean(mse_scores)}')

# Fit model on entire dataset
model.fit(X, y)

# Predict for next game (example input)
next_game_data = {
    'IP': [6],      # Example IP
    'K/9': [7.95],     # Example K/9
    'TBF': [24.5],    # Example TBF
    'BB/9': [2.4],    # Example BB/9
    'BABIP': [0.297]  # Example BABIP
}

next_game_df = pd.DataFrame(next_game_data)
predicted_strikeouts = model.predict(next_game_df)
print(f'Predicted Strikeouts for Next Game: {predicted_strikeouts[0]}')


Test Indices: [ 7  8  9 10]
Predicted Strikeouts: [6.96 8.   6.96 3.16]
Actual Strikeouts: [7 6 6 3]
Mean Squared Error: 1.2356321243850772

Test Indices: [11 12 13 14]
Predicted Strikeouts: [6.   6.   4.33 7.  ]
Actual Strikeouts: [5 5 3 7]
Mean Squared Error: 0.9398595147526976

Test Indices: [15 16 17 18]
Predicted Strikeouts: [6.23 6.24 2.2  3.08]
Actual Strikeouts: [7 6 1 4]
Mean Squared Error: 0.7316821158843965

Test Indices: [19 20 21 22]
Predicted Strikeouts: [2.98 6.03 3.87 6.19]
Actual Strikeouts: [3 8 4 7]
Mean Squared Error: 1.1332799027208296

Test Indices: [23 24 25 26]
Predicted Strikeouts: [1.   7.   6.87 3.55]
Actual Strikeouts: [0 7 5 6]
Mean Squared Error: 2.6300921061570683

Average Mean Squared Error across all folds: 1.3341091527800137
Predicted Strikeouts for Next Game: 6.011133193969727


#TARIK SKUBAL

In [20]:
# Import necessary libraries
import pandas as pd
import numpy as np
from xgboost import XGBRegressor
from sklearn.model_selection import TimeSeriesSplit, cross_val_score
from sklearn.metrics import mean_squared_error

# Input data
data = {
    'IP': [4, 6, 6.1, 3, 5, 5, 5, 1, 5, 5, 6.2, 7, 7, 5, 7, 6, 4, 5, 7, 4.1, 6, 5.1, 5, 4, 6, 4, 2.1, 4.1],
    'TBF': [20, 30, 28, 18, 19, 23, 25, 5, 20, 25, 24, 24, 26, 21, 24, 24, 20, 25, 23, 19, 26, 24, 22, 20, 26, 20, 12, 21],
    'K/9': [9, 3, 1.4, 6, 10.8, 9, 1.8, 18, 10.8, 10.8, 5.4, 9, 15.4, 5.4, 7.7, 9, 6.8, 7.2, 9, 4.2, 13.5, 6.8, 9, 11.3, 9, 11.3, 11.6, 6.2],
    'BB/9': [6.8, 3, 2.8, 3, 1.8, 0, 7.2, 18, 3.6, 3.6, 2.7, 3.9, 0, 7.2, 2.6, 4.5, 2.3, 1.8, 0, 6.2, 1.5, 3.4, 3.6, 4.5, 1.5, 6.8, 3.9, 8.3],
    'BABIP': [0.333, 0.423, 0.292, 0.462, 0.333, 0.375, 0.333, 0, 0.182, 0.357, 0.111, 0, 0.357, 0.214, 0.063, 0.143, 0.4, 0.421, 0.067, 0.214, 0.214, 0.389, 0.286, 0.273, 0.176, 0.5, 0.429, 0.429],
    'STRIKEOUT': [4, 2, 1, 2, 6, 5, 1, 2, 6, 6, 4, 7, 12, 3, 6, 6, 3, 4, 7, 2, 9, 4, 5, 5, 6, 5, 3, 3]
}



# Create DataFrame
df = pd.DataFrame(data)

# Features and target variable
X = df[['IP', 'K/9', 'TBF', 'BB/9', 'BABIP']]
y = df['STRIKEOUT']

# Initialize XGBoost Regressor
model = XGBRegressor(objective='reg:squarederror', n_estimators=100, random_state=42)

# TimeSeriesSplit for backtesting
tscv = TimeSeriesSplit(n_splits=5)

# List to store Mean Squared Errors for each fold
mse_scores = []

# Perform backtesting
for train_index, test_index in tscv.split(X):
    X_train, X_test = X.iloc[train_index], X.iloc[test_index]
    y_train, y_test = y.iloc[train_index], y.iloc[test_index]

    # Train the model
    model.fit(X_train, y_train)

    # Predict and calculate MSE
    y_pred = model.predict(X_test)
    mse = mean_squared_error(y_test, y_pred)
    mse_scores.append(mse)

    print(f'Test Indices: {test_index}')
    print(f'Predicted Strikeouts: {np.round(y_pred, 2)}')
    print(f'Actual Strikeouts: {y_test.values}')
    print(f'Mean Squared Error: {mse}\n')

print(f'Average Mean Squared Error across all folds: {np.mean(mse_scores)}')

# Fit model on entire dataset
model.fit(X, y)

# Predict for next game (example input)
next_game_data = {
    'IP': [6],      # Example IP
    'K/9': [10.3],     # Example K/9
    'TBF': [25],    # Example TBF
    'BB/9': [1.4],    # Example BB/9
    'BABIP': [0.235]  # Example BABIP
}

next_game_df = pd.DataFrame(next_game_data)
predicted_strikeouts = model.predict(next_game_df)
print(f'Predicted Strikeouts for Next Game: {predicted_strikeouts[0]}')


Test Indices: [ 8  9 10 11]
Predicted Strikeouts: [3.92 3.99 5.   3.92]
Actual Strikeouts: [6 6 4 7]
Mean Squared Error: 4.724316964381316

Test Indices: [12 13 14 15]
Predicted Strikeouts: [6.19 2.14 5.31 5.67]
Actual Strikeouts: [12  3  6  6]
Mean Squared Error: 8.772848838954985

Test Indices: [16 17 18 19]
Predicted Strikeouts: [2.21 2.63 6.41 3.04]
Actual Strikeouts: [3 4 7 2]
Mean Squared Error: 0.9839893713375147

Test Indices: [20 21 22 23]
Predicted Strikeouts: [5.86 2.79 6.   4.  ]
Actual Strikeouts: [9 4 5 5]
Mean Squared Error: 3.326640300809018

Test Indices: [24 25 26 27]
Predicted Strikeouts: [7.47 4.2  4.29 2.19]
Actual Strikeouts: [6 5 3 3]
Mean Squared Error: 1.2773277547418616

Average Mean Squared Error across all folds: 3.8170246460449393
Predicted Strikeouts for Next Game: 5.991445541381836


#ALEC MARSH

In [21]:
# Import necessary libraries
import pandas as pd
import numpy as np
from xgboost import XGBRegressor
from sklearn.model_selection import TimeSeriesSplit, cross_val_score
from sklearn.metrics import mean_squared_error

# Input data
data = {
    "IP": [5.2, 5, 7, 6, 3, 4, 5, 5, 5, 6, 6, 6, 4, 6, 2.2, 6.1, 4.1, 6, 5, 4, 3.1, 5.2, 6.2, 7, 5, 4.2, 7],
    "TBF": [23, 23, 24, 26, 17, 24, 23, 22, 22, 22, 24, 25, 12, 23, 14, 24, 21, 22, 24, 21, 19, 22, 25, 23, 19, 24, 27],
    "K/9": [11.1, 9, 12.9, 7.5, 3, 9, 7.2, 9, 7.2, 6, 9, 9, 11.3, 12, 13.5, 7.1, 12.5, 9, 9, 9, 2.7, 9.5, 4.1, 9, 14.4, 7.7, 7.7],
    "BB/9": [1.6, 3.6, 0, 3, 3, 9, 1.8, 1.8, 7.2, 0, 3, 1.5, 0, 3, 6.7, 1.4, 0, 1.5, 1.8, 11.3, 0, 3.2, 1.4, 1.3, 3.6, 3.9, 5.1],
    "BABIP": [0.4, 0.286, 0.231, 0.263, 0.5, 0.5, 0.389, 0.231, 0.308, 0.294, 0.214, 0.389, 0, 0.231, 0.429, 0.278, 0.533, 0.2, 0.375, 0.364, 0.5, 0.214, 0.2, 0.133, 0.125, 0.412, 0.25],
    "STRIKEOUT": [7, 5, 10, 5, 1, 4, 4, 5, 4, 4, 6, 6, 5, 8, 4, 5, 6, 6, 5, 4, 1, 6, 3, 7, 8, 4, 6]
}



# Create DataFrame
df = pd.DataFrame(data)

# Features and target variable
X = df[['IP', 'K/9', 'TBF', 'BB/9', 'BABIP']]
y = df['STRIKEOUT']

# Initialize XGBoost Regressor
model = XGBRegressor(objective='reg:squarederror', n_estimators=100, random_state=42)

# TimeSeriesSplit for backtesting
tscv = TimeSeriesSplit(n_splits=5)

# List to store Mean Squared Errors for each fold
mse_scores = []

# Perform backtesting
for train_index, test_index in tscv.split(X):
    X_train, X_test = X.iloc[train_index], X.iloc[test_index]
    y_train, y_test = y.iloc[train_index], y.iloc[test_index]

    # Train the model
    model.fit(X_train, y_train)

    # Predict and calculate MSE
    y_pred = model.predict(X_test)
    mse = mean_squared_error(y_test, y_pred)
    mse_scores.append(mse)

    print(f'Test Indices: {test_index}')
    print(f'Predicted Strikeouts: {np.round(y_pred, 2)}')
    print(f'Actual Strikeouts: {y_test.values}')
    print(f'Mean Squared Error: {mse}\n')

print(f'Average Mean Squared Error across all folds: {np.mean(mse_scores)}')

# Fit model on entire dataset
model.fit(X, y)

# Predict for next game (example input)
next_game_data = {
    'IP': [5],      # Example IP
    'K/9': [9],     # Example K/9
    'TBF': [22],    # Example TBF
    'BB/9': [2.6],    # Example BB/9
    'BABIP': [0.278]  # Example BABIP
}

next_game_df = pd.DataFrame(next_game_data)
predicted_strikeouts = model.predict(next_game_df)
print(f'Predicted Strikeouts for Next Game: {predicted_strikeouts[0]}')


Test Indices: [ 7  8  9 10]
Predicted Strikeouts: [5. 5. 5. 5.]
Actual Strikeouts: [5 4 4 6]
Mean Squared Error: 0.7491732787549381

Test Indices: [11 12 13 14]
Predicted Strikeouts: [4.01 6.99 9.96 6.96]
Actual Strikeouts: [6 5 8 4]
Mean Squared Error: 5.134729179972112

Test Indices: [15 16 17 18]
Predicted Strikeouts: [4.09 4.27 6.   4.06]
Actual Strikeouts: [5 6 6 5]
Mean Squared Error: 1.1780745684660587

Test Indices: [19 20 21 22]
Predicted Strikeouts: [4.05 1.   5.97 3.91]
Actual Strikeouts: [4 1 6 3]
Mean Squared Error: 0.20638644436836628

Test Indices: [23 24 25 26]
Predicted Strikeouts: [8.72 6.12 4.79 8.15]
Actual Strikeouts: [7 8 4 6]
Mean Squared Error: 2.9307510673712045

Average Mean Squared Error across all folds: 2.0398229077865357
Predicted Strikeouts for Next Game: 5.016567707061768


#BRYCE MILLER

In [22]:
# Import necessary libraries
import pandas as pd
import numpy as np
from xgboost import XGBRegressor
from sklearn.model_selection import TimeSeriesSplit, cross_val_score
from sklearn.metrics import mean_squared_error

# Input data
data = {
    'IP': [5.1, 4, 4.1, 6.2, 5.1, 6, 4, 6, 6, 5.1, 7, 4, 5, 5, 7, 5, 5.2, 6, 5, 7, 6, 5.1, 5, 5.1, 4.1, 4.2],
    'TBF': [24, 20, 20, 27, 24, 27, 19, 22, 26, 21, 27, 19, 21, 20, 28, 22, 24, 25, 22, 24, 23, 23, 20, 21, 21, 24],
    'K/9': [3.4, 11.3, 10.4, 4.1, 8.4, 6, 11.3, 9, 9, 13.5, 7.7, 11.3, 3.6, 14.4, 7.7, 7.2, 7.9, 9, 5.4, 12.9, 10.5, 6.8, 3.6, 3.4, 14.5, 13.5],
    'BB/9': [6.8, 6.8, 8.3, 2.7, 6.8, 4.5, 4.5, 3, 1.5, 1.7, 0, 0, 0, 1.8, 3.9, 7.2, 4.8, 1.5, 5.4, 0, 4.5, 5.1, 5.4, 3.4, 4.2, 1.9],
    'BABIP': [0.125, 0.4, 0.3, 0.15, 0.286, 0.278, 0.364, 0.143, 0.294, 0.273, 0.238, 0.462, 0.368, 0.3, 0.167, 0.357, 0.188, 0.333, 0.267, 0.214, 0.167, 0.214, 0.133, 0.235, 0.4, 0.5],
    'STRIKEOUT': [2, 5, 5, 3, 5, 4, 5, 6, 6, 8, 6, 5, 2, 8, 6, 4, 5, 6, 3, 10, 7, 4, 2, 2, 7, 7]
}



# Create DataFrame
df = pd.DataFrame(data)

# Features and target variable
X = df[['IP', 'K/9', 'TBF', 'BB/9', 'BABIP']]
y = df['STRIKEOUT']

# Initialize XGBoost Regressor
model = XGBRegressor(objective='reg:squarederror', n_estimators=100, random_state=42)

# TimeSeriesSplit for backtesting
tscv = TimeSeriesSplit(n_splits=5)

# List to store Mean Squared Errors for each fold
mse_scores = []

# Perform backtesting
for train_index, test_index in tscv.split(X):
    X_train, X_test = X.iloc[train_index], X.iloc[test_index]
    y_train, y_test = y.iloc[train_index], y.iloc[test_index]

    # Train the model
    model.fit(X_train, y_train)

    # Predict and calculate MSE
    y_pred = model.predict(X_test)
    mse = mean_squared_error(y_test, y_pred)
    mse_scores.append(mse)

    print(f'Test Indices: {test_index}')
    print(f'Predicted Strikeouts: {np.round(y_pred, 2)}')
    print(f'Actual Strikeouts: {y_test.values}')
    print(f'Mean Squared Error: {mse}\n')

print(f'Average Mean Squared Error across all folds: {np.mean(mse_scores)}')

# Fit model on entire dataset
model.fit(X, y)

# Predict for next game (example input)
next_game_data = {
    'IP': [6],      # Example IP
    'K/9': [9],     # Example K/9
    'TBF': [23],    # Example TBF
    'BB/9': [1.7],    # Example BB/9
    'BABIP': [0.25]  # Example BABIP
}

next_game_df = pd.DataFrame(next_game_data)
predicted_strikeouts = model.predict(next_game_df)
print(f'Predicted Strikeouts for Next Game: {predicted_strikeouts[0]}')


Test Indices: [6 7 8 9]
Predicted Strikeouts: [5. 4. 4. 5.]
Actual Strikeouts: [5 6 6 8]
Mean Squared Error: 4.2506505012762545

Test Indices: [10 11 12 13]
Predicted Strikeouts: [4.01 6.12 2.   8.  ]
Actual Strikeouts: [6 5 2 8]
Mean Squared Error: 1.3035769587739452

Test Indices: [14 15 16 17]
Predicted Strikeouts: [6.   3.14 5.   6.  ]
Actual Strikeouts: [6 4 5 6]
Mean Squared Error: 0.1829770881099222

Test Indices: [18 19 20 21]
Predicted Strikeouts: [2.92 6.   5.99 4.  ]
Actual Strikeouts: [ 3 10  7  4]
Mean Squared Error: 4.256292204903346

Test Indices: [22 23 24 25]
Predicted Strikeouts: [2. 2. 8. 8.]
Actual Strikeouts: [2 2 7 7]
Mean Squared Error: 0.4999506825934077

Average Mean Squared Error across all folds: 2.098689487131375
Predicted Strikeouts for Next Game: 6.002712249755859


#SONNY GRAY

In [23]:
# Import necessary libraries
import pandas as pd
import numpy as np
from xgboost import XGBRegressor
from sklearn.model_selection import TimeSeriesSplit, cross_val_score
from sklearn.metrics import mean_squared_error

# Input data
data = {
    "IP": [4, 7, 5, 4.2, 2, 6, 3, 6.2, 5, 1.2, 4.1, 6, 6, 7, 5, 7, 5, 5, 3.1, 6, 5.1, 5, 2, 5.1, 5, 5, 7, 6],
    "TBF": [18, 26, 22, 21, 6, 23, 13, 27, 23, 16, 24, 21, 20, 26, 20, 25, 24, 18, 19, 29, 25, 24, 12, 23, 22, 25, 26, 23],
    "K/9": [9, 10.3, 14.4, 17.4, 9, 6, 3, 4.1, 10.8, 10.8, 6.2, 12, 9, 3.9, 9, 9, 7.2, 9, 13.5, 7.5, 3.4, 3.6, 0, 18.6, 14.4, 10.8, 11.6, 7.5],
    "BB/9": [2.3, 0, 1.8, 5.8, 0, 3, 0, 0, 3.6, 16.2, 2.1, 0, 0, 1.3, 1.8, 2.6, 5.4, 5.4, 10.8, 0, 1.7, 5.4, 4.5, 3.4, 5.4, 7.2, 1.3, 3],
    "BABIP": [0.333, 0.278, 0.417, 0.286, 0, 0.125, 0.417, 0.304, 0.4, 0.7, 0.421, 0.308, 0.143, 0.136, 0.357, 0, 0.412, 0, 0.6, 0.333, 0.238, 0.333, 0.25, 0.5, 0.364, 0.4, 0.25, 0.2],
    "STRIKEOUT": [4, 8, 8, 9, 2, 4, 1, 3, 6, 2, 3, 8, 6, 3, 5, 7, 4, 5, 5, 5, 2, 2, 0, 11, 8, 6, 9, 5]
}



# Create DataFrame
df = pd.DataFrame(data)

# Features and target variable
X = df[['IP', 'K/9', 'TBF', 'BB/9', 'BABIP']]
y = df['STRIKEOUT']

# Initialize XGBoost Regressor
model = XGBRegressor(objective='reg:squarederror', n_estimators=100, random_state=42)

# TimeSeriesSplit for backtesting
tscv = TimeSeriesSplit(n_splits=5)

# List to store Mean Squared Errors for each fold
mse_scores = []

# Perform backtesting
for train_index, test_index in tscv.split(X):
    X_train, X_test = X.iloc[train_index], X.iloc[test_index]
    y_train, y_test = y.iloc[train_index], y.iloc[test_index]

    # Train the model
    model.fit(X_train, y_train)

    # Predict and calculate MSE
    y_pred = model.predict(X_test)
    mse = mean_squared_error(y_test, y_pred)
    mse_scores.append(mse)

    print(f'Test Indices: {test_index}')
    print(f'Predicted Strikeouts: {np.round(y_pred, 2)}')
    print(f'Actual Strikeouts: {y_test.values}')
    print(f'Mean Squared Error: {mse}\n')

print(f'Average Mean Squared Error across all folds: {np.mean(mse_scores)}')

# Fit model on entire dataset
model.fit(X, y)

# Predict for next game (example input)
next_game_data = {
    'IP': [6],      # Example IP
    'K/9': [10.8],     # Example K/9
    'TBF': [24],    # Example TBF
    'BB/9': [1.8],    # Example BB/9
    'BABIP': [0.267]  # Example BABIP
}

next_game_df = pd.DataFrame(next_game_data)
predicted_strikeouts = model.predict(next_game_df)
print(f'Predicted Strikeouts for Next Game: {predicted_strikeouts[0]}')


Test Indices: [ 8  9 10 11]
Predicted Strikeouts: [8.   8.76 4.   8.  ]
Actual Strikeouts: [6 2 3 8]
Mean Squared Error: 12.685597764264358

Test Indices: [12 13 14 15]
Predicted Strikeouts: [6.01 4.35 5.38 7.72]
Actual Strikeouts: [6 3 5 7]
Mean Squared Error: 0.622565954652373

Test Indices: [16 17 18 19]
Predicted Strikeouts: [5.37 4.84 4.38 5.35]
Actual Strikeouts: [4 5 5 5]
Mean Squared Error: 0.6059740772364535

Test Indices: [20 21 22 23]
Predicted Strikeouts: [2.72 3.35 1.58 8.49]
Actual Strikeouts: [ 2  2  0 11]
Mean Squared Error: 2.779678591842

Test Indices: [24 25 26 27]
Predicted Strikeouts: [7.66 5.99 7.99 5.1 ]
Actual Strikeouts: [8 6 9 5]
Mean Squared Error: 0.28675366205283126

Average Mean Squared Error across all folds: 3.3961140100096032
Predicted Strikeouts for Next Game: 6.971662521362305


TEST

In [7]:
# Import necessary libraries
import pandas as pd
import numpy as np
from xgboost import XGBRegressor
from sklearn.model_selection import TimeSeriesSplit, cross_val_score
from sklearn.metrics import mean_squared_error

# Input data
data = {
    'Moneyline': [-190, -175, -162, -158, -156, -150, -130, -128, -125, -115, -115, -110, -110],
    'O/U': [9, 9, 11, 8.5, 8, 8, 7.5, 9, 9.5, 8.5, 9, 8, 9],
    'Strikeout': [3, 3, 3, 7, 5, 3, 10, 3, 10, 6, 2, 5, 6]
}



# Create DataFrame
df = pd.DataFrame(data)

# Features and target variable
X = df[['Moneyline','O/U']]
y = df['Strikeout']

# Initialize XGBoost Regressor
model = XGBRegressor(objective='reg:squarederror', n_estimators=100, random_state=42)

# TimeSeriesSplit for backtesting
tscv = TimeSeriesSplit(n_splits=5)

# List to store Mean Squared Errors for each fold
mse_scores = []

# Perform backtesting
for train_index, test_index in tscv.split(X):
    X_train, X_test = X.iloc[train_index], X.iloc[test_index]
    y_train, y_test = y.iloc[train_index], y.iloc[test_index]

    # Train the model
    model.fit(X_train, y_train)

    # Predict and calculate MSE
    y_pred = model.predict(X_test)
    mse = mean_squared_error(y_test, y_pred)
    mse_scores.append(mse)

    print(f'Test Indices: {test_index}')
    print(f'Predicted Strikeouts: {np.round(y_pred, 2)}')
    print(f'Actual Strikeouts: {y_test.values}')
    print(f'Mean Squared Error: {mse}\n')

print(f'Average Mean Squared Error across all folds: {np.mean(mse_scores)}')

# Fit model on entire dataset
model.fit(X, y)

# Predict for next game (example input)
next_game_data = {
    'Moneyline': [-130],      # Example MONEYLINE
    'O/U': [9],     # Example OVER AN UNDER
}

next_game_df = pd.DataFrame(next_game_data)
predicted_strikeouts = model.predict(next_game_df)
print(f'Predicted Strikeouts for Next Game: {predicted_strikeouts[0]}')


Test Indices: [3 4]
Predicted Strikeouts: [3. 3.]
Actual Strikeouts: [7 5]
Mean Squared Error: 10.0

Test Indices: [5 6]
Predicted Strikeouts: [5. 5.]
Actual Strikeouts: [ 3 10]
Mean Squared Error: 14.49855255525108

Test Indices: [7 8]
Predicted Strikeouts: [10. 10.]
Actual Strikeouts: [ 3 10]
Mean Squared Error: 24.492290756440525

Test Indices: [ 9 10]
Predicted Strikeouts: [10. 10.]
Actual Strikeouts: [6 2]
Mean Squared Error: 39.98784740027986

Test Indices: [11 12]
Predicted Strikeouts: [3.98 2.  ]
Actual Strikeouts: [5 6]
Mean Squared Error: 8.510305058405407

Average Mean Squared Error across all folds: 19.497799154075373
Predicted Strikeouts for Next Game: 3.0003983974456787
