<a href="https://colab.research.google.com/github/starlingomez/preworkmac/blob/master/SEPTIEMBRE22.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#paul skeenes

In [None]:
# Importar librerías necesarias
import pandas as pd
import numpy as np
from xgboost import XGBRegressor
from sklearn.model_selection import TimeSeriesSplit
from sklearn.metrics import mean_squared_error

# Datos de entrada
data = {
    'GAME': list(range(1, 22)),
    'TBF': [20, 19, 23, 22, 23, 23, 24, 27, 24, 27, 23, 29, 25, 22, 25, 25, 19, 21, 23, 24, 22],
    'K%': [0.35, 0.579, 0.13, 0.409, 0.348, 0.348, 0.292, 0.296, 0.375, 0.296, 0.478, 0.276, 0.24, 0.182, 0.32, 0.24, 0.474, 0.286, 0.261, 0.375, 0.318],
    'STRIKEOUT': [7, 11, 3, 9, 8, 8, 7, 8, 9, 8, 11, 8, 6, 4, 8, 6, 9, 6, 6, 9, 7]
}


# Crear DataFrame
df = pd.DataFrame(data)

# Crear características basadas en juegos anteriores
df['avg_TBF_last5'] = df['TBF'].rolling(window=5).mean().shift(1)
df['avg_K%_last5'] = df['K%'].rolling(window=5).mean().shift(1)
df['avg_STRIKEOUT_last5'] = df['STRIKEOUT'].rolling(window=5).mean().shift(1)

df['last_TBF'] = df['TBF'].shift(1)
df['last_K%'] = df['K%'].shift(1)
df['last_STRIKEOUT'] = df['STRIKEOUT'].shift(1)

df['diff_TBF'] = df['last_TBF'] - df['avg_TBF_last5']
df['diff_K%'] = df['last_K%'] - df['avg_K%_last5']
df['diff_STRIKEOUT'] = df['last_STRIKEOUT'] - df['avg_STRIKEOUT_last5']

# Eliminar filas con valores NaN
df = df.dropna().reset_index(drop=True)

# Características y variable objetivo
X = df[['avg_TBF_last5', 'avg_K%_last5', 'avg_STRIKEOUT_last5',
        'last_TBF', 'last_K%', 'last_STRIKEOUT',
        'diff_TBF', 'diff_K%', 'diff_STRIKEOUT']]
y = df['STRIKEOUT']

# Inicializar el Regressor XGBoost con función objetivo para datos de conteo
model = XGBRegressor(objective='count:poisson', n_estimators=100, random_state=42)

# Validación con TimeSeriesSplit
tscv = TimeSeriesSplit(n_splits=5)
mse_scores = []

for train_index, test_index in tscv.split(X):
    X_train, X_test = X.iloc[train_index], X.iloc[test_index]
    y_train, y_test = y.iloc[train_index], y.iloc[test_index]

    # Entrenar el modelo
    model.fit(X_train, y_train)

    # Predecir y calcular el MSE
    y_pred = model.predict(X_test)
    mse = mean_squared_error(y_test, y_pred)
    mse_scores.append(mse)

    print(f'Índices de prueba: {test_index}')
    print(f'Ponches predichos: {np.round(y_pred, 2)}')
    print(f'Ponches reales: {y_test.values}')
    print(f'Error Cuadrático Medio: {mse}\n')

print(f'Promedio de Error Cuadrático Medio en todos los pliegues: {np.mean(mse_scores)}')

# Entrenar el modelo en todo el conjunto de datos
model.fit(X, y)

# Predecir para el próximo juego utilizando promedios de los últimos 5 juegos
last5_TBF = df['TBF'][-5:].values
last5_K = df['K%'][-5:].values
last5_STRIKEOUT = df['STRIKEOUT'][-5:].values

avg_TBF_last5 = np.mean(last5_TBF)
avg_K_last5 = np.mean(last5_K)
avg_STRIKEOUT_last5 = np.mean(last5_STRIKEOUT)

# Características del último juego
last_TBF = df['TBF'].iloc[-1]
last_K = df['K%'].iloc[-1]
last_STRIKEOUT = df['STRIKEOUT'].iloc[-1]

# Diferencias
diff_TBF = last_TBF - avg_TBF_last5
diff_K = last_K - avg_K_last5
diff_STRIKEOUT = last_STRIKEOUT - avg_STRIKEOUT_last5

next_game_data = {
    'avg_TBF_last5': [avg_TBF_last5],
    'avg_K%_last5': [avg_K_last5],
    'avg_STRIKEOUT_last5': [avg_STRIKEOUT_last5],
    'last_TBF': [last_TBF],
    'last_K%': [last_K],
    'last_STRIKEOUT': [last_STRIKEOUT],
    'diff_TBF': [diff_TBF],
    'diff_K%': [diff_K],
    'diff_STRIKEOUT': [diff_STRIKEOUT]
}

next_game_df = pd.DataFrame(next_game_data)
predicted_strikeouts = model.predict(next_game_df)
print(f'Ponches predichos para el próximo juego: {predicted_strikeouts[0]}')


Índices de prueba: [6 7]
Ponches predichos: [ 9.52 11.  ]
Ponches reales: [8 6]
Error Cuadrático Medio: 13.63318105964754

Índices de prueba: [8 9]
Ponches predichos: [9.05 9.27]
Ponches reales: [4 8]
Error Cuadrático Medio: 13.542359406960259

Índices de prueba: [10 11]
Ponches predichos: [10.91 10.96]
Ponches reales: [6 9]
Error Cuadrático Medio: 13.97384978864011

Índices de prueba: [12 13]
Ponches predichos: [6.59 7.95]
Ponches reales: [6 6]
Error Cuadrático Medio: 2.07082280640509

Índices de prueba: [14 15]
Ponches predichos: [7.88 7.78]
Ponches reales: [9 7]
Error Cuadrático Medio: 0.9355907355432009

Promedio de Error Cuadrático Medio en todos los pliegues: 8.83116075943924
Ponches predichos para el próximo juego: 9.022903442382812


SHANE BAZ

In [None]:
# Importar librerías necesarias
import pandas as pd
import numpy as np
from xgboost import XGBRegressor
from sklearn.model_selection import TimeSeriesSplit
from sklearn.metrics import mean_squared_error

# Datos de entrada
data = {
    'GAME': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12],
    'TBF': [25, 20, 18, 22, 23, 22, 29, 27, 20, 22, 20, 24],
    'K%': [0.24, 0.25, 0.056, 0.318, 0.217, 0.091, 0.207, 0.148, 0.25, 0.227, 0.2, 0.25],
    'STRIKEOUT': [6, 5, 1, 7, 5, 2, 6, 4, 5, 5, 4, 6]
}


# Crear DataFrame
df = pd.DataFrame(data)

# Crear características basadas en juegos anteriores
df['avg_TBF_last5'] = df['TBF'].rolling(window=5).mean().shift(1)
df['avg_K%_last5'] = df['K%'].rolling(window=5).mean().shift(1)
df['avg_STRIKEOUT_last5'] = df['STRIKEOUT'].rolling(window=5).mean().shift(1)

df['last_TBF'] = df['TBF'].shift(1)
df['last_K%'] = df['K%'].shift(1)
df['last_STRIKEOUT'] = df['STRIKEOUT'].shift(1)

df['diff_TBF'] = df['last_TBF'] - df['avg_TBF_last5']
df['diff_K%'] = df['last_K%'] - df['avg_K%_last5']
df['diff_STRIKEOUT'] = df['last_STRIKEOUT'] - df['avg_STRIKEOUT_last5']

# Eliminar filas con valores NaN
df = df.dropna().reset_index(drop=True)

# Características y variable objetivo
X = df[['avg_TBF_last5', 'avg_K%_last5', 'avg_STRIKEOUT_last5',
        'last_TBF', 'last_K%', 'last_STRIKEOUT',
        'diff_TBF', 'diff_K%', 'diff_STRIKEOUT']]
y = df['STRIKEOUT']

# Inicializar el Regressor XGBoost con función objetivo para datos de conteo
model = XGBRegressor(objective='count:poisson', n_estimators=100, random_state=42)

# Validación con TimeSeriesSplit
tscv = TimeSeriesSplit(n_splits=5)
mse_scores = []

for train_index, test_index in tscv.split(X):
    X_train, X_test = X.iloc[train_index], X.iloc[test_index]
    y_train, y_test = y.iloc[train_index], y.iloc[test_index]

    # Entrenar el modelo
    model.fit(X_train, y_train)

    # Predecir y calcular el MSE
    y_pred = model.predict(X_test)
    mse = mean_squared_error(y_test, y_pred)
    mse_scores.append(mse)

    print(f'Índices de prueba: {test_index}')
    print(f'Ponches predichos: {np.round(y_pred, 2)}')
    print(f'Ponches reales: {y_test.values}')
    print(f'Error Cuadrático Medio: {mse}\n')

print(f'Promedio de Error Cuadrático Medio en todos los pliegues: {np.mean(mse_scores)}')

# Entrenar el modelo en todo el conjunto de datos
model.fit(X, y)

# Predecir para el próximo juego utilizando promedios de los últimos 5 juegos
last5_TBF = df['TBF'][-5:].values
last5_K = df['K%'][-5:].values
last5_STRIKEOUT = df['STRIKEOUT'][-5:].values

avg_TBF_last5 = np.mean(last5_TBF)
avg_K_last5 = np.mean(last5_K)
avg_STRIKEOUT_last5 = np.mean(last5_STRIKEOUT)

# Características del último juego
last_TBF = df['TBF'].iloc[-1]
last_K = df['K%'].iloc[-1]
last_STRIKEOUT = df['STRIKEOUT'].iloc[-1]

# Diferencias
diff_TBF = last_TBF - avg_TBF_last5
diff_K = last_K - avg_K_last5
diff_STRIKEOUT = last_STRIKEOUT - avg_STRIKEOUT_last5

next_game_data = {
    'avg_TBF_last5': [avg_TBF_last5],
    'avg_K%_last5': [avg_K_last5],
    'avg_STRIKEOUT_last5': [avg_STRIKEOUT_last5],
    'last_TBF': [last_TBF],
    'last_K%': [last_K],
    'last_STRIKEOUT': [last_STRIKEOUT],
    'diff_TBF': [diff_TBF],
    'diff_K%': [diff_K],
    'diff_STRIKEOUT': [diff_STRIKEOUT]
}

next_game_df = pd.DataFrame(next_game_data)
predicted_strikeouts = model.predict(next_game_df)
print(f'Ponches predichos para el próximo juego: {predicted_strikeouts[0]}')


Índices de prueba: [2]
Ponches predichos: [2.]
Ponches reales: [4]
Error Cuadrático Medio: 3.9927134349281914

Índices de prueba: [3]
Ponches predichos: [4.]
Ponches reales: [5]
Error Cuadrático Medio: 0.9987701385043692

Índices de prueba: [4]
Ponches predichos: [4.]
Ponches reales: [5]
Error Cuadrático Medio: 0.99855189334653

Índices de prueba: [5]
Ponches predichos: [4.77]
Ponches reales: [4]
Error Cuadrático Medio: 0.5913368809908661

Índices de prueba: [6]
Ponches predichos: [4.09]
Ponches reales: [6]
Error Cuadrático Medio: 3.653591537266948

Promedio de Error Cuadrático Medio en todos los pliegues: 2.046992777007381
Ponches predichos para el próximo juego: 5.995274066925049


#BLAKE SNELL

In [None]:
# Importar librerías necesarias
import pandas as pd
import numpy as np
from xgboost import XGBRegressor
from sklearn.model_selection import TimeSeriesSplit
from sklearn.metrics import mean_squared_error

# Datos de entrada
data = {
    'GAME': list(range(1, 20)),
    'TBF': [14, 20, 24, 19, 20, 22, 18, 21, 23, 22, 29, 23, 23, 28, 15, 26, 8, 21, 21],
    'K%': [0.357, 0.2, 0.125, 0.263, 0.35, 0.318, 0.167, 0.381, 0.174, 0.682, 0.379, 0.348, 0.478, 0.357, 0.333, 0.308, 0.25, 0.381, 0.571],
    'STRIKEOUT': [5, 4, 3, 5, 7, 7, 3, 8, 4, 15, 11, 8, 11, 10, 5, 8, 2, 8, 12]
}


# Crear DataFrame
df = pd.DataFrame(data)

# Crear características basadas en juegos anteriores
df['avg_TBF_last5'] = df['TBF'].rolling(window=5).mean().shift(1)
df['avg_K%_last5'] = df['K%'].rolling(window=5).mean().shift(1)
df['avg_STRIKEOUT_last5'] = df['STRIKEOUT'].rolling(window=5).mean().shift(1)

df['last_TBF'] = df['TBF'].shift(1)
df['last_K%'] = df['K%'].shift(1)
df['last_STRIKEOUT'] = df['STRIKEOUT'].shift(1)

df['diff_TBF'] = df['last_TBF'] - df['avg_TBF_last5']
df['diff_K%'] = df['last_K%'] - df['avg_K%_last5']
df['diff_STRIKEOUT'] = df['last_STRIKEOUT'] - df['avg_STRIKEOUT_last5']

# Eliminar filas con valores NaN
df = df.dropna().reset_index(drop=True)

# Características y variable objetivo
X = df[['avg_TBF_last5', 'avg_K%_last5', 'avg_STRIKEOUT_last5',
        'last_TBF', 'last_K%', 'last_STRIKEOUT',
        'diff_TBF', 'diff_K%', 'diff_STRIKEOUT']]
y = df['STRIKEOUT']

# Inicializar el Regressor XGBoost con función objetivo para datos de conteo
model = XGBRegressor(objective='count:poisson', n_estimators=100, random_state=42)

# Validación con TimeSeriesSplit
tscv = TimeSeriesSplit(n_splits=5)
mse_scores = []

for train_index, test_index in tscv.split(X):
    X_train, X_test = X.iloc[train_index], X.iloc[test_index]
    y_train, y_test = y.iloc[train_index], y.iloc[test_index]

    # Entrenar el modelo
    model.fit(X_train, y_train)

    # Predecir y calcular el MSE
    y_pred = model.predict(X_test)
    mse = mean_squared_error(y_test, y_pred)
    mse_scores.append(mse)

    print(f'Índices de prueba: {test_index}')
    print(f'Ponches predichos: {np.round(y_pred, 2)}')
    print(f'Ponches reales: {y_test.values}')
    print(f'Error Cuadrático Medio: {mse}\n')

print(f'Promedio de Error Cuadrático Medio en todos los pliegues: {np.mean(mse_scores)}')

# Entrenar el modelo en todo el conjunto de datos
model.fit(X, y)

# Predecir para el próximo juego utilizando promedios de los últimos 5 juegos
last5_TBF = df['TBF'][-5:].values
last5_K = df['K%'][-5:].values
last5_STRIKEOUT = df['STRIKEOUT'][-5:].values

avg_TBF_last5 = np.mean(last5_TBF)
avg_K_last5 = np.mean(last5_K)
avg_STRIKEOUT_last5 = np.mean(last5_STRIKEOUT)

# Características del último juego
last_TBF = df['TBF'].iloc[-1]
last_K = df['K%'].iloc[-1]
last_STRIKEOUT = df['STRIKEOUT'].iloc[-1]

# Diferencias
diff_TBF = last_TBF - avg_TBF_last5
diff_K = last_K - avg_K_last5
diff_STRIKEOUT = last_STRIKEOUT - avg_STRIKEOUT_last5

next_game_data = {
    'avg_TBF_last5': [avg_TBF_last5],
    'avg_K%_last5': [avg_K_last5],
    'avg_STRIKEOUT_last5': [avg_STRIKEOUT_last5],
    'last_TBF': [last_TBF],
    'last_K%': [last_K],
    'last_STRIKEOUT': [last_STRIKEOUT],
    'diff_TBF': [diff_TBF],
    'diff_K%': [diff_K],
    'diff_STRIKEOUT': [diff_STRIKEOUT]
}

next_game_df = pd.DataFrame(next_game_data)
predicted_strikeouts = model.predict(next_game_df)
print(f'Ponches predichos para el próximo juego: {predicted_strikeouts[0]}')


Índices de prueba: [4 5]
Ponches predichos: [3.01 3.  ]
Ponches reales: [15 11]
Error Cuadrático Medio: 103.91319245708942

Índices de prueba: [6 7]
Ponches predichos: [10.57 14.51]
Ponches reales: [ 8 11]
Error Cuadrático Medio: 9.478158616796009

Índices de prueba: [8 9]
Ponches predichos: [ 7.13 13.75]
Ponches reales: [10  5]
Error Cuadrático Medio: 42.376402252477874

Índices de prueba: [10 11]
Ponches predichos: [11.    5.28]
Ponches reales: [8 2]
Error Cuadrático Medio: 9.88475795776776

Índices de prueba: [12 13]
Ponches predichos: [8.83 5.1 ]
Ponches reales: [ 8 12]
Error Cuadrático Medio: 24.121505433295624

Promedio de Error Cuadrático Medio en todos los pliegues: 37.95480334348534
Ponches predichos para el próximo juego: 10.008591651916504


In [None]:
# Importar librerías necesarias
import pandas as pd
import numpy as np
from xgboost import XGBRegressor
from sklearn.model_selection import TimeSeriesSplit
from sklearn.metrics import mean_squared_error

# Datos de entrada
data = {
    'GAME': list(range(1, 20)),
    'TBF': [14, 20, 24, 19, 20, 22, 18, 21, 23, 22, 29, 23, 23, 28, 15, 26, 8, 21, 21],
    'K%': [0.357, 0.2, 0.125, 0.263, 0.35, 0.318, 0.167, 0.381, 0.174, 0.682, 0.379, 0.348, 0.478, 0.357, 0.333, 0.308, 0.25, 0.381, 0.571],
    'STRIKEOUT': [5, 4, 3, 5, 7, 7, 3, 8, 4, 15, 11, 8, 11, 10, 5, 8, 2, 8, 12]
}

# Crear DataFrame
df = pd.DataFrame(data)

# Crear características basadas en juegos anteriores utilizando la mediana
df['med_TBF_last5'] = df['TBF'].rolling(window=5).median().shift(1)
df['med_K%_last5'] = df['K%'].rolling(window=5).median().shift(1)
df['med_STRIKEOUT_last5'] = df['STRIKEOUT'].rolling(window=5).median().shift(1)

df['last_TBF'] = df['TBF'].shift(1)
df['last_K%'] = df['K%'].shift(1)
df['last_STRIKEOUT'] = df['STRIKEOUT'].shift(1)

df['diff_TBF'] = df['last_TBF'] - df['med_TBF_last5']
df['diff_K%'] = df['last_K%'] - df['med_K%_last5']
df['diff_STRIKEOUT'] = df['last_STRIKEOUT'] - df['med_STRIKEOUT_last5']

# Eliminar filas con valores NaN
df = df.dropna().reset_index(drop=True)

# Características y variable objetivo
X = df[['med_TBF_last5', 'med_K%_last5', 'med_STRIKEOUT_last5',
        'last_TBF', 'last_K%', 'last_STRIKEOUT',
        'diff_TBF', 'diff_K%', 'diff_STRIKEOUT']]
y = df['STRIKEOUT']

# Inicializar el Regressor XGBoost con función objetivo para datos de conteo
model = XGBRegressor(objective='count:poisson', n_estimators=100, random_state=42)

# Validación con TimeSeriesSplit
tscv = TimeSeriesSplit(n_splits=5)
mse_scores = []

for train_index, test_index in tscv.split(X):
    X_train, X_test = X.iloc[train_index], X.iloc[test_index]
    y_train, y_test = y.iloc[train_index], y.iloc[test_index]

    # Entrenar el modelo
    model.fit(X_train, y_train)

    # Predecir y calcular el MSE
    y_pred = model.predict(X_test)
    mse = mean_squared_error(y_test, y_pred)
    mse_scores.append(mse)

    print(f'Índices de prueba: {test_index}')
    print(f'Ponches predichos: {np.round(y_pred, 2)}')
    print(f'Ponches reales: {y_test.values}')
    print(f'Error Cuadrático Medio: {mse}\n')

print(f'Promedio de Error Cuadrático Medio en todos los pliegues: {np.mean(mse_scores)}')

# Entrenar el modelo en todo el conjunto de datos
model.fit(X, y)

# Predecir para el próximo juego utilizando medianas de los últimos 5 juegos
last5_TBF = df['TBF'][-5:].values
last5_K = df['K%'][-5:].values
last5_STRIKEOUT = df['STRIKEOUT'][-5:].values

med_TBF_last5 = np.median(last5_TBF)
med_K_last5 = np.median(last5_K)
med_STRIKEOUT_last5 = np.median(last5_STRIKEOUT)

# Características del último juego
last_TBF = df['TBF'].iloc[-1]
last_K = df['K%'].iloc[-1]
last_STRIKEOUT = df['STRIKEOUT'].iloc[-1]

# Diferencias
diff_TBF = last_TBF - med_TBF_last5
diff_K = last_K - med_K_last5
diff_STRIKEOUT = last_STRIKEOUT - med_STRIKEOUT_last5

next_game_data = {
    'med_TBF_last5': [med_TBF_last5],
    'med_K%_last5': [med_K_last5],
    'med_STRIKEOUT_last5': [med_STRIKEOUT_last5],
    'last_TBF': [last_TBF],
    'last_K%': [last_K],
    'last_STRIKEOUT': [last_STRIKEOUT],
    'diff_TBF': [diff_TBF],
    'diff_K%': [diff_K],
    'diff_STRIKEOUT': [diff_STRIKEOUT]
}

next_game_df = pd.DataFrame(next_game_data)
predicted_strikeouts = model.predict(next_game_df)
print(f'Ponches predichos para el próximo juego: {predicted_strikeouts[0]}')


Índices de prueba: [4 5]
Ponches predichos: [4. 4.]
Ponches reales: [15 11]
Error Cuadrático Medio: 85.05126619639123

Índices de prueba: [6 7]
Ponches predichos: [10.99 10.98]
Ponches reales: [ 8 11]
Error Cuadrático Medio: 4.479648394798005

Índices de prueba: [8 9]
Ponches predichos: [11. 11.]
Ponches reales: [10  5]
Error Cuadrático Medio: 18.494133245229477

Índices de prueba: [10 11]
Ponches predichos: [11.   9.7]
Ponches reales: [8 2]
Error Cuadrático Medio: 34.16457318089397

Índices de prueba: [12 13]
Ponches predichos: [ 8.78 11.72]
Ponches reales: [ 8 12]
Error Cuadrático Medio: 0.34444405512476806

Promedio de Error Cuadrático Medio en todos los pliegues: 28.506813014487488
Ponches predichos para el próximo juego: 11.995311737060547


#JORDAN MONTGOMERY

In [None]:
# Importar librerías necesarias
import pandas as pd
import numpy as np
from xgboost import XGBRegressor
from sklearn.model_selection import TimeSeriesSplit
from sklearn.metrics import mean_squared_error

# Datos de entrada
data = {
    'GAME': list(range(1, 21)),
    'TBF': [22, 28, 17, 28, 24, 24, 26, 25, 16, 23, 21, 24, 20, 18, 22, 12, 28, 26, 28, 22],
    'K%': [0.136, 0.143, 0.059, 0.071, 0.292, 0.125, 0.154, 0.16, 0.063, 0.13, 0.333, 0.208, 0.05, 0.111, 0.045, 0.25, 0.143, 0.308, 0.143, 0.182],
    'STRIKEOUT': [3, 4, 1, 2, 7, 3, 4, 4, 1, 3, 7, 5, 1, 2, 1, 3, 4, 8, 4, 4]
}

# Crear DataFrame
df = pd.DataFrame(data)

# Crear características basadas en juegos anteriores utilizando la mediana
df['med_TBF_last5'] = df['TBF'].rolling(window=5).median().shift(1)
df['med_K%_last5'] = df['K%'].rolling(window=5).median().shift(1)
df['med_STRIKEOUT_last5'] = df['STRIKEOUT'].rolling(window=5).median().shift(1)

df['last_TBF'] = df['TBF'].shift(1)
df['last_K%'] = df['K%'].shift(1)
df['last_STRIKEOUT'] = df['STRIKEOUT'].shift(1)

df['diff_TBF'] = df['last_TBF'] - df['med_TBF_last5']
df['diff_K%'] = df['last_K%'] - df['med_K%_last5']
df['diff_STRIKEOUT'] = df['last_STRIKEOUT'] - df['med_STRIKEOUT_last5']

# Eliminar filas con valores NaN
df = df.dropna().reset_index(drop=True)

# Características y variable objetivo
X = df[['med_TBF_last5', 'med_K%_last5', 'med_STRIKEOUT_last5',
        'last_TBF', 'last_K%', 'last_STRIKEOUT',
        'diff_TBF', 'diff_K%', 'diff_STRIKEOUT']]
y = df['STRIKEOUT']

# Inicializar el Regressor XGBoost con función objetivo para datos de conteo
model = XGBRegressor(objective='count:poisson', n_estimators=100, random_state=42)

# Validación con TimeSeriesSplit
tscv = TimeSeriesSplit(n_splits=5)
mse_scores = []

for train_index, test_index in tscv.split(X):
    X_train, X_test = X.iloc[train_index], X.iloc[test_index]
    y_train, y_test = y.iloc[train_index], y.iloc[test_index]

    # Entrenar el modelo
    model.fit(X_train, y_train)

    # Predecir y calcular el MSE
    y_pred = model.predict(X_test)
    mse = mean_squared_error(y_test, y_pred)
    mse_scores.append(mse)

    print(f'Índices de prueba: {test_index}')
    print(f'Ponches predichos: {np.round(y_pred, 2)}')
    print(f'Ponches reales: {y_test.values}')
    print(f'Error Cuadrático Medio: {mse}\n')

print(f'Promedio de Error Cuadrático Medio en todos los pliegues: {np.mean(mse_scores)}')

# Entrenar el modelo en todo el conjunto de datos
model.fit(X, y)

# Predecir para el próximo juego utilizando medianas de los últimos 5 juegos
last5_TBF = df['TBF'][-5:].values
last5_K = df['K%'][-5:].values
last5_STRIKEOUT = df['STRIKEOUT'][-5:].values

med_TBF_last5 = np.median(last5_TBF)
med_K_last5 = np.median(last5_K)
med_STRIKEOUT_last5 = np.median(last5_STRIKEOUT)

# Características del último juego
last_TBF = df['TBF'].iloc[-1]
last_K = df['K%'].iloc[-1]
last_STRIKEOUT = df['STRIKEOUT'].iloc[-1]

# Diferencias
diff_TBF = last_TBF - med_TBF_last5
diff_K = last_K - med_K_last5
diff_STRIKEOUT = last_STRIKEOUT - med_STRIKEOUT_last5

next_game_data = {
    'med_TBF_last5': [med_TBF_last5],
    'med_K%_last5': [med_K_last5],
    'med_STRIKEOUT_last5': [med_STRIKEOUT_last5],
    'last_TBF': [last_TBF],
    'last_K%': [last_K],
    'last_STRIKEOUT': [last_STRIKEOUT],
    'diff_TBF': [diff_TBF],
    'diff_K%': [diff_K],
    'diff_STRIKEOUT': [diff_STRIKEOUT]
}

next_game_df = pd.DataFrame(next_game_data)
predicted_strikeouts = model.predict(next_game_df)
print(f'Ponches predichos para el próximo juego: {predicted_strikeouts[0]}')


Índices de prueba: [5 6]
Ponches predichos: [4. 3.]
Ponches reales: [7 5]
Error Cuadrático Medio: 6.502998684791152

Índices de prueba: [7 8]
Ponches predichos: [4.26 5.4 ]
Ponches reales: [1 2]
Error Cuadrático Medio: 11.119200447159756

Índices de prueba: [ 9 10]
Ponches predichos: [2.   3.99]
Ponches reales: [1 3]
Error Cuadrático Medio: 0.996148629314348

Índices de prueba: [11 12]
Ponches predichos: [1.29 2.26]
Ponches reales: [4 8]
Error Cuadrático Medio: 20.12528117066836

Índices de prueba: [13 14]
Ponches predichos: [3.01 3.56]
Ponches reales: [4 4]
Error Cuadrático Medio: 0.5878409556537747

Promedio de Error Cuadrático Medio en todos los pliegues: 7.866293977517477
Ponches predichos para el próximo juego: 2.352091073989868


#GRIFFIN CANNING

In [None]:
# Importar librerías necesarias
import pandas as pd
import numpy as np
from xgboost import XGBRegressor
from sklearn.model_selection import TimeSeriesSplit
from sklearn.metrics import mean_squared_error

# Datos de entrada
data = {
    "GAME_RANGE": list(range(1, 30)),
    "TBF": [23, 23, 20, 22, 22, 22, 23, 22, 25, 21, 26, 26, 28, 24, 24, 25, 28, 21, 21, 18, 21, 26, 22, 25, 21, 22, 26, 29, 21],
    "K%": [0.087, 0.174, 0.3, 0.182, 0.182, 0.045, 0.217, 0.136, 0.2, 0.095, 0.192, 0.154, 0.071, 0.125, 0.167, 0.2, 0.143, 0.19, 0.143, 0.167, 0.286, 0.231, 0.364, 0, 0.143, 0.182, 0.269, 0.138, 0.286],
    "STRIKEOUT": [2, 4, 6, 4, 4, 1, 5, 3, 5, 2, 5, 4, 2, 3, 4, 5, 4, 4, 3, 3, 6, 6, 8, 0, 3, 4, 7, 4, 6]
}

# Crear DataFrame
df = pd.DataFrame(data)

# Crear características basadas en juegos anteriores utilizando la mediana
df['med_TBF_last5'] = df['TBF'].rolling(window=5).median().shift(1)
df['med_K%_last5'] = df['K%'].rolling(window=5).median().shift(1)
df['med_STRIKEOUT_last5'] = df['STRIKEOUT'].rolling(window=5).median().shift(1)

df['last_TBF'] = df['TBF'].shift(1)
df['last_K%'] = df['K%'].shift(1)
df['last_STRIKEOUT'] = df['STRIKEOUT'].shift(1)

df['diff_TBF'] = df['last_TBF'] - df['med_TBF_last5']
df['diff_K%'] = df['last_K%'] - df['med_K%_last5']
df['diff_STRIKEOUT'] = df['last_STRIKEOUT'] - df['med_STRIKEOUT_last5']

# Eliminar filas con valores NaN
df = df.dropna().reset_index(drop=True)

# Características y variable objetivo
X = df[['med_TBF_last5', 'med_K%_last5', 'med_STRIKEOUT_last5',
        'last_TBF', 'last_K%', 'last_STRIKEOUT',
        'diff_TBF', 'diff_K%', 'diff_STRIKEOUT']]
y = df['STRIKEOUT']

# Inicializar el Regressor XGBoost con función objetivo para datos de conteo
model = XGBRegressor(objective='count:poisson', n_estimators=100, random_state=42)

# Validación con TimeSeriesSplit
tscv = TimeSeriesSplit(n_splits=5)
mse_scores = []

for train_index, test_index in tscv.split(X):
    X_train, X_test = X.iloc[train_index], X.iloc[test_index]
    y_train, y_test = y.iloc[train_index], y.iloc[test_index]

    # Entrenar el modelo
    model.fit(X_train, y_train)

    # Predecir y calcular el MSE
    y_pred = model.predict(X_test)
    mse = mean_squared_error(y_test, y_pred)
    mse_scores.append(mse)

    print(f'Índices de prueba: {test_index}')
    print(f'Ponches predichos: {np.round(y_pred, 2)}')
    print(f'Ponches reales: {y_test.values}')
    print(f'Error Cuadrático Medio: {mse}\n')

print(f'Promedio de Error Cuadrático Medio en todos los pliegues: {np.mean(mse_scores)}')

# Entrenar el modelo en todo el conjunto de datos
model.fit(X, y)

# Predecir para el próximo juego utilizando medianas de los últimos 5 juegos
last5_TBF = df['TBF'][-5:].values
last5_K = df['K%'][-5:].values
last5_STRIKEOUT = df['STRIKEOUT'][-5:].values

med_TBF_last5 = np.median(last5_TBF)
med_K_last5 = np.median(last5_K)
med_STRIKEOUT_last5 = np.median(last5_STRIKEOUT)

# Características del último juego
last_TBF = df['TBF'].iloc[-1]
last_K = df['K%'].iloc[-1]
last_STRIKEOUT = df['STRIKEOUT'].iloc[-1]

# Diferencias
diff_TBF = last_TBF - med_TBF_last5
diff_K = last_K - med_K_last5
diff_STRIKEOUT = last_STRIKEOUT - med_STRIKEOUT_last5

next_game_data = {
    'med_TBF_last5': [med_TBF_last5],
    'med_K%_last5': [med_K_last5],
    'med_STRIKEOUT_last5': [med_STRIKEOUT_last5],
    'last_TBF': [last_TBF],
    'last_K%': [last_K],
    'last_STRIKEOUT': [last_STRIKEOUT],
    'diff_TBF': [diff_TBF],
    'diff_K%': [diff_K],
    'diff_STRIKEOUT': [diff_STRIKEOUT]
}

next_game_df = pd.DataFrame(next_game_data)
predicted_strikeouts = model.predict(next_game_df)
print(f'Ponches predichos para el próximo juego: {predicted_strikeouts[0]}')


Índices de prueba: [4 5 6 7]
Ponches predichos: [3. 5. 3. 5.]
Ponches reales: [2 5 4 2]
Error Cuadrático Medio: 2.748201262288717

Índices de prueba: [ 8  9 10 11]
Ponches predichos: [5.   5.   2.3  2.31]
Ponches reales: [3 4 5 4]
Error Cuadrático Medio: 3.781434597466003

Índices de prueba: [12 13 14 15]
Ponches predichos: [2.41 1.61 3.91 2.5 ]
Ponches reales: [4 3 3 6]
Error Cuadrático Medio: 4.388969330768621

Índices de prueba: [16 17 18 19]
Ponches predichos: [3.   2.24 3.   3.06]
Ponches reales: [6 8 0 3]
Error Cuadrático Medio: 12.797472728814512

Índices de prueba: [20 21 22 23]
Ponches predichos: [3.24 1.01 2.27 4.  ]
Ponches reales: [4 7 4 6]
Error Cuadrático Medio: 10.876563449118706

Promedio de Error Cuadrático Medio en todos los pliegues: 6.918528273691311
Ponches predichos para el próximo juego: 3.8968093395233154


#SPENCER ARRIGHETI

In [None]:
# Importar librerías necesarias
import pandas as pd
import numpy as np
from xgboost import XGBRegressor
from sklearn.model_selection import TimeSeriesSplit
from sklearn.metrics import mean_squared_error

# Datos de entrada
data = {
    "GAME_RANGE": list(range(1, 28)),
    "TBF": [19, 19, 20, 23, 25, 21, 27, 23, 23, 15, 24, 12, 22, 24, 21, 24, 24, 26, 24, 23, 24, 25, 22, 29, 11, 27, 22],
    "K%": [0.158, 0.263, 0.35, 0.261, 0.16, 0.238, 0.222, 0.304, 0.348, 0.267, 0.25, 0.167, 0.091, 0.417, 0.238, 0.25, 0.292, 0.115, 0.333, 0.522, 0.542, 0.2, 0.273, 0.379, 0, 0.259, 0.136],
    "STRIKEOUT": [3, 5, 7, 6, 4, 5, 6, 7, 8, 4, 6, 2, 2, 10, 5, 6, 7, 3, 8, 12, 13, 5, 6, 11, 0, 7, 3]
}

# Crear DataFrame
df = pd.DataFrame(data)

# Crear características basadas en juegos anteriores utilizando la mediana
df['med_TBF_last5'] = df['TBF'].rolling(window=5).median().shift(1)
df['med_K%_last5'] = df['K%'].rolling(window=5).median().shift(1)
df['med_STRIKEOUT_last5'] = df['STRIKEOUT'].rolling(window=5).median().shift(1)

df['last_TBF'] = df['TBF'].shift(1)
df['last_K%'] = df['K%'].shift(1)
df['last_STRIKEOUT'] = df['STRIKEOUT'].shift(1)

df['diff_TBF'] = df['last_TBF'] - df['med_TBF_last5']
df['diff_K%'] = df['last_K%'] - df['med_K%_last5']
df['diff_STRIKEOUT'] = df['last_STRIKEOUT'] - df['med_STRIKEOUT_last5']

# Eliminar filas con valores NaN
df = df.dropna().reset_index(drop=True)

# Características y variable objetivo
X = df[['med_TBF_last5', 'med_K%_last5', 'med_STRIKEOUT_last5',
        'last_TBF', 'last_K%', 'last_STRIKEOUT',
        'diff_TBF', 'diff_K%', 'diff_STRIKEOUT']]
y = df['STRIKEOUT']

# Inicializar el Regressor XGBoost con función objetivo para datos de conteo
model = XGBRegressor(objective='count:poisson', n_estimators=100, random_state=42)

# Validación con TimeSeriesSplit
tscv = TimeSeriesSplit(n_splits=5)
mse_scores = []

for train_index, test_index in tscv.split(X):
    X_train, X_test = X.iloc[train_index], X.iloc[test_index]
    y_train, y_test = y.iloc[train_index], y.iloc[test_index]

    # Entrenar el modelo
    model.fit(X_train, y_train)

    # Predecir y calcular el MSE
    y_pred = model.predict(X_test)
    mse = mean_squared_error(y_test, y_pred)
    mse_scores.append(mse)

    print(f'Índices de prueba: {test_index}')
    print(f'Ponches predichos: {np.round(y_pred, 2)}')
    print(f'Ponches reales: {y_test.values}')
    print(f'Error Cuadrático Medio: {mse}\n')

print(f'Promedio de Error Cuadrático Medio en todos los pliegues: {np.mean(mse_scores)}')

# Entrenar el modelo en todo el conjunto de datos
model.fit(X, y)

# Predecir para el próximo juego utilizando medianas de los últimos 5 juegos
last5_TBF = df['TBF'][-5:].values
last5_K = df['K%'][-5:].values
last5_STRIKEOUT = df['STRIKEOUT'][-5:].values

med_TBF_last5 = np.median(last5_TBF)
med_K_last5 = np.median(last5_K)
med_STRIKEOUT_last5 = np.median(last5_STRIKEOUT)

# Características del último juego
last_TBF = df['TBF'].iloc[-1]
last_K = df['K%'].iloc[-1]
last_STRIKEOUT = df['STRIKEOUT'].iloc[-1]

# Diferencias
diff_TBF = last_TBF - med_TBF_last5
diff_K = last_K - med_K_last5
diff_STRIKEOUT = last_STRIKEOUT - med_STRIKEOUT_last5

next_game_data = {
    'med_TBF_last5': [med_TBF_last5],
    'med_K%_last5': [med_K_last5],
    'med_STRIKEOUT_last5': [med_STRIKEOUT_last5],
    'last_TBF': [last_TBF],
    'last_K%': [last_K],
    'last_STRIKEOUT': [last_STRIKEOUT],
    'diff_TBF': [diff_TBF],
    'diff_K%': [diff_K],
    'diff_STRIKEOUT': [diff_STRIKEOUT]
}

next_game_df = pd.DataFrame(next_game_data)
predicted_strikeouts = model.predict(next_game_df)
print(f'Ponches predichos para el próximo juego: {predicted_strikeouts[0]}')


Índices de prueba: [7 8 9]
Ponches predichos: [2.93 6.   4.  ]
Ponches reales: [ 2 10  5]
Error Cuadrático Medio: 5.955352989855119

Índices de prueba: [10 11 12]
Ponches predichos: [8.41 7.62 7.  ]
Ponches reales: [6 7 3]
Error Cuadrático Medio: 7.396944582994517

Índices de prueba: [13 14 15]
Ponches predichos: [5.57 3.   4.8 ]
Ponches reales: [ 8 12 13]
Error Cuadrático Medio: 51.38574939531336

Índices de prueba: [16 17 18]
Ponches predichos: [13.   11.93 10.3 ]
Ponches reales: [ 5  6 11]
Error Cuadrático Medio: 33.20235473752655

Índices de prueba: [19 20 21]
Ponches predichos: [5.09 2.24 5.2 ]
Ponches reales: [0 7 3]
Error Cuadrático Medio: 17.783780379446625

Promedio de Error Cuadrático Medio en todos los pliegues: 23.144836417027232
Ponches predichos para el próximo juego: 7.606191635131836


#ANDRE PALLANTE

In [None]:
# Importar librerías necesarias
import pandas as pd
import numpy as np
from xgboost import XGBRegressor
from sklearn.model_selection import TimeSeriesSplit
from sklearn.metrics import mean_squared_error

# Datos de entrada
data = {
    'GAME_RANGE': range(1, 19),
    'TBF': [22, 19, 18, 14, 24, 23, 24, 25, 23, 24, 21, 24, 26, 27, 27, 24, 25, 26],
    'K%': [0.182, 0.053, 0.333, 0.286, 0.208, 0.217, 0.208, 0.2, 0.13, 0.208, 0.095, 0.167, 0.192, 0.185, 0.111, 0.125, 0.24, 0.346],
    'STRIKEOUT': [4, 1, 6, 4, 5, 5, 5, 5, 3, 5, 2, 4, 5, 5, 3, 3, 6, 9]
}

# Crear DataFrame
df = pd.DataFrame(data)

# Crear características basadas en juegos anteriores utilizando la mediana
df['med_TBF_last5'] = df['TBF'].rolling(window=5).median().shift(1)
df['med_K%_last5'] = df['K%'].rolling(window=5).median().shift(1)
df['med_STRIKEOUT_last5'] = df['STRIKEOUT'].rolling(window=5).median().shift(1)

df['last_TBF'] = df['TBF'].shift(1)
df['last_K%'] = df['K%'].shift(1)
df['last_STRIKEOUT'] = df['STRIKEOUT'].shift(1)

df['diff_TBF'] = df['last_TBF'] - df['med_TBF_last5']
df['diff_K%'] = df['last_K%'] - df['med_K%_last5']
df['diff_STRIKEOUT'] = df['last_STRIKEOUT'] - df['med_STRIKEOUT_last5']

# Eliminar filas con valores NaN
df = df.dropna().reset_index(drop=True)

# Características y variable objetivo
X = df[['med_TBF_last5', 'med_K%_last5', 'med_STRIKEOUT_last5',
        'last_TBF', 'last_K%', 'last_STRIKEOUT',
        'diff_TBF', 'diff_K%', 'diff_STRIKEOUT']]
y = df['STRIKEOUT']

# Inicializar el Regressor XGBoost con función objetivo para datos de conteo
model = XGBRegressor(objective='count:poisson', n_estimators=100, random_state=42)

# Validación con TimeSeriesSplit
tscv = TimeSeriesSplit(n_splits=5)
mse_scores = []

for train_index, test_index in tscv.split(X):
    X_train, X_test = X.iloc[train_index], X.iloc[test_index]
    y_train, y_test = y.iloc[train_index], y.iloc[test_index]

    # Entrenar el modelo
    model.fit(X_train, y_train)

    # Predecir y calcular el MSE
    y_pred = model.predict(X_test)
    mse = mean_squared_error(y_test, y_pred)
    mse_scores.append(mse)

    print(f'Índices de prueba: {test_index}')
    print(f'Ponches predichos: {np.round(y_pred, 2)}')
    print(f'Ponches reales: {y_test.values}')
    print(f'Error Cuadrático Medio: {mse}\n')

print(f'Promedio de Error Cuadrático Medio en todos los pliegues: {np.mean(mse_scores)}')

# Entrenar el modelo en todo el conjunto de datos
model.fit(X, y)

# Predecir para el próximo juego utilizando medianas de los últimos 5 juegos
last5_TBF = df['TBF'][-5:].values
last5_K = df['K%'][-5:].values
last5_STRIKEOUT = df['STRIKEOUT'][-5:].values

med_TBF_last5 = np.median(last5_TBF)
med_K_last5 = np.median(last5_K)
med_STRIKEOUT_last5 = np.median(last5_STRIKEOUT)

# Características del último juego
last_TBF = df['TBF'].iloc[-1]
last_K = df['K%'].iloc[-1]
last_STRIKEOUT = df['STRIKEOUT'].iloc[-1]

# Diferencias
diff_TBF = last_TBF - med_TBF_last5
diff_K = last_K - med_K_last5
diff_STRIKEOUT = last_STRIKEOUT - med_STRIKEOUT_last5

next_game_data = {
    'med_TBF_last5': [med_TBF_last5],
    'med_K%_last5': [med_K_last5],
    'med_STRIKEOUT_last5': [med_STRIKEOUT_last5],
    'last_TBF': [last_TBF],
    'last_K%': [last_K],
    'last_STRIKEOUT': [last_STRIKEOUT],
    'diff_TBF': [diff_TBF],
    'diff_K%': [diff_K],
    'diff_STRIKEOUT': [diff_STRIKEOUT]
}

next_game_df = pd.DataFrame(next_game_data)
predicted_strikeouts = model.predict(next_game_df)
print(f'Ponches predichos para el próximo juego: {predicted_strikeouts[0]}')


Índices de prueba: [3 4]
Ponches predichos: [5. 5.]
Ponches reales: [3 5]
Error Cuadrático Medio: 2.0000038147009036

Índices de prueba: [5 6]
Ponches predichos: [5. 5.]
Ponches reales: [2 4]
Error Cuadrático Medio: 4.99759519492568

Índices de prueba: [7 8]
Ponches predichos: [2.12 3.  ]
Ponches reales: [5 5]
Error Cuadrático Medio: 6.134280254098542

Índices de prueba: [ 9 10]
Ponches predichos: [5. 5.]
Ponches reales: [3 3]
Error Cuadrático Medio: 3.987969749888407

Índices de prueba: [11 12]
Ponches predichos: [5.   2.88]
Ponches reales: [6 9]
Error Cuadrático Medio: 19.233025227882194

Promedio de Error Cuadrático Medio en todos los pliegues: 7.2705748482991455
Ponches predichos para el próximo juego: 8.579649925231934


#GAVIN WILLIAMS

In [None]:
# Importar librerías necesarias
import pandas as pd
import numpy as np
from xgboost import XGBRegressor
from sklearn.model_selection import TimeSeriesSplit
from sklearn.metrics import mean_squared_error

# Datos de entrada
data = {
    'GAME_RANGE': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15],
    'TBF': [21, 21, 20, 21, 25, 19, 22, 23, 21, 20, 20, 24, 7, 24, 22],
    'K%': [0.095, 0.238, 0.1, 0.238, 0.36, 0.421, 0.364, 0.13, 0.238, 0.25, 0.3, 0.25, 0, 0.208, 0.318],
    'STRIKEOUT': [2, 5, 2, 5, 9, 8, 8, 3, 5, 5, 6, 6, 0, 5, 7]
}

# Crear DataFrame
df = pd.DataFrame(data)

# Crear características basadas en juegos anteriores utilizando la mediana
df['med_TBF_last5'] = df['TBF'].rolling(window=5).median().shift(1)
df['med_K%_last5'] = df['K%'].rolling(window=5).median().shift(1)
df['med_STRIKEOUT_last5'] = df['STRIKEOUT'].rolling(window=5).median().shift(1)

df['last_TBF'] = df['TBF'].shift(1)
df['last_K%'] = df['K%'].shift(1)
df['last_STRIKEOUT'] = df['STRIKEOUT'].shift(1)

df['diff_TBF'] = df['last_TBF'] - df['med_TBF_last5']
df['diff_K%'] = df['last_K%'] - df['med_K%_last5']
df['diff_STRIKEOUT'] = df['last_STRIKEOUT'] - df['med_STRIKEOUT_last5']

# Eliminar filas con valores NaN
df = df.dropna().reset_index(drop=True)

# Características y variable objetivo
X = df[['med_TBF_last5', 'med_K%_last5', 'med_STRIKEOUT_last5',
        'last_TBF', 'last_K%', 'last_STRIKEOUT',
        'diff_TBF', 'diff_K%', 'diff_STRIKEOUT']]
y = df['STRIKEOUT']

# Inicializar el Regressor XGBoost con función objetivo para datos de conteo
model = XGBRegressor(objective='count:poisson', n_estimators=100, random_state=42)

# Validación con TimeSeriesSplit
tscv = TimeSeriesSplit(n_splits=5)
mse_scores = []

for train_index, test_index in tscv.split(X):
    X_train, X_test = X.iloc[train_index], X.iloc[test_index]
    y_train, y_test = y.iloc[train_index], y.iloc[test_index]

    # Entrenar el modelo
    model.fit(X_train, y_train)

    # Predecir y calcular el MSE
    y_pred = model.predict(X_test)
    mse = mean_squared_error(y_test, y_pred)
    mse_scores.append(mse)

    print(f'Índices de prueba: {test_index}')
    print(f'Ponches predichos: {np.round(y_pred, 2)}')
    print(f'Ponches reales: {y_test.values}')
    print(f'Error Cuadrático Medio: {mse}\n')

print(f'Promedio de Error Cuadrático Medio en todos los pliegues: {np.mean(mse_scores)}')

# Entrenar el modelo en todo el conjunto de datos
model.fit(X, y)

# Predecir para el próximo juego utilizando medianas de los últimos 5 juegos
last5_TBF = df['TBF'][-5:].values
last5_K = df['K%'][-5:].values
last5_STRIKEOUT = df['STRIKEOUT'][-5:].values

med_TBF_last5 = np.median(last5_TBF)
med_K_last5 = np.median(last5_K)
med_STRIKEOUT_last5 = np.median(last5_STRIKEOUT)

# Características del último juego
last_TBF = df['TBF'].iloc[-1]
last_K = df['K%'].iloc[-1]
last_STRIKEOUT = df['STRIKEOUT'].iloc[-1]

# Diferencias
diff_TBF = last_TBF - med_TBF_last5
diff_K = last_K - med_K_last5
diff_STRIKEOUT = last_STRIKEOUT - med_STRIKEOUT_last5

next_game_data = {
    'med_TBF_last5': [med_TBF_last5],
    'med_K%_last5': [med_K_last5],
    'med_STRIKEOUT_last5': [med_STRIKEOUT_last5],
    'last_TBF': [last_TBF],
    'last_K%': [last_K],
    'last_STRIKEOUT': [last_STRIKEOUT],
    'diff_TBF': [diff_TBF],
    'diff_K%': [diff_K],
    'diff_STRIKEOUT': [diff_STRIKEOUT]
}

next_game_df = pd.DataFrame(next_game_data)
predicted_strikeouts = model.predict(next_game_df)
print(f'Ponches predichos para el próximo juego: {predicted_strikeouts[0]}')


Índices de prueba: [5]
Ponches predichos: [8.]
Ponches reales: [6]
Error Cuadrático Medio: 3.9929630738172364

Índices de prueba: [6]
Ponches predichos: [6.]
Ponches reales: [6]
Error Cuadrático Medio: 7.781864042044617e-09

Índices de prueba: [7]
Ponches predichos: [6.]
Ponches reales: [0]
Error Cuadrático Medio: 36.00171091205107

Índices de prueba: [8]
Ponches predichos: [6.1]
Ponches reales: [5]
Error Cuadrático Medio: 1.205491021033822

Índices de prueba: [9]
Ponches predichos: [0.42]
Ponches reales: [7]
Error Cuadrático Medio: 43.33105627909635

Promedio de Error Cuadrático Medio en todos los pliegues: 16.906244258756068
Ponches predichos para el próximo juego: 4.356662273406982


#SHOTA IMANAGA

In [None]:
# Importar librerías necesarias
import pandas as pd
import numpy as np
from xgboost import XGBRegressor
from sklearn.model_selection import TimeSeriesSplit
from sklearn.metrics import mean_squared_error

# Datos de entrada
data = {
    "GAME_RANGE": list(range(1, 29)),
    "TBF": [21, 14, 24, 23, 26, 24, 28, 23, 26, 22, 20, 25, 26, 21, 25, 25, 25, 25, 22, 30, 24, 22, 20, 27, 23, 25, 26, 26],
    "K%": [0.429, 0.214, 0.167, 0.217, 0.269, 0.292, 0.286, 0.348, 0.269, 0.045, 0.3, 0.28, 0.231, 0.143, 0.12, 0.32, 0.24, 0.4, 0.136, 0.233, 0.417, 0.136, 0.3, 0.111, 0.348, 0.28, 0.154, 0.423],
    "STRIKEOUT": [9, 3, 4, 5, 7, 7, 8, 8, 7, 1, 6, 7, 6, 3, 3, 8, 6, 10, 3, 7, 10, 3, 6, 3, 8, 7, 4, 11]
}

# Crear DataFrame
df = pd.DataFrame(data)

# Crear características basadas en juegos anteriores utilizando la mediana
df['med_TBF_last5'] = df['TBF'].rolling(window=5).median().shift(1)
df['med_K%_last5'] = df['K%'].rolling(window=5).median().shift(1)
df['med_STRIKEOUT_last5'] = df['STRIKEOUT'].rolling(window=5).median().shift(1)

df['last_TBF'] = df['TBF'].shift(1)
df['last_K%'] = df['K%'].shift(1)
df['last_STRIKEOUT'] = df['STRIKEOUT'].shift(1)

df['diff_TBF'] = df['last_TBF'] - df['med_TBF_last5']
df['diff_K%'] = df['last_K%'] - df['med_K%_last5']
df['diff_STRIKEOUT'] = df['last_STRIKEOUT'] - df['med_STRIKEOUT_last5']

# Eliminar filas con valores NaN
df = df.dropna().reset_index(drop=True)

# Características y variable objetivo
X = df[['med_TBF_last5', 'med_K%_last5', 'med_STRIKEOUT_last5',
        'last_TBF', 'last_K%', 'last_STRIKEOUT',
        'diff_TBF', 'diff_K%', 'diff_STRIKEOUT']]
y = df['STRIKEOUT']

# Inicializar el Regressor XGBoost con función objetivo para datos de conteo
model = XGBRegressor(objective='count:poisson', n_estimators=100, random_state=42)

# Validación con TimeSeriesSplit
tscv = TimeSeriesSplit(n_splits=5)
mse_scores = []

for train_index, test_index in tscv.split(X):
    X_train, X_test = X.iloc[train_index], X.iloc[test_index]
    y_train, y_test = y.iloc[train_index], y.iloc[test_index]

    # Entrenar el modelo
    model.fit(X_train, y_train)

    # Predecir y calcular el MSE
    y_pred = model.predict(X_test)
    mse = mean_squared_error(y_test, y_pred)
    mse_scores.append(mse)

    print(f'Índices de prueba: {test_index}')
    print(f'Ponches predichos: {np.round(y_pred, 2)}')
    print(f'Ponches reales: {y_test.values}')
    print(f'Error Cuadrático Medio: {mse}\n')

print(f'Promedio de Error Cuadrático Medio en todos los pliegues: {np.mean(mse_scores)}')

# Entrenar el modelo en todo el conjunto de datos
model.fit(X, y)

# Predecir para el próximo juego utilizando medianas de los últimos 5 juegos
last5_TBF = df['TBF'][-5:].values
last5_K = df['K%'][-5:].values
last5_STRIKEOUT = df['STRIKEOUT'][-5:].values

med_TBF_last5 = np.median(last5_TBF)
med_K_last5 = np.median(last5_K)
med_STRIKEOUT_last5 = np.median(last5_STRIKEOUT)

# Características del último juego
last_TBF = df['TBF'].iloc[-1]
last_K = df['K%'].iloc[-1]
last_STRIKEOUT = df['STRIKEOUT'].iloc[-1]

# Diferencias
diff_TBF = last_TBF - med_TBF_last5
diff_K = last_K - med_K_last5
diff_STRIKEOUT = last_STRIKEOUT - med_STRIKEOUT_last5

next_game_data = {
    'med_TBF_last5': [med_TBF_last5],
    'med_K%_last5': [med_K_last5],
    'med_STRIKEOUT_last5': [med_STRIKEOUT_last5],
    'last_TBF': [last_TBF],
    'last_K%': [last_K],
    'last_STRIKEOUT': [last_STRIKEOUT],
    'diff_TBF': [diff_TBF],
    'diff_K%': [diff_K],
    'diff_STRIKEOUT': [diff_STRIKEOUT]
}

next_game_df = pd.DataFrame(next_game_data)
predicted_strikeouts = model.predict(next_game_df)
print(f'Ponches predichos para el próximo juego: {predicted_strikeouts[0]}')


Índices de prueba: [ 8  9 10]
Ponches predichos: [6.19 6.12 6.19]
Ponches reales: [3 3 8]
Error Cuadrático Medio: 7.7205586788613045

Índices de prueba: [11 12 13]
Ponches predichos: [8.   5.82 7.92]
Ponches reales: [ 6 10  3]
Error Cuadrático Medio: 15.226985992470114

Índices de prueba: [14 15 16]
Ponches predichos: [7.48 3.   3.  ]
Ponches reales: [ 7 10  3]
Error Cuadrático Medio: 16.395007238533594

Índices de prueba: [17 18 19]
Ponches predichos: [7.62 3.82 6.74]
Ponches reales: [6 3 8]
Error Cuadrático Medio: 1.6297853973247622

Índices de prueba: [20 21 22]
Ponches predichos: [6.98 5.99 3.05]
Ponches reales: [ 7  4 11]
Error Cuadrático Medio: 22.382660689243266

Promedio de Error Cuadrático Medio en todos los pliegues: 12.670999599286606
Ponches predichos para el próximo juego: 1.0017138719558716


#BRYAN WOO

In [None]:
# Importar librerías necesarias
import pandas as pd
import numpy as np
from xgboost import XGBRegressor
from sklearn.model_selection import TimeSeriesSplit
from sklearn.metrics import mean_squared_error

# Datos de entrada
data = {
    'Game_Range': np.arange(1, 21),
    'TBF': [15, 20, 20, 24, 19, 21, 17, 14, 20, 20, 18, 26, 26, 25, 24, 25, 24, 23, 23, 24],
    'K%': [0.2, 0.25, 0.35, 0.042, 0.105, 0.286, 0.059, 0.214, 0.05, 0.25, 0.167, 0.231, 0.269, 0.24, 0.208, 0.28, 0.125, 0.261, 0.217, 0.292],
    'STRIKEOUT': [3, 5, 7, 1, 2, 6, 1, 3, 1, 5, 3, 6, 7, 6, 5, 7, 3, 6, 5, 7]
}
# Crear DataFrame
df = pd.DataFrame(data)

# Crear características basadas en juegos anteriores utilizando la mediana
df['med_TBF_last5'] = df['TBF'].rolling(window=5).median().shift(1)
df['med_K%_last5'] = df['K%'].rolling(window=5).median().shift(1)
df['med_STRIKEOUT_last5'] = df['STRIKEOUT'].rolling(window=5).median().shift(1)

df['last_TBF'] = df['TBF'].shift(1)
df['last_K%'] = df['K%'].shift(1)
df['last_STRIKEOUT'] = df['STRIKEOUT'].shift(1)

df['diff_TBF'] = df['last_TBF'] - df['med_TBF_last5']
df['diff_K%'] = df['last_K%'] - df['med_K%_last5']
df['diff_STRIKEOUT'] = df['last_STRIKEOUT'] - df['med_STRIKEOUT_last5']

# Eliminar filas con valores NaN
df = df.dropna().reset_index(drop=True)

# Características y variable objetivo
X = df[['med_TBF_last5', 'med_K%_last5', 'med_STRIKEOUT_last5',
        'last_TBF', 'last_K%', 'last_STRIKEOUT',
        'diff_TBF', 'diff_K%', 'diff_STRIKEOUT']]
y = df['STRIKEOUT']

# Inicializar el Regressor XGBoost con función objetivo para datos de conteo
model = XGBRegressor(objective='count:poisson', n_estimators=100, random_state=42)

# Validación con TimeSeriesSplit
tscv = TimeSeriesSplit(n_splits=5)
mse_scores = []

for train_index, test_index in tscv.split(X):
    X_train, X_test = X.iloc[train_index], X.iloc[test_index]
    y_train, y_test = y.iloc[train_index], y.iloc[test_index]

    # Entrenar el modelo
    model.fit(X_train, y_train)

    # Predecir y calcular el MSE
    y_pred = model.predict(X_test)
    mse = mean_squared_error(y_test, y_pred)
    mse_scores.append(mse)

    print(f'Índices de prueba: {test_index}')
    print(f'Ponches predichos: {np.round(y_pred, 2)}')
    print(f'Ponches reales: {y_test.values}')
    print(f'Error Cuadrático Medio: {mse}\n')

print(f'Promedio de Error Cuadrático Medio en todos los pliegues: {np.mean(mse_scores)}')

# Entrenar el modelo en todo el conjunto de datos
model.fit(X, y)

# Predecir para el próximo juego utilizando medianas de los últimos 5 juegos
last5_TBF = df['TBF'][-5:].values
last5_K = df['K%'][-5:].values
last5_STRIKEOUT = df['STRIKEOUT'][-5:].values

med_TBF_last5 = np.median(last5_TBF)
med_K_last5 = np.median(last5_K)
med_STRIKEOUT_last5 = np.median(last5_STRIKEOUT)

# Características del último juego
last_TBF = df['TBF'].iloc[-1]
last_K = df['K%'].iloc[-1]
last_STRIKEOUT = df['STRIKEOUT'].iloc[-1]

# Diferencias
diff_TBF = last_TBF - med_TBF_last5
diff_K = last_K - med_K_last5
diff_STRIKEOUT = last_STRIKEOUT - med_STRIKEOUT_last5

next_game_data = {
    'med_TBF_last5': [med_TBF_last5],
    'med_K%_last5': [med_K_last5],
    'med_STRIKEOUT_last5': [med_STRIKEOUT_last5],
    'last_TBF': [last_TBF],
    'last_K%': [last_K],
    'last_STRIKEOUT': [last_STRIKEOUT],
    'diff_TBF': [diff_TBF],
    'diff_K%': [diff_K],
    'diff_STRIKEOUT': [diff_STRIKEOUT]
}

next_game_df = pd.DataFrame(next_game_data)
predicted_strikeouts = model.predict(next_game_df)
print(f'Ponches predichos para el próximo juego: {predicted_strikeouts[0]}')


Índices de prueba: [5 6]
Ponches predichos: [1.   2.51]
Ponches reales: [3 6]
Error Cuadrático Medio: 8.094298586997674

Índices de prueba: [7 8]
Ponches predichos: [3. 3.]
Ponches reales: [7 6]
Error Cuadrático Medio: 12.501602225238457

Índices de prueba: [ 9 10]
Ponches predichos: [7. 7.]
Ponches reales: [5 7]
Error Cuadrático Medio: 1.9935445401388279

Índices de prueba: [11 12]
Ponches predichos: [3.44 5.32]
Ponches reales: [3 6]
Error Cuadrático Medio: 0.3279669217619414

Índices de prueba: [13 14]
Ponches predichos: [4.66 6.99]
Ponches reales: [5 7]
Error Cuadrático Medio: 0.05628971749104039

Promedio de Error Cuadrático Medio en todos los pliegues: 4.594740398325588
Ponches predichos para el próximo juego: 3.9193012714385986


#JOY ESTES

In [None]:
# Importar librerías necesarias
import pandas as pd
import numpy as np
from xgboost import XGBRegressor
from sklearn.model_selection import TimeSeriesSplit
from sklearn.metrics import mean_squared_error

# Datos de entrada
data = {
    'Game_Range': list(range(1, 23)),
    'TBF': [17, 23, 27, 20, 20, 24, 18, 24, 23, 30, 14, 24, 26, 22, 21, 23, 27, 21, 24, 20, 27, 12],
    'K%': [0.294, 0.174, 0.222, 0.25, 0.25, 0.125, 0.111, 0.042, 0.348, 0.133, 0.071, 0.208, 0.154, 0.273, 0.19, 0.13, 0.185, 0.238, 0.292, 0.15, 0.074, 0.083],
    'STRIKEOUT': [5, 4, 6, 5, 5, 3, 2, 1, 8, 4, 1, 5, 4, 6, 4, 3, 5, 5, 7, 3, 2, 1]
}
# Crear DataFrame
df = pd.DataFrame(data)

# Crear características basadas en juegos anteriores utilizando la mediana
df['med_TBF_last5'] = df['TBF'].rolling(window=5).median().shift(1)
df['med_K%_last5'] = df['K%'].rolling(window=5).median().shift(1)
df['med_STRIKEOUT_last5'] = df['STRIKEOUT'].rolling(window=5).median().shift(1)

df['last_TBF'] = df['TBF'].shift(1)
df['last_K%'] = df['K%'].shift(1)
df['last_STRIKEOUT'] = df['STRIKEOUT'].shift(1)

df['diff_TBF'] = df['last_TBF'] - df['med_TBF_last5']
df['diff_K%'] = df['last_K%'] - df['med_K%_last5']
df['diff_STRIKEOUT'] = df['last_STRIKEOUT'] - df['med_STRIKEOUT_last5']

# Eliminar filas con valores NaN
df = df.dropna().reset_index(drop=True)

# Características y variable objetivo
X = df[['med_TBF_last5', 'med_K%_last5', 'med_STRIKEOUT_last5',
        'last_TBF', 'last_K%', 'last_STRIKEOUT',
        'diff_TBF', 'diff_K%', 'diff_STRIKEOUT']]
y = df['STRIKEOUT']

# Inicializar el Regressor XGBoost con función objetivo para datos de conteo
model = XGBRegressor(objective='count:poisson', n_estimators=100, random_state=42)

# Validación con TimeSeriesSplit
tscv = TimeSeriesSplit(n_splits=5)
mse_scores = []

for train_index, test_index in tscv.split(X):
    X_train, X_test = X.iloc[train_index], X.iloc[test_index]
    y_train, y_test = y.iloc[train_index], y.iloc[test_index]

    # Entrenar el modelo
    model.fit(X_train, y_train)

    # Predecir y calcular el MSE
    y_pred = model.predict(X_test)
    mse = mean_squared_error(y_test, y_pred)
    mse_scores.append(mse)

    print(f'Índices de prueba: {test_index}')
    print(f'Ponches predichos: {np.round(y_pred, 2)}')
    print(f'Ponches reales: {y_test.values}')
    print(f'Error Cuadrático Medio: {mse}\n')

print(f'Promedio de Error Cuadrático Medio en todos los pliegues: {np.mean(mse_scores)}')

# Entrenar el modelo en todo el conjunto de datos
model.fit(X, y)

# Predecir para el próximo juego utilizando medianas de los últimos 5 juegos
last5_TBF = df['TBF'][-5:].values
last5_K = df['K%'][-5:].values
last5_STRIKEOUT = df['STRIKEOUT'][-5:].values

med_TBF_last5 = np.median(last5_TBF)
med_K_last5 = np.median(last5_K)
med_STRIKEOUT_last5 = np.median(last5_STRIKEOUT)

# Características del último juego
last_TBF = df['TBF'].iloc[-1]
last_K = df['K%'].iloc[-1]
last_STRIKEOUT = df['STRIKEOUT'].iloc[-1]

# Diferencias
diff_TBF = last_TBF - med_TBF_last5
diff_K = last_K - med_K_last5
diff_STRIKEOUT = last_STRIKEOUT - med_STRIKEOUT_last5

next_game_data = {
    'med_TBF_last5': [med_TBF_last5],
    'med_K%_last5': [med_K_last5],
    'med_STRIKEOUT_last5': [med_STRIKEOUT_last5],
    'last_TBF': [last_TBF],
    'last_K%': [last_K],
    'last_STRIKEOUT': [last_STRIKEOUT],
    'diff_TBF': [diff_TBF],
    'diff_K%': [diff_K],
    'diff_STRIKEOUT': [diff_STRIKEOUT]
}

next_game_df = pd.DataFrame(next_game_data)
predicted_strikeouts = model.predict(next_game_df)
print(f'Ponches predichos para el próximo juego: {predicted_strikeouts[0]}')


Índices de prueba: [7 8]
Ponches predichos: [1. 1.]
Ponches reales: [4 6]
Error Cuadrático Medio: 16.98721092304858

Índices de prueba: [ 9 10]
Ponches predichos: [3.84 3.36]
Ponches reales: [4 3]
Error Cuadrático Medio: 0.07951789297851519

Índices de prueba: [11 12]
Ponches predichos: [2.27 5.8 ]
Ponches reales: [5 5]
Error Cuadrático Medio: 4.050790689169048

Índices de prueba: [13 14]
Ponches predichos: [3. 4.]
Ponches reales: [7 3]
Error Cuadrático Medio: 8.494259816472663

Índices de prueba: [15 16]
Ponches predichos: [6.63 2.86]
Ponches reales: [2 1]
Error Cuadrático Medio: 12.418006749913047

Promedio de Error Cuadrático Medio en todos los pliegues: 8.40595721431637
Ponches predichos para el próximo juego: 3.4327902793884277


#LUIS GIL

In [None]:
# Importar librerías necesarias
import pandas as pd
import numpy as np
from xgboost import XGBRegressor
from sklearn.model_selection import TimeSeriesSplit
from sklearn.metrics import mean_squared_error

# Datos de entrada
data = {
    'GAME_RANGE': list(range(1, 28)),
    'TBF': [18, 20, 23, 23, 24, 22, 23, 22, 24, 22, 28, 21, 23, 23, 15, 22, 19, 23, 24, 21, 23, 22, 20, 19, 21, 22, 22],
    'K%': [0.333, 0.4, 0.261, 0.391, 0.25, 0.227, 0.217, 0.136, 0.583, 0.364, 0.321, 0.286, 0.217, 0.261, 0.067, 0.091, 0.158, 0.391, 0.292, 0.286, 0.348, 0.273, 0.15, 0.158, 0.333, 0.227, 0.227],
    'STRIKEOUT': [6, 8, 6, 9, 6, 5, 5, 3, 14, 8, 9, 6, 5, 6, 1, 2, 3, 9, 7, 6, 8, 6, 3, 3, 7, 5, 5]
}
# Crear DataFrame
df = pd.DataFrame(data)

# Crear características basadas en juegos anteriores utilizando la mediana
df['med_TBF_last5'] = df['TBF'].rolling(window=5).median().shift(1)
df['med_K%_last5'] = df['K%'].rolling(window=5).median().shift(1)
df['med_STRIKEOUT_last5'] = df['STRIKEOUT'].rolling(window=5).median().shift(1)

df['last_TBF'] = df['TBF'].shift(1)
df['last_K%'] = df['K%'].shift(1)
df['last_STRIKEOUT'] = df['STRIKEOUT'].shift(1)

df['diff_TBF'] = df['last_TBF'] - df['med_TBF_last5']
df['diff_K%'] = df['last_K%'] - df['med_K%_last5']
df['diff_STRIKEOUT'] = df['last_STRIKEOUT'] - df['med_STRIKEOUT_last5']

# Eliminar filas con valores NaN
df = df.dropna().reset_index(drop=True)

# Características y variable objetivo
X = df[['med_TBF_last5', 'med_K%_last5', 'med_STRIKEOUT_last5',
        'last_TBF', 'last_K%', 'last_STRIKEOUT',
        'diff_TBF', 'diff_K%', 'diff_STRIKEOUT']]
y = df['STRIKEOUT']

# Inicializar el Regressor XGBoost con función objetivo para datos de conteo
model = XGBRegressor(objective='count:poisson', n_estimators=100, random_state=42)

# Validación con TimeSeriesSplit
tscv = TimeSeriesSplit(n_splits=5)
mse_scores = []

for train_index, test_index in tscv.split(X):
    X_train, X_test = X.iloc[train_index], X.iloc[test_index]
    y_train, y_test = y.iloc[train_index], y.iloc[test_index]

    # Entrenar el modelo
    model.fit(X_train, y_train)

    # Predecir y calcular el MSE
    y_pred = model.predict(X_test)
    mse = mean_squared_error(y_test, y_pred)
    mse_scores.append(mse)

    print(f'Índices de prueba: {test_index}')
    print(f'Ponches predichos: {np.round(y_pred, 2)}')
    print(f'Ponches reales: {y_test.values}')
    print(f'Error Cuadrático Medio: {mse}\n')

print(f'Promedio de Error Cuadrático Medio en todos los pliegues: {np.mean(mse_scores)}')

# Entrenar el modelo en todo el conjunto de datos
model.fit(X, y)

# Predecir para el próximo juego utilizando medianas de los últimos 5 juegos
last5_TBF = df['TBF'][-5:].values
last5_K = df['K%'][-5:].values
last5_STRIKEOUT = df['STRIKEOUT'][-5:].values

med_TBF_last5 = np.median(last5_TBF)
med_K_last5 = np.median(last5_K)
med_STRIKEOUT_last5 = np.median(last5_STRIKEOUT)

# Características del último juego
last_TBF = df['TBF'].iloc[-1]
last_K = df['K%'].iloc[-1]
last_STRIKEOUT = df['STRIKEOUT'].iloc[-1]

# Diferencias
diff_TBF = last_TBF - med_TBF_last5
diff_K = last_K - med_K_last5
diff_STRIKEOUT = last_STRIKEOUT - med_STRIKEOUT_last5

next_game_data = {
    'med_TBF_last5': [med_TBF_last5],
    'med_K%_last5': [med_K_last5],
    'med_STRIKEOUT_last5': [med_STRIKEOUT_last5],
    'last_TBF': [last_TBF],
    'last_K%': [last_K],
    'last_STRIKEOUT': [last_STRIKEOUT],
    'diff_TBF': [diff_TBF],
    'diff_K%': [diff_K],
    'diff_STRIKEOUT': [diff_STRIKEOUT]
}

next_game_df = pd.DataFrame(next_game_data)
predicted_strikeouts = model.predict(next_game_df)
print(f'Ponches predichos para el próximo juego: {predicted_strikeouts[0]}')


Índices de prueba: [7 8 9]
Ponches predichos: [6.   5.99 5.  ]
Ponches reales: [5 6 1]
Error Cuadrático Medio: 5.669839383411954

Índices de prueba: [10 11 12]
Ponches predichos: [ 5.   14.   13.94]
Ponches reales: [2 3 9]
Error Cuadrático Medio: 51.45844562023717

Índices de prueba: [13 14 15]
Ponches predichos: [9.   9.   1.01]
Ponches reales: [7 6 8]
Error Cuadrático Medio: 20.59543259296684

Índices de prueba: [16 17 18]
Ponches predichos: [6.03 1.01 3.39]
Ponches reales: [6 3 3]
Error Cuadrático Medio: 1.3671579494762607

Índices de prueba: [19 20 21]
Ponches predichos: [3.07 8.   9.  ]
Ponches reales: [7 5 5]
Error Cuadrático Medio: 13.480528766087446

Promedio de Error Cuadrático Medio en todos los pliegues: 18.514280862435935
Ponches predichos para el próximo juego: 5.00108528137207


#ZACK WHEELER

In [None]:
# Importar librerías necesarias
import pandas as pd
import numpy as np
from xgboost import XGBRegressor
from sklearn.model_selection import TimeSeriesSplit
from sklearn.metrics import mean_squared_error

# Datos de entrada
data = {
    'GAME_RANGE': list(range(1, 31)),
    'TBF': [23, 25, 28, 24, 26, 22, 22, 26, 21, 27, 28, 21, 26, 24, 26, 24, 27, 26, 21, 26, 23, 27, 25, 22, 24, 25, 26, 21, 26, 24],
    'K%': [0.217, 0.4, 0.179, 0.417, 0.308, 0.364, 0.273, 0.423, 0.095, 0.222, 0.179, 0.429, 0.231, 0.167, 0.154, 0.333, 0.185, 0.269, 0.333, 0.269, 0.261, 0.333, 0.32, 0.273, 0.333, 0.24, 0.269, 0.333, 0.346, 0.25],
    'STRIKEOUT': [5, 10, 5, 10, 8, 8, 6, 11, 2, 6, 5, 9, 6, 4, 4, 8, 5, 7, 7, 7, 6, 9, 8, 6, 8, 6, 7, 7, 9, 6]
}
# Crear DataFrame
df = pd.DataFrame(data)

# Crear características basadas en juegos anteriores utilizando la mediana
df['med_TBF_last5'] = df['TBF'].rolling(window=5).median().shift(1)
df['med_K%_last5'] = df['K%'].rolling(window=5).median().shift(1)
df['med_STRIKEOUT_last5'] = df['STRIKEOUT'].rolling(window=5).median().shift(1)

df['last_TBF'] = df['TBF'].shift(1)
df['last_K%'] = df['K%'].shift(1)
df['last_STRIKEOUT'] = df['STRIKEOUT'].shift(1)

df['diff_TBF'] = df['last_TBF'] - df['med_TBF_last5']
df['diff_K%'] = df['last_K%'] - df['med_K%_last5']
df['diff_STRIKEOUT'] = df['last_STRIKEOUT'] - df['med_STRIKEOUT_last5']

# Eliminar filas con valores NaN
df = df.dropna().reset_index(drop=True)

# Características y variable objetivo
X = df[['med_TBF_last5', 'med_K%_last5', 'med_STRIKEOUT_last5',
        'last_TBF', 'last_K%', 'last_STRIKEOUT',
        'diff_TBF', 'diff_K%', 'diff_STRIKEOUT']]
y = df['STRIKEOUT']

# Inicializar el Regressor XGBoost con función objetivo para datos de conteo
model = XGBRegressor(objective='count:poisson', n_estimators=100, random_state=42)

# Validación con TimeSeriesSplit
tscv = TimeSeriesSplit(n_splits=5)
mse_scores = []

for train_index, test_index in tscv.split(X):
    X_train, X_test = X.iloc[train_index], X.iloc[test_index]
    y_train, y_test = y.iloc[train_index], y.iloc[test_index]

    # Entrenar el modelo
    model.fit(X_train, y_train)

    # Predecir y calcular el MSE
    y_pred = model.predict(X_test)
    mse = mean_squared_error(y_test, y_pred)
    mse_scores.append(mse)

    print(f'Índices de prueba: {test_index}')
    print(f'Ponches predichos: {np.round(y_pred, 2)}')
    print(f'Ponches reales: {y_test.values}')
    print(f'Error Cuadrático Medio: {mse}\n')

print(f'Promedio de Error Cuadrático Medio en todos los pliegues: {np.mean(mse_scores)}')

# Entrenar el modelo en todo el conjunto de datos
model.fit(X, y)

# Predecir para el próximo juego utilizando medianas de los últimos 5 juegos
last5_TBF = df['TBF'][-5:].values
last5_K = df['K%'][-5:].values
last5_STRIKEOUT = df['STRIKEOUT'][-5:].values

med_TBF_last5 = np.median(last5_TBF)
med_K_last5 = np.median(last5_K)
med_STRIKEOUT_last5 = np.median(last5_STRIKEOUT)

# Características del último juego
last_TBF = df['TBF'].iloc[-1]
last_K = df['K%'].iloc[-1]
last_STRIKEOUT = df['STRIKEOUT'].iloc[-1]

# Diferencias
diff_TBF = last_TBF - med_TBF_last5
diff_K = last_K - med_K_last5
diff_STRIKEOUT = last_STRIKEOUT - med_STRIKEOUT_last5

next_game_data = {
    'med_TBF_last5': [med_TBF_last5],
    'med_K%_last5': [med_K_last5],
    'med_STRIKEOUT_last5': [med_STRIKEOUT_last5],
    'last_TBF': [last_TBF],
    'last_K%': [last_K],
    'last_STRIKEOUT': [last_STRIKEOUT],
    'diff_TBF': [diff_TBF],
    'diff_K%': [diff_K],
    'diff_STRIKEOUT': [diff_STRIKEOUT]
}

next_game_df = pd.DataFrame(next_game_data)
predicted_strikeouts = model.predict(next_game_df)
print(f'Ponches predichos para el próximo juego: {predicted_strikeouts[0]}')


Índices de prueba: [5 6 7 8]
Ponches predichos: [6. 8. 2. 8.]
Ponches reales: [5 9 6 4]
Error Cuadrático Medio: 8.49452564143104

Índices de prueba: [ 9 10 11 12]
Ponches predichos: [9.   9.   5.16 5.04]
Ponches reales: [4 8 5 7]
Error Cuadrático Medio: 7.462938591410989

Índices de prueba: [13 14 15 16]
Ponches predichos: [4.01 6.   6.5  8.08]
Ponches reales: [7 7 6 9]
Error Cuadrático Medio: 2.7662641831197448

Índices de prueba: [17 18 19 20]
Ponches predichos: [7.21 5.91 6.96 5.59]
Ponches reales: [8 6 8 6]
Error Cuadrático Medio: 0.4729942925285968

Índices de prueba: [21 22 23 24]
Ponches predichos: [5.28 8.22 6.68 7.76]
Ponches reales: [7 7 9 6]
Error Cuadrático Medio: 3.2257011062659444

Promedio de Error Cuadrático Medio en todos los pliegues: 4.484484762951263
Ponches predichos para el próximo juego: 6.662768363952637
