In [26]:
import importlib
from sklearn.model_selection import train_test_split # type: ignore
import torch

In [27]:
import Data_Prep
importlib.reload(Data_Prep)
from Data_Prep import Data_Prep, Player_IO
import Prep_Map
importlib.reload(Prep_Map)
import Output_Map
importlib.reload(Output_Map)

<module 'Output_Map' from 'c:\\Users\\nitzr\\source\\repos\\BaseballModels\\BaseballModels\\Model\\Output_Map.py'>

In [28]:
data_prep = Data_Prep(Prep_Map.base_prep_map, Output_Map.base_output_map)
hitter_io_list = data_prep.Generate_IO_Hitters("WHERE lastMLBSeason<? AND signingYear<? AND isHitter=?", (2025,2015,1), use_cutoff=True)

                                                                        

In [30]:
test = torch.tensor([1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18])
print(test.reshape((3,6)))

tensor([[ 1,  2,  3,  4,  5,  6],
        [ 7,  8,  9, 10, 11, 12],
        [13, 14, 15, 16, 17, 18]])


In [29]:
mlb_value_mean : torch.Tensor = data_prep.__getattribute__('__hittervalues_means')
mlb_value_stds : torch.Tensor = data_prep.__getattribute__('__hittervalues_devs')

print(mlb_value_mean)
print(mlb_value_stds)

tensor([ 9.2478e-02, -6.9007e-03, -1.9603e-02,  1.9402e-02,  8.3321e-01,
         2.7641e+01,  9.8841e-02, -9.6873e-03, -2.7119e-02,  1.8577e-02,
         9.0121e-01,  2.9815e+01,  1.0406e-01, -9.6932e-03, -3.4777e-02,
         1.6868e-02,  9.5601e-01,  3.1596e+01])
tensor([  0.5854,   3.4474,   1.9235,   0.6232,   3.1872, 105.7870,   0.6050,
          3.5818,   1.9948,   0.6445,   3.3044, 109.4425,   0.6216,   3.7099,
          2.0429,   0.6558,   3.3996, 112.4668])


In [None]:
io_train : list[Player_IO]
io_test : list[Player_IO]
io_train, io_test = train_test_split(hitter_io_list, test_size=0.25, random_state=0)

train_lengths = torch.tensor([io.length for io in io_train])
test_lengths = torch.tensor([io.length for io in io_test])

x_train_padded = torch.nn.utils.rnn.pad_sequence([io.input for io in io_train])
x_test_padded = torch.nn.utils.rnn.pad_sequence([io.input for io in io_test])
y_prospect_train_padded = torch.nn.utils.rnn.pad_sequence([io.output for io in io_train])
y_prospect_test_padded = torch.nn.utils.rnn.pad_sequence([io.output for io in io_test])
mask_prospect_train_padded = torch.nn.utils.rnn.pad_sequence([io.prospect_mask for io in io_train])
mask_prospect_test_padded = torch.nn.utils.rnn.pad_sequence([io.prospect_mask for io in io_test])
mask_level_train_padded = torch.nn.utils.rnn.pad_sequence([io.stat_level_mask for io in io_train])
mask_level_test_padded = torch.nn.utils.rnn.pad_sequence([io.stat_level_mask for io in io_test])

mask_year_train_padded = torch.nn.utils.rnn.pad_sequence([io.year_level_mask for io in io_train])
mask_year_test_padded = torch.nn.utils.rnn.pad_sequence([io.year_level_mask for io in io_test])
y_year_stats_train_padded = torch.nn.utils.rnn.pad_sequence([io.year_stat_output for io in io_train])
y_year_stats_test_padded = torch.nn.utils.rnn.pad_sequence([io.year_stat_output for io in io_test])
y_year_position_train_padded = torch.nn.utils.rnn.pad_sequence([io.year_pos_output for io in io_train])
y_year_position_test_padded = torch.nn.utils.rnn.pad_sequence([io.year_pos_output for io in io_test])

mlb_value_mask_train_padded = torch.nn.utils.rnn.pad_sequence([io.mlb_value_mask for io in io_train])
mlb_value_mask_test_padded = torch.nn.utils.rnn.pad_sequence([io.mlb_value_mask for io in io_test])
mlb_value_stats_train_padded = torch.nn.utils.rnn.pad_sequence([io.mlb_value_stats for io in io_train])
mlb_value_stats_test_padded = torch.nn.utils.rnn.pad_sequence([io.mlb_value_stats for io in io_test])

In [None]:
import Player_Dataset
importlib.reload(Player_Dataset)
from Player_Dataset import Player_Dataset

train_hitters_dataset = Player_Dataset(x_train_padded, train_lengths, y_prospect_train_padded, mask_prospect_train_padded, mask_level_train_padded, mask_year_train_padded, y_year_stats_train_padded, y_year_position_train_padded, mlb_value_mask_train_padded, mlb_value_stats_train_padded)
test_hitters_dataset = Player_Dataset(x_test_padded, test_lengths, y_prospect_test_padded, mask_prospect_test_padded, mask_level_test_padded, mask_year_test_padded, y_year_stats_test_padded, y_year_position_test_padded, mlb_value_mask_test_padded, mlb_value_stats_test_padded)

Train Model

In [None]:
def count_parameters(model):
    return sum(p.numel() for p in model.parameters() if p.requires_grad)

In [None]:
import Player_Model
importlib.reload(Player_Model)
from Player_Model import RNN_Model
from torch.optim import lr_scheduler
import Model_Train
importlib.reload(Model_Train)
from Model_Train import trainAndGraph
from Constants import device

batch_size = 200
hitting_mutators = data_prep.Generate_Hitting_Mutators(batch_size, Player_IO.GetMaxLength(hitter_io_list))

num_layers = 4
hidden_size = 20
network = RNN_Model(x_train_padded[0].shape[1], num_layers, hidden_size, hitting_mutators, output_map=data_prep.output_map, is_hitter=True)
network = network.to(device)

print("Num. Parameters:", count_parameters(network))

optimizer = torch.optim.Adam(network.parameters(), lr=0.004)
scheduler = lr_scheduler.ReduceLROnPlateau(optimizer, factor=0.9, patience=50, cooldown=10, verbose=False)

num_epochs = 100
training_generator = torch.utils.data.DataLoader(train_hitters_dataset, batch_size=batch_size, shuffle=True)
testing_generator = torch.utils.data.DataLoader(test_hitters_dataset, batch_size=batch_size, shuffle=False)

trainAndGraph(network, training_generator, testing_generator, len(train_hitters_dataset), len(test_hitters_dataset), optimizer, scheduler, num_epochs, logging_interval=25, early_stopping_cutoff=2000, should_output=True, model_name="Models/default_hitter.pt", save_last=True)

Testing

In [None]:
import torch.nn as nn
import sqlite3
from tqdm import tqdm
test_db = sqlite3.connect('test.db')

In [None]:
network.load_state_dict(torch.load("Models/default_hitter.pt"))
network.eval()
network = network.to(device)

In [None]:
cursor = test_db.cursor()
cursor.execute("DELETE FROM StatPred")
cursor.execute("DELETE FROM StatAct")
cursor.execute("DELETE FROM MlbValuePredH")
cursor.execute("DELETE FROM MlbValueActH")
test_db.commit()
cursor = test_db

softmax = nn.Softmax(dim=0)

mlb_value_mean : torch.Tensor = data_prep.__getattribute__('__hittervalues_means').to(device)
mlb_value_stds : torch.Tensor = data_prep.__getattribute__('__hittervalues_devs').to(device)

for io in tqdm(hitter_io_list):
    hitter = io.player
    input = io.input.unsqueeze(0)  
    output = io.output
    l = torch.tensor([input.shape[1]])
    twar, pwar, level, pa, value, year_stats, year_positions, mlb_value = network(input.to(device), l.to(device))
    
    mlb_value = mlb_value.squeeze(0)
    mlb_value_norm = (mlb_value * mlb_value_stds) + mlb_value_mean
    
    mlb_value_act = (io.mlb_value_stats.to(device) * mlb_value_stds) + mlb_value_mean
    for i in range(io.year_stat_output.size(0)):
        year_position_probs = softmax(year_positions.squeeze(0)[i,:9])
        test_db.execute("INSERT INTO StatPred VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?)", (hitter.mlbId, 1, 1, io.dates[i,1].item(), io.dates[i,2].item()) + tuple(year_stats.squeeze(0)[i,:11].tolist()) + tuple(year_position_probs.tolist()))
        test_db.execute("INSERT INTO StatAct VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?)", (hitter.mlbId, 1, 1, io.dates[i,1].item(), io.dates[i,2].item()) + tuple(io.year_stat_output[i,:].tolist()) + tuple(io.year_pos_output[i,:].tolist()))
        test_db.execute("INSERT INTO MlbValuePredH VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?)", (hitter.mlbId,)  + tuple(io.dates[i,1:3].tolist()) + tuple(mlb_value_norm[i,:].tolist()))
        test_db.execute("INSERT INTO MlbValueActH VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?)", (hitter.mlbId,)  + tuple(io.dates[i,1:3].tolist()) + tuple(mlb_value_act[i,:].tolist()))
test_db.commit()

In [37]:
import Player_Model
importlib.reload(Player_Model)
from Player_Model import RNN_Model

mlb_value_mean : torch.Tensor = data_prep.__getattribute__('__hittervalues_means').to(device)
mlb_value_stds : torch.Tensor = data_prep.__getattribute__('__hittervalues_devs').to(device)

network = RNN_Model(x_train_padded[0].shape[1], num_layers, hidden_size, hitting_mutators, output_map=data_prep.output_map, is_hitter=True)
network = network.to(device)

for io in hitter_io_list:
    hitter = io.player
    if hitter.mlbId == 545361:
        input = io.input.unsqueeze(0)  
        output = io.output
        l = torch.tensor([input.shape[1]])
        twar, pwar, level, pa, value, year_stats, year_positions, mlb_value = network(input.to(device), l.to(device))
        
        mlb_value = mlb_value.squeeze(0)
        mlb_value_norm = (mlb_value * mlb_value_stds) + mlb_value_mean
        
        mlb_value_act = (io.mlb_value_stats.to(device) * mlb_value_stds) + mlb_value_mean
        
        Player_Model.Mlb_Value_Loss(mlb_value.unsqueeze(0), io.mlb_value_stats.to(device).unsqueeze(0), io.mlb_value_mask.to(device).unsqueeze(0))

torch.Size([1, 91, 18])
torch.Size([1, 91, 18])
tensor([[2.3017e-01, 2.6668e-01, 4.1732e-02, 9.6268e-03, 1.9814e-01, 4.3401e-01],
        [2.2095e-01, 3.9762e-01, 9.2483e-02, 1.0283e-01, 2.0019e-01, 4.3303e-01],
        [3.4158e-01, 4.4290e-01, 1.0720e-01, 1.9973e-02, 2.7550e-01, 6.2088e-01],
        [9.7238e-03, 6.7266e-01, 2.8123e-01, 6.4915e-03, 1.2548e-02, 1.4460e-01],
        [2.5002e-01, 4.9229e-01, 1.8721e-01, 1.0721e-01, 3.3460e-01, 5.3078e-01],
        [2.0028e-01, 7.0680e-01, 2.2104e-01, 2.9310e-02, 2.4320e-01, 6.1682e-01],
        [3.3787e-01, 6.1234e-01, 2.0227e-01, 1.6829e-02, 2.8987e-01, 6.0407e-01],
        [3.2518e-01, 3.1998e-01, 2.2718e-01, 9.1675e-01, 2.0952e-01, 2.3301e-01],
        [9.7460e-01, 1.3277e+00, 4.8762e-01, 1.2067e+00, 4.4825e-01, 9.8662e-02],
        [1.1808e+00, 7.8477e-02, 1.0312e+00, 2.8964e+00, 1.1886e+00, 9.8656e-01],
        [8.3097e-01, 4.5823e-01, 1.1554e+00, 3.0899e+00, 1.0673e+00, 9.1174e-01],
        [3.7679e+00, 2.1001e+00, 2.1681e+00, 6.382

Statsonly pretraining

In [None]:
data_prep = Data_Prep(Prep_Map.base_prep_map, Output_Map.base_output_map)
hitter_io_list = data_prep.Generate_IO_Hitters("WHERE lastMLBSeason<? AND signingYear<? AND isHitter=?", (2025,2015,1), use_cutoff=True)

In [None]:
io_train : list[Player_IO]
io_test : list[Player_IO]
io_train, io_test = train_test_split(hitter_io_list, test_size=0.25, random_state=0)

train_lengths = torch.tensor([io.length for io in io_train])
test_lengths = torch.tensor([io.length for io in io_test])

x_train_padded = torch.nn.utils.rnn.pad_sequence([io.input for io in io_train])
x_test_padded = torch.nn.utils.rnn.pad_sequence([io.input for io in io_test])
y_prospect_train_padded = torch.nn.utils.rnn.pad_sequence([io.output for io in io_train])
y_prospect_test_padded = torch.nn.utils.rnn.pad_sequence([io.output for io in io_test])
mask_prospect_train_padded = torch.nn.utils.rnn.pad_sequence([io.prospect_mask for io in io_train])
mask_prospect_test_padded = torch.nn.utils.rnn.pad_sequence([io.prospect_mask for io in io_test])
mask_level_train_padded = torch.nn.utils.rnn.pad_sequence([io.stat_level_mask for io in io_train])
mask_level_test_padded = torch.nn.utils.rnn.pad_sequence([io.stat_level_mask for io in io_test])

mask_year_train_padded = torch.nn.utils.rnn.pad_sequence([io.year_level_mask for io in io_train])
mask_year_test_padded = torch.nn.utils.rnn.pad_sequence([io.year_level_mask for io in io_test])
y_year_stats_train_padded = torch.nn.utils.rnn.pad_sequence([io.year_stat_output for io in io_train])
y_year_stats_test_padded = torch.nn.utils.rnn.pad_sequence([io.year_stat_output for io in io_test])
y_year_position_train_padded = torch.nn.utils.rnn.pad_sequence([io.year_pos_output for io in io_train])
y_year_position_test_padded = torch.nn.utils.rnn.pad_sequence([io.year_pos_output for io in io_test])

mlb_value_mask_train_padded = torch.nn.utils.rnn.pad_sequence([io.mlb_value_mask for io in io_train])
mlb_value_mask_test_padded = torch.nn.utils.rnn.pad_sequence([io.mlb_value_mask for io in io_test])
mlb_value_stats_train_padded = torch.nn.utils.rnn.pad_sequence([io.mlb_value_stats for io in io_train])
mlb_value_stats_test_padded = torch.nn.utils.rnn.pad_sequence([io.mlb_value_stats for io in io_test])

In [None]:
train_hitters_dataset = Player_Dataset(x_train_padded, train_lengths, y_prospect_train_padded, mask_prospect_train_padded, mask_level_train_padded, mask_year_train_padded, y_year_stats_train_padded, y_year_position_train_padded, mlb_value_mask_train_padded, mlb_value_stats_train_padded)
test_hitters_dataset = Player_Dataset(x_test_padded, test_lengths, y_prospect_test_padded, mask_prospect_test_padded, mask_level_test_padded, mask_year_test_padded, y_year_stats_test_padded, y_year_position_test_padded, mlb_value_mask_test_padded, mlb_value_stats_test_padded)

In [None]:
batch_size = 200
hitting_mutators = data_prep.Generate_Hitting_Mutators(batch_size, Player_IO.GetMaxLength(hitter_io_list))

num_layers = 4
hidden_size = 20
network = RNN_Model(x_train_padded[0].shape[1], num_layers, hidden_size, hitting_mutators, output_map=data_prep.output_map, is_hitter=True)
network = network.to(device)

print("Num. Parameters:", count_parameters(network))

optimizer = torch.optim.Adam(network.parameters(), lr=0.004)
scheduler = lr_scheduler.ReduceLROnPlateau(optimizer, factor=0.9, patience=50, cooldown=10, verbose=False)

num_epochs = 100
training_generator = torch.utils.data.DataLoader(train_hitters_dataset, batch_size=batch_size, shuffle=True)
testing_generator = torch.utils.data.DataLoader(test_hitters_dataset, batch_size=batch_size, shuffle=False)

trainAndGraph(network, training_generator, testing_generator, len(train_hitters_dataset), len(test_hitters_dataset), optimizer, scheduler, num_epochs, logging_interval=25, early_stopping_cutoff=2000, should_output=True, model_name="Models/default_statsonly_hitter.pt", save_last=True)