In [None]:
import importlib

In [None]:
import Data_Prep
importlib.reload(Data_Prep)
from Data_Prep import Data_Prep, Player_IO
import Prep_Map
importlib.reload(Prep_Map)
import Output_Map
importlib.reload(Output_Map)

data_prep = Data_Prep(Prep_Map.statsonly_prep_map, Output_Map.base_output_map)

In [None]:
pitcher_io_list = data_prep.Generate_IO_Pitchers("WHERE lastMLBSeason<? AND signingYear<? AND isPitcher=?", (2025,2015,1), use_cutoff=True)

In [None]:
from sklearn.model_selection import train_test_split # type: ignore
import torch
io_train : list[Player_IO]
io_test : list[Player_IO]
io_train, io_test = train_test_split(pitcher_io_list, test_size=0.25, random_state=0)

train_lengths = torch.tensor([io.length for io in io_train])
test_lengths = torch.tensor([io.length for io in io_test])

x_train_padded = torch.nn.utils.rnn.pad_sequence([io.input for io in io_train])
x_test_padded = torch.nn.utils.rnn.pad_sequence([io.input for io in io_test])
y_prospect_train_padded = torch.nn.utils.rnn.pad_sequence([io.output for io in io_train])
y_prospect_test_padded = torch.nn.utils.rnn.pad_sequence([io.output for io in io_test])
mask_prospect_train_padded = torch.nn.utils.rnn.pad_sequence([io.prospect_mask for io in io_train])
mask_prospect_test_padded = torch.nn.utils.rnn.pad_sequence([io.prospect_mask for io in io_test])
mask_level_train_padded = torch.nn.utils.rnn.pad_sequence([io.stat_level_mask for io in io_train])
mask_level_test_padded = torch.nn.utils.rnn.pad_sequence([io.stat_level_mask for io in io_test])

mask_year_train_padded = torch.nn.utils.rnn.pad_sequence([io.year_level_mask for io in io_train])
mask_year_test_padded = torch.nn.utils.rnn.pad_sequence([io.year_level_mask for io in io_test])
y_year_stats_train_padded = torch.nn.utils.rnn.pad_sequence([io.year_stat_output for io in io_train])
y_year_stats_test_padded = torch.nn.utils.rnn.pad_sequence([io.year_stat_output for io in io_test])
y_year_position_train_padded = torch.nn.utils.rnn.pad_sequence([io.year_pos_output for io in io_train])
y_year_position_test_padded = torch.nn.utils.rnn.pad_sequence([io.year_pos_output for io in io_test])

In [None]:
import Player_Dataset
importlib.reload(Player_Dataset)
from Player_Dataset import Player_Dataset

train_pitchers_dataset = Player_Dataset(x_train_padded, train_lengths, y_prospect_train_padded, mask_prospect_train_padded, mask_level_train_padded, mask_year_train_padded, y_year_stats_train_padded, y_year_position_train_padded)
test_pitchers_dataset = Player_Dataset(x_test_padded, test_lengths, y_prospect_test_padded, mask_prospect_test_padded, mask_level_test_padded, mask_year_test_padded, y_year_stats_test_padded, y_year_position_test_padded)

Train Model

In [None]:
def count_parameters(model):
    return sum(p.numel() for p in model.parameters() if p.requires_grad)

In [None]:
import Player_Model
importlib.reload(Player_Model)
from Player_Model import RNN_Model, Classification_Loss, Stats_L1_Loss
from torch.optim import lr_scheduler
import Model_Train
importlib.reload(Model_Train)
from Model_Train import trainAndGraph
from Constants import device

batch_size = 200
mutators = data_prep.Generate_Pitching_Mutators(batch_size, Player_IO.GetMaxLength(pitcher_io_list))

num_layers = 4
hidden_size = 20
network = RNN_Model(x_train_padded[0].shape[1], num_layers, hidden_size, mutators, output_map=data_prep.output_map, is_hitter=False)
network = network.to(device)

print("Num. Parameters:", count_parameters(network))

optimizer = torch.optim.Adam(network.parameters(), lr=0.004)
scheduler = lr_scheduler.ReduceLROnPlateau(optimizer, factor=0.9, patience=50, cooldown=10, verbose=False)
loss_function = Classification_Loss
loss_function_stats = Stats_L1_Loss
loss_function_position = Player_Model.Position_Classification_Loss

num_epochs = 100
training_generator = torch.utils.data.DataLoader(train_pitchers_dataset, batch_size=batch_size, shuffle=True)
testing_generator = torch.utils.data.DataLoader(test_pitchers_dataset, batch_size=batch_size, shuffle=False)

trainAndGraph(network, training_generator, testing_generator, len(train_pitchers_dataset), len(test_pitchers_dataset), loss_function, loss_function_stats, loss_function_position, optimizer, scheduler, num_epochs, logging_interval=25, early_stopping_cutoff=2000, should_output=True, model_name="Models/default_statsonly_pitcher.pt", save_last=True)

Testing

In [None]:
import torch.nn as nn
import sqlite3
from tqdm import tqdm
test_db = sqlite3.connect('test.db')

In [None]:
network.load_state_dict(torch.load("Models/default_pitcher.pt"))
network.eval()
network = network.to(device)

In [None]:
cursor = test_db.cursor()
cursor.execute("DELETE FROM StatPredP")
cursor.execute("DELETE FROM StatActP")
test_db.commit()
cursor = test_db

softmax = nn.Softmax(dim=0)

for io in tqdm(pitcher_io_list):
    pitcher = io.player
    input = io.input.unsqueeze(0)  
    output = io.output
    l = torch.tensor([input.shape[1]])
    twar, pwar, level, pa, value, year_stats, year_positions = network(input.to(device), l.to(device))
    
    for i in range(io.year_stat_output.size(0)):
        year_position_probs = softmax(year_positions.squeeze(0)[i,:2])
        test_db.execute("INSERT INTO StatPredP VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?,?,?)", (pitcher.mlbId, 1, io.dates[i,1].item(), io.dates[i,2].item()) + tuple(year_stats.squeeze(0)[i,:9].tolist()) + tuple(year_position_probs.tolist()))
        test_db.execute("INSERT INTO StatActP VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?,?,?)", (pitcher.mlbId, 1, io.dates[i,1].item(), io.dates[i,2].item()) + tuple(io.year_stat_output[i,:].tolist()) + tuple(io.year_pos_output[i,:].tolist()))
test_db.commit()