In [None]:
import importlib

In [None]:
import Data_Prep
importlib.reload(Data_Prep)
from Data_Prep import Data_Prep, Hitter_IO
import Prep_Map
importlib.reload(Prep_Map)
import Output_Map
importlib.reload(Output_Map)

data_prep = Data_Prep(Prep_Map.base_prep_map, Output_Map.war_map)

In [None]:
hitter_io_list = data_prep.Generate_IO_Hitters("WHERE lastMLBSeason<? AND signingYear<? AND isHitter=?", (2025,2015,1))

In [None]:
hitter_io_list[0].length

In [None]:
from sklearn.model_selection import train_test_split # type: ignore
import torch
io_train : list[Hitter_IO]
io_test : list[Hitter_IO]
io_train, io_test = train_test_split(hitter_io_list, test_size=0.25, random_state=0)

train_lengths = torch.tensor([io.length for io in io_train])
test_lengths = torch.tensor([io.length for io in io_test])

x_train_padded = torch.nn.utils.rnn.pad_sequence([io.input for io in io_train])
x_test_padded = torch.nn.utils.rnn.pad_sequence([io.input for io in io_test])
y_prospect_train_padded = torch.nn.utils.rnn.pad_sequence([io.output for io in io_train])
y_prospect_test_padded = torch.nn.utils.rnn.pad_sequence([io.output for io in io_test])
y_stats_train_padded = torch.nn.utils.rnn.pad_sequence([io.stat_output for io in io_train])
y_stats_test_padded = torch.nn.utils.rnn.pad_sequence([io.stat_output for io in io_test])
mask_prospect_train_padded = torch.nn.utils.rnn.pad_sequence([io.prospect_mask for io in io_train])
mask_prospect_test_padded = torch.nn.utils.rnn.pad_sequence([io.prospect_mask for io in io_test])
mask_level_train_padded = torch.nn.utils.rnn.pad_sequence([io.stat_level_mask for io in io_train])
mask_level_test_padded = torch.nn.utils.rnn.pad_sequence([io.stat_level_mask for io in io_test])

In [None]:
import Hitter_Dataset
importlib.reload(Hitter_Dataset)
from Hitter_Dataset import Hitter_Dataset

train_hitters_dataset = Hitter_Dataset(x_train_padded, train_lengths, y_prospect_train_padded, y_stats_train_padded, mask_prospect_train_padded, mask_level_train_padded)
test_hitters_dataset = Hitter_Dataset(x_test_padded, test_lengths, y_prospect_test_padded, y_stats_test_padded, mask_prospect_test_padded, mask_level_test_padded)

Train Model

In [None]:
def count_parameters(model):
    return sum(p.numel() for p in model.parameters() if p.requires_grad)

In [None]:
import Hitter_Model
importlib.reload(Hitter_Model)
from Hitter_Model import RNN_Model, Classification_Loss, Stats_L1_Loss
from torch.optim import lr_scheduler
import Model_Train
importlib.reload(Model_Train)
from Model_Train import trainAndGraph
from Constants import device

batch_size = 1000
hitting_mutators = data_prep.Generate_Hitting_Mutators(batch_size, Hitter_IO.GetMaxLength(hitter_io_list))

num_layers = 3
hidden_size = 50
network = RNN_Model(x_train_padded[0].shape[1], num_layers, hidden_size, hitting_mutators, output_map=data_prep.output_map)
network = network.to(device)

print("Num. Parameters:", count_parameters(network))

optimizer = torch.optim.Adam(network.parameters(), lr=0.003)
scheduler = lr_scheduler.ReduceLROnPlateau(optimizer, factor=0.5, patience=20, cooldown=5, verbose=False)
loss_function = Classification_Loss
loss_function_stats = Stats_L1_Loss

num_epochs = 1000
training_generator = torch.utils.data.DataLoader(train_hitters_dataset, batch_size=batch_size, shuffle=True)
testing_generator = torch.utils.data.DataLoader(test_hitters_dataset, batch_size=batch_size, shuffle=False)

graph_y_range=(0.1,0.5)
trainAndGraph(network, training_generator, testing_generator, len(train_hitters_dataset), len(test_hitters_dataset), loss_function, loss_function_stats, optimizer, scheduler, num_epochs, logging_interval=1, early_stopping_cutoff=40, should_output=True)

Testing

In [None]:
network.load_state_dict(torch.load("no_name.pt"))
network.eval()
network = network.to(device)

In [None]:
import torch.nn.functional as F

def Check_Hitter(id : int, name : str):
    with torch.no_grad():
        for i, hitter in enumerate(hitters):
            if (hitter.mlbId == id):
                input = inputs[i].unsqueeze(0)
                
                output = outputs[i]
                l = torch.tensor([input.shape[1]])
                twar, pwar, level, pa = network(input.to(device), l.to(device))
                
                torch.set_printoptions(precision=3, sci_mode=False, linewidth=1000)
                print(name)
                print(F.softmax(twar.squeeze(0).squeeze(1), dim=1).cpu())
                #print(F.softmax(level.squeeze(0).squeeze(1), dim=1).cpu())
                print(output[0][0].item())
                #print(output[0][2].item())
                return

Get Hitter Losses

In [None]:
#Check_Hitter(596146, "Max Kepler")
Check_Hitter(545361, "Mike Trout")
#Check_Hitter(518769, "Michael Harrington")
#Check_Hitter(542454, "Danny Santana")
#Check_Hitter(605381, "Levi Michael")