In [None]:
import importlib
import Data_Prep
importlib.reload(Data_Prep)
from Data_Prep import Data_Prep, Player_IO
import Prep_Map
importlib.reload(Prep_Map)
import Output_Map
importlib.reload(Output_Map)
import Player_Dataset
importlib.reload(Player_Dataset)
from Player_Dataset import Player_Dataset, Create_Test_Train_Datasets
import Player_Model
importlib.reload(Player_Model)
from Player_Model import RNN_Model
import Model_Train
importlib.reload(Model_Train)
from Model_Train import trainAndGraph
from Constants import device, DEFAULT_NUM_LAYERS_HITTER, DEFAULT_HIDDEN_SIZE_HITTER

In [None]:
def count_parameters(model):
    return sum(p.numel() for p in model.parameters() if p.requires_grad)

In [None]:
batch_size = 800
num_epochs = 61

In [None]:
import torch
torch.set_printoptions(precision=8, sci_mode=False)
torch.set_printoptions(
    precision=2,
    sci_mode=False,
    linewidth=500,
    edgeitems=20,
)

Base Pretraining

In [None]:
num_layers = DEFAULT_NUM_LAYERS_HITTER
hidden_size = DEFAULT_HIDDEN_SIZE_HITTER

In [None]:
data_prep = Data_Prep(Prep_Map.base_prep_map, Output_Map.base_output_map)
hitter_io_list = data_prep.Generate_IO_Hitters("WHERE lastMLBSeason<? AND signingYear<? AND isHitter=?", (2025,2015,1), use_cutoff=True)
train_dataset, test_dataset = Create_Test_Train_Datasets(hitter_io_list, 0.25, 0)

In [None]:
importlib.reload(Player_Model)
from Player_Model import RNN_Model
importlib.reload(Model_Train)
from Model_Train import trainAndGraph

hitting_mutators = data_prep.Generate_Hitting_Mutators(batch_size, Player_IO.GetMaxLength(hitter_io_list))

network = RNN_Model(train_dataset.get_input_size(), num_layers, 30, hitting_mutators, data_prep=data_prep, is_hitter=True)
network = network.to(device)

best_losses = trainAndGraph(network,
              train_dataset,
              test_dataset,
              batch_size,
              num_epochs = num_epochs,
              logging_interval=10,
              early_stopping_cutoff=2000,
              should_output=True,
              model_name="Models/default_experimental_hitter",
              save_last=True,
              elements_to_save=[0])

print(best_losses)

Testing

In [None]:
# importlib.reload(Player_Model)
from Player_Model import Stats_Loss

hitlvlstat_means : torch.Tensor = getattr(data_prep, "__hitlvlstat_means").to(device)
hitlvlstat_devs : torch.Tensor = getattr(data_prep, "__hitlvlstat_devs").to(device)
hitpt_means : torch.Tensor = getattr(data_prep, "__hitlvlpt_means").to(device)
hitpt_devs : torch.Tensor = getattr(data_prep, "__hitlvlpt_devs").to(device)

torch.set_printoptions(
    precision=2,
    sci_mode=False,
    linewidth=500,
    edgeitems=200,
)

print_pa = True
print_stats = True
print_stats_level = 0

for hitter in hitter_io_list:
    if hitter.player.mlbId != 621439:# and hitter.player.mlbId != 596146:571740:593934:608336:621439
        continue
    
    data = hitter.input.to(device)
    length = torch.tensor([hitter.length]).to(device)
    output_war, output_level, output_pa, output_yearStats, output_yearPos, output_mlbValue, output_pt = network(data.unsqueeze(0), length)
    
    print(data[50])
    dates = hitter.dates
    output_pt = output_pt.squeeze(0)
    output_yearStats = output_yearStats.squeeze(0)
    actual_pt = hitter.pt_year_output.squeeze(-1).to(device)
    
    actual_yearStats = hitter.year_stat_output.to(device)
    output_yearStats = output_yearStats.reshape(actual_yearStats.shape)
    
    #print(output_yearStats.shape)
    loss = Stats_Loss(output_yearStats.unsqueeze(0), actual_yearStats.unsqueeze(0), hitter.year_level_mask.unsqueeze(0).to(device))
    for i in range(dates.size(0)):
        if i != 30:
            continue
        for j in range(8):
            output_pt[i,j] = (output_pt[i,j] * hitpt_devs) + hitpt_means
            actual_pt[i,j] = (actual_pt[i,j] * hitpt_devs) + hitpt_means
            # output_yearStats[i,j] = (output_yearStats[i,j] * hitlvlstat_devs) + hitlvlstat_means
            # actual_yearStats[i,j] = (actual_yearStats[i,j] * hitlvlstat_devs) + hitlvlstat_means
        
        print(f"Mask: {hitter.stat_level_mask[i,print_stats_level].item()}")
        if print_pa:
            print(f"Act:  {dates[i,1].item():4d}-{dates[i,2].item()} : MLB={actual_pt[i,0].item():3.0f},AAA={actual_pt[i,1].item():3.0f},AA={actual_pt[i,2].item():3.0f},A+={actual_pt[i,3].item():3.0f},A={actual_pt[i,4].item():3.0f},A-={actual_pt[i,5].item():3.0f},Rk={actual_pt[i,6].item():3.0f},DSL={actual_pt[i,7].item():3.0f}")
            print(f"Pred: {dates[i,1].item():4d}-{dates[i,2].item()} : MLB={output_pt[i,0].item():3.0f},AAA={output_pt[i,1].item():3.0f},AA={output_pt[i,2].item():3.0f},A+={output_pt[i,3].item():3.0f},A={output_pt[i,4].item():3.0f},A-={output_pt[i,5].item():3.0f},Rk={output_pt[i,6].item():3.0f},DSL={output_pt[i,7].item():3.0f}\n")
            
        if print_stats:
            a = actual_yearStats[i,print_stats_level]
            p = output_yearStats[i,print_stats_level]
            print(f"Act:  {dates[i,1].item():4d}-{dates[i,2].item()} : 1B={a[0].item():2.2f},2B={a[1].item():2.2f},3B={a[2].item():2.2f},HR={a[3].item():2.2f},BB={a[4].item():2.2f},HBP={a[5].item():2.2f},K={a[6].item():2.2f},SB={a[7].item():2.2f},CS={a[8].item():2.2f},BSR={a[9].item():2.2f},DRAA={a[10].item():2.2f},PF={a[11].item():2.2f}")
            print(f"Pred: {dates[i,1].item():4d}-{dates[i,2].item()} : 1B={p[0].item():2.2f},2B={p[1].item():2.2f},3B={p[2].item():2.2f},HR={p[3].item():2.2f},BB={p[4].item():2.2f},HBP={p[5].item():2.2f},K={p[6].item():2.2f},SB={p[7].item():2.2f},CS={p[8].item():2.2f},BSR={p[9].item():2.2f},DRAA={p[10].item():2.2f},PF={p[11].item():2.2f}\n")

Statsonly pretraining

In [None]:
data_prep = Data_Prep(Prep_Map.statsonly_prep_map, Output_Map.base_output_map)
hitter_io_list = data_prep.Generate_IO_Hitters("WHERE lastMLBSeason<? AND signingYear<? AND isHitter=?", (2025,2015,1), use_cutoff=True)
train_dataset, test_dataset = Create_Test_Train_Datasets(hitter_io_list, 0.25, 0)

In [None]:
hitting_mutators = data_prep.Generate_Hitting_Mutators(batch_size, Player_IO.GetMaxLength(hitter_io_list))

network = RNN_Model(train_dataset.get_input_size(), num_layers, hidden_size, hitting_mutators, data_prep=data_prep, is_hitter=True)
network = network.to(device)

best_losses = trainAndGraph(network, 
              train_dataset, 
              test_dataset,
              batch_size,
              num_epochs=num_epochs, 
              logging_interval=10, 
              early_stopping_cutoff=2000, 
              should_output=True, 
              model_name="Models/default_statsonly_hitter", 
              save_last=True, 
              elements_to_save=[0])

print(best_losses)