In [72]:
import importlib

In [73]:
import Data_Prep
importlib.reload(Data_Prep)
from Data_Prep import Data_Prep, Hitter_IO
import Prep_Map
importlib.reload(Prep_Map)
import Output_Map
importlib.reload(Output_Map)

data_prep = Data_Prep(Prep_Map.base_prep_map, Output_Map.war_map)

In [74]:
hitter_io_list = data_prep.Generate_IO_Hitters("WHERE lastMLBSeason<? AND signingYear<? AND isHitter=?", (2025,2015,1))

In [75]:
from sklearn.model_selection import train_test_split # type: ignore
import torch
io_train : list[Hitter_IO]
io_test : list[Hitter_IO]
io_train, io_test = train_test_split(hitter_io_list, test_size=0.25, random_state=0)

train_lengths = torch.tensor([io.length for io in io_train])
test_lengths = torch.tensor([io.length for io in io_test])

x_train_padded = torch.nn.utils.rnn.pad_sequence([io.input for io in io_train])
x_test_padded = torch.nn.utils.rnn.pad_sequence([io.input for io in io_test])
y_prospect_train_padded = torch.nn.utils.rnn.pad_sequence([io.output for io in io_train])
y_prospect_test_padded = torch.nn.utils.rnn.pad_sequence([io.output for io in io_test])
y_stats_train_padded = torch.nn.utils.rnn.pad_sequence([io.stat_output for io in io_train])
y_stats_test_padded = torch.nn.utils.rnn.pad_sequence([io.stat_output for io in io_test])
y_position_train_padded = torch.nn.utils.rnn.pad_sequence([io.position_output for io in io_train])
y_position_test_padded = torch.nn.utils.rnn.pad_sequence([io.position_output for io in io_test])
mask_prospect_train_padded = torch.nn.utils.rnn.pad_sequence([io.prospect_mask for io in io_train])
mask_prospect_test_padded = torch.nn.utils.rnn.pad_sequence([io.prospect_mask for io in io_test])
mask_level_train_padded = torch.nn.utils.rnn.pad_sequence([io.stat_level_mask for io in io_train])
mask_level_test_padded = torch.nn.utils.rnn.pad_sequence([io.stat_level_mask for io in io_test])

mask_year_train_padded = torch.nn.utils.rnn.pad_sequence([io.year_level_mask for io in io_train])
mask_year_test_padded = torch.nn.utils.rnn.pad_sequence([io.year_level_mask for io in io_test])
y_year_stats_train_padded = torch.nn.utils.rnn.pad_sequence([io.year_stat_output for io in io_train])
y_year_stats_test_padded = torch.nn.utils.rnn.pad_sequence([io.year_stat_output for io in io_test])
y_year_position_train_padded = torch.nn.utils.rnn.pad_sequence([io.year_pos_output for io in io_train])
y_year_position_test_padded = torch.nn.utils.rnn.pad_sequence([io.year_pos_output for io in io_test])

In [76]:
import Hitter_Dataset
importlib.reload(Hitter_Dataset)
from Hitter_Dataset import Hitter_Dataset

train_hitters_dataset = Hitter_Dataset(x_train_padded, train_lengths, y_prospect_train_padded, y_stats_train_padded, y_position_train_padded, mask_prospect_train_padded, mask_level_train_padded, mask_year_train_padded, y_year_stats_train_padded, y_year_position_train_padded)
test_hitters_dataset = Hitter_Dataset(x_test_padded, test_lengths, y_prospect_test_padded, y_stats_test_padded, y_position_test_padded, mask_prospect_test_padded, mask_level_test_padded, mask_year_test_padded, y_year_stats_test_padded, y_year_position_test_padded)

Train Model

In [77]:
def count_parameters(model):
    return sum(p.numel() for p in model.parameters() if p.requires_grad)

In [None]:
import Hitter_Model
importlib.reload(Hitter_Model)
from Hitter_Model import RNN_Model, Classification_Loss, Stats_L1_Loss
from torch.optim import lr_scheduler
import Model_Train
importlib.reload(Model_Train)
from Model_Train import trainAndGraph
from Constants import device

batch_size = 200
hitting_mutators = data_prep.Generate_Hitting_Mutators(batch_size, Hitter_IO.GetMaxLength(hitter_io_list))

num_layers = 3
hidden_size = 35
network = RNN_Model(x_train_padded[0].shape[1], num_layers, hidden_size, hitting_mutators, output_map=data_prep.output_map)
network = network.to(device)

print("Num. Parameters:", count_parameters(network))

optimizer = torch.optim.Adam(network.parameters(), lr=0.001)
scheduler = lr_scheduler.ReduceLROnPlateau(optimizer, factor=0.5, patience=40, cooldown=20, verbose=False)
loss_function = Classification_Loss
loss_function_stats = Stats_L1_Loss
loss_function_position = Hitter_Model.Position_Classification_Loss

num_epochs = 1000
training_generator = torch.utils.data.DataLoader(train_hitters_dataset, batch_size=batch_size, shuffle=True)
testing_generator = torch.utils.data.DataLoader(test_hitters_dataset, batch_size=batch_size, shuffle=False)

trainAndGraph(network, training_generator, testing_generator, len(train_hitters_dataset), len(test_hitters_dataset), loss_function, loss_function_stats, loss_function_position, optimizer, scheduler, num_epochs, logging_interval=1, early_stopping_cutoff=40, should_output=True)

                                                                              

Num. Parameters: 38893
Epoch [1/1000], Train Loss: 6.3067, Test Loss: 6.4469
Epoch [2/1000], Train Loss: 6.3021, Test Loss: 6.4436
Epoch [3/1000], Train Loss: 6.2976, Test Loss: 6.4415
Epoch [4/1000], Train Loss: 6.2935, Test Loss: 6.4389
Epoch [5/1000], Train Loss: 6.2903, Test Loss: 6.4366
Epoch [6/1000], Train Loss: 6.2872, Test Loss: 6.4354
Epoch [7/1000], Train Loss: 6.2847, Test Loss: 6.4346
Epoch [8/1000], Train Loss: 6.2827, Test Loss: 6.4344
Epoch [9/1000], Train Loss: 6.2809, Test Loss: 6.4335
Epoch [10/1000], Train Loss: 6.2794, Test Loss: 6.4338
Epoch [11/1000], Train Loss: 6.2784, Test Loss: 6.4336
Epoch [12/1000], Train Loss: 6.2773, Test Loss: 6.4341
Epoch [13/1000], Train Loss: 6.2764, Test Loss: 6.4341
Epoch [14/1000], Train Loss: 6.2758, Test Loss: 6.4342
Epoch [15/1000], Train Loss: 6.2752, Test Loss: 6.4340
Epoch [16/1000], Train Loss: 6.2748, Test Loss: 6.4339
Epoch [17/1000], Train Loss: 6.2745, Test Loss: 6.4341
Epoch [18/1000], Train Loss: 6.2742, Test Loss: 6.4

Testing

In [None]:
import torch.nn as nn
import sqlite3
from tqdm import tqdm
test_db = sqlite3.connect('test.db')

In [None]:
network.load_state_dict(torch.load("no_name.pt"))
network.eval()
network = network.to(device)

In [None]:
cursor = test_db.cursor()
cursor.execute("DELETE FROM StatPred")
cursor.execute("DELETE FROM StatAct")
test_db.commit()
cursor = test_db

softmax = nn.Softmax(dim=0)

for io in tqdm(hitter_io_list):
    hitter = io.hitter
    input = io.input.unsqueeze(0)  
    output = io.output
    l = torch.tensor([input.shape[1]])
    twar, pwar, level, pa, stats, positions, year_stats, year_positions = network(input.to(device), l.to(device))
    
    for i in range(io.stat_output.size(0)):
        position_probs = softmax(positions.squeeze(0)[i,:9])
        year_position_probs = softmax(year_positions.squeeze(0)[i,:9])
        test_db.execute("INSERT INTO StatPred VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?)", (hitter.mlbId, 1, 0, io.dates[i,1].item(), io.dates[i,2].item()) + tuple(stats.squeeze(0)[i,:11].tolist()) + tuple(position_probs.tolist()))
        test_db.execute("INSERT INTO StatAct VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?)", (hitter.mlbId, 1, 0, io.dates[i,1].item(), io.dates[i,2].item()) + tuple(io.stat_output[i,:].tolist()) + tuple(io.position_output[i,:].tolist()))
        test_db.execute("INSERT INTO StatPred VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?)", (hitter.mlbId, 1, 1, io.dates[i,1].item(), io.dates[i,2].item()) + tuple(year_stats.squeeze(0)[i,:11].tolist()) + tuple(year_position_probs.tolist()))
        test_db.execute("INSERT INTO StatAct VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?)", (hitter.mlbId, 1, 1, io.dates[i,1].item(), io.dates[i,2].item()) + tuple(io.year_stat_output[i,:].tolist()) + tuple(io.year_pos_output[i,:].tolist()))
test_db.commit()

100%|██████████| 8916/8916 [02:20<00:00, 63.46it/s]


In [None]:
# import Hitter_Model
# importlib.reload(Hitter_Model)
# from Hitter_Model import RNN_Model, Classification_Loss, Stats_L1_Loss

# network = RNN_Model(x_train_padded[0].shape[1], num_layers, hidden_size, hitting_mutators, output_map=data_prep.output_map)
# network.load_state_dict(torch.load("no_name.pt"))
# network.eval()
# network = network.to(device)

In [None]:
import torch.nn.functional as F

def Check_Hitter(id : int, name : str):
    with torch.no_grad():
        for io in hitter_io_list:
            hitter = io.hitter
            if (hitter.mlbId == id):
                input = io.input.unsqueeze(0)  
                output = io.output
                l = torch.tensor([input.shape[1]])
                twar, pwar, level, pa, stats, positions = network(input.to(device), l.to(device))
                
                pos_loss = Hitter_Model.Position_Classification_Loss(positions, 
                                        io.position_output.unsqueeze(0).to(device), 
                                        io.stat_level_mask.unsqueeze(0).to(device))
                torch.set_printoptions(precision=3, sci_mode=False, linewidth=1000, threshold=500000)
                print(name)
                print(positions.squeeze(0)[:,:9])
                #print(F.softmax(twar.squeeze(1), dim=1).cpu())
                #print(F.softmax(level.squeeze(0).squeeze(1), dim=1).cpu())
                #print(output[0][0].item())
                #print(output[0][2].item())
                return

Get Hitter Losses

In [None]:
#Check_Hitter(596146, "Max Kepler")
Check_Hitter(545361, "Mike Trout")
#Check_Hitter(518769, "Michael Harrington")
#Check_Hitter(542454, "Danny Santana")
#Check_Hitter(605381, "Levi Michael")

ValueError: too many values to unpack (expected 6)