In [1]:
import numpy as np
from models.tennis_lstm import TennisLSTM
from models.loss_functions import weighted_loss, auto_weighted_loss
import time
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from dataloaders.vanilla_slam_loaders import YearOpenSplitLoader, YearOpenSplitDataSet
from torch.utils.data import DataLoader
from torch.optim.lr_scheduler import MultiStepLR
import json
from dataloaders.valid_data_fields import valid_fields
from metrics.base_metrics import get_interval_success_rates

torch.manual_seed(1)
batch_size = 1

save_model = True
SAVE_PATH = 'saved_models/tennis_lstm'

model_info = {'input_dim':len(valid_fields), 'hidden_dim':25, 
'batch_size': batch_size, 
'predict_mask':True, 
'num_layers':1}

model = TennisLSTM(**model_info)

train_slam_years=['2011-ausopen', '2011-frenchopen', '2011-usopen', '2011-wimbledon',
                  '2012-ausopen', '2012-frenchopen', '2012-usopen', '2012-wimbledon',
                  # '2013-ausopen', '2013-frenchopen', '2013-usopen', '2013-wimbledon',
                  # '2015-ausopen', '2015-frenchopen', '2015-usopen', '2015-wimbledon',
                  # '2016-ausopen', '2016-frenchopen', '2016-usopen', '2016-wimbledon',
                 ]
val_slam_years = ['2017-ausopen', '2017-frenchopen', '2017-usopen', '2017-wimbledon']

model_info['train_data'] = train_slam_years

test_slam_years=['2014-ausopen',
                 '2014-frenchopen', '2014-usopen', '2014-wimbledon',
                ]
model_info['test_data'] = test_slam_years

train_data_set = YearOpenSplitDataSet(train_slam_years)
val_data_set = YearOpenSplitDataSet(val_slam_years)
test_data_set = YearOpenSplitDataSet(test_slam_years)

train_data_loader = DataLoader(train_data_set, batch_size=batch_size, shuffle=True, num_workers=4)
val_data_loader = DataLoader(val_data_set, batch_size=batch_size, shuffle=True, num_workers=4)

['2011-ausopen', '2011-frenchopen', '2011-usopen', '2011-wimbledon', '2012-ausopen', '2012-frenchopen', '2012-usopen', '2012-wimbledon']
dropped 1 matches
dropped 1 matches
dropped 0 matches
dropped 0 matches


  result = method(y)


dropped 2 matches
dropped 6 matches
dropped 1 matches
dropped 2 matches
['2017-ausopen', '2017-frenchopen', '2017-usopen', '2017-wimbledon']
dropped 4 matches
dropped 9 matches
dropped 165 matches
dropped 10 matches
['2014-ausopen', '2014-frenchopen', '2014-usopen', '2014-wimbledon']
dropped 0 matches
dropped 32 matches
dropped 6 matches
dropped 2 matches


In [5]:
test_data_loader = DataLoader(test_data_set, batch_size=batch_size, shuffle=False, num_workers=4)

In [2]:
optimizer = torch.optim.Adam(model.parameters(), lr=0.003)
optimizer = optim.SGD(model.parameters(), lr=0.0001)
losses = []

scheduler = MultiStepLR(optimizer, [20, 30], gamma=0.1, last_epoch=-1)
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(f'torch device is {device}')
loss_fn = weighted_loss

for i, data in enumerate(train_data_loader):
    X_train, prematch_probs, y_train  = data 
    X_train = X_train.float()
    y_train = y_train.float()
    prematch_probs = prematch_probs.float()
    # print(model.hidden)
    # model.hidden = model.init_hidden(prematch_probs)
    model.set_prematch_probs(prematch_probs)

    y_pred, mask = model(X_train)
    loss = loss_fn(y_train, y_pred)
    # loss = loss_fn(y_train, y_pred, mask, mask_weight=5 + num_epochs // 10)

    loss.backward()
    # if i % 10 == 0: # janky batches
    optimizer.step()
    model.zero_grad()
    optimizer.zero_grad()       
    losses.append(loss.data.numpy())
print(f'epoch avg loss {np.mean(losses)}')

torch device is cpu
epoch avg loss 21.438184193602325


In [3]:
save_base = f'{SAVE_PATH}-{time.time()}'
info_save_path = f'{save_base}-info.json'
save_path = f'{save_base}.pt'
torch.save(model.state_dict(), save_path)

print(save_path)


saved_models/tennis_lstm-1575242236.280942.pt


In [6]:
from test_model import test_my_model

test_model_info = {'input_dim':len(valid_fields), 'hidden_dim':25, 
          'batch_size': batch_size, 
          'predict_mask':True, 
          'num_layers':1}

test_model = TennisLSTM(**test_model_info)
state_dict = torch.load(save_path)
print(state_dict.keys())
test_model.load_state_dict(state_dict)
print(print(test_model.__dict__))

print("testing within loop")
interval_metrics = get_interval_success_rates(test_model, test_data_loader)


odict_keys(['lstm.weight_ih_l0', 'lstm.weight_hh_l0', 'lstm.bias_ih_l0', 'lstm.bias_hh_l0', 'linear.weight', 'linear.bias'])
{'training': True, '_parameters': OrderedDict(), '_buffers': OrderedDict(), '_backward_hooks': OrderedDict(), '_forward_hooks': OrderedDict(), '_forward_pre_hooks': OrderedDict(), '_state_dict_hooks': OrderedDict(), '_load_state_dict_pre_hooks': OrderedDict(), '_modules': OrderedDict([('lstm', LSTM(47, 25)), ('linear', Linear(in_features=25, out_features=2, bias=True))]), 'input_dim': 47, 'hidden_dim': 25, 'batch_size': 1, 'num_layers': 1, 'predict_mask': True, 'prematch_probs': None}
None
testing within loop
testing on 812 examples
at 0.5 way through the match, predicted 585 out of 812 correctly
at 0.75 way through the match, predicted 602 out of 812 correctly
at 1 way through the match, predicted 611 out of 812 correctly
predicted 0.6527873576955384 of all points correctly


In [None]:
print(test_model.lstm.__dict__)

In [None]:
test_data_set_2 = YearOpenSplitDataSet(test_slam_years)
test_data_loader_2 = DataLoader(test_data_set_2, batch_size=batch_size, shuffle=False, num_workers=4)

from test_model import test_my_model
test_model_info = {'input_dim':len(valid_fields), 'hidden_dim':25, 
          'batch_size': 1, 
          'predict_mask':True, 
          'num_layers':1}

test_model_2 = TennisLSTM(**test_model_info)
state_dict_2 = torch.load('saved_models/tennis_lstm-1575239969.151928.pt')
print(state_dict_2.keys())
test_model_2.load_state_dict(state_dict_2)
print(print(test_model_2.__dict__))

print("testing within loop")
interval_metrics = get_interval_success_rates(test_model_2, test_data_loader_2)

In [None]:
for i, data in enumerate(test_data_loader_2):
    X_train, prematch_probs, y_train  = data 
    X_train = X_train.float()
    y_train = y_train.float()
    prematch_probs = prematch_probs.float()
    # print(model.hidden)
    # model.hidden = model.init_hidden(prematch_probs)
    test_model_2.set_prematch_probs(prematch_probs)

    y_pred, mask = test_model_2(X_train)
    print(X_train, y_pred, y_train)
    if i > 10:
        break


In [None]:
for m in dir(test_model_2.lstm):
    if 'hidden' in m:
        print(m)

In [None]:
list(test_model_2.lstm.parameters())