In [10]:
import pickle
import pandas as pd
import os

import torch
import numpy as np
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from tqdm import tqdm, trange
import torch.optim as optim
from torch.utils.data import TensorDataset, DataLoader
import wandb
from torch.utils.data.sampler import SubsetRandomSampler
import pprint
import matplotlib.pyplot as plt
import torch

from sklearn.preprocessing import MinMaxScaler
import math
from torch.profiler import profile, record_function, ProfilerActivity

from operator import itemgetter
import operator
from random import randint
from rnn_classes import Dog, DogInput, Race, Races, GRUNet
from raceDB import build_dataset, build_pred_dataset
import importlib
import datetime
import model_predictions
from rnn_classes import smalll_lin_GRUNet, smalll_prelin_GRUNet
import val_model_external
import temperature_scaling_raceDB

In [11]:
def model_predictions(predDB:Races,model_location, net_size = 'small', new_hidden_state=None):
    torch_saved_dict = torch.load(model_location,map_location=torch.device('cpu'))
    predDB.to_cpu()

    if new_hidden_state:
        predDB.fill_hidden_states_from_dict(hidden_dict=new_hidden_state)
    else:
        predDB.fill_hidden_states_from_dict(hidden_dict=torch_saved_dict['db'])

    predDB.to_cpu()
    input_size = predDB.get_race_input([0,1])[0].full_input.shape[0]


    if net_size == 'small_lin':
        model = smalll_prelin_GRUNet(input_size,64, output='raw')
    else:
        model = GRUNet(input_size,64, output='softmax')

        
        
    model.load_state_dict(torch_saved_dict['model_state_dict'])
    model.eval()
    outs = model(predDB.racesDict.values()).to('cpu')

    outs_scaled = outs/(1/1.835)

    outs_sftmax = F.softmax(outs,1)
    adj_outs_sftmax = F.softmax(outs_scaled,1)

    predDB.to_cpu()
    x = predDB.racesDict.values()
    logts = [item for sublist in outs.tolist() for item in sublist]
    outs_list = [item for sublist in outs_sftmax.tolist() for item in sublist]
    outs_adj = [item for sublist in adj_outs_sftmax.tolist() for item in sublist]
    dogs = [dog.dog.dog_name for sublist in [r.dogs for r in x] for dog in sublist]
    box = [r for s in [[1,2,3,4,5,6,7,8] for r in x] for r in s]
    dogId = [dog.dog.dogid for sublist in [r.dogs for r in x] for dog in sublist]
    times = [r for s in [[r.race_time]*8 for r in x] for r in s]
    rid = [r for s in [[r.raceid]*8 for r in x] for r in s]
    track = [r for s in [[r.track_name]*8 for r in x] for r in s]
    rnum = [r for s in [[int(r.race_num)]*8 for r in x] for r in s]
    outs = pd.DataFrame(data = {"raceid":rid, "track":track, "racetime":times,"box":box, "dogs":dogs, "dogid":dogId, "conf":outs_list,'adj_out':outs_adj,"race_num":rnum })
    outs['pred_price'] = outs['conf'].apply(lambda x: 1/(x))
    outs['pred_price2'] = outs['adj_out'].apply(lambda x: 1/(x))
    outs = outs.sort_values(['track','race_num','box'])

    return outs

In [12]:
os.getcwd()
os.chdir(r"C:\Users\Nick\Documents\GitHub\grvmodel\Python\DATA")
#dog_stats_file = open( 'new gru input 2023-01.npy', 'rb')
hidden_size = 64
raceDB = build_dataset(r'C:\Users\Nick\Documents\GitHub\grvmodel\Python\DATA\gru_inputs_new.npy', hidden_size ,state_filter=['NSW'])

['speed_avg_30', 'speed_max_30', 'split_speed_avg_30', 'split_speed_max_30', 'split_margin_avg_30', 'margin_avg_30', 'first_out_avg_30', 'post_change_avg_30', 'races_30', 'wins_30', 'speed_avg_5', 'speed_max_5', 'split_speed_avg_5', 'split_speed_max_5', 'split_margin_avg_5', 'margin_avg_5', 'first_out_avg_5', 'post_change_avg_5', 'races_5', 'wins_5', 'speed_avg_1', 'speed_max_1', 'split_speed_avg_1', 'split_speed_max_1', 'split_margin_avg_1', 'margin_avg_1', 'first_out_avg_1', 'post_change_avg_1', 'races_1', 'wins_1', 'speed_avgdist_10', 'speed_maxdist_10', 'split_speed_avgdist_10', 'split_speed_maxdist_10', 'split_margin_avgdist_10', 'margin_avgdist_10', 'first_out_avgdist_10', 'post_change_avgdist_10', 'racesdist_10', 'winsdist_10', 'speed_avgbox_10', 'speed_maxbox_10', 'split_speed_avgbox_10', 'split_speed_maxbox_10', 'split_margin_avgbox_10', 'margin_avgbox_10', 'first_out_avgbox_10', 'post_change_avgbox_10', 'racesbox_10', 'winsbox_10', 'speed_avgtrack_box_10', 'speed_maxtrack_box

  0%|          | 0/15026 [00:00<?, ?it/s]

  for obj in iterable:


  0%|          | 0/41693 [00:00<?, ?it/s]

  for obj in iterable:


number of races = 41693, number of unique dogs = 15026


In [13]:
date = datetime.datetime.strptime("2022-08-01", "%Y-%m-%d").date()
today = datetime.datetime.today().strftime('%Y-%m-%d')
raceDB.create_test_split_date(date)

Train examples 32452, Test examples 9241


In [14]:
def update_hidden_state(raceDB:Races,model_name):
    nsw_model_loaded = torch.load(model_name,map_location=torch.device('cuda:0'))
    raceDB.fill_hidden_states_from_dict(hidden_dict=nsw_model_loaded['db'])
    input_size = raceDB.get_race_input([0,1])[0].full_input.shape[0]
    print(input_size)
    raceDB.to_cuda()
    nsw_model_full = smalll_prelin_GRUNet(input_size=input_size,hidden_size=64).to('cuda:0')
    nsw_model_full.load_state_dict(nsw_model_loaded['model_state_dict'])

    races = raceDB.get_test_input(range(0,len(raceDB.test_race_ids)))

    X = races
    y = torch.stack([x.classes for x in races])
    data = (X,y)

    scaled_model = temperature_scaling_raceDB.ModelWithTemperature(nsw_model_full)
    scaled_model.set_temperature(data)

    nsw_model_full.eval()
    with torch.no_grad():
        last = 0
        for i in trange(500,len(raceDB.test_race_ids)-500,500):
            sub_races = races[last:i]
            try:
                nsw_model_full(sub_races)
            except:
                print('failed')
            last = i

        outs_full = nsw_model_full(races)
    raceDB.to_cuda()
    raceDB.create_hidden_states_dict()
    
    new_rdb = raceDB.hidden_states_dict_gru
    return new_rdb

In [15]:
def gen_predictions(model_name, raceDB, predDB):
    model = f"C:/Users/Nick/Documents/GitHub/grvmodel/Python/pytorch/New Model/savedmodel/{model_name}/{model_name}_450.pt"
    hidden_states = update_hidden_state(raceDB, model)
    outs = model_predictions(predDB, model, net_size='small_lin', new_hidden_state=hidden_states )
    return outs

In [16]:
os.chdir(r"C:\Users\Nick\Documents\GitHub\grvmodel\Python\DATA")
today = datetime.datetime.today().strftime('%Y-%m-%d')
pred_df = pd.read_pickle(f'C:/Users/Nick/Documents/GitHub/grvmodel/Python/Database Updater/testing new outs {today}.npy')
pred_df
state = ['NSW']
pred_df_nsw = pred_df[pred_df['state'].isin(state)]
hidden_size = 64
predDB_nsw = build_pred_dataset(pred_df_nsw, hidden_size)

77


  0%|          | 0/140 [00:00<?, ?it/s]

  for obj in iterable:


  0%|          | 0/18 [00:00<?, ?it/s]

number of races = 18, number of unique dogs = 140


  for obj in iterable:


In [17]:
# model_name = "zany-wood-268"
# torch_saved_dict = torch.load(f"C:/Users/Nick/Documents/GitHub/grvmodel/Python/pytorch/New Model/savedmodel/{model_name}/{model_name}_450.pt")
# input_size = raceDB.get_race_input([0,1])[0].full_input.shape[0]
# model = smalll_prelin_GRUNet(input_size=input_size,hidden_size=64).to('cuda:0')
# model.load_state_dict(torch_saved_dict['model_state_dict'])
# importlib.reload(val_model_external)
# val_model_external.validate_model(model,raceDB,model_name)

In [None]:
test_ids = raceDB.test_race_ids

for i in test_ids:
    
    raceDB.racesDict[i].race_date

In [10]:
model_name = "neat-universe-4"
outs = gen_predictions(model_name, raceDB, predDB_nsw)
outs['Model Name'] = model_name

today = datetime.datetime.today().strftime('%Y-%m-%d %H_%M_%S')
outs.replace('Richmond Straight','Richmond',inplace = True)
outs.replace('Richmond Straight','Richmond',inplace = True)

outs_1 = outs # [outs['track'].isin(['Dubbo', 'Wentworth Park'])]
outs_1.to_pickle(f'./model_outputs/output {model_name} {today}.npy')
outs_1.to_excel(f'./model_outputs/output {model_name} {state}-{today}.xlsx')

  0%|          | 0/41108 [00:00<?, ?it/s]

filled =2464
empty  =326400
0.0074924588887807725null_dog=0
686
tensor([5, 5, 3,  ..., 0, 2, 7], device='cuda:0')
Before temperature - NLL: 2.003, ECE: 0.107
tensor([5, 5, 3,  ..., 0, 2, 7], device='cuda:0')
Optimal temperature: 1.841
After temperature - NLL: 1.938, ECE: 0.013


100%|██████████| 16/16 [00:00<00:00, 39.46it/s]


  0%|          | 0/23 [00:00<?, ?it/s]

filled =165
empty  =19
0.8967391304347826null_dog=0


In [None]:
outs_1 = outs # [outs['track'].isin(['Dubbo', 'Wentworth Park'])]

In [18]:
model_name = "polar-paper-2"
outs = gen_predictions(model_name, raceDB, predDB_nsw)
outs['Model Name'] = model_name

today = datetime.datetime.today().strftime('%Y-%m-%d %H_%M_%S')
#outs.replace('Richmond Straight','Richmond',inplace = True)

outs_1 = outs#[outs['track'].isin(['Gosford', 'Grafton'])]
outs_1.to_pickle(f'./model_outputs/output {model_name} {today}.npy')
outs_1.to_excel(f'./model_outputs/output {model_name} {state}-{today}.xlsx')

  0%|          | 0/41693 [00:00<?, ?it/s]

filled =273145
empty  =60399
0.8189174441752812null_dog=0
686
tensor([2, 5, 0,  ..., 6, 3, 2], device='cuda:0')
Before temperature - NLL: 2.061, ECE: 0.112
tensor([2, 5, 0,  ..., 6, 3, 2], device='cuda:0')
Optimal temperature: 1.959
After temperature - NLL: 1.987, ECE: 0.011


100%|██████████| 17/17 [00:00<00:00, 33.83it/s]


  0%|          | 0/18 [00:00<?, ?it/s]

filled =125
empty  =19
0.8680555555555556null_dog=0


In [9]:
model_name = "fallen-planet-281"
outs = gen_predictions(model_name, raceDB, predDB_nsw)
outs['Model Name'] = model_name

today = datetime.datetime.today().strftime('%Y-%m-%d %H_%M_%S')
#outs.replace('Richmond Straight','Richmond',inplace = True)

outs_1 = outs[outs['track'].isin(['Richmond', 'Wentworth Park'])]
outs_1.to_pickle(f'./model_outputs/output {model_name} {today}.npy')
outs_1.to_excel(f'./model_outputs/output {model_name} {state}-{today}.xlsx')
outs_carress = outs

  0%|          | 0/41187 [00:00<?, ?it/s]

filled =272758
empty  =56738
0.8278036759171583null_dog=0
686


RuntimeError: Error(s) in loading state_dict for smalll_prelin_GRUNet:
	size mismatch for batchnorm.weight: copying a param with shape torch.Size([510]) from checkpoint, the shape in current model is torch.Size([686]).
	size mismatch for batchnorm.bias: copying a param with shape torch.Size([510]) from checkpoint, the shape in current model is torch.Size([686]).
	size mismatch for batchnorm.running_mean: copying a param with shape torch.Size([510]) from checkpoint, the shape in current model is torch.Size([686]).
	size mismatch for batchnorm.running_var: copying a param with shape torch.Size([510]) from checkpoint, the shape in current model is torch.Size([686]).
	size mismatch for gru1.weight_ih: copying a param with shape torch.Size([192, 510]) from checkpoint, the shape in current model is torch.Size([192, 686]).
	size mismatch for gru2.weight_ih: copying a param with shape torch.Size([192, 510]) from checkpoint, the shape in current model is torch.Size([192, 686]).
	size mismatch for gru3.weight_ih: copying a param with shape torch.Size([192, 510]) from checkpoint, the shape in current model is torch.Size([192, 686]).
	size mismatch for gru4.weight_ih: copying a param with shape torch.Size([192, 510]) from checkpoint, the shape in current model is torch.Size([192, 686]).
	size mismatch for gru5.weight_ih: copying a param with shape torch.Size([192, 510]) from checkpoint, the shape in current model is torch.Size([192, 686]).
	size mismatch for gru6.weight_ih: copying a param with shape torch.Size([192, 510]) from checkpoint, the shape in current model is torch.Size([192, 686]).
	size mismatch for gru7.weight_ih: copying a param with shape torch.Size([192, 510]) from checkpoint, the shape in current model is torch.Size([192, 686]).
	size mismatch for gru8.weight_ih: copying a param with shape torch.Size([192, 510]) from checkpoint, the shape in current model is torch.Size([192, 686]).

In [8]:
model_name = "true-grass-258"
outs = gen_predictions(model_name, raceDB, predDB_nsw)
outs['Model Name'] = model_name

today = datetime.datetime.today().strftime('%Y-%m-%d %H_%M_%S')
outs.replace('Richmond Straight','Richmond',inplace = True)
outs_1 = outs[outs['track'].isin(['Healesville', 'Taree', 'Richmond', 'Wentworth Park'])]
outs_1.to_pickle(f'./model_outputs/output {model_name} {today}.npy')
outs_1.to_excel(f'./model_outputs/output {model_name} {state}-{today}.xlsx')
outs_carress = outs

100%|██████████| 89344/89344 [00:33<00:00, 2695.36it/s]


filled =605112
empty  =109640
0.8466041368194842null_dog=0
510


100%|██████████| 32/32 [00:01<00:00, 21.02it/s]
100%|██████████| 80/80 [00:00<00:00, 2351.35it/s]


filled =541
empty  =99
0.8453125null_dog=0


In [29]:
model_name = "vibrant-energy-237"
outs = gen_predictions(model_name, raceDB, predDB_nsw)
outs['Model Name'] = model_name
today = datetime.datetime.today().strftime('%Y-%m-%d')
outs.replace('Richmond Straight','Richmond',inplace = True)
today = datetime.datetime.today().strftime('%Y-%m-%d %H_%M_%S')
outs_2= outs[outs['track'].isin(['Dubbo','Wentworth Park'])]
outs_2.to_pickle(f'./model_outputs/output {model_name} {today}.npy')
outs_2.to_csv(f'./model_outputs/output {model_name} {state}-{today}.csv')
outs_carress = outs

100%|██████████| 89184/89184 [00:16<00:00, 5414.67it/s] 


filled =274383
empty  =439089
0.38457430705059203null_dog=0
510


100%|██████████| 32/32 [00:01<00:00, 23.61it/s]
100%|██████████| 57/57 [00:00<00:00, 2372.91it/s]


filled =411
empty  =45
0.9013157894736842null_dog=0


In [30]:
outs_combined = pd.concat([outs_1,outs_2])
outs_combined.to_pickle(f'./model_outputs/output {model_name} combined {today}.npy')
outs_combined.to_csv(f'./model_outputs/output combined {model_name} {state}-{today}.csv')

In [17]:
model_name = "upbeat-plant-202"
outs = gen_predictions(model_name, raceDB, predDB_nsw)
outs['Model Name'] = model_name
today = datetime.datetime.today().strftime('%Y-%m-%d')
outs.replace('Richmond Straight','Richmond',inplace = True)
outs = outs[outs['track'].isin(['Richmond'])]
outs.to_pickle(f'./model_outputs/output {model_name} {today}.npy')
outs.to_csv(f'./model_outputs/output {model_name} {state}-{today}.csv')
outs_carress = outs

100%|██████████| 162850/162850 [00:30<00:00, 5360.34it/s]


filled =275245
empty  =1027555
0.21127187595947192null_dog=0
510


100%|██████████| 59/59 [00:04<00:00, 12.99it/s]
100%|██████████| 24/24 [00:00<00:00, 2397.89it/s]


filled =172
empty  =20
0.8958333333333334null_dog=0


In [None]:
model_name = "fearless-shadow-200"
outs = gen_predictions(model_name, raceDB, predDB_nsw)
outs['Model Name'] = model_name
today = datetime.datetime.today().strftime('%Y-%m-%d')
outs.replace('Richmond Straight','Richmond',inplace = True)
outs.to_pickle(f'./model_outputs/output {model_name} {today}.npy')
outs.to_csv(f'./model_outputs/output {model_name} {state}-{today}.csv')
outs_carress = outs

100%|██████████| 161848/161848 [00:21<00:00, 7458.84it/s]


filled =273708
empty  =1021076
0.21139278829519054null_dog=0
510


100%|██████████| 57/57 [00:05<00:00, 10.50it/s]
100%|██████████| 42/42 [00:00<00:00, 2797.49it/s]


filled =293
empty  =43
0.8720238095238095null_dog=0


In [None]:
model_name = "effortless-sweep-1"
outs = gen_predictions(model_name, raceDB, predDB_nsw)
outs['Model Name'] = model_name
today = datetime.datetime.today().strftime('%Y-%m-%d')
outs.to_pickle(f'./model_outputs/output {model_name} {today}.npy')
outs.to_csv(f'./model_outputs/output {model_name} {state}-{today}.csv')
outs_carress = outs

100%|██████████| 161871/161871 [00:20<00:00, 7836.88it/s]


filled =273509
empty  =1021459
0.21120908006993222null_dog=0
512


100%|██████████| 57/57 [00:04<00:00, 11.86it/s]
100%|██████████| 18/18 [00:00<00:00, 2441.15it/s]


filled =126
empty  =18
0.875null_dog=0


In [None]:
model_name = "dulcet-waterfall-189"
outs = gen_predictions(model_name, raceDB, predDB_nsw)
outs['Model Name'] = model_name
today = datetime.datetime.today().strftime('%Y-%m-%d')
outs.to_pickle(f'./model_outputs/output {model_name} {today}.npy')
outs.to_csv(f'./model_outputs/output {model_name} {state}-{today}.csv')
outs_carress = outs

100%|██████████| 161871/161871 [00:20<00:00, 7843.07it/s]


filled =273509
empty  =1021459
0.21120908006993222null_dog=0
512


100%|██████████| 57/57 [00:05<00:00, 10.47it/s]
100%|██████████| 31/31 [00:00<00:00, 2581.01it/s]


filled =223
empty  =25
0.8991935483870968null_dog=0


In [None]:

model_name = "forthright-caress-156"
outs = gen_predictions(model_name, raceDB, predDB_nsw)
outs['Model Name'] = model_name
today = datetime.datetime.today().strftime('%Y-%m-%d')
outs.to_pickle(f'./model_outputs/output {model_name} {today}.npy')
outs.to_csv(f'./model_outputs/output {model_name} {state}-{today}.csv')
outs_carress = outs

100%|██████████| 161871/161871 [00:19<00:00, 8152.61it/s]


filled =273415
empty  =1021553
0.2111364914036486null_dog=0
512


RuntimeError: Error(s) in loading state_dict for smalll_lin_GRUNet:
	size mismatch for batchnorm.weight: copying a param with shape torch.Size([416]) from checkpoint, the shape in current model is torch.Size([512]).
	size mismatch for batchnorm.bias: copying a param with shape torch.Size([416]) from checkpoint, the shape in current model is torch.Size([512]).
	size mismatch for batchnorm.running_mean: copying a param with shape torch.Size([416]) from checkpoint, the shape in current model is torch.Size([512]).
	size mismatch for batchnorm.running_var: copying a param with shape torch.Size([416]) from checkpoint, the shape in current model is torch.Size([512]).
	size mismatch for fc0.weight: copying a param with shape torch.Size([416, 416]) from checkpoint, the shape in current model is torch.Size([512, 512]).
	size mismatch for fc0.bias: copying a param with shape torch.Size([416]) from checkpoint, the shape in current model is torch.Size([512]).
	size mismatch for gru1.weight_ih: copying a param with shape torch.Size([192, 416]) from checkpoint, the shape in current model is torch.Size([192, 512]).
	size mismatch for gru2.weight_ih: copying a param with shape torch.Size([192, 416]) from checkpoint, the shape in current model is torch.Size([192, 512]).
	size mismatch for gru3.weight_ih: copying a param with shape torch.Size([192, 416]) from checkpoint, the shape in current model is torch.Size([192, 512]).
	size mismatch for gru4.weight_ih: copying a param with shape torch.Size([192, 416]) from checkpoint, the shape in current model is torch.Size([192, 512]).
	size mismatch for gru5.weight_ih: copying a param with shape torch.Size([192, 416]) from checkpoint, the shape in current model is torch.Size([192, 512]).
	size mismatch for gru6.weight_ih: copying a param with shape torch.Size([192, 416]) from checkpoint, the shape in current model is torch.Size([192, 512]).
	size mismatch for gru7.weight_ih: copying a param with shape torch.Size([192, 416]) from checkpoint, the shape in current model is torch.Size([192, 512]).
	size mismatch for gru8.weight_ih: copying a param with shape torch.Size([192, 416]) from checkpoint, the shape in current model is torch.Size([192, 512]).

In [None]:
model_name = "stilted-sweep-15"
outs = gen_predictions("stilted-sweep-15", raceDB, predDB_nsw)
today = datetime.datetime.today().strftime('%Y-%m-%d')
outs.to_pickle(f'./model_outputs/output {model_name} {today}.npy')
outs.to_csv(f'./model_outputs/output {model_name} {state}-{today}.csv')
outs_stilted = outs

100%|██████████| 161170/161170 [00:22<00:00, 7209.10it/s]


filled =271623
empty  =1017737
0.2106649810758826null_dog=0


100%|██████████| 73/73 [00:06<00:00, 11.94it/s]
100%|██████████| 34/34 [00:00<00:00, 2426.43it/s]


filled =251
empty  =21
0.9227941176470589null_dog=0


In [None]:
model_name = "visionary-sweep-1"
outs = gen_predictions(model_name, raceDB, predDB_nsw)
today = datetime.datetime.today().strftime('%Y-%m-%d')
outs.to_pickle(f'./model_outputs/output {model_name} {today}.npy')
outs.to_csv(f'./model_outputs/output {model_name} {state}-{today}.csv')
outs_vision = outs

100%|██████████| 161170/161170 [00:23<00:00, 6890.25it/s]


filled =271623
empty  =1017737
0.2106649810758826null_dog=0


100%|██████████| 73/73 [00:06<00:00, 11.67it/s]
100%|██████████| 34/34 [00:00<00:00, 2264.49it/s]


filled =251
empty  =21
0.9227941176470589null_dog=0


In [None]:
model_name = 'forthright-quiver-153'
outs = gen_predictions(model_name, raceDB, predDB_nsw)
today = datetime.datetime.today().strftime('%Y-%m-%d')
outs.to_pickle(f'./model_outputs/output {model_name} {today}.npy')
outs.to_csv(f'./model_outputs/output {model_name} {state}-{today}.csv')
outs_quiver = outs

100%|██████████| 161170/161170 [00:22<00:00, 7077.21it/s]


filled =272214
empty  =1017146
0.21112334801762114null_dog=0


100%|██████████| 73/73 [00:04<00:00, 15.35it/s]
100%|██████████| 34/34 [00:00<00:00, 1698.54it/s]


filled =252
empty  =20
0.9264705882352942null_dog=0


In [None]:
model_name = 'prime-sweep-8'
outs = gen_predictions(model_name, raceDB, predDB_nsw)
today = datetime.datetime.today().strftime('%Y-%m-%d')
outs.to_pickle(f'./model_outputs/output {model_name} {today}.npy')
outs.to_csv(f'./model_outputs/output {model_name} {state}-{today}.csv')
outs_prime = outs

100%|██████████| 161170/161170 [00:22<00:00, 7161.66it/s]


filled =272214
empty  =1017146
0.21112334801762114null_dog=0


100%|██████████| 73/73 [00:05<00:00, 12.26it/s]
100%|██████████| 34/34 [00:00<00:00, 2264.17it/s]


filled =251
empty  =21
0.9227941176470589null_dog=0


In [None]:
all_preds = outs[['raceid','track','racetime','box','dogs','dogid']]
all_preds_merge = all_preds.merge(outs_stilted, on=['raceid','box'])

NameError: name 'outs_carress' is not defined

In [None]:
outs = gen_predictions("pleasant-sweep-3", raceDB, predDB_nsw)

In [None]:

nsw_model =  f"C:/Users/Nick/Documents/GitHub/grvmodel/Python/pytorch/New Model/savedmodel/{model_name}/{model_name}_450.pt"
hidden_states = update_hidden_state(raceDB, nsw_model)

  0%|          | 0/39546 [00:00<?, ?it/s]

filled =267422
empty  =48946
0.8452877661457543null_dog=0


In [None]:
model_name = "pleasant-sweep-3"
nsw_model =  f"C:/Users/Nick/Documents/GitHub/grvmodel/Python/pytorch/New Model/savedmodel/{model_name}/{model_name}_450.pt"
hidden_states_pleasent = update_hidden_state(raceDB, nsw_model)

  0%|          | 0/39546 [00:00<?, ?it/s]

filled =267422
empty  =48946
0.8452877661457543null_dog=0


In [None]:
outs_nsw = model_predictions(predDB_nsw, nsw_model, net_size='small_lin', new_hidden_state=hidden_states )
outs = outs_nsw

  0%|          | 0/34 [00:00<?, ?it/s]

filled =247
empty  =25
0.9080882352941176null_dog=0


In [None]:
today = datetime.datetime.today().strftime('%Y-%m-%d')
outs.to_pickle(f'output {model_name} {today}.npy')
outs.to_csv(f'output {model_name} {state}-{today}.csv')

In [None]:
nsw_model_loaded = torch.load(r"C:\Users\Nick\Documents\GitHub\grvmodel\Python\pytorch\New Model\savedmodel\pleasant-sweep-3\pleasant-sweep-3_450.pt",map_location=torch.device('cuda:0'))
raceDB.fill_hidden_states_from_dict(hidden_dict=nsw_model_loaded['db'])
input_size = raceDB.get_race_input([0,1])[0].full_input.shape[0]
raceDB.to_cuda()
nsw_model_full = smalll_lin_GRUNet(input_size=input_size,hidden_size=64).to('cuda:0')
nsw_model_full.load_state_dict(nsw_model_loaded['model_state_dict'])


  0%|          | 0/39546 [00:00<?, ?it/s]

filled =267422
empty  =48946
0.8452877661457543null_dog=0


<All keys matched successfully>

In [None]:
races = raceDB.get_test_input(range(0,2542))

In [None]:
nsw_model_full.eval()
with torch.no_grad():
    outs_full = nsw_model_full(races)

In [None]:
raceDB.create_hidden_states_dict()
new_rdb = raceDB.hidden_states_dict


  0%|          | 0/34 [00:00<?, ?it/s]

filled =241
empty  =31
0.8860294117647058null_dog=0


In [None]:
state = "NZ"
pred_df_nz = pred_df[pred_df['state'].str.contains(state, na=False)]
hidden_size = 64
predDB_nz = build_pred_dataset(pred_df_nz, hidden_size)

  0%|          | 0/175 [00:00<?, ?it/s]

  for obj in iterable:


  0%|          | 0/24 [00:00<?, ?it/s]

number of races = 24, number of unique dogs = 175


  for obj in iterable:


In [None]:
(pred_df_nsw['prev_race']==-1).sum()

11

In [None]:
nsw_model =  r"C:\Users\Nick\Documents\GitHub\grvmodel\Python\pytorch\New Model\savedmodel\pleasant-sweep-3\pleasant-sweep-3_450.pt"
nz_model = r"C:\Users\Nick\Documents\GitHub\grvmodel\Python\pytorch\New Model\savedmodel\very long run RUN NZ\very long run RUN NZ_450.pt"

In [None]:
all_data = r'C:\Users\Nick\Documents\GitHub\grvmodel\Python\Database Updater\DATA\new_windows_gru_REAL.npy'

In [None]:
importlib.reload(model_predictions)

<module 'model_predictions' from 'c:\\Users\\Nick\\Documents\\GitHub\\grvmodel\\Python\\pytorch\\New Model\\LSTM\\model_predictions.py'>

In [None]:
test_R = predDB_nsw.get_race_input([1,2])[0]

In [None]:
len(test_R.race_track)

71

In [None]:
outs_nsw = model_predictions.model_predictions(predDB_nsw, nsw_model, net_size='small_lin')
#outs_nz =  model_predictions.model_predictions(predDB_nz , nz_model)

  0%|          | 0/34 [00:00<?, ?it/s]

filled =223
empty  =49
0.8198529411764706


In [None]:
outs = pd.concat([outs_nsw,outs_nz])

In [None]:
outs = outs_nsw

In [None]:
today = datetime.datetime.today().strftime('%Y-%m-%d')
outs.to_pickle(f'output {today}.npy')
outs.to_csv(f'output {state }-{today}.csv')


In [None]:
pred_df = pd.read_pickle(r'C:\Users\Nick\Documents\GitHub\grvmodel\Python\Database Updater\prediction_input 2023-01-26.npy')
pred_df.track_name.value_counts()

Mandurah         95
Shepparton       95
Sandown Park     94
Gunnedah         94
Christchurch     92
Casino           92
Traralgon        90
Warrnambool      89
Albion Park      84
Waikato          83
Hobart           80
Angle Park       77
Dapto            75
Mount Gambier    74
Name: track_name, dtype: int64

In [None]:
len(pred_df.trackOHE.iloc[0])

71

In [None]:
x = predDB.racesDict.values()

In [None]:
torch_saved_dict = torch.load(r"C:\Users\Nick\Documents\GitHub\grvmodel\Python\pytorch\New Model\savedmodel\long nsw 2000 RUN\long nsw 2000 RUN_450.pt",map_location=torch.device('cpu'))

In [None]:
# model_state_dict = torch_saved_dict['model_state_dict']

In [None]:
predDB.to_cpu()

In [None]:
predDB.fill_hidden_states_from_dict(hidden_dict=torch_saved_dict['db'])

  0%|          | 0/34 [00:00<?, ?it/s]

filled =218
empty  =54
0.8014705882352942


In [None]:
predDB.racesDict.values()

dict_values([<rnn_classes.Race object at 0x000001C2B9ABD310>, <rnn_classes.Race object at 0x000001C2B9ABDD00>, <rnn_classes.Race object at 0x000001C2B9ABD6D0>, <rnn_classes.Race object at 0x000001C2B9ABD280>, <rnn_classes.Race object at 0x000001C2B9A80370>, <rnn_classes.Race object at 0x000001C2B9ABD790>, <rnn_classes.Race object at 0x000001C2B9ABDC70>, <rnn_classes.Race object at 0x000001C2B9A806D0>, <rnn_classes.Race object at 0x000001C2B981BF70>, <rnn_classes.Race object at 0x000001C2B9ABD9D0>, <rnn_classes.Race object at 0x000001C2B9ABDE20>, <rnn_classes.Race object at 0x000001C2B9A80490>, <rnn_classes.Race object at 0x000001C2B9A9B4F0>, <rnn_classes.Race object at 0x000001C2B9ABDE50>, <rnn_classes.Race object at 0x000001C2B9ABDE80>, <rnn_classes.Race object at 0x000001C2B9A807C0>, <rnn_classes.Race object at 0x000001C2B9AB8F70>, <rnn_classes.Race object at 0x000001C2B9ABD820>, <rnn_classes.Race object at 0x000001C2B9ABD700>, <rnn_classes.Race object at 0x000001C2B9A80940>, <rnn_cl

In [None]:
for r in predDB.racesDict.values():
    print(len(r.full_input))

232
232
232
232
232
232
232
232
232
232
232
232
232
232
232
232
232
232
232
232
232
232
232
232
232
232
232
232
232
232
232
232
232
232


In [None]:
input_size = predDB.get_race_input([0,1])[0].full_input.shape[0]
model = GRUNet(input_size,64, output='softmax')
model.load_state_dict(torch_saved_dict['model_state_dict'])

<All keys matched successfully>

In [None]:
model.eval()
outs = model(predDB.racesDict.values()).to('cpu')
predDB.to_cpu()
outs_list = [item for sublist in outs.tolist() for item in sublist]

In [None]:
dogs = [dog.dog.dog_name for sublist in [r.dogs for r in x] for dog in sublist]
box = [r for s in [[1,2,3,4,5,6,7,8] for r in x] for r in s]
dogId = [dog.dog.dogid for sublist in [r.dogs for r in x] for dog in sublist]
times = [r for s in [[r.race_time]*8 for r in x] for r in s]
rid = [r for s in [[r.raceid]*8 for r in x] for r in s]
track = [r for s in [[r.track_name]*8 for r in x] for r in s]
rnum = [r for s in [[int(r.race_num)]*8 for r in x] for r in s]

In [None]:
outs = pd.DataFrame(data = {"raceid":rid, "track":track, "racetime":times,"box":box, "dogs":dogs, "dogid":dogId, "conf":outs_list,"race_num":rnum })

In [None]:
outs['pred_price'] = outs['conf'].apply(lambda x: 1/(x))

In [None]:
outs = outs.sort_values(['track','race_num','box'])

In [None]:
outs.head(50)

Unnamed: 0,raceid,track,racetime,box,dogs,dogid,conf,race_num,pred_price
0,865597968,Casino,10:36AM,1,CUTTIN' GRASS,672317468,0.088841,1,11.256009
1,865597968,Casino,10:36AM,2,CURIOUS CAZA,714918126,0.112474,1,8.890962
2,865597968,Casino,10:36AM,3,SEVEN SINNERS,669960288,0.206793,1,4.835751
3,865597968,Casino,10:36AM,4,SPRITELY GLENNIS,587949066,0.079609,1,12.561409
4,865597968,Casino,10:36AM,5,AMARTEY PARTY,654230428,0.000446,1,2244.363565
5,865597968,Casino,10:36AM,6,CLOVER JULIET,556974388,0.225925,1,4.426243
6,865597968,Casino,10:36AM,7,MR FRESH,714917101,0.117305,1,8.524808
7,865597968,Casino,10:36AM,8,INTEGRITY SON,555068999,0.168607,1,5.930941
8,865597969,Casino,10:56AM,1,ALL THUNDER,592709649,0.168463,2,5.936028
9,865597969,Casino,10:56AM,2,IN THE THIRTIES,471933615,0.000509,2,1964.320351


In [None]:
outs.track.value_counts()

Casino      96
Gunnedah    96
Dapto       80
Name: track, dtype: int64

In [None]:
len(pred_df.trackOHE.iloc[0])

71

In [None]:
today = datetime.datetime.today().strftime('%Y-%m-%d')
outs.to_csv(f'output {state }{today}.csv')

In [None]:
outs.to_pickle('output 2023-01-24.npy')

In [None]:
outs.race_num

0       1
1       1
2       1
3       1
4       1
       ..
187    12
188    12
189    12
190    12
191    12
Name: race_num, Length: 272, dtype: int64

In [None]:
os.getcwd()

'C:\\Users\\Nick\\Documents\\GitHub\\grvmodel\\Python\\DATA'

In [None]:
betfair_df = pd.read_pickle(r'.\betfair races\betfair races 2023-01-24.npy')

In [None]:
betfair_df

Unnamed: 0,market_id,track,dist,race_num,runner_id,runnner_name
0,1.209048130,Temora,R2,2,51312709,STRIVE POLDA
1,1.209048130,Temora,R2,2,41385415,TAYLEN BALE
2,1.209048130,Temora,R2,2,51918883,DOUBLE DIPPIN
3,1.209048130,Temora,R2,2,51129843,TAMMY TURBO
4,1.209048130,Temora,R2,2,51918884,QUEST FOR STARS
...,...,...,...,...,...,...
719,1.209048284,Gosford,R6,6,39791347,SOUND OF SILENCE
720,1.209048284,Gosford,R6,6,51918814,WE BACK
721,1.209048284,Gosford,R6,6,39979942,ZIPPING ALABAMA
722,1.209048284,Gosford,R6,6,41962222,NOT A SHAME


In [None]:
betfair_df.track.value_counts()

Temora           86
Murray Bridge    86
Geelong          86
Ipswich          83
Richmond         80
Launceston       76
Horsham          70
Warragul         45
Grafton          44
Gosford          44
Townsville       24
Name: track, dtype: int64

In [None]:
outs.race_num = pd.to_numeric(outs.race_num)

In [None]:
merged = pd.merge(outs, betfair_df, how='left', left_on=['race_num', 'dogs'], right_on=['race_num', 'runnner_name'])

In [None]:
# betfair_df_9 = betfair_df[betfair_df['race_num']==7]
# outs = outs[outs['race_num']==7]
# merged = pd.merge(outs, betfair_df_9, how='left', left_on=['dogs'], right_on=['runnner_name'])

In [None]:
merged.track_x.value_counts()

Casino      96
Gunnedah    96
Dapto       80
Name: track_x, dtype: int64

In [None]:
merged.track_y.value_counts()

Series([], Name: track_y, dtype: int64)

In [None]:
merged.market_id.unique()

array([nan], dtype=object)