In [1]:
import numpy as np
import pandas as pd
import glob
import os

import torch
import torch.utils.data as data_utils
from torch import nn
from torch.autograd import Variable
from torch.utils.data import DataLoader
# from torchvision import transforms
import torch.nn.functional as F
import torch.optim as optim
from torch.nn.utils.rnn import pad_sequence
from random import shuffle


In [2]:
# load and process data
path = r'odi_csv/'
all_files = glob.glob(path + "/*.csv")
shuffle(all_files)
innings2 = []
target=[]
for filename in all_files:
    df = pd.read_csv(filename,usecols=[0])
    skips = df.loc[ 'info' , : ].shape[0]

    df = pd.read_csv(filename,nrows=skips,skiprows=1,header=None)
    df = df.drop(columns=0).set_index(df.columns[1])
    winteam=None
    if 'winner' in df.index:
        winteam = df.loc['winner',:].values[0]
    
    df = pd.read_csv(filename,skiprows=skips+1,header=None)
    
    df2 = df[df.columns[[1,2,7,8]]].set_index(df.columns[1]).drop(index=1)
    if df2.shape[0]>0:
        innings2.append(df2)
        i2team = df[df.columns[[1,3]]].set_index(df.columns[1]).drop(index=1).values[0,0]
        if (i2team==winteam):
            target.append(1)
        else:
            target.append(0)


In [3]:
# torch tensor processing
features=[]
# create targets first
for i in range(len(innings2)):
    features.append(torch.tensor(innings2[i].values))


# convert to fixed length sequence
features = pad_sequence(features,batch_first=True, padding_value=-1)
targets = torch.tensor(target)

split = int(len(features) * 0.8)
X_train = features[:split]
X_test  = features[split:]
y_train = targets[:split]
y_test  = targets[split:]
print(X_train.size())
print(X_test.size())

torch.Size([1244, 321, 3])
torch.Size([312, 321, 3])


In [4]:
# LSTM model
class matchRNN(nn.Module):
    def __init__(self,insize,hsize,outsize):
        super(matchRNN,self).__init__()
        
        self.insize=insize
        self.hsize=hsize
        self.outsize = outsize
        
        # lstm cell
        self.lstm_cell = nn.LSTMCell(input_size=insize, hidden_size=hsize)
        self.fc_out = nn.Linear(in_features=hsize, out_features=outsize)
#         self.dropout = nn.Dropout(p=0.2, inplace=False)
        self.softmax = nn.Softmax(dim=1)
        
    def forward(self,feat):
#         feat = torch.tensor(feat[np.newaxis,:],dtype=torch.float32)
        batch_size = feat.size(0)
        # init the hidden and cell states to zeros
        hidden_state = torch.zeros((batch_size, self.hsize))
        cell_state = torch.zeros((batch_size, self.hsize))
        outputs = torch.empty((batch_size, feat.size(1), self.outsize))

        for t in range(feat.size(1)):

            # for the first time step (if input is different)
            if t == 0:
                hidden_state, cell_state = self.lstm_cell(feat[:,t,:].view(batch_size,-1).float(), (hidden_state, cell_state))
                
            # for the 2nd+ time step
            else:
                hidden_state, cell_state = self.lstm_cell(feat[:,t,:].view(batch_size,-1).float(), (hidden_state, cell_state))
            
#             dropouts = self.dropout(hidden_state)
            out = self.fc_out(hidden_state)
#             out = self.softmax(out)
            outputs[:,t,:] = out
    
        return outputs

In [5]:
ytrain_tiled = y_train.repeat(X_train.size(1),1).transpose(0,1)
ytrain_tiled.shape

torch.Size([1244, 321])

In [6]:
def evalfunc(model,j):
    model.eval()
    # train
    X = X_train
    y = y_train
    corr=0
    num_batches = X.size(0) // batch_size

    for batch_idx in range(num_batches):
        start_idx = batch_idx * batch_size
        end_idx = (batch_idx+1) * batch_size
        op = model(X[start_idx:end_idx])
        req_op = op[:,j-1]
        maxval,maxidx = torch.max(req_op,1)
        corr+= np.sum((maxidx==y[start_idx:end_idx]).numpy())
    total=num_batches*batch_size
    train_acc = corr / total
    
    # test
    X = X_test
    y = y_test
    corr=0
    num_batches = X.size(0) // batch_size

    for batch_idx in range(num_batches):
        start_idx = batch_idx * batch_size
        end_idx = (batch_idx+1) * batch_size
        op = model(X[start_idx:end_idx])
        req_op = op[:,j-1]
        maxval,maxidx = torch.max(req_op,1)
        corr+= np.sum((maxidx==y[start_idx:end_idx]).numpy())
    total=num_batches*batch_size
    test_acc = corr / total
    print('j = {}, Train Acc = {}, Test Acc = {}'.format(j,train_acc,test_acc))

In [7]:
### training parameters

insize=X_train.size(2)
hsize=64
outsize=2    #binary classification
model = matchRNN(insize,hsize,outsize)
loss_function = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)
batch_size=16
num_batches = int(X_train.size(0) / batch_size)

# train iterations
for epoch in range(121):  # optimum 100-150 epochs
    epoch_loss=0
    model.train()
    for batch_idx in range(num_batches):
        start_idx = batch_idx * batch_size
        end_idx = (batch_idx+1) * batch_size
        model.zero_grad()
        outputs = model(X_train[start_idx:end_idx])
        loss = loss_function(outputs[:,60:321,:].contiguous().view(-1,outsize), ytrain_tiled[start_idx:end_idx,60:321].contiguous().view(-1))
        loss.backward()
        epoch_loss+=loss.data.item()
        optimizer.step()
    print('Epoch = {}, loss = {}'.format(epoch,epoch_loss))
    if epoch%10==0:
        for j in [321,250,200]:
            evalfunc(model,j)

torch.save(model.state_dict(), './models/cric_prediction_all_output_batch.pth')


Epoch = 0, loss = 53.343428790569305
j = 321, Train Acc = 0.5081168831168831, Test Acc = 0.4934210526315789
j = 250, Train Acc = 0.5673701298701299, Test Acc = 0.5296052631578947
j = 200, Train Acc = 0.5397727272727273, Test Acc = 0.5164473684210527
Epoch = 1, loss = 53.15908634662628
Epoch = 2, loss = 52.940744280815125
Epoch = 3, loss = 53.07577192783356
Epoch = 4, loss = 51.74091190099716
Epoch = 5, loss = 49.93058007955551
Epoch = 6, loss = 48.537143528461456
Epoch = 7, loss = 47.5353062748909
Epoch = 8, loss = 48.90459868311882
Epoch = 9, loss = 48.8341081738472
Epoch = 10, loss = 47.947246730327606
j = 321, Train Acc = 0.827922077922078, Test Acc = 0.8256578947368421
j = 250, Train Acc = 0.6436688311688312, Test Acc = 0.6118421052631579
j = 200, Train Acc = 0.5641233766233766, Test Acc = 0.5986842105263158
Epoch = 11, loss = 48.34395134449005
Epoch = 12, loss = 48.712593257427216
Epoch = 13, loss = 47.57472413778305
Epoch = 14, loss = 53.58564209938049
Epoch = 15, loss = 53.14413

In [9]:
for epoch in range(121,200):  # optimum 100-150 epochs
    epoch_loss=0
    model.train()
    for batch_idx in range(num_batches):
        start_idx = batch_idx * batch_size
        end_idx = (batch_idx+1) * batch_size
        model.zero_grad()
        outputs = model(X_train[start_idx:end_idx])
        loss = loss_function(outputs[:,60:321,:].contiguous().view(-1,outsize), ytrain_tiled[start_idx:end_idx,60:321].contiguous().view(-1))
        loss.backward()
        epoch_loss+=loss.data.item()
        optimizer.step()
    print('Epoch = {}, loss = {}'.format(epoch,epoch_loss))
    if epoch%10==0:
        for j in [321,250,200]:
            evalfunc(model,j)

torch.save(model.state_dict(), './models/cric_prediction_all_output_batch.pth')

Epoch = 121, loss = 42.51482406258583
Epoch = 122, loss = 42.443438082933426
Epoch = 123, loss = 42.35908907651901
Epoch = 124, loss = 42.339952290058136
Epoch = 125, loss = 42.58680856227875
Epoch = 126, loss = 42.6234216094017
Epoch = 127, loss = 42.37803840637207
Epoch = 128, loss = 42.269956052303314
Epoch = 129, loss = 42.18683221936226
Epoch = 130, loss = 42.08628311753273
j = 321, Train Acc = 0.9147727272727273, Test Acc = 0.9144736842105263
j = 250, Train Acc = 0.711038961038961, Test Acc = 0.7236842105263158
j = 200, Train Acc = 0.6607142857142857, Test Acc = 0.6578947368421053
Epoch = 131, loss = 42.05293822288513
Epoch = 132, loss = 42.00405728816986
Epoch = 133, loss = 41.97373500466347
Epoch = 134, loss = 41.890100330114365
Epoch = 135, loss = 41.84604549407959
Epoch = 136, loss = 41.764572978019714
Epoch = 137, loss = 41.860697120428085
Epoch = 138, loss = 41.77369737625122
Epoch = 139, loss = 41.814272195100784
Epoch = 140, loss = 41.709476947784424
j = 321, Train Acc = 

In [8]:
# results on multi output lstm

# Epoch = 80, loss = 42.69335952401161
# j = 321, Train Acc = 0.9050324675324676, Test Acc = 0.9473684210526315
# j = 250, Train Acc = 0.7021103896103896, Test Acc = 0.7401315789473685
# j = 200, Train Acc = 0.6323051948051948, Test Acc = 0.6381578947368421
# Epoch = 81, loss = 42.64999434351921
# Epoch = 82, loss = 42.63620883226395
# Epoch = 83, loss = 42.57252901792526
# Epoch = 84, loss = 42.54386180639267
# Epoch = 85, loss = 42.543820798397064
# Epoch = 86, loss = 42.522236466407776
# Epoch = 87, loss = 43.12302175164223
# Epoch = 88, loss = 43.80488169193268
# Epoch = 89, loss = 43.530466586351395
# Epoch = 90, loss = 43.1253487765789
# j = 321, Train Acc = 0.9042207792207793, Test Acc = 0.9506578947368421
# j = 250, Train Acc = 0.6996753246753247, Test Acc = 0.7467105263157895
# j = 200, Train Acc = 0.635551948051948, Test Acc = 0.6644736842105263
# Epoch = 91, loss = 42.85170575976372
# Epoch = 92, loss = 42.611751973629
# Epoch = 93, loss = 42.60165584087372
# Epoch = 94, loss = 42.472452610731125
# Epoch = 95, loss = 42.44126981496811
# Epoch = 96, loss = 42.43537795543671
# Epoch = 97, loss = 42.41048192977905
# Epoch = 98, loss = 42.40796732902527
# Epoch = 99, loss = 42.37926670908928
# Epoch = 100, loss = 42.36977231502533
# j = 321, Train Acc = 0.9066558441558441, Test Acc = 0.9572368421052632
# j = 250, Train Acc = 0.7021103896103896, Test Acc = 0.743421052631579
# j = 200, Train Acc = 0.635551948051948, Test Acc = 0.6578947368421053
# Epoch = 101, loss = 42.35787692666054
# Epoch = 102, loss = 42.33479583263397
# Epoch = 103, loss = 42.33716815710068
# Epoch = 104, loss = 42.291197776794434
# Epoch = 105, loss = 42.42200693488121
# Epoch = 106, loss = 42.384660959243774
# Epoch = 107, loss = 42.503537118434906
# Epoch = 108, loss = 42.54160389304161
# Epoch = 109, loss = 42.657554507255554
# Epoch = 110, loss = 46.20248129963875
# j = 321, Train Acc = 0.5048701298701299, Test Acc = 0.5526315789473685
# j = 250, Train Acc = 0.5876623376623377, Test Acc = 0.6052631578947368
# j = 200, Train Acc = 0.5909090909090909, Test Acc = 0.5953947368421053
# Epoch = 111, loss = 49.134992361068726
# Epoch = 112, loss = 43.54741933941841
# Epoch = 113, loss = 42.86708441376686
# Epoch = 114, loss = 42.59751954674721
# Epoch = 115, loss = 42.5811333656311
# Epoch = 116, loss = 42.51113286614418
# Epoch = 117, loss = 42.68782064318657
# Epoch = 118, loss = 42.618944466114044
# Epoch = 119, loss = 42.34784010052681
# Epoch = 120, loss = 42.301506608724594
# j = 321, Train Acc = 0.9066558441558441, Test Acc = 0.9506578947368421
# j = 250, Train Acc = 0.713474025974026, Test Acc = 0.75
# j = 200, Train Acc = 0.650974025974026, Test Acc = 0.6710526315789473
# Epoch = 121, loss = 42.29225406050682
# Epoch = 122, loss = 42.695088654756546
# Epoch = 123, loss = 42.92030057311058
# Epoch = 124, loss = 42.58220049738884
# Epoch = 125, loss = 42.21531546115875
# Epoch = 126, loss = 42.28876554965973
# Epoch = 127, loss = 42.07214707136154
# Epoch = 128, loss = 42.15156552195549
# Epoch = 129, loss = 42.09659793972969
# Epoch = 130, loss = 43.25178563594818
# j = 321, Train Acc = 0.8522727272727273, Test Acc = 0.8322368421052632
# j = 250, Train Acc = 0.6566558441558441, Test Acc = 0.6414473684210527
# j = 200, Train Acc = 0.6112012987012987, Test Acc = 0.5822368421052632
# Epoch = 131, loss = 46.67593550682068
# Epoch = 132, loss = 43.80374363064766
# Epoch = 133, loss = 42.56290856003761
# Epoch = 134, loss = 42.68502974510193
# Epoch = 135, loss = 42.1684812605381
# Epoch = 136, loss = 42.16793215274811
# Epoch = 137, loss = 42.16465583443642
# Epoch = 138, loss = 42.15301898121834
# Epoch = 139, loss = 41.93092507123947
# Epoch = 140, loss = 42.10533806681633
# j = 321, Train Acc = 0.9066558441558441, Test Acc = 0.9506578947368421
# j = 250, Train Acc = 0.7167207792207793, Test Acc = 0.75
# j = 200, Train Acc = 0.6550324675324676, Test Acc = 0.6743421052631579
# Epoch = 141, loss = 41.746142119169235
# Epoch = 142, loss = 42.21582242846489
# Epoch = 143, loss = 41.93819710612297
# Epoch = 144, loss = 41.87680941820145
# Epoch = 145, loss = 42.27157709002495
# Epoch = 146, loss = 41.94843155145645
# Epoch = 147, loss = 41.99655598402023
# Epoch = 148, loss = 42.1709089577198
# Epoch = 149, loss = 42.353795766830444
# Epoch = 150, loss = 42.91742631793022
# j = 321, Train Acc = 0.9050324675324676, Test Acc = 0.9506578947368421
# j = 250, Train Acc = 0.7126623376623377, Test Acc = 0.7105263157894737
# j = 200, Train Acc = 0.6501623376623377, Test Acc = 0.6217105263157895
# Epoch = 151, loss = 42.41686064004898
# Epoch = 152, loss = 42.26151829957962
# Epoch = 153, loss = 42.90386986732483
# Epoch = 154, loss = 42.91596955060959
# Epoch = 155, loss = 42.06308516860008
# Epoch = 156, loss = 42.01913532614708
# Epoch = 157, loss = 41.886956721544266
# Epoch = 158, loss = 42.28912091255188
# Epoch = 159, loss = 41.89504021406174
# Epoch = 160, loss = 41.59468686580658
# j = 321, Train Acc = 0.9058441558441559, Test Acc = 0.9506578947368421
# j = 250, Train Acc = 0.7224025974025974, Test Acc = 0.7368421052631579
# j = 200, Train Acc = 0.6574675324675324, Test Acc = 0.6447368421052632
# Epoch = 161, loss = 42.02638882398605
# Epoch = 162, loss = 42.76215770840645
# Epoch = 163, loss = 41.943300515413284
# Epoch = 164, loss = 41.744627594947815
# Epoch = 165, loss = 41.64369750022888
# Epoch = 166, loss = 41.82700064778328
# Epoch = 167, loss = 41.838323920965195
# Epoch = 168, loss = 42.99936231970787
# Epoch = 169, loss = 42.55475810170174
# Epoch = 170, loss = 42.43961876630783
# j = 321, Train Acc = 0.9050324675324676, Test Acc = 0.9506578947368421
# j = 250, Train Acc = 0.724025974025974, Test Acc = 0.7236842105263158
# j = 200, Train Acc = 0.6525974025974026, Test Acc = 0.6513157894736842
# Epoch = 171, loss = 42.14726781845093
# Epoch = 172, loss = 42.02729895710945
# Epoch = 173, loss = 42.05796667933464
# Epoch = 174, loss = 42.047207325696945
# Epoch = 175, loss = 41.85863038897514
# Epoch = 176, loss = 41.78750139474869
# Epoch = 177, loss = 41.89787817001343
# Epoch = 178, loss = 41.723336696624756
# Epoch = 179, loss = 41.46990704536438
# Epoch = 180, loss = 41.31174489855766
# j = 321, Train Acc = 0.9147727272727273, Test Acc = 0.9539473684210527
# j = 250, Train Acc = 0.7386363636363636, Test Acc = 0.743421052631579
# j = 200, Train Acc = 0.6728896103896104, Test Acc = 0.6546052631578947
# Epoch = 181, loss = 41.147144973278046
# Epoch = 182, loss = 41.04915153980255
# Epoch = 183, loss = 41.02901268005371
# Epoch = 184, loss = 41.16104966402054
# Epoch = 185, loss = 41.052144914865494
# Epoch = 186, loss = 40.75493836402893
# Epoch = 187, loss = 40.8061888217926
# Epoch = 188, loss = 41.45923164486885
# Epoch = 189, loss = 40.893089562654495
# Epoch = 190, loss = 40.89757114648819
# j = 321, Train Acc = 0.9188311688311688, Test Acc = 0.9506578947368421
# j = 250, Train Acc = 0.734577922077922, Test Acc = 0.7138157894736842
# j = 200, Train Acc = 0.6728896103896104, Test Acc = 0.6447368421052632
# Epoch = 191, loss = 40.91097801923752
# Epoch = 192, loss = 40.533461928367615
# Epoch = 193, loss = 40.0526128411293
# Epoch = 194, loss = 40.267620861530304
# Epoch = 195, loss = 40.43658027052879
# Epoch = 196, loss = 39.8879688680172
# Epoch = 197, loss = 40.29349622130394
# Epoch = 198, loss = 39.77166989445686
# Epoch = 199, loss = 40.42837768793106
# Epoch = 200, loss = 39.85610529780388
# j = 321, Train Acc = 0.9066558441558441, Test Acc = 0.9506578947368421
# j = 250, Train Acc = 0.7564935064935064, Test Acc = 0.7105263157894737
# j = 200, Train Acc = 0.698051948051948, Test Acc = 0.6381578947368421