In [1]:
import numpy as np
import pandas as pd
import glob
import os
from random import shuffle

import torch
import torch.utils.data as data_utils
from torch import nn
from torch.autograd import Variable
from torch.utils.data import DataLoader
from torchvision import transforms
import torch.nn.functional as F
import torch.optim as optim
from torch.nn.utils.rnn import pack_sequence,pad_packed_sequence,pad_sequence
torch.__version__

'1.3.0'

In [3]:
# load and process data
path = r'odi_csv/'
all_files = glob.glob(path + "/*.csv")
shuffle(all_files)
innings2 = []
target=[]
for filename in all_files:
    df = pd.read_csv(filename,usecols=[0])
    skips = df.loc[ 'info' , : ].shape[0]

    df = pd.read_csv(filename,nrows=skips,skiprows=1,header=None)
    df = df.drop(columns=0).set_index(df.columns[1])
    winteam=None
    if 'winner' in df.index:
        winteam = df.loc['winner',:].values[0]
    
    df = pd.read_csv(filename,skiprows=skips+1,header=None)
    
    df2 = df[df.columns[[1,2,7,8]]].set_index(df.columns[1]).drop(index=1)
    if df2.shape[0]>0:
        innings2.append(df2)
        i2team = df[df.columns[[1,3]]].set_index(df.columns[1]).drop(index=1).values[0,0]
        if (i2team==winteam):
            target.append(1)
        else:
            target.append(0)

# len(innings2) = 1556
split = int(len(innings2) * 0.8)
innings2_train=innings2[:split]
innings2_test =innings2[split:]
target_train  =target[:split]
target_test   =target[split:]

In [4]:
# Random Sample generator
def X_train_generator(ing):
    features=[]
    # create targets first
    for i in range(len(ing)):
        balls = ing[i].shape[0]
        if balls>250:
            j = np.random.choice(np.arange(250,balls+1))
            features.append(torch.tensor(ing[i].values[:j]))
        else:
            features.append(torch.tensor(ing[i].values))

    # convert to fixed length sequence
    features,_ = pad_packed_sequence(pack_sequence(features,enforce_sorted = False), batch_first=True, padding_value=-1, total_length=321)
    return features

In [5]:
# # All overs generator
# def X_train_generator(ing):
#     features=[]
#     # create targets first
#     for i in range(len(ing)):
#         features.append(torch.tensor(ing[i].values))

#     # convert to fixed length sequence
#     features,_ = pad_packed_sequence(pack_sequence(features,enforce_sorted = False), batch_first=True,padding_value=-1, total_length=321)
#     return features

In [6]:
features=[]
# create targets first
for i in range(len(innings2_test)):
    features.append(torch.tensor(innings2_test[i].values))

# convert to fixed length sequence
X_test, _ = pad_packed_sequence(pack_sequence(features,enforce_sorted = False), batch_first=True,padding_value=-1, total_length=321)
y_train = torch.tensor(target_train)
y_test = torch.tensor(target_test)
print(X_test.size())

In [8]:
# LSTM model
class matchRNN(nn.Module):
    def __init__(self,insize,hsize,outsize):
        super(matchRNN,self).__init__()
        
        self.insize=insize
        self.hsize=hsize
        self.outsize = outsize
        
        # lstm cell
        self.lstm_cell = nn.LSTMCell(input_size=insize, hidden_size=hsize)
        self.fc_out = nn.Linear(in_features=hsize, out_features=outsize)
        self.softmax = nn.Softmax(dim=1)
        
    def forward(self,feat):
#         feat = torch.tensor(feat[np.newaxis,:],dtype=torch.float32)
        batch_size = feat.size(0)
        
        # init the hidden and cell states to zeros
        hidden_state = torch.zeros((batch_size, self.hsize))
        cell_state = torch.zeros((batch_size, self.hsize))
        for t in range(feat.size(1)):
            hidden_state, cell_state = self.lstm_cell(feat[:,t,:].view(batch_size,-1).float(), (hidden_state, cell_state))

        out = self.fc_out(hidden_state)

        return out

In [9]:
# evaluation

def X_generator(ing,j):
    features=[]
    # create targets first
    for i in range(len(ing)):
        features.append(torch.tensor(ing[i].values[:j]))

    # convert to fixed length sequence
    features,_ = pad_packed_sequence(pack_sequence(features,enforce_sorted = False), batch_first=True,padding_value=-1, total_length=321)
    return features

def evalfunc(model,j):
    global innings2_train,innings2_test,y_train,y_test,batch_size
    model.eval()
    # train
    X = X_generator(innings2_train,j)
    y = y_train
    corr=0
    num_batches = X.size(0) // batch_size

    for batch_idx in range(num_batches):
        start_idx = batch_idx * batch_size
        end_idx = (batch_idx+1) * batch_size
        op = model(X[start_idx:end_idx])
        maxval,maxidx = torch.max(op,1)
        corr+= np.sum((maxidx==y[start_idx:end_idx]).numpy())
    total=num_batches*batch_size
    train_acc = corr / total
    
    # test
    X = X_generator(innings2_test,j)
    y = y_test
    corr=0
    num_batches = X.size(0) // batch_size

    for batch_idx in range(num_batches):
        start_idx = batch_idx * batch_size
        end_idx = (batch_idx+1) * batch_size
        op = model(X[start_idx:end_idx])
        maxval,maxidx = torch.max(op,1)
        corr+= np.sum((maxidx==y[start_idx:end_idx]).numpy())
    total=num_batches*batch_size
    test_acc = corr / total
    print('j = {}, Train Acc = {}, Test Acc = {}'.format(j,train_acc,test_acc))

In [10]:
# training parameters

insize=X_test.size(2)
hsize=64
outsize=2    #binary classification
model = matchRNN(insize,hsize,outsize)
loss_function = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)
batch_size = 16


# train iterations
for epoch in range(61):
    epoch_loss=0
    X_train = X_train_generator(innings2_train)
    num_batches = X_train.size(0) // batch_size
    model.train()

    for batch_idx in range(num_batches):
        start_idx = batch_idx * batch_size
        end_idx = (batch_idx+1) * batch_size
        model.zero_grad()
        op = model(X_train[start_idx:end_idx])
        loss = loss_function(op.contiguous().view(-1,outsize), y_train[start_idx:end_idx].view(-1))
        loss.backward()
        epoch_loss+=loss.data.item()
        optimizer.step()
    print('Epoch = {}, loss = {}'.format(epoch,epoch_loss))
    if epoch%10==0:
        for j in [321,250,200]:
            evalfunc(model,j)

torch.save(model.state_dict(), './models/cric_prediction_M1B1_randomsample.pth')


Epoch = 0, loss = 53.54995155334473
j = 321, Train Acc = 0.49594155844155846, Test Acc = 0.48026315789473684
j = 250, Train Acc = 0.4967532467532468, Test Acc = 0.48355263157894735
j = 200, Train Acc = 0.4967532467532468, Test Acc = 0.48355263157894735
Epoch = 1, loss = 53.55238860845566
Epoch = 2, loss = 53.500824213027954
Epoch = 3, loss = 53.478012800216675
Epoch = 4, loss = 53.31806284189224
Epoch = 5, loss = 53.507573902606964
Epoch = 6, loss = 53.11656981706619
Epoch = 7, loss = 53.27751964330673
Epoch = 8, loss = 53.45483046770096
Epoch = 9, loss = 53.403779685497284
Epoch = 10, loss = 53.153639793395996
j = 321, Train Acc = 0.5381493506493507, Test Acc = 0.5230263157894737
j = 250, Train Acc = 0.5032467532467533, Test Acc = 0.5164473684210527
j = 200, Train Acc = 0.5032467532467533, Test Acc = 0.5164473684210527
Epoch = 11, loss = 53.10060238838196
Epoch = 12, loss = 53.39316636323929
Epoch = 13, loss = 53.37317854166031
Epoch = 14, loss = 53.2958744764328
Epoch = 15, loss = 53

In [13]:
# training parameters


# train iterations
for epoch in range(61,120):
    epoch_loss=0
    X_train = X_train_generator(innings2_train)
    num_batches = X_train.size(0) // batch_size
    model.train()

    for batch_idx in range(num_batches):
        start_idx = batch_idx * batch_size
        end_idx = (batch_idx+1) * batch_size
        model.zero_grad()
        op = model(X_train[start_idx:end_idx])
        loss = loss_function(op.contiguous().view(-1,outsize), y_train[start_idx:end_idx].view(-1))
        loss.backward()
        epoch_loss+=loss.data.item()
        optimizer.step()
    print('Epoch = {}, loss = {}'.format(epoch,epoch_loss))
    if epoch%10==0:
        for j in [321,250,200]:
            evalfunc(model,j)

torch.save(model.state_dict(), './models/cric_prediction_M1B1_randomsample.pth')


Epoch = 61, loss = 52.29833424091339
Epoch = 62, loss = 52.21452534198761
Epoch = 63, loss = 51.61600208282471
Epoch = 64, loss = 51.97134047746658
Epoch = 65, loss = 51.952072739601135
Epoch = 66, loss = 51.27543890476227
Epoch = 67, loss = 50.14579477906227
Epoch = 68, loss = 48.916630417108536
Epoch = 69, loss = 52.01295745372772
Epoch = 70, loss = 52.61844131350517
j = 321, Train Acc = 0.5917207792207793, Test Acc = 0.5921052631578947
j = 250, Train Acc = 0.5917207792207793, Test Acc = 0.5921052631578947
j = 200, Train Acc = 0.5422077922077922, Test Acc = 0.5460526315789473
Epoch = 71, loss = 50.26707103848457
Epoch = 72, loss = 49.369433373212814
Epoch = 73, loss = 47.90370932221413
Epoch = 74, loss = 46.723154067993164
Epoch = 75, loss = 45.69775667786598
Epoch = 76, loss = 47.40668934583664
Epoch = 77, loss = 47.15504705905914
Epoch = 78, loss = 45.34402018785477
Epoch = 79, loss = 43.483223646879196
Epoch = 80, loss = 43.35786336660385
j = 321, Train Acc = 0.6996753246753247, T

In [15]:
# training parameters


# train iterations
for epoch in range(121,201):
    epoch_loss=0
    X_train = X_train_generator(innings2_train)
    num_batches = X_train.size(0) // batch_size
    model.train()

    for batch_idx in range(num_batches):
        start_idx = batch_idx * batch_size
        end_idx = (batch_idx+1) * batch_size
        model.zero_grad()
        op = model(X_train[start_idx:end_idx])
        loss = loss_function(op.contiguous().view(-1,outsize), y_train[start_idx:end_idx].view(-1))
        loss.backward()
        epoch_loss+=loss.data.item()
        optimizer.step()
    print('Epoch = {}, loss = {}'.format(epoch,epoch_loss))
    if epoch%10==0:
        for j in [321,250,200]:
            evalfunc(model,j)

torch.save(model.state_dict(), './models/cric_prediction_M1B1_randomsample.pth')


Epoch = 121, loss = 40.30392572283745
Epoch = 122, loss = 40.647254556417465
Epoch = 123, loss = 40.238804042339325
Epoch = 124, loss = 41.13992765545845
Epoch = 125, loss = 40.291603058576584
Epoch = 126, loss = 40.16562223434448
Epoch = 127, loss = 40.237551778554916
Epoch = 128, loss = 40.17660480737686
Epoch = 129, loss = 40.281757324934006
Epoch = 130, loss = 40.34495559334755
j = 321, Train Acc = 0.7581168831168831, Test Acc = 0.7105263157894737
j = 250, Train Acc = 0.6761363636363636, Test Acc = 0.7039473684210527
j = 200, Train Acc = 0.5909090909090909, Test Acc = 0.5888157894736842
Epoch = 131, loss = 40.010081112384796
Epoch = 132, loss = 39.88279302418232
Epoch = 133, loss = 40.121012419462204
Epoch = 134, loss = 39.97076407074928
Epoch = 135, loss = 40.77816781401634
Epoch = 136, loss = 40.50542455911636
Epoch = 137, loss = 41.0333736538887
Epoch = 138, loss = 52.5030038356781
Epoch = 139, loss = 52.60395222902298
Epoch = 140, loss = 52.508822202682495
j = 321, Train Acc = 

In [17]:
# training parameters


# train iterations
for epoch in range(201,501):
    epoch_loss=0
    X_train = X_train_generator(innings2_train)
    num_batches = X_train.size(0) // batch_size
    model.train()

    for batch_idx in range(num_batches):
        start_idx = batch_idx * batch_size
        end_idx = (batch_idx+1) * batch_size
        model.zero_grad()
        op = model(X_train[start_idx:end_idx])
        loss = loss_function(op.contiguous().view(-1,outsize), y_train[start_idx:end_idx].view(-1))
        loss.backward()
        epoch_loss+=loss.data.item()
        optimizer.step()
    print('Epoch = {}, loss = {}'.format(epoch,epoch_loss))
    if epoch%10==0:
        for j in [321,250,200]:
            evalfunc(model,j)

torch.save(model.state_dict(), './models/cric_prediction_M1B1_randomsample.pth')


Epoch = 201, loss = 39.4869422018528
Epoch = 202, loss = 41.72693482041359
Epoch = 203, loss = 39.47155658900738
Epoch = 204, loss = 40.056733787059784
Epoch = 205, loss = 39.68918797373772
Epoch = 206, loss = 39.41008400917053
Epoch = 207, loss = 39.27362395823002
Epoch = 208, loss = 39.68775573372841
Epoch = 209, loss = 39.70422703027725
Epoch = 210, loss = 39.538359716534615
j = 321, Train Acc = 0.7443181818181818, Test Acc = 0.7006578947368421
j = 250, Train Acc = 0.6891233766233766, Test Acc = 0.6743421052631579
j = 200, Train Acc = 0.5909090909090909, Test Acc = 0.5921052631578947
Epoch = 211, loss = 39.29075272381306
Epoch = 212, loss = 39.017792761325836
Epoch = 213, loss = 40.07129579782486
Epoch = 214, loss = 39.80070674419403
Epoch = 215, loss = 39.75534808635712
Epoch = 216, loss = 39.439828380942345
Epoch = 217, loss = 39.74226728081703
Epoch = 218, loss = 39.13374847173691
Epoch = 219, loss = 39.45093114674091
Epoch = 220, loss = 39.17780637741089
j = 321, Train Acc = 0.7

In [11]:
# # This result is for all overs as inputs case
# Epoch = 35, loss = 52.047237277030945
# Epoch = 36, loss = 52.01414090394974
# Epoch = 37, loss = 51.91944259405136
# Epoch = 38, loss = 51.50864785909653
# Epoch = 39, loss = 28.567791245877743
# Epoch = 40, loss = 32.422352373600006
# j = 321, Train Acc = 0.8409090909090909, Test Acc = 0.8355263157894737
# j = 200, Train Acc = 0.5560064935064936, Test Acc = 0.6052631578947368
# j = 100, Train Acc = 0.5194805194805194, Test Acc = 0.5493421052631579
# Epoch = 41, loss = 26.737921312451363
# Epoch = 42, loss = 23.24614042043686
# Epoch = 43, loss = 22.8752434104681
# Epoch = 44, loss = 53.03560835123062
# Epoch = 45, loss = 47.28141215443611
# Epoch = 46, loss = 35.23486316204071
# Epoch = 47, loss = 23.303925164043903
# Epoch = 48, loss = 22.12454342842102
# Epoch = 49, loss = 19.51815900206566
# Epoch = 50, loss = 18.069486137479544
# j = 321, Train Acc = 0.9204545454545454, Test Acc = 0.9473684210526315
# j = 200, Train Acc = 0.5762987012987013, Test Acc = 0.618421052631579
# j = 100, Train Acc = 0.5081168831168831, Test Acc = 0.5427631578947368
# Epoch = 51, loss = 17.860560324043036
# Epoch = 52, loss = 23.788215935230255
# Epoch = 53, loss = 20.787461169064045
# Epoch = 54, loss = 16.932700466364622
# Epoch = 55, loss = 16.28865105099976
# Epoch = 56, loss = 16.52232925966382
# Epoch = 57, loss = 16.135572612285614
# Epoch = 58, loss = 15.656426377594471
# Epoch = 59, loss = 15.42301008477807