In [1]:
import pandas as pd
import numpy as np
from collections import Counter
import torch
import torch.nn as nn
from sklearn.preprocessing import LabelEncoder
from torch.utils.data import Dataset, DataLoader

In [2]:
EPOCHS = 200
BATCH_SIZE = 128
LEARNING_RATE = 1e-3
EMBED_DIM = 128
DROPOUT_RATE = 0.2
MAX_SEQUENCE = 50
MAX_LEARNING_RATE = 2e-3
LAST_HIT = 20
LAST_TIME = 101
HIT_PAD = 21
TIME_PAD = 102

In [3]:
seed_value = 42
torch.manual_seed(seed_value)
torch.cuda.manual_seed(seed_value)
torch.cuda.manual_seed_all(seed_value) 

# Data Preprocessing

In [4]:
badminton_data = pd.read_csv('./dataset_test.csv')
badminton_data = badminton_data[['rally','player', 'type', 'hit_height', 'landing_area', 'player_location_area', 'time_proportion', 'getpoint_player']]
le = LabelEncoder()
badminton_data['type'] = le.fit_transform(badminton_data['type'])
BALL_TYPE = len(badminton_data.type.unique())
PLAYER = len(badminton_data.player.unique())
PLAYER_LIST = badminton_data.player.unique()
training_data = badminton_data[:18132].reset_index(drop=True)
testing_data = badminton_data[18132:].reset_index(drop=True)

In [5]:
ball_type_2gram = np.zeros((BALL_TYPE, BALL_TYPE))
mem = training_data['rally'][0]
for i in range(len(training_data)):
    if i == len(training_data) - 1:
        break
    if training_data['rally'][i] != mem:
        mem = training_data['rally'][i]
        continue
    ball_type_2gram[training_data['type'][i]][training_data['type'][i+1]] +=1

padding_vector = np.full((2, BALL_TYPE), -1)
ball_type_embedding = np.concatenate([ball_type_2gram, ball_type_2gram], axis=0)
ball_type_embedding = np.concatenate([ball_type_embedding, padding_vector], axis=0)
padding_vector = np.full((BALL_TYPE * 2 + 2, EMBED_DIM - BALL_TYPE), -1)
ball_type_embedding = np.concatenate([ball_type_embedding, padding_vector], axis=1)
ball_type_embedding = ball_type_embedding.astype(np.float32)
ball_type_embedding = torch.tensor(ball_type_embedding)

In [6]:
class BadmintonDataset(Dataset):
    def __init__(self, data, max_sequence=50):
        super(BadmintonDataset, self).__init__()        
        rally = []
        mem = data['rally'][0]
        tmp = []
        for i in range(len(data)):
            if i == len(data)-1:
                rally.append(tmp)
            if data['rally'][i] != mem:
                mem = data['rally'][i]
                rally.append(tmp)
                tmp = []
            tmp.append([data['player'][i], data['type'][i], round(data['time_proportion'][i], 2) * 100, data['hit_height'][i] - 1, data['getpoint_player'][i]])
               
        self.pattern = []
        self.getpoint_player = []
        self.label = []
        for i in range(len(rally)):
            A = []
            B = []
            for j in range(1, len(rally[i])):
                if rally[i][j][0] == 'CHOU Tien Chen':
                    A.append(np.concatenate((np.array(rally[i][j][1]) * np.ones(1), np.array(rally[i][j][2]) * np.ones(1), np.array(rally[i][j][3]) * np.ones(1))))
                else:
                    B.append(np.concatenate((np.array(rally[i][j][1]) * np.ones(1), np.array(rally[i][j][2]) * np.ones(1), np.array(rally[i][j][3]) * np.ones(1))))
            if len(A) == 0 or len(B) == 0:
                continue
                
            if rally[i][0][4] == 'CHOU Tien Chen':
                self.getpoint_player.append(np.array([1]))
                self.getpoint_player.append(np.array([0]))
            if rally[i][0][4] != 'CHOU Tien Chen':
                self.getpoint_player.append(np.array([0]))
                self.getpoint_player.append(np.array([1]))
                
            A.append(np.concatenate((np.array(LAST_HIT) * np.ones(1), np.array(LAST_TIME) * np.ones(1), np.array(0) * np.ones(1))))
            B.append(np.concatenate((np.array(LAST_HIT) * np.ones(1), np.array(LAST_TIME) * np.ones(1), np.array(0) * np.ones(1))))
            self.pattern.append(np.concatenate((np.array(A), np.full((max_sequence - len(A), 3), (HIT_PAD, TIME_PAD, 0)))))
            self.label.append(np.array([0]))
            self.pattern.append(np.concatenate((np.array(B), np.full((max_sequence - len(B), 3), (HIT_PAD, TIME_PAD, 0)))))
            self.label.append(np.array([1])) 
        self.pattern = np.array(self.pattern)
        self.label = np.array(self.label)
    def __len__(self):
        return len(self.label)

    def __getitem__(self, index):
        action = []
        time = []
        for i in range(len(self.pattern[index])):
            action.append(int(self.pattern[index][i][0] + BALL_TYPE * self.pattern[index][i][2]))
            time.append(self.pattern[index][i][1])
        return np.array(action), np.array(time), self.label[index], self.getpoint_player[index]

In [7]:
# class BadmintonDataset(Dataset):
#     def __init__(self, data, max_sequence=50):
#         super(BadmintonDataset, self).__init__()        
#         rally = []
#         mem = data['rally'][0]
#         tmp = []
#         for i in range(len(data)):
#             if i == len(data)-1:
#                 rally.append(tmp)
#             if data['rally'][i] != mem:
#                 mem = data['rally'][i]
#                 rally.append(tmp)
#                 tmp = []
#             tmp.append([data['player'][i], data['type'][i], round(data['time_proportion'][i], 2) * 100, data['hit_height'][i] - 1, data['getpoint_player'][i]])
               
#         self.pattern = []
#         self.getpoint_player = []
#         self.label = []
#         for i in range(len(rally)):
#             player_A = []
#             player_B = []
#             for j in range(1, len(rally[i])):
#                 if j % 2 == 0:
#                     player_A.append(np.concatenate((np.array(rally[i][j][1]) * np.ones(1), np.array(rally[i][j][2]) * np.ones(1), np.array(rally[i][j][3]) * np.ones(1))))
#                 else:
#                     player_B.append(np.concatenate((np.array(rally[i][j][1]) * np.ones(1), np.array(rally[i][j][2]) * np.ones(1), np.array(rally[i][j][3]) * np.ones(1))))
#             if len(player_A) == 0 or len(player_B) == 0:
#                 continue
#             if rally[i][0][4] == rally[i][1][0]:
#                 self.getpoint_player.append(np.array([0]))
#                 self.getpoint_player.append(np.array([1]))
#             elif rally[i][0][4] != rally[i][1][0]:
#                 self.getpoint_player.append(np.array([1]))
#                 self.getpoint_player.append(np.array([0]))
                
#             player_A.append(np.concatenate((np.array(LAST_HIT) * np.ones(1), np.array(LAST_TIME) * np.ones(1), np.array(0) * np.ones(1))))
#             player_B.append(np.concatenate((np.array(LAST_HIT) * np.ones(1), np.array(LAST_TIME) * np.ones(1), np.array(0) * np.ones(1))))
#             self.pattern.append(np.concatenate((np.array(player_A), np.full((max_sequence - len(player_A), 3), (HIT_PAD, TIME_PAD, 0)))))
#             self.pattern.append(np.concatenate((np.array(player_B), np.full((max_sequence - len(player_B), 3), (HIT_PAD, TIME_PAD, 0)))))
            
#             if rally[i][1][0] == 'CHOU Tien Chen':
#                 self.label.append(np.array([0]))
#                 self.label.append(np.array([1]))
#             elif rally[i][2][0] == 'CHOU Tien Chen':
#                 self.label.append(np.array([1]))
#                 self.label.append(np.array([0]))
#             else:
#                 self.label.append(np.array([0]))
#                 self.label.append(np.array([0]))
            
#         self.pattern = np.array(self.pattern)
#         self.label = np.array(self.label)
# #         self.getpoint_player = np.array(self.getpoint_player)
#     def __len__(self):
#         return len(self.label)

#     def __getitem__(self, index):
#         action = []
#         time = []
#         for i in range(len(self.pattern[index])):
#             action.append(int(self.pattern[index][i][0] + BALL_TYPE * self.pattern[index][i][2]))
#             time.append(self.pattern[index][i][1])
#         return np.array(action), np.array(time), self.label[index], self.getpoint_player[index]

In [8]:
train_set = BadmintonDataset(training_data, max_sequence=MAX_SEQUENCE)
train_loader = DataLoader(train_set, batch_size=BATCH_SIZE, shuffle=True)

In [9]:
class FFN(nn.Module):
    def __init__(self, state_size=128):
        super(FFN, self).__init__()

        self.lr1 = nn.Linear(state_size, state_size)
        self.relu = nn.ReLU()
        self.lr2 = nn.Linear(state_size, state_size)
        self.dropout = nn.Dropout(0.2)
        self.pred = nn.Linear(state_size, 1)     
    
    def forward(self, x):
        x = self.lr1(x)
        x = self.relu(x)
        x = self.lr2(x)
        x = self.dropout(x)
        x = self.pred(x)
        return x

class TimeAware(nn.Module):
    def __init__(self, ball_type_num=10, embed_dim=128, contingency_tags=None, dropout_rate=0.2):
        super(TimeAware, self).__init__()
        self.time_embedding = nn.Embedding(101 + 2, embed_dim)
        self.ball_type_embedding = nn.Embedding.from_pretrained(contingency_tags, freeze=False)
        
        self.multi_att = nn.MultiheadAttention(embed_dim=embed_dim, num_heads=8, dropout=dropout_rate)
        self.lstm = nn.LSTM(embed_dim, embed_dim)
        self.fnn = FFN(embed_dim + 1)
        
    def forward(self, a, t, w):
        padding_idx = a == LAST_HIT
        a = self.ball_type_embedding(a)
        t = self.time_embedding(t)
        att_output, _ = self.multi_att(a, t, a)
        out, _ = self.lstm(att_output)
        mask = padding_idx.unsqueeze(-1).expand_as(out)
        out = torch.masked_select(out, mask).view(-1, 128)
        out = torch.cat((out, w), 1)
        out = self.fnn(out)
        return out

In [10]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# "cuda" if torch.cuda.is_available() else 
timeaware_model = TimeAware(ball_type_num=BALL_TYPE, embed_dim=EMBED_DIM, contingency_tags=ball_type_embedding, dropout_rate=DROPOUT_RATE)
optimizer = torch.optim.Adam(timeaware_model.parameters(), lr=LEARNING_RATE)
scheduler = torch.optim.lr_scheduler.OneCycleLR(
    optimizer, max_lr=MAX_LEARNING_RATE, steps_per_epoch=len(train_loader), epochs=EPOCHS
)
loss_function = nn.BCEWithLogitsLoss()

timeaware_model.to(device)
loss_function.to(device)
timeaware_model.train()
for epoch in range(EPOCHS):
    num_corrects = 0
    num_total = 0
    for item in train_loader:
        action = item[0].to(device).long()
        time = item[1].to(device).long()
        label = item[2].to(device).float()
        getpoint_player = item[3].to(device).long()
        output = timeaware_model(action, time, getpoint_player)

        optimizer.zero_grad()
        loss = loss_function(output, label)
        loss.backward()
        optimizer.step()
        scheduler.step()
        pred = (torch.sigmoid(output) >= 0.5).long()
        
        num_corrects += (pred == label).sum().item()
        num_total += len(label)

    print(num_corrects / num_total) 
    print('[{}/{}] Loss:'.format(epoch+1, EPOCHS), loss.item())
    print()


torch.save(timeaware_model.state_dict(), 'timeaware.pt')

0.495787700084246
[1/200] Loss: 0.6929510831832886

0.502106149957877
[2/200] Loss: 0.6927691102027893

0.508424599831508
[3/200] Loss: 0.6933482885360718

0.523167649536647
[4/200] Loss: 0.6922197937965393

0.4797809604043808
[5/200] Loss: 0.6893211007118225

0.4764111204717776
[6/200] Loss: 0.7029282450675964

0.4945240101095198
[7/200] Loss: 0.699658989906311

0.5033698399326032
[8/200] Loss: 0.6935940384864807

0.5071609098567817
[9/200] Loss: 0.6899312138557434

0.5004212299915753
[10/200] Loss: 0.6953129768371582

0.493681550126369
[11/200] Loss: 0.6968020796775818

0.493681550126369
[12/200] Loss: 0.691404402256012

0.4898904802021904
[13/200] Loss: 0.6973329782485962

0.4953664700926706
[14/200] Loss: 0.6911776661872864

0.5155855096882898
[15/200] Loss: 0.6928145289421082

0.5029486099410277
[16/200] Loss: 0.6848073601722717

0.5008424599831508
[17/200] Loss: 0.6921272277832031

0.4907329401853412
[18/200] Loss: 0.6890856027603149

0.4856781802864364
[19/200] Loss: 0.695147275

0.5139005897219882
[155/200] Loss: 0.6929774880409241

0.4991575400168492
[156/200] Loss: 0.6930496096611023

0.4898904802021904
[157/200] Loss: 0.6937145590782166

0.5122156697556866
[158/200] Loss: 0.6928026676177979

0.5214827295703454
[159/200] Loss: 0.6936178207397461

0.5058972198820556
[160/200] Loss: 0.6930298209190369

0.495787700084246
[161/200] Loss: 0.6937595009803772

0.5046335299073293
[162/200] Loss: 0.6942552328109741

0.5008424599831508
[163/200] Loss: 0.6934347152709961

0.4991575400168492
[164/200] Loss: 0.694239616394043

0.510530749789385
[165/200] Loss: 0.6921265721321106

0.5092670598146588
[166/200] Loss: 0.6937854886054993

0.5164279696714406
[167/200] Loss: 0.6926397085189819

0.5092670598146588
[168/200] Loss: 0.6930026412010193

0.4995787700084246
[169/200] Loss: 0.6930544376373291

0.4945240101095198
[170/200] Loss: 0.6932340264320374

0.504212299915754
[171/200] Loss: 0.6930599212646484

0.5029486099410277
[172/200] Loss: 0.6937621235847473

0.505475989890

In [11]:
test_set = BadmintonDataset(testing_data)
test_loader = DataLoader(test_set, batch_size=BATCH_SIZE, shuffle=True)

In [12]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
timeaware_model = TimeAware(ball_type_num=BALL_TYPE, embed_dim=EMBED_DIM, contingency_tags=ball_type_embedding, dropout_rate=DROPOUT_RATE)

try:
    timeaware_model.load_state_dict(torch.load('timeaware.pt'))
except:
    timeaware_model.load_state_dict(torch.load('timeaware.pt', map_location='cpu'))
timeaware_model.to(device)
timeaware_model.eval()

num_corrects = 0
num_total = 0
predict = []
with torch.no_grad():
    for item in test_loader:
        action = item[0].to(device).long()
        time = item[1].to(device).long()
        label = item[2].to(device).float()
        getpoint_player = item[3].to(device).long()
        
        output = timeaware_model(action, time, getpoint_player)
        
        pred = (torch.sigmoid(output) >= 0.5).long()
        
        predict.extend(pred.view(-1).data.cpu().numpy())
        
        num_corrects += (pred == label).sum().item()
        num_total += len(label)

print(num_corrects / num_total)  

0.5540540540540541


# Output Test

In [13]:
count = 0
for i in range(len(test_set)):
    if test_set[i][2] != 0:
        count += 1
print('0: ', len(test_set) - count)
print('1: ', count)

0:  148
1:  148


In [14]:
PLAYER = badminton_data.player.unique()

In [15]:
badminton_data.player.unique()

array(['Kento MOMOTA', 'CHOU Tien Chen', 'Anthony Sinisuka GINTING',
       'CHEN Long', 'CHEN Yufei', 'TAI Tzu Ying', 'Viktor AXELSEN',
       'Anders ANTONSEN', 'PUSARLA V. Sindhu', 'WANG Tzu Wei',
       'Khosit PHETPRADAB', 'Jonatan CHRISTIE', 'NG Ka Long Angus',
       'SHI Yuqi'], dtype=object)

In [16]:
p = np.where(PLAYER == 'Kento MOMOTA')

In [17]:
p[0][0]

0

In [14]:
train_set[0]

(array([14, 15, 20, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
        21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
        21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21]),
 array([ 33., 100., 101., 102., 102., 102., 102., 102., 102., 102., 102.,
        102., 102., 102., 102., 102., 102., 102., 102., 102., 102., 102.,
        102., 102., 102., 102., 102., 102., 102., 102., 102., 102., 102.,
        102., 102., 102., 102., 102., 102., 102., 102., 102., 102., 102.,
        102., 102., 102., 102., 102., 102.]),
 array([0]),
 array([0]))