In [1]:
import pandas as pd
import numpy as np
from collections import Counter
import torch
import torch.nn as nn
from sklearn.preprocessing import LabelEncoder
import torch.nn.utils.rnn as rnn_utils
from torch.utils.data import Dataset, DataLoader

In [2]:
EPOCHS = 200
BATCH_SIZE = 128
LEARNING_RATE = 1e-3
EMBED_DIM = 128
DROPOUT_RATE = 0.2
MAX_SEQUENCE = 50
MAX_LEARNING_RATE = 2e-3
LAST_HIT = 20
LAST_TIME = 101
HIT_PAD = 21
TIME_PAD = 102
GROUP = 3
RATIO = 1.5
NUMBER_OF_FEATURE = 11

In [3]:
seed_value = 42
torch.manual_seed(seed_value)
torch.cuda.manual_seed(seed_value)
torch.cuda.manual_seed_all(seed_value)

# Data Preprocessing

## Used Feature

In [4]:
"""
rally
player
type
aroundhead
backhand
hit_height
hit_area
hit_x
hit_y
landing_height
landing_area
landing_x
landing_y
getpoint_player
"""

'\nrally\nplayer\ntype\naroundhead\nbackhand\nhit_height\nhit_area\nhit_x\nhit_y\nlanding_height\nlanding_area\nlanding_x\nlanding_y\ngetpoint_player\n'

In [5]:
badminton_data = pd.read_csv('./dataset_test.csv')
badminton_data = badminton_data[['rally_id', 'player', 'type', 'aroundhead', 'backhand', 'hit_height', 'hit_area', 'hit_x', 'hit_y', 'landing_height', 'landing_area', 'landing_x', 'landing_y']]
badminton_data = badminton_data.fillna(0)
le = LabelEncoder()
badminton_data['type'] = le.fit_transform(badminton_data['type'])

In [6]:
PLAYERS_LIST = badminton_data['player'].unique()
print(PLAYERS_LIST)
print('# of PLAYER:', len(PLAYERS_LIST))

['Kento MOMOTA' 'CHOU Tien Chen' 'Anthony Sinisuka GINTING' 'CHEN Long'
 'CHEN Yufei' 'TAI Tzu Ying' 'Viktor AXELSEN' 'Anders ANTONSEN'
 'PUSARLA V. Sindhu' 'WANG Tzu Wei' 'Khosit PHETPRADAB' 'Jonatan CHRISTIE'
 'NG Ka Long Angus' 'SHI Yuqi']
# of PLAYER: 14


In [7]:
training_data = badminton_data[:18132].reset_index(drop=True)
testing_data = badminton_data[18132:].reset_index(drop=True)

In [15]:
class BadmintonDataset(Dataset):
    def __init__(self, data, players_list=None, max_sequence=25):
        super(BadmintonDataset, self).__init__()
        self.label = []
        self.pattern = []
        players_in_data = data['player'].unique()
        players_group = data.groupby('player')
        for player_label, player_name in enumerate(players_list):
            if player_name in players_in_data:
                player_pattern = players_group.get_group(player_name).reset_index(drop=True)
                player_pattern_by_rally = player_pattern.groupby('rally_id')
                rallys_id = list(player_pattern_by_rally.groups.keys())
                for rally_id in rallys_id:
                    tmp_pattern = player_pattern_by_rally.get_group(rally_id).iloc[:, 2:].to_numpy().astype('int')# remvoe first two column('player', 'rally')
                    tmp_pattern = np.concatenate((np.array(tmp_pattern), np.full((max_sequence - len(tmp_pattern), 11), 1)))
                    self.pattern.append(tmp_pattern)
                    one_hot = np.zeros(len(players_list))
                    one_hot[player_label] = 1
                    self.label.append(one_hot)
        self.pattern = np.array(self.pattern).astype('int')
        self.label = np.array(self.label).astype('int')
        
            
    def __len__(self):
        return len(self.label)

    def __getitem__(self, index):
        return self.pattern[index], self.label[index]

In [16]:
train_set = BadmintonDataset(training_data, PLAYERS_LIST, max_sequence=MAX_SEQUENCE)
train_loader = DataLoader(train_set, batch_size=BATCH_SIZE, shuffle=True)

# Model

In [25]:
def random_dimension_permutation():
    print('hi')
class TapNet(nn.Module):
    def __init__(self, embed_dim=128, number_of_feature=11, dropout_rate=0.2):
        super(TapNet, self).__init__()
        self.lstm = nn.LSTM(number_of_feature, embed_dim)
#         self.conv_1 = nn.Conv1d(self.channel, filters[0], kernel_size=kernels[0], dilation=dilation, stride=1, padding=paddings[0])
        
    def forward(self, pattern):
        pattern = pattern.type(torch.LongTensor)
        lstm_out, _ = self.lstm(pattern)
#         lstm_out = lstm_out.mean(1)
#         lstm_out = lstm_out.view(pattern.size(0), -1)
        return lstm_out

# Training

In [26]:
device = torch.device("cpu")
# "cuda" if torch.cuda.is_available() else 
tapnet_model = TapNet(embed_dim=EMBED_DIM, number_of_feature=NUMBER_OF_FEATURE, dropout_rate=DROPOUT_RATE)
optimizer = torch.optim.Adam(tapnet_model.parameters(), lr=LEARNING_RATE)
scheduler = torch.optim.lr_scheduler.OneCycleLR(
    optimizer, max_lr=MAX_LEARNING_RATE, steps_per_epoch=len(train_loader), epochs=EPOCHS
)
loss_function = nn.CrossEntropyLoss()

tapnet_model.to(device)
loss_function.to(device)
tapnet_model.train()
for epoch in range(EPOCHS):
    num_corrects = 0
    num_total = 0
    for item in train_loader:
        pattern = item[0].to(device).long()
        label = item[1].to(device).long()
        output = tapnet_model(pattern)

        optimizer.zero_grad()
        loss = loss_function(output, label)
        loss.backward()
        optimizer.step()
        scheduler.step()
        pred = (torch.sigmoid(output) >= 0.5).long()
        
        num_corrects += (pred == label).sum().item()
        num_total += len(label)

    print(num_corrects / num_total) 
    print('[{}/{}] Loss:'.format(epoch+1, EPOCHS), loss.item())
    print()


torch.save(tapnet_model.state_dict(), 'tapnet.pt')

RuntimeError: expected scalar type Long but found Float

# Testing

In [None]:
test_set = BadmintonDataset(testing_data)
test_loader = DataLoader(test_set, batch_size=BATCH_SIZE, shuffle=True)

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
tapnet_model = TapNet(ball_type_num=BALL_TYPE, embed_dim=EMBED_DIM, contingency_tags=ball_type_embedding, dropout_rate=DROPOUT_RATE)

try:
    tapnet_model.load_state_dict(torch.load('tapnet.pt'))
except:
    tapnet_model.load_state_dict(torch.load('tapnet.pt', map_location='cpu'))
tapnet_model.to(device)
tapnet_model.eval()

num_corrects = 0
num_total = 0
predict = []
with torch.no_grad():
    for item in test_loader:
        action = item[0].to(device).long()
        time = item[1].to(device).long()
        label = item[2].to(device).float()
        getpoint_player = item[3].to(device).long()
        
        output = tapnet_model(action, time, getpoint_player)
        
        pred = (torch.sigmoid(output) >= 0.5).long()
        
        predict.extend(pred.view(-1).data.cpu().numpy())
        
        num_corrects += (pred == label).sum().item()
        num_total += len(label)


# Ouput Test

In [None]:
count = 0
for i in range(len(test_set)):
    if test_set[i][2] != 0:
        count += 1
print('0: ', len(test_set) - count)
print('1: ', count)

In [None]:
PLAYER = badminton_data.player.unique()

In [None]:
badminton_data.player.unique()

In [None]:
p = np.where(PLAYER == 'Kento MOMOTA')

In [9]:
badminton_data

Unnamed: 0,rally,player,type,aroundhead,backhand,hit_height,hit_area,hit_x,hit_y,landing_height,landing_area,landing_x,landing_y,getpoint_player
0,1,Kento MOMOTA,6,0.0,1,2,7,0.0,0.0,2.0,8,662,575,Kento MOMOTA
1,1,CHOU Tien Chen,4,0.0,0,2,8,662.0,575.0,1.0,5,513,332,Kento MOMOTA
2,1,Kento MOMOTA,9,1.0,0,2,5,513.0,332.0,1.0,3,833,641,Kento MOMOTA
3,1,CHOU Tien Chen,5,0.0,0,2,3,833.0,641.0,1.0,12,849,326,Kento MOMOTA
4,2,Kento MOMOTA,6,0.0,1,2,7,0.0,0.0,2.0,7,529,519,CHOU Tien Chen
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
25896,34,NG Ka Long Angus,1,0.0,1,1,6,835.0,383.0,1.0,8,788,567,Viktor AXELSEN
25897,34,Viktor AXELSEN,1,0.0,0,1,8,788.0,567.0,2.0,5,492,364,Viktor AXELSEN
25898,34,NG Ka Long Angus,3,0.0,0,2,5,492.0,364.0,1.0,1,464,479,Viktor AXELSEN
25899,34,Viktor AXELSEN,4,0.0,1,1,1,464.0,479.0,2.0,4,789,334,Viktor AXELSEN


In [15]:
data = badminton_data.copy()
rally = []
mem = data['rally'][0]
tmp = []
for i in range(len(data)):
    if i == len(data)-1:
        rally.append(tmp)
    if data['rally'][i] != mem:
        mem = data['rally'][i]
        rally.append(tmp)
        tmp = []
    tmp.append([data['player'][i], data['type'][i], data['aroundhead'][i], data['backhand'][i], data['hit_height'][i] - 1, data['hit_area'][i], data['hit_x'][i], data['hit_y'][i], data['landing_height'][i] - 1, data['landing_area'][i], data['landing_x'][i], data['landing_y'][i], data['getpoint_player'][i]])

pattern = []
getpoint_player = []
label = []
for i in range(len(rally)):
    A = []
    B = []
    for j in range(1, len(rally[i])):
        if rally[i][j][0] == 'CHOU Tien Chen':
            A.append(np.concatenate((np.array(rally[i][j][1]) * np.ones(1), np.array(rally[i][j][2]) * np.ones(1), np.array(rally[i][j][3]) * np.ones(1), np.array(rally[i][j][4]) * np.ones(1), np.array(rally[i][j][5]) * np.ones(1), np.array(rally[i][j][6]) * np.ones(1), np.array(rally[i][j][7]) * np.ones(1), np.array(rally[i][j][8]) * np.ones(1), np.array(rally[i][j][9]) * np.ones(1), np.array(rally[i][j][10]) * np.ones(1), np.array(rally[i][j][11]) * np.ones(1))))
        else:
            B.append(np.concatenate((np.array(rally[i][j][1]) * np.ones(1), np.array(rally[i][j][2]) * np.ones(1), np.array(rally[i][j][3]) * np.ones(1), np.array(rally[i][j][4]) * np.ones(1), np.array(rally[i][j][5]) * np.ones(1), np.array(rally[i][j][6]) * np.ones(1), np.array(rally[i][j][7]) * np.ones(1), np.array(rally[i][j][8]) * np.ones(1), np.array(rally[i][j][9]) * np.ones(1), np.array(rally[i][j][10]) * np.ones(1), np.array(rally[i][j][11]) * np.ones(1))))
    if len(A) == 0 or len(B) == 0:
        continue
    pattern.append(np.concatenate((np.array(A), np.full((50 - len(A), 3), (HIT_PAD, TIME_PAD, 0)))))

ValueError: all the input array dimensions for the concatenation axis must match exactly, but along dimension 1, the array at index 0 has size 11 and the array at index 1 has size 3

In [12]:
tmp = badminton_data.groupby('player')

In [16]:
len(tmp.groups)

14

In [22]:
tmp.get_group('CHOU Tien Chen')

Unnamed: 0,rally,player,type,aroundhead,backhand,hit_height,hit_area,hit_x,hit_y,landing_height,landing_area,landing_x,landing_y,getpoint_player
1,1,CHOU Tien Chen,4,0.0,0,2,8,662.0,575.0,1.0,5,513,332,Kento MOMOTA
3,1,CHOU Tien Chen,5,0.0,0,2,3,833.0,641.0,1.0,12,849,326,Kento MOMOTA
5,2,CHOU Tien Chen,8,0.0,0,1,7,529.0,519.0,2.0,1,482,426,CHOU Tien Chen
7,2,CHOU Tien Chen,5,0.0,0,1,8,651.0,620.0,2.0,8,695,342,CHOU Tien Chen
8,3,CHOU Tien Chen,6,0.0,1,2,7,0.0,0.0,2.0,7,735,399,CHOU Tien Chen
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
19732,33,CHOU Tien Chen,6,0.0,1,2,7,0.0,0.0,2.0,1,404,372,Viktor AXELSEN
19734,33,CHOU Tien Chen,2,0.0,1,2,1,399.0,466.0,1.0,9,701,269,Viktor AXELSEN
19736,33,CHOU Tien Chen,0,0.0,0,1,8,482.0,628.0,2.0,2,800,373,Viktor AXELSEN
19738,33,CHOU Tien Chen,8,0.0,0,2,2,775.0,492.0,2.0,2,760,382,Viktor AXELSEN
