In [1]:
import numpy as np
import pandas as pd
import lxml
import itertools
pd.options.mode.chained_assignment = None  # default='warn'

In [2]:
filelist = ['week'+str(i)+'.html' for i in range(1, 10)]

def read_data(filename):
    df = pd.read_html('results/'+filename)[0]
    week = df.columns.get_level_values(0).unique()
    df.columns = df.columns.get_level_values(1)
    df = df.drop_duplicates(subset = ['Team1', 'Team2'])
    df['Week'] = [week.values[0] for _ in range(len(df))]
    df = df[['Team1', 'Team2', 'Result', 'Week']]
    df.replace(to_replace = '-', value = '0 - 0', inplace = True)
    result = df['Result'].str.split(' - ')
    result = list(zip(*result.values))
    df['Home'] = list(map(lambda x: int(x), result[0]))
    df['Away'] = list(map(lambda x: int(x), result[1]))
    df.drop('Result', axis = 1, inplace = True)
    return df

df_result = pd.concat([read_data(name) for name in ['week'+str(i)+'.html' for i in range(1, 8)]])
df_predict = pd.concat([read_data(name) for name in ['week'+str(i)+'.html' for i in range(8, 10)]])

In [3]:
df_result

Unnamed: 0,Team1,Team2,Week,Home,Away
0,GMedia Luxury,CERBERUS Esports,Week 1,0,2
2,SBTC Esports,Burst The Sky Esports,Week 1,2,1
5,Saigon Buffalo,Team Secret,Week 1,2,1
8,GAM Esports,Team Flash,Week 1,2,0
10,Burst The Sky Esports,GMedia Luxury,Week 1,1,2
13,CERBERUS Esports,Saigon Buffalo,Week 1,1,2
0,Saigon Buffalo,GAM Esports,Week 2,0,2
2,CERBERUS Esports,Team Secret,Week 2,2,1
5,GMedia Luxury,Team Flash,Week 2,2,1
8,SBTC Esports,GAM Esports,Week 2,0,2


In [4]:
def winner(row):
    return row['Team1'] if row['Home'] > row['Away'] else row['Team2']

df_result['Winner'] = df_result.apply(winner, axis = 1)

In [5]:
df_result

Unnamed: 0,Team1,Team2,Week,Home,Away,Winner
0,GMedia Luxury,CERBERUS Esports,Week 1,0,2,CERBERUS Esports
2,SBTC Esports,Burst The Sky Esports,Week 1,2,1,SBTC Esports
5,Saigon Buffalo,Team Secret,Week 1,2,1,Saigon Buffalo
8,GAM Esports,Team Flash,Week 1,2,0,GAM Esports
10,Burst The Sky Esports,GMedia Luxury,Week 1,1,2,GMedia Luxury
13,CERBERUS Esports,Saigon Buffalo,Week 1,1,2,Saigon Buffalo
0,Saigon Buffalo,GAM Esports,Week 2,0,2,GAM Esports
2,CERBERUS Esports,Team Secret,Week 2,2,1,CERBERUS Esports
5,GMedia Luxury,Team Flash,Week 2,2,1,GMedia Luxury
8,SBTC Esports,GAM Esports,Week 2,0,2,GAM Esports


In [6]:
df_table = pd.DataFrame(df_result['Team1'].unique(), columns = ["Team"])

def calculate(row, df_result):
    df = df_result[(df_result['Team1'] == row['Team']) | (df_result['Team2'] == row['Team'])]
    df_winner = df[df['Winner'] == row['Team']]
    df_lose = df[df['Winner'] != row['Team']]
    win_count = len(df_winner)
    lose_count = len(df_lose)
    diff = abs(df_winner['Home'] - df_winner['Away']).sum() - abs(df_lose['Home'] - df_lose['Away']).sum()
    return pd.Series([win_count, lose_count, diff])

df_table[['Win', 'Lose', 'Diff']] = df_table.apply(calculate, axis = 1, args = (df_result,))

df_table.sort_values(['Win', 'Lose', 'Diff'], ascending = [False, True, False], ignore_index = True, inplace = True)

In [7]:
df_table

Unnamed: 0,Team,Win,Lose,Diff
0,GAM Esports,10,1,15
1,CERBERUS Esports,8,3,9
2,Saigon Buffalo,6,4,2
3,SBTC Esports,6,5,1
4,Team Secret,5,5,-1
5,Burst The Sky Esports,3,7,-6
6,GMedia Luxury,2,8,-11
7,Team Flash,2,9,-9


In [8]:
df_predict

Unnamed: 0,Team1,Team2,Week,Home,Away
0,Saigon Buffalo,Burst The Sky Esports,Week 8,0,0
3,GMedia Luxury,Team Secret,Week 8,0,0
6,Team Flash,Burst The Sky Esports,Week 8,0,0
9,Team Secret,CERBERUS Esports,Week 8,0,0
12,SBTC Esports,Saigon Buffalo,Week 8,0,0
15,GAM Esports,GMedia Luxury,Week 8,0,0
0,Burst The Sky Esports,Team Secret,Week 9,0,0
3,CERBERUS Esports,GAM Esports,Week 9,0,0
6,Saigon Buffalo,Team Flash,Week 9,0,0
9,GMedia Luxury,SBTC Esports,Week 9,0,0


In [12]:
result_list = [[2,0], [0,2], [2,1], [1,2]]

result_list = [result_list for _ in range(len(df_predict))]

result_list

[[[2, 0], [0, 2], [2, 1], [1, 2]],
 [[2, 0], [0, 2], [2, 1], [1, 2]],
 [[2, 0], [0, 2], [2, 1], [1, 2]],
 [[2, 0], [0, 2], [2, 1], [1, 2]],
 [[2, 0], [0, 2], [2, 1], [1, 2]],
 [[2, 0], [0, 2], [2, 1], [1, 2]],
 [[2, 0], [0, 2], [2, 1], [1, 2]],
 [[2, 0], [0, 2], [2, 1], [1, 2]],
 [[2, 0], [0, 2], [2, 1], [1, 2]],
 [[2, 0], [0, 2], [2, 1], [1, 2]],
 [[2, 0], [0, 2], [2, 1], [1, 2]],
 [[2, 0], [0, 2], [2, 1], [1, 2]],
 [[2, 0], [0, 2], [2, 1], [1, 2]],
 [[2, 0], [0, 2], [2, 1], [1, 2]]]

In [10]:
total_per = 4**(len(df_predict))
p = itertools.product(*result_list)
ranking_predict = pd.DataFrame(np.zeros((8, 9)), columns = ["Team"] + [str(i) for i in range(1, 9)])
ranking_predict['Team'] = df_table['Team']
ranking_predict.set_index('Team', inplace = True)

def test():
    result = next(p)
    df_predict['Home'] = list([int(ele) for ele in (list(zip(*result))[0])])
    df_predict['Away'] = list([int(ele) for ele in (list(zip(*result))[2])])
    df_predict['Winner'] = df_predict.apply(winner, axis = 1)
    df_table_temp = df_table.copy()
    df_table_temp[['Win', 'Lose', 'Diff']] = df_table_temp.apply(calculate, axis = 1, args = (df_predict,))
    df_table_temp['Win'] += df_table['Win']
    df_table_temp['Lose'] += df_table['Lose']
    df_table_temp['Diff'] += df_table['Diff']
    df_table_temp.sort_values(['Win', 'Lose', 'Diff'], ascending = [False, True, False], ignore_index = True, inplace = True)
    for i, team in enumerate(df_table_temp['Team'].values):
        ranking_predict.loc[team][str(i + 1)] += 1

In [11]:
%timeit test()

15 ms ± 486 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [None]:
total_per = 4**(len(df_predict))

ranking_predict = ranking_predict/total_per*100

ranking_predict