In [None]:
import numpy as np
import pandas as pd
import lxml
import itertools
pd.options.mode.chained_assignment = None  # default='warn'

In [None]:
filelist = ['week'+str(i)+'.html' for i in range(1, 10)]

def read_data(filename):
    df = pd.read_html('results/'+filename)[0]
    week = df.columns.get_level_values(0).unique()
    df.columns = df.columns.get_level_values(1)
    df = df.drop_duplicates(subset = ['Team1', 'Team2'])
    df['Week'] = [week.values[0] for _ in range(len(df))]
    df = df[['Team1', 'Team2', 'Result', 'Week']]
    df.replace(to_replace = '-', value = '0 - 0', inplace = True)
    result = df['Result'].str.split(' - ')
    result = list(zip(*result.values))
    df['Home'] = list(map(lambda x: int(x), result[0]))
    df['Away'] = list(map(lambda x: int(x), result[1]))
    df.drop('Result', axis = 1, inplace = True)
    return df

df_result = pd.concat([read_data(name) for name in ['week'+str(i)+'.html' for i in range(1, 8)]])
df_predict = pd.concat([read_data(name) for name in ['week'+str(i)+'.html' for i in range(8, 10)]])

In [None]:
df_result

In [None]:
def winner(row):
    return row['Team1'] if row['Home'] > row['Away'] else row['Team2']

df_result['Winner'] = df_result.apply(winner, axis = 1)

In [None]:
df_result

In [None]:
df_table = pd.DataFrame(df_result['Team1'].unique(), columns = ["Team"])

def calculate(row, df_result):
    df = df_result[(df_result['Team1'] == row['Team']) | (df_result['Team2'] == row['Team'])]
    df_winner = df[df['Winner'] == row['Team']]
    df_lose = df[df['Winner'] != row['Team']]
    win_count = len(df_winner)
    lose_count = len(df_lose)
    diff = abs(df_winner['Home'] - df_winner['Away']).sum() - abs(df_lose['Home'] - df_lose['Away']).sum()
    return pd.Series([win_count, lose_count, diff])

df_table[['Win', 'Lose', 'Diff']] = df_table.apply(calculate, axis = 1, args = (df_result,))

df_table.sort_values(['Win', 'Lose', 'Diff'], ascending = [False, True, False], ignore_index = True, inplace = True)

In [None]:
df_table

In [None]:
df_predict

In [None]:
result_list = ['2-0', '0-2', '2-1', '1-2']

result_list = [result_list for _ in range(len(df_predict))]

result_list

In [None]:
total_per = 4**(len(df_predict))
p = itertools.product(*result_list)
ranking_predict = pd.DataFrame(np.zeros((8, 9)), columns = ["Team"] + [str(i) for i in range(1, 9)])
ranking_predict['Team'] = df_table['Team']
ranking_predict.set_index('Team', inplace = True)

for idx in range(total_per):
    result = next(p)
    df_predict['Home'] = list([int(ele) for ele in (list(zip(*result))[0])])
    df_predict['Away'] = list([int(ele) for ele in (list(zip(*result))[2])])
    df_predict['Winner'] = df_predict.apply(winner, axis = 1)
    df_table_temp = df_table.copy()
    df_table_temp[['Win', 'Lose', 'Diff']] = df_table_temp.apply(calculate, axis = 1, args = (df_predict,))
    df_table_temp['Win'] += df_table['Win']
    df_table_temp['Lose'] += df_table['Lose']
    df_table_temp['Diff'] += df_table['Diff']
    df_table_temp.sort_values(['Win', 'Lose', 'Diff'], ascending = [False, True, False], ignore_index = True, inplace = True)
    for i, team in enumerate(df_table_temp['Team'].values):
        ranking_predict.loc[team][str(i + 1)] += 1

In [None]:
total_per = 4**(len(df_predict))

ranking_predict = ranking_predict/total_per*100

ranking_predict