In [1]:
import pandas as pd
from collections import defaultdict
import scipy as sp
import scipy.sparse as sparse
import numpy as np

In [2]:
def eval_markov_chain(P, x0, t_max):
    x = x0
    for t in range(t_max):
        x = P.T.dot(x)
    return x

In [14]:
def ufc_markov(weightclass):
    
    fights = pd.read_csv('fights.csv')
    
    if weightclass == 'all':
        all_fights = fights.copy()[['fighter_0', 'fighter_1', 'date']]
    else:
        all_fights = fights.copy()[fights['Weight class'] == weightclass][['fighter_0', 'fighter_1', 'date']]
        
    all_fights['date'] = pd.to_datetime(all_fights['date'])
    all_fights2 = all_fights.copy()
    all_fights2.columns = ['fighter_1', 'fighter_0', 'date']
    all_fights2 = all_fights2[['fighter_0', 'fighter_1', 'date']]
    all_fights = pd.concat([all_fights, all_fights2])
    all_fights = all_fights.sort_values(['date', 'fighter_0', 'fighter_1']).reset_index(drop = True)
    
    edge_df = pd.DataFrame(all_fights.groupby(['fighter_0']).agg(
        opponents = ('fighter_1', list)))
    edge_df = edge_df[edge_df['opponents'].map(len) > 1].reset_index()
    
    if len(edge_df) < 1:
        return None
    
    edge_dict = defaultdict(int)
    for edge_list in edge_df['opponents']:
        for i in range(len(edge_list) - 1):
            for j in range(min(2, len(edge_list[i+1:]) - 1)):
                f0 = edge_list[i]
                f1 = edge_list[i + j + 1]
                c = 1 / (j + 1) ** 2
                edge_dict[(f0, f1)] += c
                
    all_edges = pd.DataFrame()
    all_edges['fighter_0'] = [k[0] for k in edge_dict.keys()]
    all_edges['fighter_1'] = [k[1] for k in edge_dict.keys()]
    all_edges['count'] = [v for k,v in edge_dict.items()]

    outdegrees = all_edges[['fighter_0', 'fighter_1']].groupby(['fighter_0'], as_index = False).count()
    outdegrees.columns = ['fighter_0', 'outdegree']
    all_edges = all_edges.merge(outdegrees, on = 'fighter_0', how = 'left')

    all_edges['outdegree'] = 1 / all_edges['outdegree']
    all_edges['weight'] = all_edges['count'] + 999 * all_edges['outdegree']
    
    roster = sorted(list(set(all_edges['fighter_0'].to_list() + all_edges['fighter_1'].to_list())))
    roster_dict = {roster[i] : i for i in range(len(roster))}
    roster_trans = {i : roster[i] for i in range(len(roster))}
    roster_codes = pd.DataFrame(roster)
    roster_codes.columns = ['f']
    
    all_edges['f0'] = all_edges['fighter_0'].apply(lambda x : roster_dict[x])
    all_edges['f1'] = all_edges['fighter_1'].apply(lambda x : roster_dict[x])
    
    P = sparse.coo_matrix((all_edges['weight'], (all_edges['f0'], all_edges['f1'])), shape=(len(roster), len(roster)))
    x0 = np.array([1 / len(all_edges['f0'].unique())] * len(roster))
    
    outdegree_inverse = 1 / all_edges['outdegree']
    degree_median = int(round(outdegree_inverse.median()))
    T_MAX = degree_median
    x = eval_markov_chain(P, x0, T_MAX)
    ranks = np.argsort(-x)
    desc = roster_codes.iloc[ranks]['f']
    rating = x[ranks]
    top = pd.DataFrame({'Description': desc,
                          'Markov Chain Rating': rating}).reset_index(drop = True)
    return(top)

In [15]:
for w in pd.read_csv('fights.csv')['Weight class'].unique():
    x = ufc_markov(w)
    if x is not None:
        display(x.head())

Unnamed: 0,Description,Markov Chain Rating
0,Andrei Arlovski,3.051666e+22
1,Junior Dos Santos,2.540117e+22
2,Stefan Struve,2.513373e+22
3,Mark Hunt,2.223576e+22
4,Frank Mir,2.158065e+22


Unnamed: 0,Description,Markov Chain Rating
0,Angela Hill,3.90057e+22
1,Jessica Andrade,3.766349e+22
2,Carla Esparza,3.529013e+22
3,Cortney Casey,3.374199e+22
4,Joanna Jedrzejczyk,3.351061e+22


Unnamed: 0,Description,Markov Chain Rating
0,Jose Aldo,2.567886e+19
1,Ricardo Lamas,2.387138e+19
2,Darren Elkins,2.254735e+19
3,Cub Swanson,2.240605e+19
4,Dennis Bermudez,2.227284e+19


Unnamed: 0,Description,Markov Chain Rating
0,Tim Boetsch,1.660983e+25
1,Nate Marquardt,1.604457e+25
2,Thales Leites,1.517071e+25
3,Elias Theodorou,1.383696e+25
4,Derek Brunson,1.31623e+25


Unnamed: 0,Description,Markov Chain Rating
0,Thiago Alves,1.425123e+22
1,Demian Maia,1.334911e+22
2,Ben Saunders,1.323966e+22
3,Jake Ellenberger,1.251124e+22
4,Robbie Lawler,1.182277e+22


Unnamed: 0,Description,Markov Chain Rating
0,Jennifer Maia,115645.365174
1,Joanne Calderwood,96593.064302
2,Lucie Pudilova,68811.51314
3,Katlyn Chookagian,46579.488372
4,Sabina Mazo,34895.348837


Unnamed: 0,Description,Markov Chain Rating
0,Jimi Manuwa,2.69787e+25
1,Ovince Saint Preux,2.619209e+25
2,Jan Blachowicz,2.519182e+25
3,Glover Teixeira,2.465775e+25
4,Jon Jones,2.250491e+25


Unnamed: 0,Description,Markov Chain Rating
0,Jim Miller,1.56441e+28
1,Rafael Dos Anjos,1.12115e+28
2,Francisco Trinaldo,1.096382e+28
3,Kajan Johnson,1.066578e+28
4,Edson Barboza,1.065965e+28


Unnamed: 0,Description,Markov Chain Rating
0,Jimmie Rivera,3.210764e+19
1,Urijah Faber,2.352078e+19
2,Rani Yahya,1.901813e+19
3,Raphael Assuncao,1.74779e+19
4,Cody Garbrandt,1.580512e+19


Unnamed: 0,Description,Markov Chain Rating
0,Lina Lansberg,8.300988e+16
1,Sarah Moras,8.113581e+16
2,Marion Reneau,6.175925e+16
3,Ketlen Vieira,3.995675e+16
4,Miesha Tate,3.66182e+16


Unnamed: 0,Description,Markov Chain Rating
0,Joseph Benavidez,5.798945e+16
1,Jussier Formiga,4.846128e+16
2,Demetrious Johnson,4.224026e+16
3,Louis Smolka,3.904897e+16
4,Dustin Ortiz,3.43575e+16


Unnamed: 0,Description,Markov Chain Rating
0,Zarah Fairn,125116.044643
1,Amanda Nunes,125089.285714
2,Felicia Spencer,35803.625
3,Cat Zingano,17928.625
4,Yana Kunitskaya,17928.625


Unnamed: 0,Description,Markov Chain Rating
0,Vitor Belfort,1000.0
1,Wanderlei Silva,0.0


Unnamed: 0,Description,Markov Chain Rating
0,Dan Severn,106768100.0
1,David Abbott,61809850.0
2,Mark Hall,48792240.0
3,Oleg Taktarov,44388630.0
4,Keith Hackney,31864280.0
