In [1]:
import pandas as pd
from collections import defaultdict
import scipy as sp
import scipy.sparse as sparse
import numpy as np

In [2]:
def eval_markov_chain(P, x0, t_max):
    x = x0
    for t in range(t_max):
        x = P.T.dot(x)
    return x

In [3]:
def ufc_markov(weightclass):
    
    fights = pd.read_csv('fights.csv')
    
    if weightclass == 'all':
        all_fights = fights.copy()[['fighter_0', 'fighter_1', 'date']]
    else:
        all_fights = fights.copy()[fights['Weight class'] == weightclass][['fighter_0', 'fighter_1', 'date']]
        
    all_fights['date'] = pd.to_datetime(all_fights['date'])
    all_fights2 = all_fights.copy()
    all_fights2.columns = ['fighter_1', 'fighter_0', 'date']
    all_fights2 = all_fights2[['fighter_0', 'fighter_1', 'date']]
    all_fights = pd.concat([all_fights, all_fights2])
    all_fights = all_fights.sort_values(['date', 'fighter_0', 'fighter_1']).reset_index(drop = True)
    
    edge_df = pd.DataFrame(all_fights.groupby(['fighter_0']).agg(
        opponents = ('fighter_1', list)))
    edge_df = edge_df[edge_df['opponents'].map(len) > 1].reset_index()
    
    if len(edge_df) < 1:
        return None
    
    edge_dict = defaultdict(int)
    for edge_list in edge_df['opponents']:
        for i in range(len(edge_list) - 1):
            for j in range(min(2, len(edge_list[i+1:]) - 1)):
                f0 = edge_list[i]
                f1 = edge_list[i + j + 1]
                c = 1 / (j + 1) ** 2
                edge_dict[(f0, f1)] += c
                
    all_edges = pd.DataFrame()
    all_edges['fighter_0'] = [k[0] for k in edge_dict.keys()]
    all_edges['fighter_1'] = [k[1] for k in edge_dict.keys()]
    all_edges['count'] = [v for k,v in edge_dict.items()]

    outdegrees = all_edges[['fighter_0', 'fighter_1']].groupby(['fighter_0'], as_index = False).count()
    outdegrees.columns = ['fighter_0', 'outdegree']
    all_edges = all_edges.merge(outdegrees, on = 'fighter_0', how = 'left')

    all_edges['outdegree'] = 1 / all_edges['outdegree']
    all_edges['weight'] = all_edges['count'] + 999 * all_edges['outdegree']
    
    roster = sorted(list(set(all_edges['fighter_0'].to_list() + all_edges['fighter_1'].to_list())))
    roster_dict = {roster[i] : i for i in range(len(roster))}
    roster_trans = {i : roster[i] for i in range(len(roster))}
    roster_codes = pd.DataFrame(roster)
    roster_codes.columns = ['f']
    
    all_edges['f0'] = all_edges['fighter_0'].apply(lambda x : roster_dict[x])
    all_edges['f1'] = all_edges['fighter_1'].apply(lambda x : roster_dict[x])
    
    P = sparse.coo_matrix((all_edges['weight'], (all_edges['f0'], all_edges['f1'])), shape=(len(roster), len(roster)))
    x0 = np.array([1 / len(all_edges['f0'].unique())] * len(roster))
    
    outdegree_inverse = 1 / all_edges['outdegree']
    degree_median = int(round(outdegree_inverse.median()))
    T_MAX = degree_median
    x = eval_markov_chain(P, x0, T_MAX)
    ranks = np.argsort(-x)
    desc = roster_codes.iloc[ranks]['f']
    rating = x[ranks]
    top = pd.DataFrame({'Description': desc,
                          'Markov Chain Rating': rating}).reset_index(drop = True)
    return(top)

In [4]:
for w in pd.read_csv('fights.csv')['Weight class'].unique():
    x = ufc_markov(w)
    if x is not None:
        display(x.head())

Unnamed: 0,Description,Markov Chain Rating
0,Andrei Arlovski,3.207488e+22
1,Junior Dos Santos,2.495005e+22
2,Stefan Struve,2.473002e+22
3,Frank Mir,2.409613e+22
4,Mark Hunt,2.315715e+22


Unnamed: 0,Description,Markov Chain Rating
0,Angela Hill,4.027866e+22
1,Cortney Casey,3.990408e+22
2,Jessica Andrade,3.919852e+22
3,Carla Esparza,3.646265e+22
4,Joanna Jedrzejczyk,3.423294e+22


Unnamed: 0,Description,Markov Chain Rating
0,Jose Aldo,2.363765e+22
1,Dennis Bermudez,2.214847e+22
2,Ricardo Lamas,2.168073e+22
3,Darren Elkins,2.085897e+22
4,Cub Swanson,2.037299e+22


Unnamed: 0,Description,Markov Chain Rating
0,Tim Boetsch,1.686629e+25
1,Nate Marquardt,1.600274e+25
2,Thales Leites,1.503696e+25
3,Elias Theodorou,1.367816e+25
4,Gegard Mousasi,1.28999e+25


Unnamed: 0,Description,Markov Chain Rating
0,Thiago Alves,1.375852e+22
1,Demian Maia,1.301453e+22
2,Jake Ellenberger,1.270905e+22
3,Ben Saunders,1.266473e+22
4,Robbie Lawler,1.180349e+22


Unnamed: 0,Description,Markov Chain Rating
0,Jennifer Maia,115645.365174
1,Joanne Calderwood,96593.064302
2,Lucie Pudilova,68811.51314
3,Katlyn Chookagian,46579.488372
4,Sabina Mazo,34895.348837


Unnamed: 0,Description,Markov Chain Rating
0,Ovince Saint Preux,2.536849e+25
1,Jon Jones,2.420676e+25
2,Glover Teixeira,2.380752e+25
3,Jimi Manuwa,2.376717e+25
4,Jan Blachowicz,2.212232e+25


Unnamed: 0,Description,Markov Chain Rating
0,Jim Miller,1.582281e+28
1,Rafael Dos Anjos,1.118891e+28
2,Francisco Trinaldo,1.102335e+28
3,Gleison Tibau,1.100113e+28
4,Edson Barboza,1.053196e+28


Unnamed: 0,Description,Markov Chain Rating
0,Jimmie Rivera,3.126973e+19
1,Urijah Faber,2.223427e+19
2,Rani Yahya,1.90254e+19
3,Raphael Assuncao,1.693426e+19
4,TJ Dillashaw,1.524269e+19


Unnamed: 0,Description,Markov Chain Rating
0,Lina Lansberg,8.517404e+19
1,Sarah Moras,7.491707e+19
2,Marion Reneau,6.018635e+19
3,Ketlen Vieira,3.593564e+19
4,Sara McMann,3.550742e+19


Unnamed: 0,Description,Markov Chain Rating
0,Joseph Benavidez,5.579427e+16
1,Jussier Formiga,4.614106e+16
2,Demetrious Johnson,4.23817e+16
3,Louis Smolka,3.962141e+16
4,Dustin Ortiz,3.276991e+16


Unnamed: 0,Description,Markov Chain Rating
0,Zarah Fairn,125116.044643
1,Amanda Nunes,125089.285714
2,Felicia Spencer,35803.625
3,Cat Zingano,17928.625
4,Yana Kunitskaya,17928.625


Unnamed: 0,Description,Markov Chain Rating
0,Vitor Belfort,1000.0
1,Wanderlei Silva,0.0


Unnamed: 0,Description,Markov Chain Rating
0,Dan Severn,85790860.0
1,David Abbott,59380940.0
2,Mark Hall,47255530.0
3,Oleg Taktarov,45657870.0
4,Ken Shamrock,38940020.0
