In [25]:
import pandas as pd
pd.set_option('display.max_columns', 500)
import matplotlib.pyplot as plt
%matplotlib inline
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)
import numpy as np
from datetime import timedelta, time, datetime
from py2neo import Graph

In [26]:
#get fight data
fights = pd.read_csv('fight_hist.csv')

#ignore DQs
fights = fights[fights.method != 'DQ']

fights = fights.reset_index(drop = True)

#remove apostrophes from womens divisions
fights['division'] = fights.division.str.replace("'", '')

In [27]:
#connect to Neo4j graph (must have Neo4j instance up and running with default settings, password set to 123)
graph = Graph(password="123")

In [309]:
#load data into graph
tx = graph.begin()
for index, row in fights[fights.result == 'W'].iterrows():
    tx.evaluate('''
       MERGE (a: fighter {name: $fighter})
       MERGE (b: fighter {name: $opponent})
       MERGE (b)-[r:lose_to {date: $date, division: $division, method: $method, winner: $winner, loser: $loser}]->(a)
    ''', parameters = {'fighter': row['fighter'], 'opponent': row['opponent'], 'date':row['date'], 
                       'method':row['method'], 'division':row['division'], 'winner':row['fighter'], 'loser':row['opponent']})
tx.commit()

In [28]:
#get pageranks for each division
divs = [d for d in fights.division.unique() if d not in ['Open Weight', 'Catch Weight', 'Super Heavyweight']]

In [29]:
pr = {}
tx = graph.begin()
for d in divs:
    cypher = '''
CALL algo.pageRank.stream(
"MATCH (fighter) RETURN id(fighter) as id",
"MATCH (opponent) -[lose_to]-> (fighter) where lose_to.division = '%s' RETURN id(opponent) as source, id(fighter) as target",
{graph:'cypher',iterations:50, dampingFactor:0.85})
YIELD nodeId, score
RETURN algo.asNode(nodeId).name AS fighter,score
ORDER BY score DESC''' %d
    dat = tx.run(cypher).data()
    pr[d] = dat

In [30]:
#convert data into dataframes for each division
for key in pr:
    temp = pd.DataFrame(pr[key])
    temp.columns = ['fighter', 'pagerank']
    pr[key] = pd.DataFrame(temp)

In [32]:
#get names of fighter that have fought in each division
div_fighters = {}
for d in divs:
    f_ls = fights[fights.division == d].fighter.unique()
    div_fighters[d] = f_ls

In [33]:
#pageranks
ranks = pr['Lightweight']
ranks = ranks[ranks.fighter.isin(div_fighters['Lightweight'])]
ranks.head(10).to_clipboard(index = False)
ranks.head(10)

Unnamed: 0,fighter,pagerank
0,Donald Cerrone,9.96855
1,Michael Johnson,8.789021
2,Rafael Dos Anjos,7.213535
3,Khabib Nurmagomedov,6.44348
4,Tony Ferguson,6.264578
5,Edson Barboza,5.210198
6,Anthony Pettis,4.426724
7,Benson Henderson,4.408875
8,Nate Diaz,4.325772
9,Dustin Poirier,4.308176


In [294]:
#get win ratio
fights['ct'] =1 
total_fights = fights.pivot_table(index = ['division','fighter'], values = ['ct'], aggfunc = 'sum')
wins = fights[fights.result == 'W'].pivot_table(index = ['division','fighter'], values = ['ct'], aggfunc = 'sum') 

wr = pd.merge(total_fights, wins, left_index = True, right_index = True, how = 'left', copy = False)
wr.columns = ['total_fights', 'wins']
wr['win_ratio'] = wr.wins / wr.total_fights
wr = wr.fillna(0)
wr = wr.reset_index()

In [296]:
#scale page rank pageranks by win ratio
prs = {}

for d in pr:
    #get fighter win ratio in that division
    wrd = wr[wr.division == d]
    
    #df of fighter pageranks in a particular division, multiply pagerank by win ratio in that division
    temp = pr[d]
    temp = pd.merge(temp, wrd, on = 'fighter', how = 'left', copy = False)
    temp['pagerank'] = temp.pagerank * temp.win_ratio
    temp = temp[['fighter', 'pagerank']]
    temp = temp.sort_values('pagerank', ascending = False)
    temp = temp.reset_index(drop = True)
    prs[d] = temp

In [300]:
#pageranks scaled by win ratio
ranks = prs['Lightweight']
ranks = ranks[ranks.fighter.isin(div_fighters['Lightweight'])]
ranks.head(10).to_clipboard(index = False)
ranks.head(10)

Unnamed: 0,fighter,pagerank
0,Donald Cerrone,7.715101
1,Khabib Nurmagomedov,6.420802
2,Tony Ferguson,5.828353
3,Rafael Dos Anjos,4.819398
4,Michael Johnson,4.658021
5,Benson Henderson,3.317794
6,Edson Barboza,3.311262
7,Dustin Poirier,3.23102
8,Kevin Lee,3.039748
9,Beneil Dariush,2.867465
