In [3]:
import pandas as pd
pd.set_option('display.max_columns', 500)
import matplotlib.pyplot as plt
%matplotlib inline
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)
import numpy as np
from datetime import timedelta, time, datetime

In [4]:
#get fight data
fights = pd.read_csv('fight_hist.csv')

#ignore DQs
fights = fights[fights.method != 'DQ']

fights = fights.reset_index(drop = True)

#remove apostrophes from womens divisions
fights['division'] = fights.division.str.replace("'", '')

#eliminate non conventional weight classes
divs = [d for d in fights.division.unique() if d not in ['Open Weight', 'Catch Weight', 'Super Heavyweight']]

fights = fights[fights.division.isin(divs)]

fights['fighter_gender'] = 'Male'
fights.loc[fights.division.str.contains('women', case = False), 'fighter_gender'] = 'Female'
fights['opponent_gender'] = 'Male'
fights.loc[fights.division.str.contains('women', case = False), 'opponent_gender'] = 'Female'

In [5]:
fights[fights.result == 'W'].head()

Unnamed: 0,date,fight_url,event_url,result,fighter,opponent,division,method,round,time,fighter_url,opponent_url,knockdowns,sub_attempts,pass,reversals,takedowns_landed,takedowns_attempts,sig_strikes_landed,sig_strikes_attempts,total_strikes_landed,total_strikes_attempts,head_strikes_landed,head_strikes_attempts,body_strikes_landed,body_strikes_attempts,leg_strikes_landed,leg_strikes_attempts,distance_strikes_landed,distance_strikes_attempts,clinch_strikes_landed,clinch_strikes_attempts,ground_strikes_landed,ground_strikes_attempts,fighter_gender,opponent_gender
0,"February 13, 2021",http://ufcstats.com/fight-details/be94e5b7a35c...,http://ufcstats.com/event-details/3f7c14c7eca7...,W,Kamaru Usman,Gilbert Burns,Welterweight,KO/TKO,3,0:34,http://ufcstats.com/fighter-details/f1b2aa7853...,http://ufcstats.com/fighter-details/23024fdfc9...,2,0,0,2:05,0,0,83,136,93,149,56,105,11,15,16,16,54,100,3,4,26,32,Male,Male
2,"February 13, 2021",http://ufcstats.com/fight-details/25b8c932da5e...,http://ufcstats.com/event-details/3f7c14c7eca7...,W,Alexa Grasso,Maycee Barber,Womens Flyweight,U-DEC,3,5:00,http://ufcstats.com/fighter-details/e8b731feff...,http://ufcstats.com/fighter-details/6a740daf1b...,0,1,1,5:10,0,0,38,81,95,162,24,57,9,14,5,10,25,61,13,20,0,0,Female,Female
4,"February 13, 2021",http://ufcstats.com/fight-details/26fbf026beb8...,http://ufcstats.com/event-details/3f7c14c7eca7...,W,Kelvin Gastelum,Ian Heinisch,Middleweight,U-DEC,3,5:00,http://ufcstats.com/fighter-details/8c0580d4ff...,http://ufcstats.com/fighter-details/2e585c701f...,0,0,1,6:50,6,14,38,84,51,115,20,62,10,13,8,9,28,69,8,11,2,4,Male,Male
6,"February 13, 2021",http://ufcstats.com/fight-details/54e2490959f6...,http://ufcstats.com/event-details/3f7c14c7eca7...,W,Ricky Simon,Brian Kelleher,Featherweight,U-DEC,3,5:00,http://ufcstats.com/fighter-details/5987b2458f...,http://ufcstats.com/fighter-details/7be14eaed4...,0,1,0,8:14,6,9,45,98,72,136,25,73,3,4,17,21,29,74,6,9,10,15,Male,Male
8,"February 13, 2021",http://ufcstats.com/fight-details/9201312dd860...,http://ufcstats.com/event-details/3f7c14c7eca7...,W,Julian Marquez,Maki Pitolo,Middleweight,SUB,3,4:17,http://ufcstats.com/fighter-details/d0e1b42d41...,http://ufcstats.com/fighter-details/c549573c0d...,0,4,1,1:54,0,1,41,100,61,129,37,94,3,4,1,2,18,72,15,16,8,12,Male,Male


In [6]:
#connect to Neo4j graph (must have Neo4j instance up and running with default settings, password set to 123)
from py2neo import Graph
graph = Graph(password="123")

In [7]:
#load data into graph
tx = graph.begin()
for index, row in fights[(fights.result == 'W')].iterrows():
    tx.evaluate('''
       MERGE (a: fighter {name: $fighter, gender: $fighter_gender})
       MERGE (b: fighter {name: $opponent, gender: $opponent_gender})
       MERGE (b)-[r:lose_to {date: $date, division: $division, method: $method}]->(a)
    ''', parameters = {'fighter': row['fighter'], 'opponent': row['opponent'], 'date':row['date'], 
                       'method':row['method'], 'division':row['division'],
                       'fighter_gender':row['fighter_gender'], 'opponent_gender':row['opponent_gender']})
tx.commit()

<py2neo.database.work.TransactionSummary at 0x7fded795c940>

In [8]:
#create male graph projection
tx = graph.begin()
cypher = '''
     CALL gds.graph.create.cypher(
    'male_fights',
    "MATCH (f:fighter) WHERE f.gender = 'Male' RETURN id(f) AS id",
    "MATCH (opponent) -[lose_to]-> (fighter) WHERE opponent.gender = 'Male' and fighter.gender = 'Male' RETURN id(opponent) as source, id(fighter) as target"
)'''
tx.run(cypher).data()

[{'nodeQuery': "MATCH (f:fighter) WHERE f.gender = 'Male' RETURN id(f) AS id",
  'relationshipQuery': "MATCH (opponent) -[lose_to]-> (fighter) WHERE opponent.gender = 'Male' and fighter.gender = 'Male' RETURN id(opponent) as source, id(fighter) as target",
  'graphName': 'male_fights',
  'nodeCount': 1872,
  'relationshipCount': 5234,
  'createMillis': 47}]

In [9]:
#create female graph projection
tx = graph.begin()
cypher = '''
CALL gds.graph.create.cypher(
    'female_fights',
    "MATCH (f:fighter) WHERE f.gender = 'Female' RETURN id(f) AS id",
    "MATCH (opponent) -[lose_to]-> (fighter) WHERE opponent.gender = 'Female' and fighter.gender = 'Female' RETURN id(opponent) as source, id(fighter) as target"
)'''
tx.run(cypher).data()

[{'nodeQuery': "MATCH (f:fighter) WHERE f.gender = 'Female' RETURN id(f) AS id",
  'relationshipQuery': "MATCH (opponent) -[lose_to]-> (fighter) WHERE opponent.gender = 'Female' and fighter.gender = 'Female' RETURN id(opponent) as source, id(fighter) as target",
  'graphName': 'female_fights',
  'nodeCount': 184,
  'relationshipCount': 455,
  'createMillis': 24}]

In [10]:
#male connected components
tx = graph.begin()

cypher = '''CALL gds.wcc.write('male_fights', { writeProperty: 'mf_component_id' })
YIELD nodePropertiesWritten, componentCount;
'''

%time tx.run(cypher).data()

CPU times: user 761 µs, sys: 73 µs, total: 834 µs
Wall time: 58.8 ms


[{'nodePropertiesWritten': 1872, 'componentCount': 27}]

In [11]:
#female connected components
tx = graph.begin()

cypher = '''CALL gds.wcc.write('female_fights', { writeProperty: 'ff_component_id' })
YIELD nodePropertiesWritten, componentCount;
'''

%time tx.run(cypher).data()

CPU times: user 538 µs, sys: 46 µs, total: 584 µs
Wall time: 40 ms


[{'nodePropertiesWritten': 184, 'componentCount': 3}]

In [14]:
tx = graph.begin()
cypher = '''
CALL gds.pageRank.stream('male_fights')
YIELD
  nodeId,
  score
RETURN gds.util.asNode(nodeId).name AS fighter, score
ORDER BY score DESC
'''
tx.run(cypher).data()

[{'fighter': 'Stipe Miocic', 'score': 11.215403877751669},
 {'fighter': 'Henry Cejudo', 'score': 11.042557013354962},
 {'fighter': 'Demetrious Johnson', 'score': 10.533453871184612},
 {'fighter': 'Dustin Poirier', 'score': 8.78761443226249},
 {'fighter': 'Junior Dos Santos', 'score': 8.564044779073447},
 {'fighter': 'Jon Jones', 'score': 8.470969679212429},
 {'fighter': 'Francis Ngannou', 'score': 8.453018058557062},
 {'fighter': 'Joseph Benavidez', 'score': 8.229673965024995},
 {'fighter': 'Max Holloway', 'score': 8.060076443999696},
 {'fighter': 'Donald Cerrone', 'score': 7.84987305267823},
 {'fighter': 'Kamaru Usman', 'score': 7.760013837600129},
 {'fighter': 'Daniel Cormier', 'score': 7.5704781895503395},
 {'fighter': 'Frankie Edgar', 'score': 7.111835626121319},
 {'fighter': 'Derrick Lewis', 'score': 6.9421872727747544},
 {'fighter': 'Aljamain Sterling', 'score': 6.724637681775493},
 {'fighter': 'Raphael Assuncao', 'score': 6.620791037604283},
 {'fighter': 'Georges St-Pierre', 'sc

In [12]:
#male pagerank
tx = graph.begin()
cypher = '''
CALL gds.pageRank.stream('male_fights')
YIELD
  nodeId,
  score
RETURN gds.util.asNode(nodeId).name AS fighter, score
ORDER BY score DESC
'''
male_pr = pd.DataFrame(tx.run(cypher).data())
male_pr.head(10).to_clipboard(index = False, header = False)
male_pr.head(10)

Unnamed: 0,fighter,score
0,Stipe Miocic,11.215404
1,Henry Cejudo,11.042557
2,Demetrious Johnson,10.533454
3,Dustin Poirier,8.787614
4,Junior Dos Santos,8.564045
5,Jon Jones,8.47097
6,Francis Ngannou,8.453018
7,Joseph Benavidez,8.229674
8,Max Holloway,8.060076
9,Donald Cerrone,7.849873


In [13]:
#female pagerank
tx = graph.begin()
cypher = '''
CALL gds.pageRank.stream('female_fights')
YIELD
  nodeId,
  score
RETURN gds.util.asNode(nodeId).name AS fighter, score
ORDER BY score DESC
'''
female_pr = pd.DataFrame(tx.run(cypher).data())
female_pr.head(10).to_clipboard(index = False, header = False)
female_pr.head(10)

Unnamed: 0,fighter,score
0,Amanda Nunes,11.747856
1,Cat Zingano,10.529791
2,Holly Holm,4.263861
3,Valentina Shevchenko,4.202499
4,Germaine de Randamie,3.47333
5,Ronda Rousey,3.436543
6,Carla Esparza,3.342
7,Julianna Pena,3.11453
8,Ketlen Vieira,2.84835
9,Irene Aldana,2.841481
