In [1]:
!pip install ufc_api
!pip install ufcstats

Collecting ufc_api
  Downloading ufc_api-0.0.1-py3-none-any.whl.metadata (3.4 kB)
Downloading ufc_api-0.0.1-py3-none-any.whl (5.3 kB)
Installing collected packages: ufc_api
Successfully installed ufc_api-0.0.1
Collecting ufcstats
  Downloading ufcstats-0.0.3-py3-none-any.whl.metadata (1.4 kB)
Downloading ufcstats-0.0.3-py3-none-any.whl (3.5 kB)
Installing collected packages: ufcstats
Successfully installed ufcstats-0.0.3


In [2]:
import requests as req
from lxml import html
import datetime as dt
import math
import re

def parse_sherdog_fighter(url):
    headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36"}
    htm = req.get(url, headers = headers)
    xml = html.document_fromstring(htm.content)


    wins_detailed = xml.xpath("//div[@class='wins']/div[@class='meter']/div[1]/text()")
    losses_detailed = xml.xpath("//div[@class='loses']/div[@class='meter']/div[1]/text()")
    bio = xml.xpath("//div[@class='fighter-info']")[0]

    try:
        other_wins = wins_detailed[3]
        other_losses = losses_detailed[3]
    except IndexError:
        other_wins = '0'
        other_losses = '0'

    fighter = {
        'name' : xml.xpath("//span[@class='fn']/text()")[0],
        #'nickname' : bio.xpath("//span[@class='nickname']/em/text()")[0],
        #'nationality' : bio.xpath("//strong[@itemprop='nationality']/text()")[0],
        #'birthplace' : xml.xpath("//span[@class='locality']/text()")[0],
        #'birthdate' : xml.xpath("//span[@itemprop='birthDate']/text()")[0],
        'age' : xml.xpath("//span[@itemprop='birthDate']/preceding-sibling::b/text()")[0],
        'height' : xml.xpath("//b[@itemprop='height']/text()")[0],
        'weight' : xml.xpath("//b[@itemprop='weight']/text()")[0],
        #'association' : xml.xpath("//span[@itemprop='memberOf']/a/span/text()")[0],
        #'weight_class' : xml.xpath("//div[@class='association-class']/a/text()")[0],

        'wins' : {
            'total': xml.xpath("//div[@class='winloses win']/span[2]/text()")[0],
            'ko/tko': wins_detailed[0],
            'submissions':wins_detailed[1],
            'decisions':wins_detailed[2],
            'others': other_wins
                },
        'losses' : {
            'total': xml.xpath("//div[@class='winloses lose']/span[2]/text()")[0],
            'ko/tko': losses_detailed[0],
            'submissions':losses_detailed[1],
            'decisions':losses_detailed[2],
            'others':other_losses
                },

        'fights' : []
    }

    fight_rows = xml.xpath("//table[@class='new_table fighter']/tr[not(@class='table_head')]")

    for row in fight_rows:
        try:
            referee =  row.xpath("td[4]/span/a/text()")[0]
        except IndexError:
            referee = ""

        fight = {
            'name': row.xpath("td[3]/a/descendant-or-self::*/text()")[0],
            'date': row.xpath("td[3]/span/text()")[0],
            'url': "https://www.sherdog.com" + row.xpath("td[3]/a/@href")[0],
            'result': row.xpath("td[1]/span/text()")[0],
            'method': row.xpath("td[4]/b/text()")[0],
            'referee': referee,
            'round': row.xpath("td[5]/text()")[0],
            'time': row.xpath("td[6]/text()")[0],
            'opponent': row.xpath("td[2]/a/text()")[0]
        }
        fighter['fights'].append(fight)
    return fighter

def get_ufc_stats(url):
    headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36"}
    htm = req.get(url, headers = headers)
    xml = html.document_fromstring(htm.content)

    distance = xml.xpath("//div[@class='c-stat-3bar__value']/text()")
    stats = xml.xpath("//div[@class='c-stat-compare__number']/text()")

    str_tds = []
    for item in xml.xpath("//dd"):
        if item.text is not None:
            str_tds.append(item.text)
        else:
            str_tds.append("0")

    fighter = {
        'strikes': {
            'attempted': str_tds[1],
            'landed': str_tds[0],
            'standing': distance[0].split(" ")[0],
            'clinch': distance[1].split(" ")[0],
            'ground': distance[2].split(" ")[0],
            'striking defense': stats[4].strip(),
            'strikes per minute': stats[0].strip()
        },
        'takedowns': {
            'attempted': str_tds[3],
            'landed': str_tds[2],
            'takedown defense':stats[5].strip(),
            'subs per 15min': stats[3].strip()
        }
    }
    return fighter

def search(query):
    url = 'https://www.google.com/search?q=' + query
    headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36"}
    htm = req.get(url, headers = headers)
    xml = html.document_fromstring(htm.content)
    return xml.xpath("//h3/parent::a/@href")

def get_sherdog_link(query):
    possible_urls = search(query+" Sherdog")
    for url in possible_urls:
        if ("sherdog.com/fighter/" in url) and (not "/news/" in url):
            return url
    raise BaseException("Sherdog link not found !")

def get_ufc_link(query):
    possible_urls = search(query+" UFC.com")
    for url in possible_urls:
        if ("ufc.com/athlete/" in url):
            return url
    raise BaseException("UFC link not found !")

def get_fighter(query):
    sherdog_link = get_sherdog_link(query)
    ufc_link = get_ufc_link(query)

    fighter = parse_sherdog_fighter(sherdog_link)
    fighter.update(get_ufc_stats(ufc_link))
    return fighter


def get_upcoming_event_links():
    url = 'https://www.ufc.com/events'
    headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36"}
    htm = req.get(url, headers = headers)
    xml = html.document_fromstring(htm.content)
    return ["https://www.ufc.com/"+x for x in xml.xpath("//details[@id='events-list-upcoming']/div/div/div/div/div/section/ul/li/article/div[1]/div/a/@href")]

def get_ufc_link_event(query):
    possible_urls = search(query+" UFC")
    for url in possible_urls:
        if ("ufc.com/event/" in url):
            return url
    raise BaseException("UFC link not found !")

def get_ranking(fight, corner):
    if corner == 'red':
        path = "div/div/div/div[2]/div[2]/div[2]/div[1]/span/text()"
    else:
        path = "div/div/div/div[2]/div[2]/div[2]/div[2]/span/text()"

    try:
        return fight.xpath(path)[0][1:]
    except IndexError:
        return "Unranked"

def get_name(fight, corner):
    if corner == 'red':
        path = "div/div/div/div[2]/div[2]/div[5]/div[1]/a/span/text()"
    else:
        path = "div/div/div/div[2]/div[2]/div[5]/div[3]/a/span/text()"

    name = " ".join(fight.xpath(path))

    if name == '':
        path = path.replace("/span", "")
        name = " ".join(fight.xpath(path)).strip()

    return name

def parse_event(url, past=True):

    headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36"}
    htm = req.get(url, headers = headers)
    xml = html.document_fromstring(htm.content)
    fights_html = xml.xpath("//div[@class='fight-card']/div/div/section/ul/li")

    prefix = xml.xpath("//div[@class='c-hero__header']/div[1]/div/h1/text()")[0].strip()
    names = xml.xpath("//div[@class='c-hero__header']/div[2]/span/span/text()")

    name = f"{prefix}: {names[0].strip()} vs. {names[-1].strip()}"

    date = dt.datetime.fromtimestamp(int(xml.xpath("//div[@class='c-hero__bottom-text']/div[1]/@data-timestamp")[0]))
    date = date.strftime("%Y-%m-%d")
    location = xml.xpath("//div[@class='c-hero__bottom-text']/div[2]/div/text()")[0].split(",")

    event = {
        'name': name,
        'date': date,
        'location': location[1].strip(),
        'venue': location[0].strip(),
        'fights': []
    }
    for fight in fights_html:
        this_fight = {
                'weightclass': fight.xpath("div/div/div/div[2]/div[2]/div[1]/div[2]/text()")[0][:-5],
                'red corner': {
                    'name': get_name(fight, 'red'),
                    'ranking': get_ranking(fight, 'red'),
                    'odds': fight.xpath("div/div/div/div[4]/div[2]/span[1]/span/text()")[0],
                    'link': fight.xpath("div/div/div/div[2]/div[2]/div[5]/div[1]/a/@href")[0]
                },
                'blue corner': {
                    'name': get_name(fight, 'blue'),
                    'ranking': get_ranking(fight, 'blue'),
                    'odds': fight.xpath("div/div/div/div[4]/div[2]/span[3]/span/text()")[0],
                    'link': fight.xpath("div/div/div/div[2]/div[2]/div[5]/div[3]/a/@href")[0]
                }
            }
        if past:
            result = fight.xpath("div/div/div/div[2]//div[@class='c-listing-fight__outcome-wrapper']/div/text()")
            method = fight.xpath("div//div[@class='c-listing-fight__result-text method']/text()")

            finished_round = fight.xpath("div//div[@class='c-listing-fight__result-text round']/text()")
            finished_time = fight.xpath("div//div[@class='c-listing-fight__result-text time']/text()")

            this_fight['round'] = finished_round[0]
            this_fight['time'] = finished_time[0]
            this_fight['method'] = method[0]
            this_fight['red corner']['result'] = result[0].strip()
            this_fight['blue corner']['result'] = result[1].strip()
        event['fights'].append(this_fight)
    return event

def get_upcoming_events():
    links = get_upcoming_event_links()

    results = {}

    for url in links:
        event = parse_event(url, False)
        results[event['name']] = event
    return results

def get_event(query):
    link = get_ufc_link_event(query)
    return parse_event(link)

import pandas as pd
fighter_dictionnary = get_fighter('Jon Jones')
df = pd.Series(fighter_dictionnary)
df


Unnamed: 0,0
name,Jon Jones
age,37
height,"6'4"""
weight,248 lbs
wins,"{'total': '27', 'ko/tko': '10', 'submissions':..."
losses,"{'total': '1', 'ko/tko': '0', 'submissions': '..."
fights,"[{'name': 'UFC 285 - Jones vs. Gane', 'date': ..."
strikes,"{'attempted': '2536', 'landed': '1468', 'stand..."
takedowns,"{'attempted': '97', 'landed': '36', 'takedown ..."


In [3]:
from ufc import get_event
import pandas as pd
fighter_dictionnary = get_fighter('Jon Jones')
df = pd.Series(fighter_dictionnary)
import ufcstats
stats = ufcstats.getStats("Jon Jones")
import ast

def str_to_dict(fighter_stats_str):
    try:
        # Convert the string to a dictionary
        fighter_stats_dict = ast.literal_eval(fighter_stats_str)
        return fighter_stats_dict
    except (ValueError, SyntaxError) as e:
        print(f"Error converting string to dictionary: {e}")
        return None  # Return None if conversion fails

print(str_to_dict(stats))

{'Name': 'Jon Jones', 'Nick': 'Bones', 'Height:': '6\' 4"', 'Weight:': '248 lbs.', 'Reach:': '84"', 'STANCE:': 'Orthodox', 'DOB:': 'Jul 19, 1987', 'SLpM:': '4.29', 'Str. Acc.:': '57%', 'SApM:': '2.22', 'Str. Def:': '64%', 'TD Avg.:': '1.93', 'TD Acc.:': '45%', 'TD Def.:': '95%', 'Sub. Avg.:': '0.5'}


In [4]:
dictionnary = get_event("UFC 200")
event_df = pd.Series(dictionnary)
event_df['fights']

[{'weightclass': "Women's Bantamweight Title",
  'red corner': {'name': 'Miesha Tate',
   'ranking': 'Unranked',
   'odds': '-',
   'link': 'https://www.ufc.com/athlete/miesha-tate',
   'result': 'Loss'},
  'blue corner': {'name': 'Amanda Nunes',
   'ranking': 'Unranked',
   'odds': '-',
   'link': 'https://www.ufc.com/athlete/amanda-nunes',
   'result': 'Win'},
  'round': '1',
  'time': '03:17',
  'method': 'SUB'},
 {'weightclass': 'Heavyweight',
  'red corner': {'name': 'Brock Lesnar',
   'ranking': 'Unranked',
   'odds': '-',
   'link': 'https://www.ufc.com/athlete/brock-lesnar',
   'result': 'Win'},
  'blue corner': {'name': 'Mark Hunt',
   'ranking': 'Unranked',
   'odds': '-',
   'link': 'https://www.ufc.com/athlete/mark-hunt',
   'result': 'Loss'},
  'round': '3',
  'time': '05:00',
  'method': 'DEC'},
 {'weightclass': 'Light Heavyweight',
  'red corner': {'name': 'Daniel Cormier',
   'ranking': 'Unranked',
   'odds': '-',
   'link': 'https://www.ufc.com/athlete/daniel-cormier',

In [5]:
print((get_fighter('Islam Makhachev')))

{'name': 'Islam Makhachev', 'age': '32', 'height': '5\'10"', 'weight': '155 lbs', 'wins': {'total': '26', 'ko/tko': '5', 'submissions': '12', 'decisions': '9', 'others': '0'}, 'losses': {'total': '1', 'ko/tko': '1', 'submissions': '0', 'decisions': '0', 'others': '0'}, 'fights': [{'name': 'UFC 302 - Makhachev vs. Poirier', 'date': 'Jun / 01 / 2024', 'url': 'https://www.sherdog.com/events/UFC-302-Makhachev-vs-Poirier-101617', 'result': 'win', 'method': 'Submission (Brabo Choke)', 'referee': 'Keith Peterson', 'round': '5', 'time': '2:42', 'opponent': 'Dustin Poirier'}, {'name': 'UFC 294 - Makhachev vs. Volkanovski 2', 'date': 'Oct / 21 / 2023', 'url': 'https://www.sherdog.com/events/UFC-294-Makhachev-vs-Volkanovski-2-97019', 'result': 'win', 'method': 'KO (Head Kick and Punches)', 'referee': 'Marc Goddard', 'round': '1', 'time': '3:06', 'opponent': 'Alexander Volkanovski'}, {'name': 'UFC 284 - Makhachev vs. Volkanovski', 'date': 'Feb / 11 / 2023', 'url': 'https://www.sherdog.com/events/U

In [6]:
column_list = ["WeightClass", "Fighter1 Name" ,"Fighter1 Odds", "Fighter1 Wins", "Fighter1 KO/TKO","Fighter1 submissions", "Fighter1 decisions","Fighter1 Losses", "Fighter1 KO/TKO losses", "Fighter1 submission losses", "Fighter1 decision losses", "Fighter1 Height", "Fighter1 Weight", "Fighter1 Age", "Fighter1 Reach", "Fighter1 Stance", "Fighter1 TD Avg", "Fighter1 TD Acc", "Fighter1 TD Def", "Fighter1 Str Acc", "Fighter1 Str Def", "Fighter1 Str Avg/minute", "Fighter1 Sub Avg", "Fighter2 Name","Fighter2 Odds", "Fighter2 Wins", "Fighter2 KO/TKO","Fighter2 submissions", "Fighter2 decisions","Fighter2 Losses", "Fighter2 KO/TKO losses", "Fighter2 submission losses", "Fighter2 decision losses", "Fighter2 Height", "Fighter2 Weight", "Fighter2 Age", "Fighter2 Reach", "Fighter2 Stance", "Fighter2 TD Avg", "Fighter2 TD Acc", "Fighter2 TD Def", "Fighter2 Str Acc", "Fighter2 Str Def", "Fighter2 Str Avg/minute", "Fighter2 Sub Avg", "Winner", "Method"]
df = pd.DataFrame(columns=column_list)
df

Unnamed: 0,WeightClass,Fighter1 Name,Fighter1 Odds,Fighter1 Wins,Fighter1 KO/TKO,Fighter1 submissions,Fighter1 decisions,Fighter1 Losses,Fighter1 KO/TKO losses,Fighter1 submission losses,...,Fighter2 Stance,Fighter2 TD Avg,Fighter2 TD Acc,Fighter2 TD Def,Fighter2 Str Acc,Fighter2 Str Def,Fighter2 Str Avg/minute,Fighter2 Sub Avg,Winner,Method


In [8]:
import ast
import time
def str_to_dict(fighter_stats_str):
    try:
        # Convert the string to a dictionary
        fighter_stats_dict = ast.literal_eval(fighter_stats_str)
        return fighter_stats_dict
    except (ValueError, SyntaxError) as e:
        print(f"Error converting string to dictionary: {e}")
        return None  # Return None if conversion fails

ufc_event_start,ufc_event_end = 290, 300 #minimum value for ufc_event_start is 56 idk why the sherdog links are not available

e = 0
while ufc_event_start<ufc_event_end:
    time.sleep(60)
    try:
        event_series = pd.Series(get_event(f"UFC {ufc_event_start}"))
        for f in event_series['fights']:
            series1 = pd.Series(f['blue corner'])
            series2 = pd.Series(f['red corner'])
            try:
              fighter1_stats , fighter2_stats = str_to_dict(ufcstats.getStats(series1['name'])),str_to_dict(ufcstats.getStats(series2['name']))
            except:
              continue
            try:
              fighter1_info, fighter2_info = get_fighter(series1['name']), get_fighter(series2['name'])
              fighter1_wins, fighter2_wins = fighter1_info['wins'], fighter2_info['wins']
              fighter1_losses, fighter2_losses = fighter1_info['losses'], fighter2_info['losses']
            except:
              fighter1_wins, fighter2_wins = {'total': 0, 'ko/tko': 0, 'submissions': 0, 'decisions': 0, 'others': 0}, {'total': 0, 'ko/tko': 0, 'submissions': 0, 'decisions': 0, 'others': 0}
              fighter1_losses, fighter2_losses = {'total': 0, 'ko/tko': 0, 'submissions': 0, 'decisions': 0, 'others': 0}, {'total': 0, 'ko/tko': 0, 'submissions': 0, 'decisions': 0, 'others': 0}
            new_row = {
                'WeightClass': f['weightclass'],
                'Fighter1 Name': series1['name'],
                'Fighter1 Odds': series1['odds'],
                'Fighter1 Wins': fighter1_wins['total'],
                'Fighter1 KO/TKO': fighter1_wins['ko/tko'],
                'Fighter1 submissions': fighter1_wins['submissions'],
                'Fighter1 decisions': fighter1_wins['decisions'],
                'Fighter1 Losses': fighter1_losses['total'], #fix record by showingt the record of the fighter at that point in time
                'Fighter1 KO/TKO losses': fighter1_losses['ko/tko'],
                'Fighter1 submission losses': fighter1_losses['submissions'],
                'Fighter1 decision losses': fighter1_losses['decisions'],
                'Fighter1 Height': fighter1_stats['Height:'],
                'Fighter1 Weight': fighter1_stats['Weight:'],
                'Fighter1 Age': fighter1_stats['DOB:'],
                'Fighter1 Reach': fighter1_stats['Reach:'],
                'Fighter1 Stance': fighter1_stats['STANCE:'],
                'Fighter1 TD Avg': fighter1_stats['TD Avg.:'],
                'Fighter1 TD Acc': fighter1_stats['TD Acc.:'],
                'Fighter1 TD Def': fighter1_stats['TD Def.:'],
                'Fighter1 Str Acc': fighter1_stats['Str. Acc.:'],
                'Fighter1 Str Def': fighter1_stats['Str. Def:'],
                'Fighter1 Str Avg/minute': fighter1_stats['SLpM:'],
                'Fighter1 Sub Avg': fighter1_stats['Sub. Avg.:'],

                'Fighter2 Name': series2['name'],
                'Fighter2 Odds': series2['odds'],
                'Fighter2 Wins': fighter2_wins['total'],
                'Fighter2 KO/TKO': fighter2_wins['ko/tko'],
                'Fighter2 submissions': fighter2_wins['submissions'],
                'Fighter2 decisions': fighter2_wins['decisions'],
                'Fighter2 Losses': fighter2_losses['total'],
                'Fighter2 KO/TKO losses': fighter2_losses['ko/tko'],
                'Fighter2 submission losses': fighter2_losses['submissions'],
                'Fighter2 decision losses': fighter2_losses['decisions'],
                'Fighter2 Height': fighter2_stats['Height:'],
                'Fighter2 Weight': fighter2_stats['Weight:'],
                'Fighter2 Age': fighter2_stats['DOB:'],
                'Fighter2 Reach': fighter2_stats['Reach:'],
                'Fighter2 Stance': fighter2_stats['STANCE:'],
                'Fighter2 TD Avg': fighter2_stats['TD Avg.:'],
                'Fighter2 TD Acc': fighter2_stats['TD Acc.:'],
                'Fighter2 TD Def': fighter2_stats['TD Def.:'],
                'Fighter2 Str Acc': fighter2_stats['Str. Acc.:'],
                'Fighter2 Str Def': fighter2_stats['Str. Def:'],
                'Fighter2 Str Avg/minute': fighter2_stats['SLpM:'],
                'Fighter2 Sub Avg': fighter2_stats['Sub. Avg.:'],

                'Winner': 0 if series1['result'] == 'Win' else 1,
                'Method': f['method']
                }

            # Convert the new row to a DataFrame and concatenate it with the existing DataFrame
            df = pd.concat([df, pd.DataFrame([new_row])], ignore_index=True)


    except Exception as ex:
        print(f"Error processing UFC {ufc_event_start}: {ex}")
        e += 1
    # except BaseException:
    #     e += 1

    ufc_event_start += 1
    print(f"Processed UFC {ufc_event_start}")
    #print(df.head())
    if e > 1000:
        break


Processed UFC 291
Processed UFC 292
Processed UFC 293
Processed UFC 294
Processed UFC 295
Processed UFC 296
Processed UFC 297
Processed UFC 298
Processed UFC 299
Processed UFC 300


In [9]:
df

Unnamed: 0,WeightClass,Fighter1 Name,Fighter1 Odds,Fighter1 Wins,Fighter1 KO/TKO,Fighter1 submissions,Fighter1 decisions,Fighter1 Losses,Fighter1 KO/TKO losses,Fighter1 submission losses,...,Fighter2 Stance,Fighter2 TD Avg,Fighter2 TD Acc,Fighter2 TD Def,Fighter2 Str Acc,Fighter2 Str Def,Fighter2 Str Avg/minute,Fighter2 Sub Avg,Winner,Method
0,Featherweight Title,Yair Rodriguez,+290,19,8,5,6,5,3,1,...,Orthodox,1.78,37%,70%,56%,58%,6.16,0.2,1,KO/TKO
1,Flyweight Title,Alexandre Pantoja,+175,28,8,10,10,5,0,0,...,Orthodox,1.74,46%,63%,44%,59%,3.87,0.5,0,Decision - Split
2,Middleweight,Dricus Du Plessis,+280,22,9,11,2,2,1,1,...,Orthodox,0.80,38%,82%,43%,59%,4.58,0.0,0,KO/TKO
3,Lightweight,Dan Hooker,+235,24,11,7,6,12,3,3,...,Southpaw,0.79,55%,74%,48%,41%,5.60,1.1,0,Decision - Split
4,Middleweight,Val Woodburn,+1100,7,5,0,2,2,1,0,...,Southpaw,7.46,50%,0%,62%,70%,1.64,7.5,1,KO/TKO
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
132,Light Heavyweight,Philipe Lins,-,18,9,4,5,5,4,0,...,Southpaw,4.04,52%,75%,43%,46%,4.29,0.0,0,Decision - Unanimous
133,Middleweight,Michal Oleksiejczuk,+124,19,14,1,4,9,1,6,...,Orthodox,1.53,55%,94%,54%,58%,5.16,0.8,1,Submission
134,Heavyweight,Josh Parisian,-,0,0,0,0,0,0,0,...,Orthodox,0.00,0%,25%,54%,49%,2.16,0.0,1,KO/TKO
135,Flyweight,Asu Almabayev,-375,0,0,0,0,0,0,0,...,Orthodox,0.00,0%,60%,57%,52%,5.24,0.2,0,Decision - Unanimous


In [18]:
for i in range(len(df['WeightClass'])):
  if 'Title' in df['WeightClass'][i]: #removes TITLE from weightclass strings so that even title fights are considered same weightclass in the eyes of ML model
    df['WeightClass'][i] = df['WeightClass'][i][:-6]
  if df['Fighter1 Odds'][i] == '-': #replaces Nan odds with pick'em odds -110 (pick'em is when the odds are about similar on both ends)
    df['Fighter1 Odds'][i] = -110
    df['Fighter2 Odds'][i] = -110
  if type(df['Fighter1 Odds'][i]) == str:
     df['Fighter1 Odds'][i] = int(df['Fighter1 Odds'][i])
     df['Fighter2 Odds'][i] = int(df['Fighter2 Odds'][i])
  for c in range(len(df.columns)): #convert percentages strings to integers between 0 and 1 just to scale this and make it faster for ML model
    if type(df.iloc[i,c]) == str:
      if '%' in df.iloc[i,c]:
        df.iloc[i,c] = int(df.iloc[i,c][:-1])
        df.iloc[i,c] = df.iloc[i,c]/100
df = pd.get_dummies(df, columns=['WeightClass', 'Fighter1 Stance', 'Fighter2 Stance'])
df['Method']

KeyError: 'WeightClass'

In [11]:
method_mapping = {
    'KO/TKO': 1,     # Knockout
    'Submission': 2,    # Submission
    'Decision - Split': 3,    # Decision
    'Decision - Unanimous': 4      # Disqualification (example)
}

# Apply the mapping to the Method column
df['Method'] = df['Method'].map(method_mapping)
for r in range(len(df.columns[0])):
  for c in range(len(df.columns)):
    if df.iloc[r,c] == True:
      df.iloc[r,c] = 1
    elif df.iloc[r,c] == False:
      df.iloc[r,c] = 0
df['Method']

  df.iloc[r,c] = 0
  df.iloc[r,c] = 0
  df.iloc[r,c] = 1
  df.iloc[r,c] = 0
  df.iloc[r,c] = 0
  df.iloc[r,c] = 0
  df.iloc[r,c] = 0
  df.iloc[r,c] = 0
  df.iloc[r,c] = 0
  df.iloc[r,c] = 0
  df.iloc[r,c] = 0
  df.iloc[r,c] = 0
  df.iloc[r,c] = 0
  df.iloc[r,c] = 1
  df.iloc[r,c] = 0
  df.iloc[r,c] = 0
  df.iloc[r,c] = 1
  df.iloc[r,c] = 0
  df.iloc[r,c] = 0


Unnamed: 0,Method
0,1.0
1,3.0
2,1.0
3,3.0
4,1.0
...,...
132,4.0
133,2.0
134,1.0
135,4.0


In [12]:
df.drop(['Fighter1 Name', 'Fighter2 Name'], axis=1, inplace=True)
df

Unnamed: 0,Fighter1 Odds,Fighter1 Wins,Fighter1 KO/TKO,Fighter1 submissions,Fighter1 decisions,Fighter1 Losses,Fighter1 KO/TKO losses,Fighter1 submission losses,Fighter1 decision losses,Fighter1 Height,...,WeightClass_Welterweight,WeightClass_Women's Bantamweight,WeightClass_Women's Flyweight,WeightClass_Women's Strawweight,Fighter1 Stance_Orthodox,Fighter1 Stance_Southpaw,Fighter1 Stance_Switch,Fighter2 Stance_Orthodox,Fighter2 Stance_Southpaw,Fighter2 Stance_Switch
0,+290,19,8,5,6,5,3,1,1,"5' 11""",...,0,0,0,0,1,0,0,1,0,0
1,+175,28,8,10,10,5,0,0,5,"5' 5""",...,0,0,0,0,1,0,0,1,0,0
2,+280,22,9,11,2,2,1,1,0,"6' 1""",...,0,0,0,0,0,0,1,1,0,0
3,+235,24,11,7,6,12,3,3,6,"6' 0""",...,0,0,0,0,0,0,1,0,1,0
4,+1100,7,5,0,2,2,1,0,1,"5' 8""",...,0,0,0,0,1,0,0,0,1,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
132,-110,18,9,4,5,5,4,0,1,"6' 2""",...,False,False,False,False,True,False,False,False,True,False
133,+124,19,14,1,4,9,1,6,2,"6' 0""",...,False,False,False,False,False,True,False,True,False,False
134,-110,0,0,0,0,0,0,0,0,"6' 4""",...,False,False,False,False,True,False,False,True,False,False
135,-375,0,0,0,0,0,0,0,0,"5' 4""",...,False,False,False,False,True,False,False,True,False,False


In [13]:
for r in range(len(df.columns[0])):
    height1, height2 = df['Fighter1 Height'][r].split("\' "),df['Fighter2 Height'][r].split("\' ")
    feet1, inches1, feet2, inches2 = int(height1[0]), int(height1[1][:-1]),int(height2[0]), int(height2[1][:-1])
    meters1, meters2 = 0.3048*feet1 + inches1*0.0254,0.3048*feet2 + inches2*0.0254
    df['Fighter1 Height'][r] = meters1
    df['Fighter2 Height'][r] = meters2
    df['Fighter1 Reach'][r] = int(df['Fighter1 Reach'][r][:-1])
    df['Fighter2 Reach'][r] = int(df['Fighter2 Reach'][r][:-1])

df['Fighter2 Height']

You are setting values through chained assignment. Currently this works in certain cases, but when using Copy-on-Write (which will become the default behaviour in pandas 3.0) this will never work to update the original DataFrame or Series, because the intermediate object on which we are setting values will behave as a copy.
A typical example is when you are setting values in a column of a DataFrame, like:

df["col"][row_indexer] = value

Use `df.loc[row_indexer, "col"] = values` instead, to perform the assignment in a single step and ensure this keeps updating the original `df`.

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

  df['Fighter1 Height'][r] = meters1
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['Fighter1 Height'][r] = m

Unnamed: 0,Fighter2 Height
0,1.6764
1,1.7018
2,1.8288
3,1.905
4,1.8542
...,...
132,"6' 1"""
133,"6' 1"""
134,"6' 7"""
135,"5' 6"""


In [14]:
df['Fighter1 Weight'] = df['Fighter1 Weight'].str.replace(' lbs.', '')
df['Fighter2 Weight'] = df['Fighter2 Weight'].str.replace(' lbs.', '')
for c in range(len(df.columns[0])):
  df['Fighter1 Weight'][c] = int(df['Fighter1 Weight'][c])
  df['Fighter2 Weight'][c] = int(df['Fighter2 Weight'][c])
df['Fighter1 Weight']

You are setting values through chained assignment. Currently this works in certain cases, but when using Copy-on-Write (which will become the default behaviour in pandas 3.0) this will never work to update the original DataFrame or Series, because the intermediate object on which we are setting values will behave as a copy.
A typical example is when you are setting values in a column of a DataFrame, like:

df["col"][row_indexer] = value

Use `df.loc[row_indexer, "col"] = values` instead, to perform the assignment in a single step and ensure this keeps updating the original `df`.

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

  df['Fighter1 Weight'][c] = int(df['Fighter1 Weight'][c])
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['Fi

Unnamed: 0,Fighter1 Weight
0,145
1,125
2,185
3,155
4,170
...,...
132,205
133,185
134,265
135,125


In [22]:
# for r in range(len(df.columns)):
#     for c in range(len(df.columns[0])):
#         if type(df.iloc[r,c]) == str:
#             print(df.iloc[r,c])
#             print(r, c)
for i in range(len(df['Fighter1 Odds'])):
 if type(df['Fighter1 Odds'][i]) == str:
     df['Fighter1 Odds'][i] = float(df['Fighter1 Odds'][i])
     df['Fighter2 Odds'][i] = float(df['Fighter2 Odds'][i])

In [15]:
from datetime import datetime
for c in range(len(df.columns[0])):
  birthyear1 = int(df['Fighter1 Age'][c][-4:])
  birthyear2 = int(df['Fighter2 Age'][c][-4:])
  current_year = datetime.now().year
  df['Fighter1 Age'][c] = current_year - birthyear1
  df['Fighter2 Age'][c] = current_year - birthyear2
df['Fighter1 Age']

You are setting values through chained assignment. Currently this works in certain cases, but when using Copy-on-Write (which will become the default behaviour in pandas 3.0) this will never work to update the original DataFrame or Series, because the intermediate object on which we are setting values will behave as a copy.
A typical example is when you are setting values in a column of a DataFrame, like:

df["col"][row_indexer] = value

Use `df.loc[row_indexer, "col"] = values` instead, to perform the assignment in a single step and ensure this keeps updating the original `df`.

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

  df['Fighter1 Age'][c] = current_year - birthyear1
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['Fighter1 

Unnamed: 0,Fighter1 Age
0,32
1,34
2,30
3,34
4,31
...,...
132,"Aug 17, 1985"
133,"Feb 22, 1995"
134,"Jun 28, 1989"
135,"Jan 25, 1994"


In [28]:
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

X = df.drop(['Winner', 'Method'], axis=1)
y = df['Winner']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
y_train = y_train.astype(int)
y_test = y_test.astype(int)
X_train = X_train.apply(pd.to_numeric, errors='coerce')
X_train.fillna(-110, inplace=True)  # or drop NaNs as needed
X_test = X_test.apply(pd.to_numeric, errors='coerce')
X_test.fillna(-110, inplace=True)
y_train = y_train.apply(pd.to_numeric, errors='coerce')
y_train.fillna(-110, inplace=True)
y_test = y_test.apply(pd.to_numeric, errors='coerce')
y_test.fillna(-110, inplace=True)
model = RandomForestClassifier(n_estimators=100, random_state=42)

model.fit(X_train, y_train.ravel())

y_pred = model.predict(X_test)

accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred, average='weighted')
recall = recall_score(y_test, y_pred, average='weighted')
f1 = f1_score(y_test, y_pred, average='weighted')

print(f"Accuracy: {accuracy * 100:.2f}%")
print(f"Precision: {precision:.2f}")
print(f"Recall: {recall:.2f}")
print(f"F1 Score: {f1:.2f}")


  model.fit(X_train, y_train.ravel())


Accuracy: 60.71%
Precision: 0.60
Recall: 0.61
F1 Score: 0.58


In [29]:
# Step 1: Import necessary libraries
import pandas as pd
from google.colab import drive

# Step 2: Mount Google Drive
drive.mount('/content/drive')

# Step 4: Define the file path where the Excel file will be saved
file_path = '/content/drive/My Drive/Colab Notebooks/MMA Fight Predictor/fight_data.xlsx'  # Replace with your directory/filename

# Step 5: Write the DataFrame to an Excel file
df.to_excel(file_path, index=False)

print(f"Excel file saved to: {file_path}")


Mounted at /content/drive
Excel file saved to: /content/drive/My Drive/Colab Notebooks/MMA Fight Predictor/fight_data.xlsx


In [24]:
df

Unnamed: 0,Fighter1 Odds,Fighter1 Wins,Fighter1 KO/TKO,Fighter1 submissions,Fighter1 decisions,Fighter1 Losses,Fighter1 KO/TKO losses,Fighter1 submission losses,Fighter1 decision losses,Fighter1 Height,...,WeightClass_Welterweight,WeightClass_Women's Bantamweight,WeightClass_Women's Flyweight,WeightClass_Women's Strawweight,Fighter1 Stance_Orthodox,Fighter1 Stance_Southpaw,Fighter1 Stance_Switch,Fighter2 Stance_Orthodox,Fighter2 Stance_Southpaw,Fighter2 Stance_Switch
0,+290,19,8,5,6,5,3,1,1,1.8034,...,0,0,0,0,1,0,0,1,0,0
1,+175,28,8,10,10,5,0,0,5,1.651,...,0,0,0,0,1,0,0,1,0,0
2,+280,22,9,11,2,2,1,1,0,1.8542,...,0,0,0,0,0,0,1,1,0,0
3,+235,24,11,7,6,12,3,3,6,1.8288,...,0,0,0,0,0,0,1,0,1,0
4,+1100,7,5,0,2,2,1,0,1,1.7272,...,0,0,0,0,1,0,0,0,1,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
132,-110,18,9,4,5,5,4,0,1,"6' 2""",...,False,False,False,False,True,False,False,False,True,False
133,+124,19,14,1,4,9,1,6,2,"6' 0""",...,False,False,False,False,False,True,False,True,False,False
134,-110,0,0,0,0,0,0,0,0,"6' 4""",...,False,False,False,False,True,False,False,True,False,False
135,-375,0,0,0,0,0,0,0,0,"5' 4""",...,False,False,False,False,True,False,False,True,False,False
