In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

/kaggle/input/merged-stats-n-scorecards-trimmed/merged_stats_n_scorecards_trimmed.csv


# Feature Transformation

In [2]:
df = pd.read_csv("/kaggle/input/merged-stats-n-scorecards-trimmed/merged_stats_n_scorecards_trimmed.csv", sep=';')
df.head()

Unnamed: 0,red_fighter_name,blue_fighter_name,event_date,red_fighter_result,blue_fighter_result,method,bout_type,bonus,red_fighter_total_pts,blue_fighter_total_pts
0,ILIA TOPURIA,MAX HOLLOWAY,26/10/2024,W,L,KO/TKO,UFC Featherweight Title Bout,belt,20 20 19,18 18 19
1,ROBERT WHITTAKER,KHAMZAT CHIMAEV,26/10/2024,L,W,Submission,Middleweight Bout,perf,- - -,- - -
2,MAGOMED ANKALAEV,ALEKSANDAR RAKIC,26/10/2024,W,L,Decision - Unanimous,Light Heavyweight Bout,-,29 29 29,28 28 28
3,LERONE MURPHY,DAN IGE,26/10/2024,W,L,Decision - Unanimous,Featherweight Bout,-,29 29 29,28 28 28
4,SHARA MAGOMEDOV,ARMEN PETROSYAN,26/10/2024,W,L,KO/TKO,Middleweight Bout,perf,10 10 10,9 9 9


In [3]:
df['winner'] = df['red_fighter_result'].apply(lambda x: "red" if x == 'W' else "blue")
df.drop(['event_date', 'bonus', 'red_fighter_result', 'blue_fighter_result'], axis=1, inplace=True)

# Changing columns orders
df = df.loc[:, ['red_fighter_name', 'blue_fighter_name', 'winner', 'method', 'bout_type', 'red_fighter_total_pts', 'blue_fighter_total_pts']]
df.head()

Unnamed: 0,red_fighter_name,blue_fighter_name,winner,method,bout_type,red_fighter_total_pts,blue_fighter_total_pts
0,ILIA TOPURIA,MAX HOLLOWAY,red,KO/TKO,UFC Featherweight Title Bout,20 20 19,18 18 19
1,ROBERT WHITTAKER,KHAMZAT CHIMAEV,blue,Submission,Middleweight Bout,- - -,- - -
2,MAGOMED ANKALAEV,ALEKSANDAR RAKIC,red,Decision - Unanimous,Light Heavyweight Bout,29 29 29,28 28 28
3,LERONE MURPHY,DAN IGE,red,Decision - Unanimous,Featherweight Bout,29 29 29,28 28 28
4,SHARA MAGOMEDOV,ARMEN PETROSYAN,red,KO/TKO,Middleweight Bout,10 10 10,9 9 9


## Extracting only lightweight division

In [4]:
lightweight_division_filter = df.apply(lambda x: 'Lightweight' in x['bout_type'], axis=1)
df_lightweight_division = df[lightweight_division_filter]
df_lightweight_division.head()

Unnamed: 0,red_fighter_name,blue_fighter_name,winner,method,bout_type,red_fighter_total_pts,blue_fighter_total_pts
27,GRANT DAWSON,RAFA GARCIA,red,KO/TKO,Lightweight Bout,10 10 10,9 9 9
43,AUSTIN HUBBARD,ALEXANDER HERNANDEZ,blue,Decision - Split,Lightweight Bout,30 28 28,27 29 29
48,RENATO MOICANO,BENOIT SAINT DENIS,red,TKO - Doctor's Stoppage,Lightweight Bout,19 19 19,18 18 18
53,FARES ZIAM,MATT FREVOLA,red,KO/TKO,Lightweight Bout,20 20 20,18 18 17
56,LUDOVIT KLEIN,ROOSEVELT ROBERTS,red,Decision - Unanimous,Lightweight Bout,30 29 29,27 28 28


## Summing the points up

In [5]:
import re

def sum_points(row):
    if re.match(r"\d", row):
        return sum(int(num) for num in row.split())
    else:
        return "-"

df_lightweight_division.loc[:, 'red_fighter_total_pts'] = df_lightweight_division['red_fighter_total_pts'].apply(sum_points)
df_lightweight_division.loc[:, 'blue_fighter_total_pts'] = df_lightweight_division['blue_fighter_total_pts'].apply(sum_points)

df_lightweight_division.head()

Unnamed: 0,red_fighter_name,blue_fighter_name,winner,method,bout_type,red_fighter_total_pts,blue_fighter_total_pts
27,GRANT DAWSON,RAFA GARCIA,red,KO/TKO,Lightweight Bout,30,27
43,AUSTIN HUBBARD,ALEXANDER HERNANDEZ,blue,Decision - Split,Lightweight Bout,86,85
48,RENATO MOICANO,BENOIT SAINT DENIS,red,TKO - Doctor's Stoppage,Lightweight Bout,57,54
53,FARES ZIAM,MATT FREVOLA,red,KO/TKO,Lightweight Bout,60,53
56,LUDOVIT KLEIN,ROOSEVELT ROBERTS,red,Decision - Unanimous,Lightweight Bout,88,83


# ELO Algorithm

## Creating the leaderboard

In [6]:
all_fighters = set(pd.concat([df_lightweight_division['red_fighter_name'], df_lightweight_division['blue_fighter_name']]))
print(f"Total number of unique fighters in the lightweight division: {len(all_fighters)}")

Total number of unique fighters in the lightweight division: 164


## 1500 is the base ELO score for all fighters

In [7]:
lightweight_leaderboard = {fighter: 1500 for fighter in all_fighters}
dict(list(lightweight_leaderboard.items())[:5])

{'MICHAL FIGLAK': 1500,
 'CARL DEATON': 1500,
 'UROS MEDIC': 1500,
 'MATT WIMAN': 1500,
 'ELVES BRENER': 1500}

In [8]:
set(df_lightweight_division['method'])

{'Decision - Majority',
 'Decision - Split',
 'Decision - Unanimous',
 'KO/TKO',
 'Submission',
 "TKO - Doctor's Stoppage"}

In [117]:
def change_ELO(fighter, change):
    lightweight_leaderboard[fighter] += change

def expected_probability(elo_fighter_A, elo_fighter_B):
    prob_fighter_A = 1 / (1 + 10 ** ((elo_fighter_B - elo_fighter_A) / 400))
    prob_fighter_B = 1 - prob_fighter_A
    return prob_fighter_A, prob_fighter_B

def base_K_value(outcome):
    outcomes = {'KO/TKO': 40,
                'Submission': 40,
                "TKO - Doctor's Stoppage": 35,
                'Decision - Unanimous': 30,
                'Decision - Majority': 25,
                'Decision - Split': 20,
                "Draw": 10,
                "No Contest": 5 
               }
    return outcomes[outcome]



In [118]:
expected_probability(1500, 1700)

0.2402530733520421
0.759746926647958
