**Goal of analysis: Which fights stats of a fighter are correlated with wins?**

Physical stats considered: Knockdowns, takedowns landed, significant strikes, submission attempts. 
Got data from 2020 onwards because I am more familiar with the modern trends of the sport (Data ends at 30 AUG). 

In [57]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [58]:
event_df = pd.read_csv("ufc_event_data.csv")
fighter_df = pd.read_csv("ufc_fighters.csv")

In [59]:
event_df.dtypes

Event Name      object
Event Date      object
Result          object
Fighter1        object
Fighter2        object
KD              object
Strikes         object
TD              object
Sub             object
Weight Class    object
Method          object
Round            int64
Time            object
dtype: object

In [60]:
fighter_df.dtypes

First Name    object
Last Name     object
Nickname      object
Height        object
Weight        object
Reach         object
Stance        object
Wins           int64
Losses         int64
Draws          int64
dtype: object

In [61]:
event_df.describe()

Unnamed: 0,Round
count,2863.0
mean,2.462801
std,1.035328
min,1.0
25%,2.0
50%,3.0
75%,3.0
max,5.0


In [62]:
fighter_df.describe()

Unnamed: 0,Wins,Losses,Draws
count,4429.0,4429.0,4429.0
mean,12.395575,5.648002,0.251976
std,9.206908,5.038161,0.798145
min,0.0,0.0,0.0
25%,7.0,2.0,0.0
50%,11.0,4.0,0.0
75%,17.0,8.0,0.0
max,253.0,83.0,11.0


In [63]:
fighter_df = fighter_df.drop(["Nickname"], axis=1) # Look up what axis means

fighter_df["Fighter Name"] = fighter_df["First Name"] + " " + fighter_df["Last Name"]
fighter_df = fighter_df.drop(["First Name", "Last Name"], axis=1)

fighter_df.head(5)

Unnamed: 0,Height,Weight,Reach,Stance,Wins,Losses,Draws,Fighter Name
0,--,155 lbs.,--,,5,3,0,Tom Aaron
1,"5' 11""",155 lbs.,--,Orthodox,4,6,0,Danny Abbadi
2,"5' 8""",155 lbs.,"66.0""",Orthodox,28,4,0,Nariman Abbasov
3,"6' 2""",265 lbs.,"80.0""",Orthodox,9,5,0,Darion Abbey
4,"6' 0""",265 lbs.,--,Switch,10,15,0,David Abbott


In [64]:
event_df["Event Date"] = pd.to_datetime(event_df["Event Date"])

In [65]:
event_df.head(10)

Unnamed: 0,Event Name,Event Date,Result,Fighter1,Fighter2,KD,Strikes,TD,Sub,Weight Class,Method,Round,Time
0,UFC Fight Night: Walker vs. Zhang,2025-08-23,Johnny Walker,Johnny Walker,Zhang Mingyang,1-0,50-20,0-0,0-0,Light Heavyweight,KO/TKO-Punches,2,2:37
1,UFC Fight Night: Walker vs. Zhang,2025-08-23,Aljamain Sterling,Aljamain Sterling,Brian Ortega,0-0,124-55,3-0,0-0,Catch Weight,U-DEC,5,5:00
2,UFC Fight Night: Walker vs. Zhang,2025-08-23,Sergei Pavlovich,Sergei Pavlovich,Waldo Cortes-Acosta,0-0,61-45,0-0,0-0,Heavyweight,U-DEC,3,5:00
3,UFC Fight Night: Walker vs. Zhang,2025-08-23,Sumudaerji,Sumudaerji,Kevin Borjas,0-0,73-16,0-0,0-0,Flyweight,U-DEC,3,5:00
4,UFC Fight Night: Walker vs. Zhang,2025-08-23,Taiyilake Nueraji,Taiyilake Nueraji,Kiefer Crosbie,0-0,27-2,1-0,0-0,Welterweight,KO/TKO-Elbows,1,3:33
5,UFC Fight Night: Walker vs. Zhang,2025-08-23,Gauge Young,Gauge Young,Maheshate,0-0,112-66,3-0,0-0,Lightweight,U-DEC,3,5:00
6,UFC Fight Night: Walker vs. Zhang,2025-08-23,Charles Johnson,Charles Johnson,Lone'er Kavanagh,1-0,44-59,1-1,0-1,Flyweight,KO/TKO-Punch,2,4:35
7,UFC Fight Night: Walker vs. Zhang,2025-08-23,Rongzhu,Rongzhu,Austin Hubbard,0-0,101-75,0-2,0-0,Lightweight,U-DEC,3,5:00
8,UFC Fight Night: Walker vs. Zhang,2025-08-23,Kyle Daukaus,Kyle Daukaus,Michel Pereira,1-0,5-2,0-0,0-0,Middleweight,KO/TKO-Elbows,1,0:43
9,UFC Fight Night: Walker vs. Zhang,2025-08-23,Yizha,Yizha,Westin Wilson,2-0,13-0,0-0,0-0,Featherweight,KO/TKO-Punches,1,0:37


In [66]:
event_df = event_df.rename(columns={
    "Event Name": "Name",
    "Event Date": "Date",
    "KD": "Knockdowns",
    "TD": "Takedowns Landed", # Not attempted takedowns
    "Sub": "Submission Attempts",
    "Strikes": "Sig Strikes" # Not total strikes
})

In [67]:
event_df

Unnamed: 0,Name,Date,Result,Fighter1,Fighter2,Knockdowns,Sig Strikes,Takedowns Landed,Submission Attempts,Weight Class,Method,Round,Time
0,UFC Fight Night: Walker vs. Zhang,2025-08-23,Johnny Walker,Johnny Walker,Zhang Mingyang,1-0,50-20,0-0,0-0,Light Heavyweight,KO/TKO-Punches,2,2:37
1,UFC Fight Night: Walker vs. Zhang,2025-08-23,Aljamain Sterling,Aljamain Sterling,Brian Ortega,0-0,124-55,3-0,0-0,Catch Weight,U-DEC,5,5:00
2,UFC Fight Night: Walker vs. Zhang,2025-08-23,Sergei Pavlovich,Sergei Pavlovich,Waldo Cortes-Acosta,0-0,61-45,0-0,0-0,Heavyweight,U-DEC,3,5:00
3,UFC Fight Night: Walker vs. Zhang,2025-08-23,Sumudaerji,Sumudaerji,Kevin Borjas,0-0,73-16,0-0,0-0,Flyweight,U-DEC,3,5:00
4,UFC Fight Night: Walker vs. Zhang,2025-08-23,Taiyilake Nueraji,Taiyilake Nueraji,Kiefer Crosbie,0-0,27-2,1-0,0-0,Welterweight,KO/TKO-Elbows,1,3:33
...,...,...,...,...,...,...,...,...,...,...,...,...,...
2858,UFC 246: McGregor vs. Cowboy,2020-01-18,Sodiq Yusuff,Sodiq Yusuff,Andre Fili,0-0,73-49,0-3,1-0,Featherweight,U-DEC,3,5:00
2859,UFC 246: McGregor vs. Cowboy,2020-01-18,Askar Askarov,Askar Askarov,Tim Elliott,0-0,73-47,0-3,0-0,Flyweight,U-DEC,3,5:00
2860,UFC 246: McGregor vs. Cowboy,2020-01-18,Drew Dober,Drew Dober,Nasrat Haqparast,1-0,13-7,0-0,0-0,Lightweight,KO/TKO-Punch,1,1:10
2861,UFC 246: McGregor vs. Cowboy,2020-01-18,Aleksa Camur,Aleksa Camur,Justin Ledet,0-0,80-44,1-0,0-0,Light Heavyweight,U-DEC,3,5:00


In [68]:
# separate the fighter1 and fighter2 stuff from the dash. E.g. Fighter1_TD, Fighter2_KD, etc...
from separate_endash import separate_endash

for col in ("Sig Strikes", "Takedowns Landed", "Submission Attempts", "Knockdowns"):
    event_df[f"Fighter1 {col}"] = event_df[col].apply(lambda x: separate_endash(x)[0])
    event_df[f"Fighter2 {col}"] = event_df[col].apply(lambda x: separate_endash(x)[1])

event_df = event_df.drop(columns=["Sig Strikes", "Takedowns Landed", "Submission Attempts", "Knockdowns"])

event_df.dtypes

Name                                    object
Date                            datetime64[ns]
Result                                  object
Fighter1                                object
Fighter2                                object
Weight Class                            object
Method                                  object
Round                                    int64
Time                                    object
Fighter1 Sig Strikes                     int64
Fighter2 Sig Strikes                     int64
Fighter1 Takedowns Landed                int64
Fighter2 Takedowns Landed                int64
Fighter1 Submission Attempts             int64
Fighter2 Submission Attempts             int64
Fighter1 Knockdowns                      int64
Fighter2 Knockdowns                      int64
dtype: object

In [70]:
event_df.drop(columns=["Weight Class", "Round", "Time"])

Unnamed: 0,Name,Date,Result,Fighter1,Fighter2,Method,Fighter1 Sig Strikes,Fighter2 Sig Strikes,Fighter1 Takedowns Landed,Fighter2 Takedowns Landed,Fighter1 Submission Attempts,Fighter2 Submission Attempts,Fighter1 Knockdowns,Fighter2 Knockdowns
0,UFC Fight Night: Walker vs. Zhang,2025-08-23,Johnny Walker,Johnny Walker,Zhang Mingyang,KO/TKO-Punches,50,20,0,0,0,0,1,0
1,UFC Fight Night: Walker vs. Zhang,2025-08-23,Aljamain Sterling,Aljamain Sterling,Brian Ortega,U-DEC,124,55,3,0,0,0,0,0
2,UFC Fight Night: Walker vs. Zhang,2025-08-23,Sergei Pavlovich,Sergei Pavlovich,Waldo Cortes-Acosta,U-DEC,61,45,0,0,0,0,0,0
3,UFC Fight Night: Walker vs. Zhang,2025-08-23,Sumudaerji,Sumudaerji,Kevin Borjas,U-DEC,73,16,0,0,0,0,0,0
4,UFC Fight Night: Walker vs. Zhang,2025-08-23,Taiyilake Nueraji,Taiyilake Nueraji,Kiefer Crosbie,KO/TKO-Elbows,27,2,1,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2858,UFC 246: McGregor vs. Cowboy,2020-01-18,Sodiq Yusuff,Sodiq Yusuff,Andre Fili,U-DEC,73,49,0,3,1,0,0,0
2859,UFC 246: McGregor vs. Cowboy,2020-01-18,Askar Askarov,Askar Askarov,Tim Elliott,U-DEC,73,47,0,3,0,0,0,0
2860,UFC 246: McGregor vs. Cowboy,2020-01-18,Drew Dober,Drew Dober,Nasrat Haqparast,KO/TKO-Punch,13,7,0,0,0,0,1,0
2861,UFC 246: McGregor vs. Cowboy,2020-01-18,Aleksa Camur,Aleksa Camur,Justin Ledet,U-DEC,80,44,1,0,0,0,0,0
