# UFC ML Model Predictor

## Import Libraries

In [4]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import accuracy_score

## Read and Clean Data for Use

In [5]:
raw = pd.read_csv('ufc-master.csv')
raw.head()

Unnamed: 0,RedFighter,BlueFighter,RedOdds,BlueOdds,RedExpectedValue,BlueExpectedValue,Date,Location,Country,Winner,...,FinishDetails,FinishRound,FinishRoundTime,TotalFightTimeSecs,RedDecOdds,BlueDecOdds,RSubOdds,BSubOdds,RKOOdds,BKOOdds
0,Alexandre Pantoja,Kai Asakura,-250.0,215.0,40.0,215.0,2024-12-07,"Las Vegas, Nevada, USA",USA,Red,...,Rear Naked Choke,2.0,2:05,425.0,300.0,800.0,150.0,2500.0,400.0,350.0
1,Shavkat Rakhmonov,Ian Machado Garry,-210.0,295.0,47.619,295.0,2024-12-07,"Las Vegas, Nevada, USA",USA,Red,...,,5.0,5:00,1500.0,250.0,650.0,180.0,3000.0,240.0,700.0
2,Ciryl Gane,Alexander Volkov,-380.0,300.0,26.3158,300.0,2024-12-07,"Las Vegas, Nevada, USA",USA,Red,...,,3.0,5:00,900.0,-160.0,450.0,1100.0,3000.0,350.0,1100.0
3,Bryce Mitchell,Kron Gracie,-950.0,625.0,10.5263,625.0,2024-12-07,"Las Vegas, Nevada, USA",USA,Red,...,Elbows,3.0,0:39,639.0,-200.0,1100.0,380.0,1400.0,500.0,4000.0
4,Nate Landwehr,Dooho Choi,-130.0,110.0,76.9231,110.0,2024-12-07,"Las Vegas, Nevada, USA",USA,Blue,...,Elbows,3.0,3:21,801.0,275.0,550.0,500.0,700.0,300.0,250.0


In [10]:
not_stat = raw.select_dtypes(['object']).columns
not_stat

Index(['RedFighter', 'BlueFighter', 'Date', 'Location', 'Country', 'Winner',
       'WeightClass', 'Gender', 'BlueStance', 'RedStance', 'BetterRank',
       'Finish', 'FinishDetails', 'FinishRoundTime'],
      dtype='object')

In [17]:
raw.drop(columns = ['RedFighter', 'BlueFighter', 'Date', 'Location', 'Country', 'Gender', 'BetterRank','Finish', 'FinishDetails', 'FinishRoundTime'], inplace = True)

In [18]:
raw.select_dtypes(['object']).columns

Index(['Winner', 'WeightClass', 'BlueStance', 'RedStance'], dtype='object')

In [19]:
stance_map = {'Orthodox': 0, 'Southpaw': 1, 'Switch': 2, 'Open Stance': 3}
raw['RedStance'] = raw['RedStance'].replace(stance_map)
raw['BlueStance'] = raw['BlueStance'].replace(stance_map)

win_map = {'Red': 1, 'Blue': 0}
raw['Winner'] = raw['Winner'].replace(win_map)
raw['Winner'].replace(np.nan, 2, inplace = True)

  raw['RedStance'] = raw['RedStance'].replace(stance_map)
  raw['Winner'] = raw['Winner'].replace(win_map)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  raw['Winner'].replace(np.nan, 2, inplace = True)


In [21]:
raw['Winner'].unique()
raw['Winner'].value_counts()

Winner
1    3787
0    2741
Name: count, dtype: int64

In [22]:
x = raw.drop(columns = ['Winner'])
y = raw['Winner']

In [24]:
def fill_nan_by_weight(df):
    df_new = df.copy()
    numeric_cols = df_new.select_dtypes(include=[np.number]).columns
    df_new = df_new.groupby('WeightClass')[numeric_cols].transform(lambda x: x.fillna(x.mean()))

    remaining = df_new[numeric_cols].isna().sum().sum()
    if remaining > 0:
        df_new[numeric_cols] = df_new[numeric_cols].fillna(df_new[numeric_cols].mean())

    return df_new

x_filled = fill_nan_by_weight(x)

x_filled.replace(np.nan, 0, inplace = True)

x = x_filled.select_dtypes(include=[np.number])

In [25]:
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = 0.2, random_state = 42)

In [26]:
print(x_train.shape, x_test.shape, y_train.shape, y_test.shape)

(5222, 104) (1306, 104) (5222,) (1306,)


In [27]:
seed = 360
np.random.seed(seed)

In [None]:
from sklearn.preprocessing import StandardScaler   

scaler = StandardScaler()
scaler.fit_transform(x_train)
scaler.transform(x_test)