# WELCOME TO THE UFC FIGHT PREDICTION APP!
## This app utilizes a specially trained machine learning model to predict fight outcomes... Enjoy!
## Simply:
## 1. Select a weight class and two fighters from the dropdown menus
## 2. Click the predict button
## 3. The 'Fighter' column shows the predicted winner and the 'win_proba' column shows the probability of the outcome according to the model


### *This app should not be used as a tool for betting and all predictions and probabilities are not to be viewed as guarantees






In [None]:
# Data Prep Imports 

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt 
import sys
import datetime 
import seaborn as sns
import datetime
from pytz import utc, timezone
import time
import re
from functools import reduce
import pickle as pkl
import joblib
from ipywidgets import interact, interactive, fixed, interact_manual, Dropdown
import ipywidgets as widgets


In [104]:
# ML Modeling Imports

from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score, confusion_matrix, accuracy_score, make_scorer
from math import sqrt

# !pip install eli5
# import eli5
# from eli5.sklearn import PermutationImportance
from sklearn.feature_selection import SelectFromModel
from sklearn.preprocessing import MinMaxScaler, StandardScaler

from sklearn.model_selection import cross_val_score, KFold, train_test_split, RepeatedStratifiedKFold, GridSearchCV, RandomizedSearchCV
from sklearn.linear_model import LinearRegression, LogisticRegression, Ridge, ElasticNet, SGDRegressor, Lasso, RidgeCV, LassoCV, ElasticNetCV
from sklearn.kernel_ridge import KernelRidge
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor, AdaBoostClassifier, ExtraTreesClassifier
from sklearn.neighbors import KNeighborsClassifier 
from sklearn.neural_network import MLPClassifier
# from lightgbm import LGBMRegressor
# from xgboost import XGBClassifier

In [105]:
ufc_df = pd.read_csv('/Users/joshhawkins/Desktop/Data Science/UFC_Project/ufc_clean_future_data.csv', index_col=0, low_memory=False)

In [106]:
# REMOVE NULLS 
ufc_df = ufc_df[pd.notnull(ufc_df['Won_Last_Fight'])]
ufc_df = ufc_df[pd.notnull(ufc_df['Won_Fight_Before_Last'])]


# Create copy of Training DF as Opponent DF for join
opponent_ufc_df = ufc_df.copy()
opponent_ufc_df = opponent_ufc_df.drop(['Win', 'Winner', 'Male', 'Female'], axis=1)
cols = opponent_ufc_df.columns.tolist()
opponent_ufc_df.columns = ['Opponent_' + str(col) for col in cols]
opponent_ufc_df = opponent_ufc_df.rename(columns={'Opponent_Date':'Date', 'Opponent_Fighter':'Opponent', 'Opponent_Opponent':'Fighter', 
                                                            'Opponent_Fighter_Age':'Opponent_Age'})
opponent_ufc_df = opponent_ufc_df.drop_duplicates()
opponent_ufc_df = opponent_ufc_df.reset_index(drop=True)


# Merge original and opponent DFs and strip spaces in columns
full_df = ufc_df.merge(opponent_ufc_df, how='left', on=['Date', 'Opponent', 'Fighter']) 
full_df = full_df.drop('Winner', axis=1)
full_df = full_df.dropna(subset=['Opponent_DOB'], axis=0)
full_df.columns = full_df.columns.to_series().apply(lambda x: x.strip())


# AVG LEVEL OF OPPONENTS FACED
full_df['avg_level_of_opponents_faced'] = full_df.groupby(['Fighter'])['Opponent_win_percentage'].transform(lambda x: x.rolling(len(full_df), 0).mean()).astype(float)
full_df['Opponent_avg_level_of_opponents_faced'] = full_df.groupby(['Opponent'])['win_percentage'].transform(lambda x: x.rolling(len(full_df), 0).mean()).astype(float)


#KEEP ONLY MOST RECENT FIGHT PER FIGHTER
future_df = full_df.sort_values(['Date', 'Fighter'], ascending=True).drop_duplicates('Fighter',keep='last')


In [107]:
# Take a df, two fighters, and then give a df row that includes prediction and probability of the winner

def create_matchup(df, fighter1, fighter2):

  #filter for fighters of interest
  data = df.copy()
  data = data.loc[(data.Fighter.str.contains(fighter1))|(data.Fighter.str.contains(fighter2))]
  data.drop('Win', axis=1, inplace=True)

  #add blank row for todays date for each fighter and sort ascending
  today = pd.DataFrame({'Date': [datetime.date.today()], 'Fighter':[fighter1], 'Opponent':[fighter2]})
  data = pd.concat([today, data])
  today = pd.DataFrame({'Date': [datetime.date.today()], 'Fighter':[fighter2], 'Opponent':[fighter1]})
  data = pd.concat([today, data])
  data['Date'] = pd.to_datetime(data['Date'])
  data.sort_values('Date', inplace=True)
  #get year
  data['Year'] = pd.DatetimeIndex(data['Date']).year


  #SHIFT STATS UP BY 1 TO SIMULATE REALITY OF INFORMATION AVAILABLE
  keep_list = ['Date', 'Fighter', 'Opponent', 'Date_of_Last_Fight', 'Months_Since_Last_Fight', 'Year', 'Fighter_Age']
  bio_list = ['Male', 'Female', 'DOB', 'Height', 'Reach', 'STANCE', 'Weight', 'Weight_Class']
  shift_cols = [i for i in data.columns.tolist() if i not in keep_list]
  shift_cols = [i for i in shift_cols if i not in bio_list]
  shift_cols = [i for i in shift_cols if 'Opponent' not in i]
  #Shift Fighter Stats up from last fight
  for col in shift_cols:
      data[col+'_AOLF'] = data.groupby(['Fighter'])[col].shift(1, axis=0)
  #Shift Fighter Bio Stats 
  for col in bio_list:
      data[col] = data.groupby(['Fighter'])[col].shift(1, axis=0)
  #Date of Last Fight and drop shifted cols
  data['Date_of_Last_Fight'] = data.groupby(['Fighter'])['Date'].shift(1, axis=0)
  data.drop(shift_cols, axis=1, inplace=True)


  #RECALCULATE MONTHS SINCE LAST FIGHT
  data['Date_of_Last_Fight'] = data.groupby(['Fighter'])['Date'].shift(1, axis=0)
  data['Months_Since_Last_Fight'] = data['Date'] - data['Date_of_Last_Fight']
  data['Months_Since_Last_Fight'] = data['Months_Since_Last_Fight'].astype(str)
  data['Months_Since_Last_Fight'] = data['Months_Since_Last_Fight'].str.replace('NaT', '0')
  data['Months_Since_Last_Fight'] = data['Months_Since_Last_Fight'].str.split(" ").str[0].astype(int)
  data['Months_Since_Last_Fight'] = round(data['Months_Since_Last_Fight']/30.5, 2)


  #RECALCULATE AGE
  data['DOB'] = pd.to_datetime(data['DOB'])
  data['Fighter_Age'] = (data.Date - data.DOB)#.astype(str)
  data = data[pd.notnull(data['Fighter_Age'])]
  data['Fighter_Age'] = data['Fighter_Age'].astype(str)
  data['Fighter_Age'] = data['Fighter_Age'].str.split(" ").str[0].astype(int)
  data['Fighter_Age'] = round(data['Fighter_Age']/365, 2)


  #MERGE AS FIGHTER/OPPONENT
  data = data[[i for i in data.columns.tolist() if 'Opponent' not in i]]
  opponent_data = data.copy()
  col_list = [i for i in opponent_data.columns.tolist() if i not in ['Fighter', 'Date', 'Male', 'Female', 'Year', 'Fighter_Age']]
  for col in col_list:
    opponent_data['Opponent_'+col] = opponent_data[col]
    opponent_data.drop(col, axis=1, inplace=True)
  #Manually change name and age
  opponent_data['Opponent'] = opponent_data['Fighter']
  opponent_data.drop('Fighter', axis=1, inplace=True)
  opponent_data['Opponent_Age'] = opponent_data['Fighter_Age']
  opponent_data.drop('Fighter_Age', axis=1, inplace=True)
  
  data = data.merge(opponent_data, 'inner', on=['Date', 'Male', 'Female', 'Year'])
  data = data[data['Fighter'] != data['Opponent']]


  #CALCULATE DIFFS
  #Isolate columns to create diffs from
  columns_to_change = [i for i in data.columns.tolist() if i not in 
                      ['Date', 'Fighter', 'Opponent', 'Win', 'Date_of_Last_Fight', 'Male', 'Female',	'DOB', 'STANCE', 'Weight_Class',
                        'Opponent_Weight_Class', 'Opponent_Date_of_Last_Fight', 'Opponent_DOB', 'Opponent_STANCE', 'Year', 'Month',
                        'Fighter_Age', 'Opponent_Age']]

  #only need non-opponent side because opponent will be dealt with in the function
  columns_to_change = [i for i in columns_to_change if 'Opponent' not in i]

  ##residual function
  def create_residual_col(df, col):
    opponent_col = str('Opponent_'+col)
    df[str('Diff_'+col)] = df[col].astype(float) - df[opponent_col].astype(float)
    df = df.drop([col, opponent_col], axis=1, inplace=True)

  #alternate df with residuals only
  diff_df = data.copy()
  [create_residual_col(diff_df,i) for i in columns_to_change]
  diff_df['Diff_age'] = diff_df['Fighter_Age'] - diff_df['Opponent_Age']
  diff_df.drop(['Fighter_Age', 'Opponent_Age'], axis=1, inplace=True)

  # drop extra columns
  final_drop_cols = [i for i in columns_to_change if 'Opponent' in i]
  final_drop_cols = [i for i in final_drop_cols if 'AOLF' not in i]
  final_drop_cols = [i for i in final_drop_cols if i not in ['Opponent_avg_level_of_opponents_faced', 'Opponent_Months_Since_Last_Fight', 'Opponent_Height', 'Opponent_Reach', 'Opponent_Weight']]
  final_drop_cols = final_drop_cols 
  diff_df = diff_df.drop(final_drop_cols, axis=1)


  return diff_df
 	

In [108]:
#Bring In Pickled Models

# Loading model to compare the results
mens_filepath = '/Users/joshhawkins/Desktop/Data Science/UFC_Project/mens_stack_model.pkl'
mens_model = joblib.load(open(mens_filepath,'rb'))

womens_filepath = '/Users/joshhawkins/Desktop/Data Science/UFC_Project/womens_stack_model.pkl'
womens_model = joblib.load(open(womens_filepath,'rb'))


In [109]:
#Features
 
#Initialize features
non_feature_list = ['Career_Stage', 'Male', 'Female', 'base_pred', 'Diff_Weight', 'Month', 'AGE',  'Win', 'Date', 'Date_of_Last_Fight', 'Opponent_Date_of_Last_Fight', 'Fighter', 'Opponent', 'DOB', 
                    'Opponent_DOB', 'Opponent_STANCE', 'STANCE', 'Weight_Class', 'Opponent_STANCE', 'Opponent_Weight_Class', 
                    'base_pred', 'Diff_wins_last_5_AOLF', 'Diff_Fight_Time_in_Sec_AOLF', 'Diff_Fight_Time_in_Min_AOLF', 'Diff_Fight_Time_in_Min_avg_AOLF',
                    'Diff_win_total_AOLF', 'Diff_DQ_total_AOLF', 'Diff_KO/TKO_total_AOLF', 'Diff_M-DEC_total_AOLF', 'Diff_S-DEC_total_AOLF', 
                    'Diff_SUB_total_AOLF', 'Diff_U-DEC_total_AOLF', 'win_proba']  


In [113]:

Weight_Class = Dropdown(options = future_df.Weight_Class.unique().tolist())
Fighter_1 = Dropdown(options = future_df.Fighter.sort_values().unique().tolist()) 
Fighter_2 = Dropdown(options = future_df.Fighter.sort_values().unique().tolist())


@interact(WeightClass = Weight_Class, Fighter1 = Fighter_1, Fighter2 = Fighter_2)
def print_city(WeightClass, Fighter1, Fighter2):
    Fighter_1.options = future_df[future_df.Weight_Class == WeightClass]['Fighter'].sort_values().tolist()
    Fighter_2.options = future_df[future_df.Weight_Class == WeightClass]['Fighter'].sort_values().tolist()

interactive(children=(Dropdown(description='WeightClass', options=('Open Weight', 'Middleweight', 'Heavyweight…

In [None]:
button = widgets.Button(description='Predict!')
output = widgets.Output()

display(button, output)

def click(b):
  # with output:
  # Create Matchup DF and Feature List
  match_up_df = create_matchup(future_df, Fighter_1.value, Fighter_2.value)
  feature_list = [i for i in match_up_df.columns.tolist() if i not in non_feature_list]

  #Create Predictions
  match_up_df['win_proba'] = np.where(match_up_df['Male'] > 0, mens_model.predict_proba(match_up_df[feature_list])[:, 1], womens_model.predict_proba(match_up_df[feature_list])[:, 1])
  #Take the Higher Probability
  def high_prob_score(df):
    results_df = pd.DataFrame()
    for i in df.Fighter.unique().tolist():
      temp_df = df.loc[df.isin([i]).any(axis=1)]
      temp_df = temp_df[temp_df.win_proba == temp_df.win_proba.max()]
      results_df = pd.concat([results_df, temp_df])
      results_df.drop_duplicates(inplace=True)
    return results_df
  results = high_prob_score(match_up_df)

  #Reindex and Simplify Result s
  results.set_index('Date', inplace=True)
  with output:
    display(results[['Fighter', 'Opponent', 'win_proba']])

button.on_click(click)

