In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report
from sklearn.preprocessing import LabelEncoder
from bs4 import BeautifulSoup
import requests
import re
from datetime import datetime


In [2]:
# CREATING FUNCTION to get each fight urls from a fighter's URL
def get_fighter_fight_urls(fighter_url):
    page = requests.get(fighter_url)
    soup = BeautifulSoup(page.content, 'html.parser')
    
    fight_urls = []
    
    fight_rows = soup.find_all('tr', class_='b-fight-details__table-row')
    for fight_row in fight_rows:
        onclick_value = fight_row.get('onclick')
        if onclick_value:
            match = re.search(r"'(http://www.ufcstats.com/fight-details/.+?)'", onclick_value)
            if match:
                fight_link = match.group(1)
                fight_urls.append(fight_link)
    
    return fight_urls



In [3]:
# CREATING FUNCTION to get each fight urls from a fighter's URL
def get_fighter_fight_urls(fighter_url):
    page = requests.get(fighter_url)
    soup = BeautifulSoup(page.content, 'html.parser')
    
    fight_urls = []
    
    fight_rows = soup.find_all('tr', class_='b-fight-details__table-row')
    for fight_row in fight_rows:
        onclick_value = fight_row.get('onclick')
        if onclick_value:
            match = re.search(r"'(http://www.ufcstats.com/fight-details/.+?)'", onclick_value)
            if match:
                fight_link = match.group(1)
                fight_urls.append(fight_link)
    
    return fight_urls


In [4]:
 # FUNCTION TO SCRAPE DATA FROM A FIGHT URL

def scrape_fight_data(fight_url):
    fight_page = requests.get(fight_url)
    fight_soup = BeautifulSoup(fight_page.content, 'html.parser')

    data = []
    fight_table = fight_soup.find('tbody', class_='b-fight-details__table-body')
    
    fighter_divs = fight_soup.find_all('div', class_='b-fight-details__person')
    outcomes = [outcome_tag.get_text(strip=True) if (outcome_tag := fighter_div.find('i', class_='b-fight-details__person-status')) else None for fighter_div in fighter_divs]

    for row in fight_table.find_all('tr', class_='b-fight-details__table-row'):
        fighter_names = row.find_all('a', class_='b-link_style_black')
        fighters = [name.get_text(strip=True) for name in fighter_names]

        kd = [col.get_text(strip=True) for col in row.find_all('td')[1].find_all('p')]
        sig_str = [col.get_text(strip=True) for col in row.find_all('td')[2].find_all('p')]
        sig_str_percent = [col.get_text(strip=True) for col in row.find_all('td')[3].find_all('p')]
        total_str = [col.get_text(strip=True) for col in row.find_all('td')[4].find_all('p')]
        td = [col.get_text(strip=True) for col in row.find_all('td')[5].find_all('p')]
        td_percent = [col.get_text(strip=True) for col in row.find_all('td')[6].find_all('p')]
        sub_att = [col.get_text(strip=True) for col in row.find_all('td')[7].find_all('p')]
        rev = [col.get_text(strip=True) for col in row.find_all('td')[8].find_all('p')]
        ctrl = [col.get_text(strip=True) for col in row.find_all('td')[9].find_all('p')]

        for i in range(len(fighters)):
            fight_info = {
                'Fighter': fighters[i],
                'KD': kd[i],
                'Sig. str.': sig_str[i],
                'Sig. str. %': sig_str_percent[i],
                'Total str.': total_str[i],
                'Td': td[i],
                'Td %': td_percent[i],
                'Sub. att': sub_att[i],
                'Rev.': rev[i],
                'Ctrl': ctrl[i],
                'Fight URL': fight_url,
                'Outcome': outcomes[i] 
            }
            data.append(fight_info)

    df = pd.DataFrame(data)

    new_data = []
    new_columns = [
        'Fighter', 'Opponent', 'KD', 'Opponent KD', 'Sig. str.', 'Opponent Sig. str.',
        'Sig. str. %', 'Opponent Sig. str. %', 'Total str.', 'Opponent Total str.',
        'Td', 'Opponent Td', 'Td %', 'Opponent Td %', 'Sub. att', 'Opponent Sub. att',
        'Rev.', 'Opponent Rev.', 'Ctrl', 'Opponent Ctrl', 'Fight URL', 'Outcome'
    ]

    for i in range(0, len(df), 2):
        fighter_row = df.iloc[i]
        opponent_row = df.iloc[i + 1]

        new_row = [
            fighter_row['Fighter'], opponent_row['Fighter'],
            fighter_row['KD'], opponent_row['KD'],
            fighter_row['Sig. str.'], opponent_row['Sig. str.'],
            fighter_row['Sig. str. %'], opponent_row['Sig. str. %'],
            fighter_row['Total str.'], opponent_row['Total str.'],
            fighter_row['Td'], opponent_row['Td'],
            fighter_row['Td %'], opponent_row['Td %'],
            fighter_row['Sub. att'], opponent_row['Sub. att'],
            fighter_row['Rev.'], opponent_row['Rev.'],
            fighter_row['Ctrl'], opponent_row['Ctrl'],
            fighter_row['Fight URL'], fighter_row['Outcome']
        ]

        new_data.append(new_row)

    new_df = pd.DataFrame(new_data, columns=new_columns)
    return new_df


In [5]:
#CREATE FUNCTIONS TO CONVERT STRING DATA TO NUMERIC TO LATER USE FOR MODELING AND ANALYSIS

def get_numeric_value(value_str):
    try:
        return float(value_str)
    except ValueError:
        return None

def convert_height(height_str):
    height_str = height_str.replace('"', '')
    feet, inches = map(int, height_str.split("'"))
    total_inches = feet * 12 + inches
    return total_inches

def convert_weight(weight_str):
    return float(weight_str.replace('lbs.', '').strip())

def convert_reach(reach_str):
    if reach_str == '--':
        return None
    return float(reach_str.replace('"', '').strip())

def fighter_stats(fighter_url):
    fighter_page = requests.get(fighter_url)
    fighter_soup = BeautifulSoup(fighter_page.content, 'html.parser')

    fighter_data = {}
    fighter_physical_stats = fighter_soup.find('div', class_='b-list__info-box')
    if fighter_physical_stats:
        physical_details = fighter_physical_stats.find_all('li', class_='b-list__box-list-item')
        for detail in physical_details:
            label = detail.find('i', class_='b-list__box-item-title').get_text(strip=True)
            value = detail.get_text(strip=True).replace(label, '').strip()

            if label == 'Height:':
                fighter_data[label] = convert_height(value)
            elif label == 'Weight:':
                fighter_data[label] = convert_weight(value)
            elif label == 'Reach:':
                fighter_data[label] = convert_reach(value)
            else:
                fighter_data[label] = value

    fighter_career_stats = fighter_soup.find('div', class_='b-list__info-box-left')
    if fighter_career_stats:
        career_stats = fighter_career_stats.find_all('li', class_='b-list__box-list-item')
        for stat in career_stats:
            label_element = stat.find('i', class_='b-list__box-item-title')
            value = stat.get_text(strip=True).replace(label_element.get_text(strip=True), '').strip()
            label = label_element.get_text(strip=True).rstrip(':')
            fighter_data[label] = value

    fighter_df = pd.DataFrame([fighter_data])
    
    return fighter_df

In [6]:
# Define a function to get opponent stats
def get_opponent_stats(opponent_urls):
    opponent_stats_list = []

    for opponent_url in opponent_urls:
        opponent_stats_df = fighter_stats(opponent_url)
        opponent_stats_df = (opponent_stats_df, "Opponent's ") 
        opponent_stats_list.append(opponent_stats_df)  

    # Combine all opponent_stats DataFrames into a single DataFrame
    all_opponent_stats_df = pd.concat(opponent_stats_list, ignore_index=True)
    return all_opponent_stats_df

In [7]:
#CREATE FUNCTION TO SCRAPE PAST FIGHT RESULST FROM A FIGHTER'S URL PAGE
def past_fights(fighter_url):
    fighter_page = requests.get(fighter_url)
    fighter_soup = BeautifulSoup(fighter_page.content, 'html.parser')

    fighter_name = fighter_soup.find('span', class_='b-content__title-highlight').get_text(strip=True)
    fight_urls = get_fighter_fight_urls(fighter_url)
    all_fight_dfs = []

    for fight_url in fight_urls:
        fight_df = scrape_fight_data(fight_url)
        all_fight_dfs.append(fight_df)

    combined_df = pd.concat(all_fight_dfs, ignore_index=True)


    fight_table = fighter_soup.find('tbody', class_='b-fight-details__table-body')

    each_fight_details = fight_table.find_all('tr')

    for idx, fight in enumerate(each_fight_details[1:]):
        event_info_elements = fight.find_all('td', class_='b-fight-details__table-col l-page_align_left')
        round_time_elements = fight.find_all('td', class_='b-fight-details__table-col')

        event_name = event_info_elements[1].find_all('p')[0].text.strip()
        event_date = event_info_elements[1].find_all('p')[1].text.strip()
        method_of_victory = event_info_elements[2].find('p').text.strip()
        rounds = round_time_elements[-2].find('p').text.strip()
        time = round_time_elements[-1].find('p').text.strip()

        combined_df.at[idx, 'Event Name'] = event_name
        combined_df.at[idx, 'Event Date'] = event_date
        combined_df.at[idx, 'Method of Victory'] = method_of_victory
        combined_df.at[idx, 'Rounds'] = rounds
        combined_df.at[idx, 'Time'] = time

    '''
    for index, row in combined_df.iterrows():
        if row['Opponent'] == fighter_name:
           combined_df.at[index, ['Fighter', 'Opponent']] = row['Opponent'], row['Fighter']
            combined_df.at[index, ['KD', 'Opponent KD']] = row['Opponent KD'], row['KD']
            combined_df.at[index, ['Sig. str.', 'Opponent Sig. str.']] = row['Opponent Sig. str.'], row['Sig. str.']
            combined_df.at[index, ['Sig. str. %', 'Opponent Sig. str. %']] = row['Opponent Sig. str. %'], row['Sig. str. %']
            combined_df.at[index, ['Total str.', 'Opponent Total str.']] = row['Opponent Total str.'], row['Total str.']
            combined_df.at[index, ['Td', 'Opponent Td']] = row['Opponent Td'], row['Td']
            combined_df.at[index, ['Td %', 'Opponent Td %']] = row['Opponent Td %'], row['Td %']
            combined_df.at[index, ['Sub. att', 'Opponent Sub. att']] = row['Opponent Sub. att'], row['Sub. att']
            combined_df.at[index, ['Rev.', 'Opponent Rev.']] = row['Opponent Rev.'], row['Rev.']
            combined_df.at[index, ['Ctrl', 'Opponent Ctrl']] = row['Opponent Ctrl'], row['Ctrl']'''
    
    for index, row in combined_df.iterrows():
        if row['Opponent'] == fighter_name:
            combined_df.at[index, 'Fighter'] = row['Opponent']
            combined_df.at[index, 'Opponent'] = row['Fighter']
            combined_df.at[index, 'KD'] = row['Opponent KD']
            combined_df.at[index, 'Opponent KD'] = row['KD']

            combined_df.at[index, 'Sig. str.'] = row['Opponent Sig. str.']
            combined_df.at[index, 'Opponent Sig. str.'] = row['Sig. str.']

            combined_df.at[index, 'Sig. str. %'] = row['Opponent Sig. str. %']
            combined_df.at[index, 'Opponent Sig. str. %'] = row['Sig. str. %']

            combined_df.at[index, 'Total str.'] = row['Opponent Total str.']
            combined_df.at[index, 'Opponent Total str.'] = row['Total str.']

            combined_df.at[index, 'Td'] = row['Opponent Td']
            combined_df.at[index, 'Opponent Td'] = row['Td']

            combined_df.at[index, 'Td %'] = row['Opponent Td %']
            combined_df.at[index, 'Opponent Td %'] = row['Td %']

            combined_df.at[index, 'Sub. att'] = row['Opponent Sub. att']
            combined_df.at[index, 'Opponent Sub. att'] = row['Sub. att']

            combined_df.at[index, 'Rev.'] = row['Opponent Rev.']
            combined_df.at[index, 'Opponent Rev.'] = row['Rev.']

            combined_df.at[index, 'Ctrl'] = row['Opponent Ctrl']
            combined_df.at[index, 'Opponent Ctrl'] = row['Ctrl']


            if row['Outcome'] == 'W':
                combined_df.at[index, 'Outcome'] = 'L'
            elif row['Outcome'] == 'L':
                combined_df.at[index, 'Outcome'] = 'W'

    current_streak = []
    streak_type = []

    win_streak = 0
    loss_streak = 0

    for index, row in combined_df.iterrows():
        outcome = row['Outcome']

        if outcome == 'W':
            win_streak += 1
            loss_streak = 0
        elif outcome == 'L':
            loss_streak += 1
            win_streak = 0
        else:
            win_streak = 0
            loss_streak = 0

        if win_streak > 0:
            current_streak.append(win_streak)
            streak_type.append('Win')
        elif loss_streak > 0:
            current_streak.append(loss_streak)
            streak_type.append('Loss')
        else:
            current_streak.append(0)
            streak_type.append('None')


    combined_df['Current Streak'] = current_streak
    combined_df['Streak Type'] = streak_type

    combined_df['Event Date'] = pd.to_datetime(combined_df['Event Date'])

    combined_df = combined_df.sort_values(by='Event Date', ascending=True)

    combined_df['Days since last fight'] = (combined_df['Event Date'] - combined_df['Event Date'].shift(1)).dt.days

    combined_df['Days since last fight'].fillna(0, inplace=True)


    return combined_df



In [8]:
#CREATE A FUNCTION TO CLEAN THE DATAFRAME AND PREPARE FOR DATA ANALYSIS
def clean_past_fights(fighter_url):
    past_fights_df = past_fights(fighter_url)  
    
    fighter_page = requests.get(fighter_url)
    fighter_soup = BeautifulSoup(fighter_page.content, 'html.parser')

    fight_table = fighter_soup.find('tbody', class_='b-fight-details__table-body')

    each_fight_details = fight_table.find_all('tr')

    opponent_urls = []

    for fight in each_fight_details[1:]:
        event_info_elements = fight.find_all('td', class_='b-fight-details__table-col l-page_align_left')

        fighters = event_info_elements[0].find_all('p')
        opponent_tag = fighters[1].find('a', class_='b-link b-link_style_black')  
        opponent_url = opponent_tag.get('href') 

        opponent_urls.append(opponent_url)

    opponent_stats_list = []

    for opponent_url in opponent_urls:
        opponent_stats_df = fighter_stats(opponent_url)  
        opponent_stats_list.append(opponent_stats_df) 


    all_opponent_stats_df = pd.concat(opponent_stats_list, ignore_index=True)

    all_opponent_stats_df = all_opponent_stats_df.drop([''], axis=1)
    all_opponent_stats_df.columns = ["Opponent's " + column for column in all_opponent_stats_df.columns]


    final_df = pd.concat([past_fights_df, all_opponent_stats_df], axis=1)
 
    final_df["Opponent's Age"] = (pd.to_datetime('today') - pd.to_datetime(final_df["Opponent's DOB:"])).astype('<m8[Y]')
    final_df[['Sig. Landed', 'Sig. Attempted']] = final_df['Sig. str.'].str.split(' of ', expand=True)
    final_df['Sig. Landed'] = final_df['Sig. Landed'].astype(int)
    final_df['Sig. Attempted'] = final_df['Sig. Attempted'].astype(int)
    final_df[["Opponent Sig. Landed", "Opponent Sig. Attempted"]] = final_df["Opponent Sig. str."].str.split(' of ', expand=True)
    final_df["Opponent Sig. Landed"] = final_df["Opponent Sig. Landed"].astype(int)
    final_df["Opponent Sig. Attempted"] = final_df["Opponent Sig. Attempted"].astype(int)
    
    final_df[['Total Landed', 'Total Attempted']] = final_df['Total str.'].str.split(' of ', expand=True)
    final_df['Total Landed'] = final_df['Total Landed'].astype(int)
    final_df['Total Attempted'] = final_df['Total Attempted'].astype(int)

    final_df[['Opponent Total Landed', 'Opponent Total Attempted']] = final_df['Opponent Total str.'].str.split(' of ', expand=True)
    final_df['Opponent Total Landed'] = final_df['Opponent Total Landed'].astype(int)
    final_df['Opponent Total Attempted'] = final_df['Opponent Total Attempted'].astype(int)

    final_df[['Td Landed', 'Td Attempted']] = final_df['Td'].str.split(' of ', expand=True)
    final_df['Td Landed'] = final_df['Td Landed'].astype(int)
    final_df['Td Attempted'] = final_df['Td Attempted'].astype(int)

    final_df['Sig. str. %'] = final_df['Sig. str. %'].replace('---', float('nan')).str.rstrip('%').astype(float)
    final_df['Opponent Sig. str. %'] = final_df['Opponent Sig. str. %'].replace('---', float('nan')).str.rstrip('%').astype(float)
    final_df['Td %'] = final_df['Td %'].replace('---', float('nan')).str.rstrip('%').astype(float)
    final_df['Opponent Td %'] = final_df['Opponent Td %'].replace('---', float('nan')).str.rstrip('%').astype(float)

    final_df["Opponent's Str. Def(%)"] = final_df["Opponent's Str. Def"].replace('---', float('nan')).str.rstrip('%').astype(float)    
    final_df["Opponent's TD Def.(%)"] = final_df["Opponent's TD Def."].replace('---', float('nan')).str.rstrip('%').astype(float)

    final_df["Opponent's Str. Acc.(%)"] = final_df["Opponent's Str. Acc."].replace('---', float('nan')).str.rstrip('%').astype(float)
    final_df["Opponent's TD Acc.(%)"] = final_df["Opponent's TD Acc."].replace('---', float('nan')).str.rstrip('%').astype(float)

    final_df.drop(["Opponent's Str. Def","Opponent's TD Def.", "Opponent's Str. Acc.", "Opponent's TD Acc."],axis=1, inplace=True)
    

    return final_df


In [9]:
def preprocessing_data(fighter_url):
    fighter_df = clean_past_fights(fighter_url)

    fighter_df.drop(['Fight URL', 'Event Name', 'Event Date', 'Fighter', 'Opponent', 'Sig. str.', 'Opponent Sig. str.','Total str.','Opponent Total str.', 'Td', 'Opponent Td', 'Ctrl', 'Time','Opponent Ctrl',"Opponent's DOB:", "Td %","Opponent Td %"], axis=1, inplace=True)

    fighter_encoded = pd.get_dummies(fighter_df, columns=['Method of Victory', "Opponent's STANCE:", "Streak Type"])

    return fighter_encoded

In [10]:
def predict_outcome(fighter_url):

    fighter_df = preprocessing_data(fighter_url)

    fighter_df.dropna(inplace=True)
    X = fighter_df.drop(['Outcome'], axis=1)

    y = fighter_df['Outcome']
    
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    model = LogisticRegression(multi_class='multinomial', solver='lbfgs')
    model.fit(X_train, y_train)

    y_pred = model.predict(X_test)

    accuracy = accuracy_score(y_test, y_pred)
    conf_matrix = confusion_matrix(y_test, y_pred)

    print(f"Accuracy: {accuracy}")
    print("Confusion Matrix:")
    print(conf_matrix)

    return model

In [11]:
Petr_Yan = 'http://www.ufcstats.com/fighter-details/d661ce4da776fc20'
Sean_Omalley = 'http://www.ufcstats.com/fighter-details/b50a426a33da0012'
Aljamain_Sterling = 'http://www.ufcstats.com/fighter-details/cb696ebfb6598724'

In [12]:
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import mean_squared_error, accuracy_score, confusion_matrix
predict_outcome(Petr_Yan)
predict_outcome(Sean_Omalley)
predict_outcome(Aljamain_Sterling)


Accuracy: 0.6666666666666666
Confusion Matrix:
[[1 1]
 [0 1]]
Accuracy: 0.3333333333333333
Confusion Matrix:
[[0 0 0]
 [0 0 1]
 [1 0 1]]
Accuracy: 0.5
Confusion Matrix:
[[0 1]
 [1 2]]


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


In [13]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error

Poirier_df = preprocessing_data('http://www.ufcstats.com/fighter-details/029eaff01e6bb8f0')


In [14]:
Poirier_df

Unnamed: 0,KD,Opponent KD,Sig. str. %,Opponent Sig. str. %,Sub. att,Opponent Sub. att,Rev.,Opponent Rev.,Outcome,Rounds,...,Method of Victory_KO/TKO,Method of Victory_M-DEC,Method of Victory_SUB,Method of Victory_U-DEC,Opponent's STANCE:_Orthodox,Opponent's STANCE:_Southpaw,Opponent's STANCE:_Switch,Streak Type_Loss,Streak Type_None,Streak Type_Win
30,0,0,44.0,44.0,5,5,0,0,L,3,...,0,0,0,1,1,0,0,1,0,0
29,0,0,59.0,59.0,0,0,0,0,W,1,...,1,0,0,0,0,1,0,0,0,1
28,0,0,52.0,37.0,0,2,1,0,W,3,...,0,0,0,1,1,0,0,0,0,1
27,0,0,52.0,35.0,0,0,0,0,W,3,...,0,0,0,1,1,0,0,0,0,1
26,0,0,36.0,41.0,1,1,0,0,W,2,...,0,0,1,0,1,0,0,0,0,1
25,0,0,44.0,34.0,3,0,0,0,W,1,...,0,0,1,0,1,0,0,0,0,1
24,0,0,44.0,44.0,0,0,2,2,L,4,...,0,0,1,0,1,0,0,1,0,0
23,0,0,47.0,48.0,1,0,0,0,W,1,...,0,0,1,0,0,1,0,0,0,1
22,0,0,41.0,41.0,0,0,0,0,L,3,...,0,0,0,1,1,0,0,1,0,0
21,2,2,58.0,58.0,2,2,0,0,W,3,...,0,0,0,1,0,1,0,0,0,1


In [15]:

Poirier_df['Outcome']


30     L
29     W
28     W
27     W
26     W
25     W
24     L
23     W
22     L
21     W
20     W
19     W
18     L
17     W
16     W
15     W
14     W
13     L
12     W
11    NC
10     W
9      W
8      W
7      W
6      L
5      W
4      W
3      W
2      L
1      W
0      L
Name: Outcome, dtype: object

In [16]:
Poirier_df.iloc[:,1:15]

Unnamed: 0,Opponent KD,Sig. str. %,Opponent Sig. str. %,Sub. att,Opponent Sub. att,Rev.,Opponent Rev.,Outcome,Rounds,Current Streak,Days since last fight,Opponent's Height:,Opponent's Weight:,Opponent's Reach:
30,0,44.0,44.0,5,5,0,0,L,3,1,0.0,69,155.0,71.0
29,0,59.0,59.0,0,0,0,0,W,1,5,85.0,72,155.0,74.0
28,0,52.0,37.0,0,2,1,0,W,3,4,51.0,71,145.0,74.0
27,0,52.0,35.0,0,0,0,0,W,3,3,161.0,69,145.0,73.0
26,0,36.0,41.0,1,1,0,0,W,2,2,154.0,73,145.0,73.0
25,0,44.0,34.0,3,0,0,0,W,1,1,84.0,71,145.0,69.0
24,0,44.0,44.0,0,0,2,2,L,4,1,101.0,67,145.0,72.0
23,0,47.0,48.0,1,0,0,0,W,1,1,214.0,72,145.0,74.0
22,0,41.0,41.0,0,0,0,0,L,3,1,63.0,68,145.0,70.0
21,2,58.0,58.0,2,2,0,0,W,3,3,196.0,70,170.0,71.0


In [17]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, confusion_matrix

X = Poirier_df.drop(['Outcome'], axis=1)

y = Poirier_df['Outcome']

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Create and train the Multinomial Logistic Regression model
model = LogisticRegression(multi_class='multinomial', solver='lbfgs')
model.fit(X_train, y_train)

# Make predictions on the test data
y_pred = model.predict(X_test)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
conf_matrix = confusion_matrix(y_test, y_pred)

print(f"Accuracy: {accuracy}")
print("Confusion Matrix:")
print(conf_matrix)

Accuracy: 0.5714285714285714
Confusion Matrix:
[[1 1 0]
 [0 0 0]
 [1 1 3]]


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


In [18]:
#Merab Dvalishvili
predict_outcome('http://www.ufcstats.com/fighter-details/c03520b5c88ed6b4')

Accuracy: 0.6666666666666666
Confusion Matrix:
[[0 1]
 [0 2]]


In [19]:
#Henry
predict_outcome('http://www.ufcstats.com/fighter-details/056c493bbd76a918')

Accuracy: 0.6666666666666666
Confusion Matrix:
[[0 0]
 [1 2]]


In [20]:
#Sandhagen
predict_outcome('http://www.ufcstats.com/fighter-details/65f09bacd3957381')

Accuracy: 0.3333333333333333
Confusion Matrix:
[[0 1]
 [1 1]]


In [21]:
#CHito
predict_outcome('http://www.ufcstats.com/fighter-details/7c7332319c14094c')

Accuracy: 0.6
Confusion Matrix:
[[0 2]
 [0 3]]


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


In [22]:
#Yadong
predict_outcome('http://www.ufcstats.com/fighter-details/efb96bf3e9ada36f')

Accuracy: 0.6666666666666666
Confusion Matrix:
[[0 1]
 [0 2]]


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


In [23]:
#Rob Font
predict_outcome('http://www.ufcstats.com/fighter-details/05339613bf8e9808')

Accuracy: 0.5
Confusion Matrix:
[[0 1]
 [1 2]]


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


In [24]:
#Dom Cruz
predict_outcome('http://www.ufcstats.com/fighter-details/10f3ba6cd2f44a97')

ParserError: String does not contain a date: -- present at position 3

In [25]:
#predro munh
predict_outcome('http://www.ufcstats.com/fighter-details/6bd02119599741a4')

Accuracy: 0.5
Confusion Matrix:
[[1 0 1]
 [0 0 1]
 [0 0 1]]


In [26]:
predict_outcome('http://www.ufcstats.com/fighter-details/6d4b63c767106d3a')

Accuracy: 0.5
Confusion Matrix:
[[0 2]
 [0 2]]


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


In [27]:
from sklearn.ensemble import RandomForestClassifier

def predict_outcome_random_forest(fighter_url):
    fighter_df = preprocessing_data(fighter_url)

    fighter_df.dropna(inplace=True)
    X = fighter_df.drop(['Outcome'], axis=1)
    y = fighter_df['Outcome']

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    model = RandomForestClassifier(random_state=42)
    model.fit(X_train, y_train)

    y_pred = model.predict(X_test)

    accuracy = accuracy_score(y_test, y_pred)
    conf_matrix = confusion_matrix(y_test, y_pred)

    print("Random Forest Classifier")
    print(f"Accuracy: {accuracy}")
    print("Confusion Matrix:")
    print(conf_matrix)

    return model, accuracy, conf_matrix


In [28]:
from sklearn.svm import SVC

def predict_outcome_svm(fighter_url):
    fighter_df = preprocessing_data(fighter_url)

    fighter_df.dropna(inplace=True)
    X = fighter_df.drop(['Outcome'], axis=1)
    y = fighter_df['Outcome']

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    model = SVC(kernel='linear', random_state=42)
    model.fit(X_train, y_train)

    y_pred = model.predict(X_test)

    accuracy = accuracy_score(y_test, y_pred)
    conf_matrix = confusion_matrix(y_test, y_pred)

    print("SVM Classifier")
    print(f"Accuracy: {accuracy}")
    print("Confusion Matrix:")
    print(conf_matrix)

    return model, accuracy, conf_matrix


In [29]:
from keras.models import Sequential
from keras.layers import Dense

def predict_outcome_neural_network(fighter_url):
    fighter_df = preprocessing_data(fighter_url)

    fighter_df.dropna(inplace=True)
    X = fighter_df.drop(['Outcome'], axis=1)
    y = fighter_df['Outcome']

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    model = Sequential()
    model.add(Dense(64, input_dim=X_train.shape[1], activation='relu'))
    model.add(Dense(32, activation='relu'))
    model.add(Dense(3, activation='softmax'))  # 3 classes: Win, Loss, No Contest (NC)

    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

    # Convert outcome labels to numeric codes
    outcome_mapping = {'W': 0, 'L': 1, 'NC': 2}
    y_train_encoded = y_train.map(outcome_mapping)
    y_test_encoded = y_test.map(outcome_mapping)

    y_train_encoded = pd.get_dummies(y_train_encoded)
    y_test_encoded = pd.get_dummies(y_test_encoded)

    model.fit(X_train, y_train_encoded, epochs=10, batch_size=32, verbose=0)

    _, accuracy = model.evaluate(X_test, y_test_encoded)

    y_pred = model.predict(X_test)

    conf_matrix = confusion_matrix(y_test_encoded.values.argmax(axis=1), y_pred.argmax(axis=1))

    print("Neural Network Classifier")
    print(f"Accuracy: {accuracy}")
    print("Confusion Matrix:")
    print(conf_matrix)

    return model, accuracy, conf_matrix


In [30]:
predict_outcome_random_forest(Sean_Omalley)
predict_outcome_svm(Sean_Omalley)
predict_outcome_neural_network(Sean_Omalley)

Random Forest Classifier
Accuracy: 0.6666666666666666
Confusion Matrix:
[[0 1]
 [0 2]]
SVM Classifier
Accuracy: 0.3333333333333333
Confusion Matrix:
[[0 0 0]
 [0 0 1]
 [1 0 1]]


ValueError: Failed to convert a NumPy array to a Tensor (Unsupported object type float).