In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report
from sklearn.preprocessing import LabelEncoder
from bs4 import BeautifulSoup
import requests
import re
from datetime import datetime


In [2]:
# CREATING FUNCTION to get each fight urls from a fighter's URL
def get_fighter_fight_urls(fighter_url):
    page = requests.get(fighter_url)
    soup = BeautifulSoup(page.content, 'html.parser')
    
    fight_urls = []
    
    fight_rows = soup.find_all('tr', class_='b-fight-details__table-row')
    for fight_row in fight_rows:
        onclick_value = fight_row.get('onclick')
        if onclick_value:
            match = re.search(r"'(http://www.ufcstats.com/fight-details/.+?)'", onclick_value)
            if match:
                fight_link = match.group(1)
                fight_urls.append(fight_link)
    
    return fight_urls



In [3]:
# CREATING FUNCTION to get each fight urls from a fighter's URL
def get_fighter_fight_urls(fighter_url):
    page = requests.get(fighter_url)
    soup = BeautifulSoup(page.content, 'html.parser')
    
    fight_urls = []
    
    fight_rows = soup.find_all('tr', class_='b-fight-details__table-row')
    for fight_row in fight_rows:
        onclick_value = fight_row.get('onclick')
        if onclick_value:
            match = re.search(r"'(http://www.ufcstats.com/fight-details/.+?)'", onclick_value)
            if match:
                fight_link = match.group(1)
                fight_urls.append(fight_link)
    
    return fight_urls


In [4]:
 # FUNCTION TO SCRAPE DATA FROM A FIGHT URL

def scrape_fight_data(fight_url):
    fight_page = requests.get(fight_url)
    fight_soup = BeautifulSoup(fight_page.content, 'html.parser')

    data = []
    fight_table = fight_soup.find('tbody', class_='b-fight-details__table-body')
    
    fighter_divs = fight_soup.find_all('div', class_='b-fight-details__person')
    outcomes = [outcome_tag.get_text(strip=True) if (outcome_tag := fighter_div.find('i', class_='b-fight-details__person-status')) else None for fighter_div in fighter_divs]

    for row in fight_table.find_all('tr', class_='b-fight-details__table-row'):
        fighter_names = row.find_all('a', class_='b-link_style_black')
        fighters = [name.get_text(strip=True) for name in fighter_names]

        kd = [col.get_text(strip=True) for col in row.find_all('td')[1].find_all('p')]
        sig_str = [col.get_text(strip=True) for col in row.find_all('td')[2].find_all('p')]
        sig_str_percent = [col.get_text(strip=True) for col in row.find_all('td')[3].find_all('p')]
        total_str = [col.get_text(strip=True) for col in row.find_all('td')[4].find_all('p')]
        td = [col.get_text(strip=True) for col in row.find_all('td')[5].find_all('p')]
        td_percent = [col.get_text(strip=True) for col in row.find_all('td')[6].find_all('p')]
        sub_att = [col.get_text(strip=True) for col in row.find_all('td')[7].find_all('p')]
        rev = [col.get_text(strip=True) for col in row.find_all('td')[8].find_all('p')]
        ctrl = [col.get_text(strip=True) for col in row.find_all('td')[9].find_all('p')]

        for i in range(len(fighters)):
            fight_info = {
                'Fighter': fighters[i],
                'KD': kd[i],
                'Sig. str.': sig_str[i],
                'Sig. str. %': sig_str_percent[i],
                'Total str.': total_str[i],
                'Td': td[i],
                'Td %': td_percent[i],
                'Sub. att': sub_att[i],
                'Rev.': rev[i],
                'Ctrl': ctrl[i],
                'Fight URL': fight_url,
                'Outcome': outcomes[i] 
            }
            data.append(fight_info)

    df = pd.DataFrame(data)

    new_data = []
    new_columns = [
        'Fighter', 'Opponent', 'KD', 'Opponent KD', 'Sig. str.', 'Opponent Sig. str.',
        'Sig. str. %', 'Opponent Sig. str. %', 'Total str.', 'Opponent Total str.',
        'Td', 'Opponent Td', 'Td %', 'Opponent Td %', 'Sub. att', 'Opponent Sub. att',
        'Rev.', 'Opponent Rev.', 'Ctrl', 'Opponent Ctrl', 'Fight URL', 'Outcome'
    ]

    for i in range(0, len(df), 2):
        fighter_row = df.iloc[i]
        opponent_row = df.iloc[i + 1]

        new_row = [
            fighter_row['Fighter'], opponent_row['Fighter'],
            fighter_row['KD'], opponent_row['KD'],
            fighter_row['Sig. str.'], opponent_row['Sig. str.'],
            fighter_row['Sig. str. %'], opponent_row['Sig. str. %'],
            fighter_row['Total str.'], opponent_row['Total str.'],
            fighter_row['Td'], opponent_row['Td'],
            fighter_row['Td %'], opponent_row['Td %'],
            fighter_row['Sub. att'], opponent_row['Sub. att'],
            fighter_row['Rev.'], opponent_row['Rev.'],
            fighter_row['Ctrl'], opponent_row['Ctrl'],
            fighter_row['Fight URL'], fighter_row['Outcome']
        ]

        new_data.append(new_row)

    new_df = pd.DataFrame(new_data, columns=new_columns)
    return new_df


In [5]:
#CREATE FUNCTIONS TO CONVERT STRING DATA TO NUMERIC TO LATER USE FOR MODELING AND ANALYSIS

def get_numeric_value(value_str):
    try:
        return float(value_str)
    except ValueError:
        return None

def convert_height(height_str):
    height_str = height_str.replace('"', '')
    feet, inches = map(int, height_str.split("'"))
    total_inches = feet * 12 + inches
    return total_inches

def convert_weight(weight_str):
    return float(weight_str.replace('lbs.', '').strip())

def convert_reach(reach_str):
    if reach_str == '--':
        return None
    return float(reach_str.replace('"', '').strip())

def fighter_stats(fighter_url):
    fighter_page = requests.get(fighter_url)
    fighter_soup = BeautifulSoup(fighter_page.content, 'html.parser')

    fighter_data = {}
    fighter_physical_stats = fighter_soup.find('div', class_='b-list__info-box')
    if fighter_physical_stats:
        physical_details = fighter_physical_stats.find_all('li', class_='b-list__box-list-item')
        for detail in physical_details:
            label = detail.find('i', class_='b-list__box-item-title').get_text(strip=True)
            value = detail.get_text(strip=True).replace(label, '').strip()

            if label == 'Height:':
                fighter_data[label] = convert_height(value)
            elif label == 'Weight:':
                fighter_data[label] = convert_weight(value)
            elif label == 'Reach:':
                fighter_data[label] = convert_reach(value)
            else:
                fighter_data[label] = value

    fighter_career_stats = fighter_soup.find('div', class_='b-list__info-box-left')
    if fighter_career_stats:
        career_stats = fighter_career_stats.find_all('li', class_='b-list__box-list-item')
        for stat in career_stats:
            label_element = stat.find('i', class_='b-list__box-item-title')
            value = stat.get_text(strip=True).replace(label_element.get_text(strip=True), '').strip()
            label = label_element.get_text(strip=True).rstrip(':')
            fighter_data[label] = value

    fighter_df = pd.DataFrame([fighter_data])
    
    return fighter_df

In [6]:
# Define a function to get opponent stats
def get_opponent_stats(opponent_urls):
    opponent_stats_list = []

    for opponent_url in opponent_urls:
        opponent_stats_df = fighter_stats(opponent_url)
        opponent_stats_df = (opponent_stats_df, "Opponent's ") 
        opponent_stats_list.append(opponent_stats_df)  

    # Combine all opponent_stats DataFrames into a single DataFrame
    all_opponent_stats_df = pd.concat(opponent_stats_list, ignore_index=True)
    return all_opponent_stats_df

In [7]:
#CREATE FUNCTION TO SCRAPE PAST FIGHT RESULST FROM A FIGHTER'S URL PAGE
def past_fights(fighter_url):
    fighter_page = requests.get(fighter_url)
    fighter_soup = BeautifulSoup(fighter_page.content, 'html.parser')

    fighter_name = fighter_soup.find('span', class_='b-content__title-highlight').get_text(strip=True)
    fight_urls = get_fighter_fight_urls(fighter_url)
    all_fight_dfs = []

    for fight_url in fight_urls:
        fight_df = scrape_fight_data(fight_url)
        all_fight_dfs.append(fight_df)

    combined_df = pd.concat(all_fight_dfs, ignore_index=True)


    fight_table = fighter_soup.find('tbody', class_='b-fight-details__table-body')

    each_fight_details = fight_table.find_all('tr')

    for idx, fight in enumerate(each_fight_details[1:]):
        event_info_elements = fight.find_all('td', class_='b-fight-details__table-col l-page_align_left')
        round_time_elements = fight.find_all('td', class_='b-fight-details__table-col')

        event_name = event_info_elements[1].find_all('p')[0].text.strip()
        event_date = event_info_elements[1].find_all('p')[1].text.strip()
        method_of_victory = event_info_elements[2].find('p').text.strip()
        rounds = round_time_elements[-2].find('p').text.strip()
        time = round_time_elements[-1].find('p').text.strip()

        combined_df.at[idx, 'Event Name'] = event_name
        combined_df.at[idx, 'Event Date'] = event_date
        combined_df.at[idx, 'Method of Victory'] = method_of_victory
        combined_df.at[idx, 'Rounds'] = rounds
        combined_df.at[idx, 'Time'] = time

    for index, row in combined_df.iterrows():
        if row['Opponent'] == fighter_name:
            combined_df.at[index, 'Fighter'] = row['Opponent']
            combined_df.at[index, 'Opponent'] = row['Fighter']
            combined_df.at[index, 'KD'] = row['Opponent KD']
            combined_df.at[index, 'Opponent KD'] = row['KD']

            combined_df.at[index, 'Sig. str.'] = row['Opponent Sig. str.']
            combined_df.at[index, 'Opponent Sig. str.'] = row['Sig. str.']

            combined_df.at[index, 'Sig. str. %'] = row['Opponent Sig. str. %']
            combined_df.at[index, 'Opponent Sig. str. %'] = row['Sig. str. %']

            combined_df.at[index, 'Total str.'] = row['Opponent Total str.']
            combined_df.at[index, 'Opponent Total str.'] = row['Total str.']

            combined_df.at[index, 'Td'] = row['Opponent Td']
            combined_df.at[index, 'Opponent Td'] = row['Td']

            combined_df.at[index, 'Td %'] = row['Opponent Td %']
            combined_df.at[index, 'Opponent Td %'] = row['Td %']

            combined_df.at[index, 'Sub. att'] = row['Opponent Sub. att']
            combined_df.at[index, 'Opponent Sub. att'] = row['Sub. att']

            combined_df.at[index, 'Rev.'] = row['Opponent Rev.']
            combined_df.at[index, 'Opponent Rev.'] = row['Rev.']

            combined_df.at[index, 'Ctrl'] = row['Opponent Ctrl']
            combined_df.at[index, 'Opponent Ctrl'] = row['Ctrl']


            if row['Outcome'] == 'W':
                combined_df.at[index, 'Outcome'] = 'L'
            elif row['Outcome'] == 'L':
                combined_df.at[index, 'Outcome'] = 'W'

    current_streak = []
    streak_type = []

    win_streak = 0
    loss_streak = 0

    for index, row in combined_df.iterrows():
        outcome = row['Outcome']

        if outcome == 'W':
            win_streak += 1
            loss_streak = 0
        elif outcome == 'L':
            loss_streak += 1
            win_streak = 0
        else:
            win_streak = 0
            loss_streak = 0

        if win_streak > 0:
            current_streak.append(win_streak)
            streak_type.append('Win')
        elif loss_streak > 0:
            current_streak.append(loss_streak)
            streak_type.append('Loss')
        else:
            current_streak.append(0)
            streak_type.append('None')


    combined_df['Current Streak'] = current_streak
    combined_df['Streak Type'] = streak_type

    combined_df['Event Date'] = pd.to_datetime(combined_df['Event Date'])

    combined_df = combined_df.sort_values(by='Event Date', ascending=True)

    combined_df['Days since last fight'] = (combined_df['Event Date'] - combined_df['Event Date'].shift(1)).dt.days

    combined_df['Days since last fight'].fillna(0, inplace=True)


    return combined_df



In [8]:
#CREATE A FUNCTION TO CLEAN THE DATAFRAME AND PREPARE FOR DATA ANALYSIS
def clean_past_fights(fighter_url):
    past_fights_df = past_fights(fighter_url)  
    
    fighter_page = requests.get(fighter_url)
    fighter_soup = BeautifulSoup(fighter_page.content, 'html.parser')

    fight_table = fighter_soup.find('tbody', class_='b-fight-details__table-body')

    each_fight_details = fight_table.find_all('tr')

    opponent_urls = []

    for fight in each_fight_details[1:]:
        event_info_elements = fight.find_all('td', class_='b-fight-details__table-col l-page_align_left')

        fighters = event_info_elements[0].find_all('p')
        opponent_tag = fighters[1].find('a', class_='b-link b-link_style_black')  
        opponent_url = opponent_tag.get('href') 

        opponent_urls.append(opponent_url)

    opponent_stats_list = []

    for opponent_url in opponent_urls:
        opponent_stats_df = fighter_stats(opponent_url)  
        opponent_stats_list.append(opponent_stats_df) 


    all_opponent_stats_df = pd.concat(opponent_stats_list, ignore_index=True)

    all_opponent_stats_df = all_opponent_stats_df.drop([''], axis=1)
    all_opponent_stats_df.columns = ["Opponent's " + column for column in all_opponent_stats_df.columns]


    final_df = pd.concat([past_fights_df, all_opponent_stats_df], axis=1)
 
    final_df["Opponent's Age"] = (pd.to_datetime('today') - pd.to_datetime(final_df["Opponent's DOB:"])).astype('<m8[Y]')
    final_df[['Sig. Landed', 'Sig. Attempted']] = final_df['Sig. str.'].str.split(' of ', expand=True)
    final_df['Sig. Landed'] = final_df['Sig. Landed'].astype(int)
    final_df['Sig. Attempted'] = final_df['Sig. Attempted'].astype(int)
    final_df[["Opponent Sig. Landed", "Opponent Sig. Attempted"]] = final_df["Opponent Sig. str."].str.split(' of ', expand=True)
    final_df["Opponent Sig. Landed"] = final_df["Opponent Sig. Landed"].astype(int)
    final_df["Opponent Sig. Attempted"] = final_df["Opponent Sig. Attempted"].astype(int)
    
    final_df[['Total Landed', 'Total Attempted']] = final_df['Total str.'].str.split(' of ', expand=True)
    final_df['Total Landed'] = final_df['Total Landed'].astype(int)
    final_df['Total Attempted'] = final_df['Total Attempted'].astype(int)

    final_df[['Opponent Total Landed', 'Opponent Total Attempted']] = final_df['Opponent Total str.'].str.split(' of ', expand=True)
    final_df['Opponent Total Landed'] = final_df['Opponent Total Landed'].astype(int)
    final_df['Opponent Total Attempted'] = final_df['Opponent Total Attempted'].astype(int)

    final_df[['Td Landed', 'Td Attempted']] = final_df['Td'].str.split(' of ', expand=True)
    final_df['Td Landed'] = final_df['Td Landed'].astype(int)
    final_df['Td Attempted'] = final_df['Td Attempted'].astype(int)

    final_df['Sig. str. %'] = final_df['Sig. str. %'].replace('---', float('nan')).str.rstrip('%').astype(float)
    final_df['Opponent Sig. str. %'] = final_df['Opponent Sig. str. %'].replace('---', float('nan')).str.rstrip('%').astype(float)
    final_df['Td %'] = final_df['Td %'].replace('---', float('nan')).str.rstrip('%').astype(float)
    final_df['Opponent Td %'] = final_df['Opponent Td %'].replace('---', float('nan')).str.rstrip('%').astype(float)

    final_df["Opponent's Str. Def(%)"] = final_df["Opponent's Str. Def"].replace('---', float('nan')).str.rstrip('%').astype(float)    
    final_df["Opponent's TD Def.(%)"] = final_df["Opponent's TD Def."].replace('---', float('nan')).str.rstrip('%').astype(float)

    final_df["Opponent's Str. Acc.(%)"] = final_df["Opponent's Str. Acc."].replace('---', float('nan')).str.rstrip('%').astype(float)
    final_df["Opponent's TD Acc.(%)"] = final_df["Opponent's TD Acc."].replace('---', float('nan')).str.rstrip('%').astype(float)

    final_df.drop(["Opponent's Str. Def","Opponent's TD Def.", "Opponent's Str. Acc.", "Opponent's TD Acc."],axis=1, inplace=True)
    

    return final_df


In [18]:
def preprocessing_data(fighter_url):
    fighter_df = clean_past_fights(fighter_url)

    fighter_df.drop(['Fight URL', 'Event Name', 'Event Date', 'Fighter', 'Opponent', 'Sig. str.', 'Opponent Sig. str.','Total str.','Opponent Total str.', 'Td', 'Opponent Td', 'Ctrl', 'Time','Opponent Ctrl',"Opponent's DOB:", "Td %","Opponent Td %", 'Method of Victory'], axis=1, inplace=True)

    fighter_encoded = pd.get_dummies(fighter_df, columns=["Opponent's STANCE:", "Streak Type"])

    return fighter_encoded

In [14]:
omalley_df = preprocessing_data('http://www.ufcstats.com/fighter-details/b50a426a33da0012')
omalley_df.head()

Unnamed: 0,KD,Opponent KD,Sig. str. %,Opponent Sig. str. %,Sub. att,Opponent Sub. att,Rev.,Opponent Rev.,Outcome,Rounds,...,Opponent's Str. Def(%),Opponent's TD Def.(%),Opponent's Str. Acc.(%),Opponent's TD Acc.(%),Opponent's STANCE:_Orthodox,Opponent's STANCE:_Southpaw,Opponent's STANCE:_Switch,Streak Type_Loss,Streak Type_None,Streak Type_Win
11,2,0,60.0,42.0,0,0,0,0,W,1,...,40.0,0.0,42.0,25.0,1,0,0,0,0,1
10,0,0,55.0,30.0,0,0,0,0,W,3,...,50.0,44.0,32.0,22.0,1,0,0,0,0,1
9,0,0,56.0,41.0,3,0,1,0,W,3,...,49.0,58.0,44.0,53.0,1,0,0,0,0,1
8,0,0,81.0,0.0,0,0,0,0,W,1,...,64.0,42.0,45.0,47.0,0,1,0,0,0,1
7,1,1,65.0,65.0,0,0,0,0,W,1,...,68.0,86.0,29.0,33.0,1,0,0,0,0,1


In [19]:
def predict_outcome(fighter_url):

    fighter_df = preprocessing_data(fighter_url)

    fighter_df.dropna(inplace=True)
    X = fighter_df.drop(['Outcome'], axis=1)

    y = fighter_df['Outcome']
    
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    model = LogisticRegression(multi_class='multinomial', solver='lbfgs')
    model.fit(X_train, y_train)

    y_pred = model.predict(X_test)

    accuracy = accuracy_score(y_test, y_pred)
    conf_matrix = confusion_matrix(y_test, y_pred)

    print(f"Accuracy: {accuracy}")
    print("Confusion Matrix:")
    print(conf_matrix)

    return model

In [20]:
#TOP FLyweights
Pantoja = 'http://www.ufcstats.com/fighter-details/a0f0004aadf10b71'
Moreno = 'http://www.ufcstats.com/fighter-details/792be9a24df82ed6'
Albazi = 'http://www.ufcstats.com/fighter-details/6d35bf94f7d30241'
Kara_France = 'http://www.ufcstats.com/fighter-details/853eb0dd5c0e2149'
Royval = 'http://www.ufcstats.com/fighter-details/6e15f63b6c2e2c15'
Figgy = 'http://www.ufcstats.com/fighter-details/aa72b0f831d0bfe5'
Perez = 'http://www.ufcstats.com/fighter-details/ab2b4ff41d6ebe0f'
Schnell = 'http://www.ufcstats.com/fighter-details/67c1d46f4ed16f9e'
Elliot ='http://www.ufcstats.com/fighter-details/c96d9178c9ed9e62'
Kape = 'http://www.ufcstats.com/fighter-details/5d1b7e3dd9e11074'

flyweight_urls= [
'http://www.ufcstats.com/fighter-details/a0f0004aadf10b71',
'http://www.ufcstats.com/fighter-details/792be9a24df82ed6',
'http://www.ufcstats.com/fighter-details/6d35bf94f7d30241',
'http://www.ufcstats.com/fighter-details/853eb0dd5c0e2149',
'http://www.ufcstats.com/fighter-details/6e15f63b6c2e2c15',
'http://www.ufcstats.com/fighter-details/aa72b0f831d0bfe5',
'http://www.ufcstats.com/fighter-details/ab2b4ff41d6ebe0f',
'http://www.ufcstats.com/fighter-details/67c1d46f4ed16f9e',
'http://www.ufcstats.com/fighter-details/c96d9178c9ed9e62',
'http://www.ufcstats.com/fighter-details/5d1b7e3dd9e11074'   
]

flyweight_dfs = []

# Loop through the fighter URLs and preprocess the data
for fighter_url in flyweight_urls:
    fighter_df = preprocessing_data(fighter_url)
    flyweight_dfs.append(fighter_df)

# Concatenate all fighter DataFrames into a single DataFrame
flyweight_df = pd.concat(flyweight_dfs, ignore_index=True)

# Display the combined DataFrame
flyweight_df.head()

AttributeError: 'NoneType' object has no attribute 'text'

In [None]:
#TOP 10 BANTAMWEIGHTS URLS
bantamweight_urls = [
'http://www.ufcstats.com/fighter-details/d661ce4da776fc20',
'http://www.ufcstats.com/fighter-details/b50a426a33da0012',
'http://www.ufcstats.com/fighter-details/cb696ebfb6598724',
'http://www.ufcstats.com/fighter-details/c03520b5c88ed6b4',
'http://www.ufcstats.com/fighter-details/056c493bbd76a918',
'http://www.ufcstats.com/fighter-details/65f09bacd3957381',
'http://www.ufcstats.com/fighter-details/7c7332319c14094c',
'http://www.ufcstats.com/fighter-details/efb96bf3e9ada36f',
'http://www.ufcstats.com/fighter-details/05339613bf8e9808',
'http://www.ufcstats.com/fighter-details/6bd02119599741a4']

bantamweight_dfs = []

# Loop through the fighter URLs and preprocess the data
for fighter_url in bantamweight_urls:
    bantamweight_df = preprocessing_data(fighter_url)
    bantamweight_dfs.append(fighter_df)

# Concatenate all fighter DataFrames into a single DataFrame
bantamweight_df = pd.concat(bantamweight_dfs, ignore_index=True)

# Display the combined DataFrame
bantamweight_df.head()


In [None]:
#FEatherweight
Volk = 'http://www.ufcstats.com/fighter-details/e1248941344b3288'
Rodriguez = 'http://www.ufcstats.com/fighter-details/cbf5e6f231b55443'
Emmett = 'http://www.ufcstats.com/fighter-details/fba03cd6cc28dc41'
Topuria = 'http://www.ufcstats.com/fighter-details/54f64b5e283b0ce7'
Kattar = 'http://www.ufcstats.com/fighter-details/751de04455cfaac0'
Chikadze = 'http://www.ufcstats.com/fighter-details/9560ff14eb3129f7'
Ortega = 'http://www.ufcstats.com/fighter-details/def8166ff24bd237'
Jung = 'http://www.ufcstats.com/fighter-details/c451d67c09c55418'
Holloway = 'http://www.ufcstats.com/fighter-details/150ff4cc642270b9'
Evloev = 'http://www.ufcstats.com/fighter-details/76e2870ffafbe38f'
Allen = 'http://www.ufcstats.com/fighter-details/040a74bb0a465c54'

featherweight_urls= [
'http://www.ufcstats.com/fighter-details/e1248941344b3288',
'http://www.ufcstats.com/fighter-details/cbf5e6f231b55443',
'http://www.ufcstats.com/fighter-details/fba03cd6cc28dc41',
'http://www.ufcstats.com/fighter-details/54f64b5e283b0ce7',
'http://www.ufcstats.com/fighter-details/751de04455cfaac0',
'http://www.ufcstats.com/fighter-details/9560ff14eb3129f7',
'http://www.ufcstats.com/fighter-details/def8166ff24bd237',
'http://www.ufcstats.com/fighter-details/c451d67c09c55418',
'http://www.ufcstats.com/fighter-details/150ff4cc642270b9',
'http://www.ufcstats.com/fighter-details/76e2870ffafbe38f',
'http://www.ufcstats.com/fighter-details/040a74bb0a465c54']

featherweight_dfs = []

# Loop through the fighter URLs and preprocess the data
for fighter_url in featherweight_urls:
    featherweight_df = preprocessing_data(fighter_url)
    featherweight_dfs.append(featherweight_df)

# Concatenate all fighter DataFrames into a single DataFrame
featherweight_df = pd.concat(featherweight_dfs, ignore_index=True)

# Display the combined DataFrame
featherweight_df.head()


In [None]:
#Lightweights
Makhachev = 'http://www.ufcstats.com/fighter-details/275aca31f61ba28c'
Oliveira = 'http://www.ufcstats.com/fighter-details/07225ba28ae309b6'
Gaethje = 'http://www.ufcstats.com/fighter-details/9e8f6c728eb01124'
Poirier = 'http://www.ufcstats.com/fighter-details/029eaff01e6bb8f0'
Fiziev = 'http://www.ufcstats.com/fighter-details/c814b4c899793af6'
Chandler = 'http://www.ufcstats.com/fighter-details/4b93a88f3b1de35b'
Dariush = 'http://www.ufcstats.com/fighter-details/08af939f41b5a57b'
Gamrot = 'http://www.ufcstats.com/fighter-details/72db2a14ffa73ece'
Tsarukyan = 'http://www.ufcstats.com/fighter-details/eae48ff31db420c2'
Dawson = 'http://www.ufcstats.com/fighter-details/99bd51917728c25d'
Hooker = 'http://www.ufcstats.com/fighter-details/193b9d1858bc4df3'

lightweight_urls= [
'http://www.ufcstats.com/fighter-details/275aca31f61ba28c',
'http://www.ufcstats.com/fighter-details/07225ba28ae309b6',
'http://www.ufcstats.com/fighter-details/9e8f6c728eb01124',
'http://www.ufcstats.com/fighter-details/029eaff01e6bb8f0',
'http://www.ufcstats.com/fighter-details/c814b4c899793af6',
'http://www.ufcstats.com/fighter-details/4b93a88f3b1de35b',
'http://www.ufcstats.com/fighter-details/08af939f41b5a57b',
'http://www.ufcstats.com/fighter-details/72db2a14ffa73ece',
'http://www.ufcstats.com/fighter-details/eae48ff31db420c2',
'http://www.ufcstats.com/fighter-details/99bd51917728c25d',
'http://www.ufcstats.com/fighter-details/193b9d1858bc4df3']

lightweight_dfs = []

# Loop through the fighter URLs and preprocess the data
for fighter_url in lightweight_urls:
    lightweight_df = preprocessing_data(fighter_url)
    lightweight_dfs.append(lightweight_df)

# Concatenate all fighter DataFrames into a single DataFrame
lightweight_df = pd.concat(lightweight_dfs, ignore_index=True)

# Display the combined DataFrame
lightweight_df.head()


In [None]:
#WElterwieghts
Edwards = 'http://www.ufcstats.com/fighter-details/f1fac969a1d70b08'
Usman = 'http://www.ufcstats.com/fighter-details/f1b2aa7853d1ed6e'
Covington = 'http://www.ufcstats.com/fighter-details/dc9572dd6ec74859'
Thompson = 'http://www.ufcstats.com/fighter-details/4a28cb716c19157a'
Muhammad = 'http://www.ufcstats.com/fighter-details/b1b0729d27936f2f'
Rakhmanov = 'http://www.ufcstats.com/fighter-details/01afe0916a40c7c5'
Burns = 'http://www.ufcstats.com/fighter-details/23024fdfc966410a'
Neal = 'http://www.ufcstats.com/fighter-details/b997be68943010fc'
Brady = 'http://www.ufcstats.com/fighter-details/45f7cb591c3ab00b'
Luque = 'http://www.ufcstats.com/fighter-details/6d4b63c767106d3a'

welterweight_urls= [
'http://www.ufcstats.com/fighter-details/f1fac969a1d70b08',
'http://www.ufcstats.com/fighter-details/f1b2aa7853d1ed6e',
'http://www.ufcstats.com/fighter-details/dc9572dd6ec74859',
'http://www.ufcstats.com/fighter-details/4a28cb716c19157a',
'http://www.ufcstats.com/fighter-details/b1b0729d27936f2f',
'http://www.ufcstats.com/fighter-details/01afe0916a40c7c5',
'http://www.ufcstats.com/fighter-details/23024fdfc966410a',
'http://www.ufcstats.com/fighter-details/b997be68943010fc',
'http://www.ufcstats.com/fighter-details/45f7cb591c3ab00b',
'http://www.ufcstats.com/fighter-details/6d4b63c767106d3a']

welterweight_dfs = []

# Loop through the fighter URLs and preprocess the data
for fighter_url in welterweight_urls:
    welterweight_df = preprocessing_data(fighter_url)
    welterweight_dfs.append(welterweight_df)

# Concatenate all fighter DataFrames into a single DataFrame
welterweightt_df = pd.concat(welterweight_dfs, ignore_index=True)

# Display the combined DataFrame
welterweight_df.head()


In [None]:
#Middleweight
Adesanya = 'http://www.ufcstats.com/fighter-details/1338e2c7480bdf9e'
Du_Plessis = 'http://www.ufcstats.com/fighter-details/0d7b51c9d2649a6e'
Whittaker = 'http://www.ufcstats.com/fighter-details/e1147d3d2dabe1ce'
VEttori = 'http://www.ufcstats.com/fighter-details/7acbb0972e75281a'
Costa = 'http://www.ufcstats.com/fighter-details/2e5c2aa8e4ab9d82'
Brunson ='http://www.ufcstats.com/fighter-details/b1a3e0aca758b322'
Dolidze = 'http://www.ufcstats.com/fighter-details/327d5f279895110d'
Hermansson = 'http://www.ufcstats.com/fighter-details/0a1942069c9ad6b6'
Allen = 'http://www.ufcstats.com/fighter-details/2f181c0467965b98'
Canonnier = 'http://www.ufcstats.com/fighter-details/13a0275fa13c4d26'
Strickland = 'http://www.ufcstats.com/fighter-details/0d8011111be000b2'

middleweight_urls= [
'http://www.ufcstats.com/fighter-details/1338e2c7480bdf9e',
'http://www.ufcstats.com/fighter-details/0d7b51c9d2649a6e',
'http://www.ufcstats.com/fighter-details/e1147d3d2dabe1ce',
'http://www.ufcstats.com/fighter-details/7acbb0972e75281a',
'http://www.ufcstats.com/fighter-details/2e5c2aa8e4ab9d82',
'http://www.ufcstats.com/fighter-details/b1a3e0aca758b322',
'http://www.ufcstats.com/fighter-details/327d5f279895110d',
'http://www.ufcstats.com/fighter-details/0a1942069c9ad6b6',
'http://www.ufcstats.com/fighter-details/2f181c0467965b98',
'http://www.ufcstats.com/fighter-details/13a0275fa13c4d26',
'http://www.ufcstats.com/fighter-details/0d8011111be000b2']

middleweight_dfs = []

# Loop through the fighter URLs and preprocess the data
for fighter_url in middleweight_urls:
    middleweight_df = preprocessing_data(fighter_url)
    middleweight_dfs.append(middleweight_df)

# Concatenate all fighter DataFrames into a single DataFrame
middleweight_df = pd.concat(middleweight_dfs, ignore_index=True)

# Display the combined DataFrame
middleweight_df.head()

In [None]:
#Light Heavyweight
Hill = 'http://www.ufcstats.com/fighter-details/5444c5a201d3ee5a'
Pereira = 'http://www.ufcstats.com/fighter-details/e5549c82bfb5582d'
Walker = 'http://www.ufcstats.com/fighter-details/c21f26bbde777573'
Blachowicz = 'http://www.ufcstats.com/fighter-details/99df7d0a2a08a8a8'
Rakic = 'http://www.ufcstats.com/fighter-details/333b9e5c723ac873'
Krylov = 'http://www.ufcstats.com/fighter-details/1091d4d957141094'
Smith = 'http://www.ufcstats.com/fighter-details/d4c9dcd330403612'
Ozdemir = 'http://www.ufcstats.com/fighter-details/0845c81e37d3bcb3'
Spann = 'http://www.ufcstats.com/fighter-details/a67f5afa8d6a1b80'
Prochazka = 'http://www.ufcstats.com/fighter-details/009341ed974bad72'
Ankalaev = 'http://www.ufcstats.com/fighter-details/d802174b0c0c1f4e'

light_heavyweight_urls= [
'http://www.ufcstats.com/fighter-details/5444c5a201d3ee5a',
'http://www.ufcstats.com/fighter-details/e5549c82bfb5582d',
'http://www.ufcstats.com/fighter-details/c21f26bbde777573',
'http://www.ufcstats.com/fighter-details/99df7d0a2a08a8a8',
'http://www.ufcstats.com/fighter-details/333b9e5c723ac873',
'http://www.ufcstats.com/fighter-details/1091d4d957141094',
'http://www.ufcstats.com/fighter-details/d4c9dcd330403612',
'http://www.ufcstats.com/fighter-details/0845c81e37d3bcb3',
'http://www.ufcstats.com/fighter-details/a67f5afa8d6a1b80',
'http://www.ufcstats.com/fighter-details/009341ed974bad72',
'http://www.ufcstats.com/fighter-details/d802174b0c0c1f4e']

light_heavyweight_dfs = []

# Loop through the fighter URLs and preprocess the data
for fighter_url in light_heavyweight_urls:
    light_heavyweight_df = preprocessing_data(fighter_url)
    light_heavyweight_dfs.append(light_heavyweight_df)

# Concatenate all fighter DataFrames into a single DataFrame
light_heavyweight_df = pd.concat(light_heavyweight_dfs, ignore_index=True)

# Display the combined DataFrame
light_heavyweight_df.head()



In [None]:
#Heavyweight

Jones = 'http://www.ufcstats.com/fighter-details/07f72a2a7591b409'
Pavlovich = 'http://www.ufcstats.com/fighter-details/f14cf73e51b29254'
Gane = 'http://www.ufcstats.com/fighter-details/787bb1f087ccff8a'
Miocic = 'http://www.ufcstats.com/fighter-details/d28dee5c705991df'
Aspinall = 'http://www.ufcstats.com/fighter-details/399afbabc02376b5'
Blaydes = 'http://www.ufcstats.com/fighter-details/fa6796c55d6c5440'
Tuivasa = 'http://www.ufcstats.com/fighter-details/c62fbc117d57b943'
Spivac = 'http://www.ufcstats.com/fighter-details/e2f6b2769aaedd6c'
Volkov = 'http://www.ufcstats.com/fighter-details/279566840aa55bf2'
Almeida = 'http://www.ufcstats.com/fighter-details/41e83a89929d1327'
Lewis = 'http://www.ufcstats.com/fighter-details/d3df1add9d9a7efb'

heavyweight_urls= [
Jones = 'http://www.ufcstats.com/fighter-details/07f72a2a7591b409',
Pavlovich = 'http://www.ufcstats.com/fighter-details/f14cf73e51b29254',
Gane = 'http://www.ufcstats.com/fighter-details/787bb1f087ccff8a',
Miocic = 'http://www.ufcstats.com/fighter-details/d28dee5c705991df',
Aspinall = 'http://www.ufcstats.com/fighter-details/399afbabc02376b5',
Blaydes = 'http://www.ufcstats.com/fighter-details/fa6796c55d6c5440',
Tuivasa = 'http://www.ufcstats.com/fighter-details/c62fbc117d57b943',
Spivac = 'http://www.ufcstats.com/fighter-details/e2f6b2769aaedd6c',
Volkov = 'http://www.ufcstats.com/fighter-details/279566840aa55bf2',
Almeida = 'http://www.ufcstats.com/fighter-details/41e83a89929d1327',
Lewis = 'http://www.ufcstats.com/fighter-details/d3df1add9d9a7efb']


heavyweight_dfs = []

# Loop through the fighter URLs and preprocess the data
for fighter_url in heavyweight_urls:
    heavyweight_df = preprocessing_data(fighter_url)
    heavyweight_dfs.append(heavyweight_df)

# Concatenate all fighter DataFrames into a single DataFrame
heavyweight_df = pd.concat(heavyweight_dfs, ignore_index=True)

# Display the combined DataFrame
heavyweight_df.head()

In [None]:
all_weight_df = pd.concat([flyweight_df, bantamweight_df, featherweight_df, lightweight_df, welterweight_df, middleweight_df, light_heavyweight_df, heavyweight_df], ignore_index=True)

all_weight_df.head(10)

In [None]:
bantamweight_df.dropna(inplace=True)
X = bantamweight_df.drop(['Outcome'], axis=1)

y = bantamweight_df['Outcome']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

model = LogisticRegression(multi_class='multinomial', solver='lbfgs')
model.fit(X_train, y_train)

y_pred = model.predict(X_test)

accuracy = accuracy_score(y_test, y_pred)
conf_matrix = confusion_matrix(y_test, y_pred)

print(f"Accuracy: {accuracy}")
print("Confusion Matrix:")
print(conf_matrix)


In [None]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score


In [None]:

def predict_outcome_random_forest(fighter_url):
    fighter_df = preprocessing_data(fighter_url)

    fighter_df.dropna(inplace=True)
    X = fighter_df.drop(['Outcome'], axis=1)
    y = fighter_df['Outcome']

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    model = RandomForestClassifier(random_state=42)
    model.fit(X_train, y_train)

    y_pred = model.predict(X_test)

    accuracy = accuracy_score(y_test, y_pred)
    conf_matrix = confusion_matrix(y_test, y_pred)
    report = classification_report(y_test, y_pred)

    print("Random Forest Classifier")
    print(f"Accuracy: {accuracy}")
    print("Confusion Matrix:")
    print(conf_matrix)
    print(report)

    return model, accuracy, conf_matrix, report


In [None]:
predict_outcome_random_forest(Sean_Omalley)

Random Forest Classifier
Accuracy: 0.6666666666666666
Confusion Matrix:
[[0 1]
 [0 2]]
              precision    recall  f1-score   support

          NC       0.00      0.00      0.00         1
           W       0.67      1.00      0.80         2

    accuracy                           0.67         3
   macro avg       0.33      0.50      0.40         3
weighted avg       0.44      0.67      0.53         3



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


(RandomForestClassifier(random_state=42),
 0.6666666666666666,
 array([[0, 1],
        [0, 2]], dtype=int64),
 '              precision    recall  f1-score   support\n\n          NC       0.00      0.00      0.00         1\n           W       0.67      1.00      0.80         2\n\n    accuracy                           0.67         3\n   macro avg       0.33      0.50      0.40         3\nweighted avg       0.44      0.67      0.53         3\n')