In [3]:
import re
import requests
from bs4 import BeautifulSoup
import pandas as pd


In [4]:
# CREATING FUNCTION to get each fight urls from a fighter's URL
def get_fighter_fight_urls(fighter_url):
    page = requests.get(fighter_url)
    soup = BeautifulSoup(page.content, 'html.parser')
    
    fight_urls = []
    
    fight_rows = soup.find_all('tr', class_='b-fight-details__table-row')
    for fight_row in fight_rows:
        onclick_value = fight_row.get('onclick')
        if onclick_value:
            match = re.search(r"'(http://www.ufcstats.com/fight-details/.+?)'", onclick_value)
            if match:
                fight_link = match.group(1)
                fight_urls.append(fight_link)
    
    return fight_urls


In [5]:
# CREATING A SCRAPE FUNCTION THAT RETRIEVES THE DATA FROM A FIGHT URL AND MAKES A DATAFRAME OUT OF THE DATA
def scrape_fight_data(fight_url):
    fight_page = requests.get(fight_url)
    fight_soup = BeautifulSoup(fight_page.content, 'html.parser')

    data = []
    fight_table = fight_soup.find('tbody', class_='b-fight-details__table-body')
    
    fighter_divs = fight_soup.find_all('div', class_='b-fight-details__person')
    outcomes = [outcome_tag.get_text(strip=True) if (outcome_tag := fighter_div.find('i', class_='b-fight-details__person-status')) else None for fighter_div in fighter_divs]

    for idx, row in enumerate(fight_table.find_all('tr', class_='b-fight-details__table-row')):
        fighter_names = row.find_all('a', class_='b-link_style_black')
        fighters = [name.get_text(strip=True) for name in fighter_names]
        
        kd = [col.get_text(strip=True) for col in row.find_all('td')[1].find_all('p')]
        sig_str = [col.get_text(strip=True) for col in row.find_all('td')[2].find_all('p')]
        sig_str_percent = [col.get_text(strip=True) for col in row.find_all('td')[3].find_all('p')]
        total_str = [col.get_text(strip=True) for col in row.find_all('td')[4].find_all('p')]
        td = [col.get_text(strip=True) for col in row.find_all('td')[5].find_all('p')]
        td_percent = [col.get_text(strip=True) for col in row.find_all('td')[6].find_all('p')]
        sub_att = [col.get_text(strip=True) for col in row.find_all('td')[7].find_all('p')]
        rev = [col.get_text(strip=True) for col in row.find_all('td')[8].find_all('p')]
        ctrl = [col.get_text(strip=True) for col in row.find_all('td')[9].find_all('p')]

        for i in range(len(fighters)):
            fight_info = {
                'Fighter': fighters[i],
                'KD': kd[i],
                'Sig. str.': sig_str[i],
                'Sig. str. %': sig_str_percent[i],
                'Total str.': total_str[i],
                'Td': td[i],
                'Td %': td_percent[i],
                'Sub. att': sub_att[i],
                'Rev.': rev[i],
                'Ctrl': ctrl[i],
                'Fight URL': fight_url,
                'Outcome': outcomes[i] 
            }
            data.append(fight_info)

    df = pd.DataFrame(data)

    new_data = []
    new_columns = [
        'Fighter', 'Opponent', 'KD', 'Opponent KD', 'Sig. str.', 'Opponent Sig. str.',
        'Sig. str. %', 'Opponent Sig. str. %', 'Total str.', 'Opponent Total str.',
        'Td', 'Opponent Td', 'Td %', 'Opponent Td %', 'Sub. att', 'Opponent Sub. att',
        'Rev.', 'Opponent Rev.', 'Ctrl', 'Opponent Ctrl', 'Fight URL', 'Outcome'
    ]

    for i in range(0, len(df), 2):
        fighter_row = df.iloc[i]
        opponent_row = df.iloc[i + 1]

        new_row = [
            fighter_row['Fighter'], opponent_row['Fighter'],
            fighter_row['KD'], opponent_row['KD'],
            fighter_row['Sig. str.'], opponent_row['Sig. str.'],
            fighter_row['Sig. str. %'], opponent_row['Sig. str. %'],
            fighter_row['Total str.'], opponent_row['Total str.'],
            fighter_row['Td'], opponent_row['Td'],
            fighter_row['Td %'], opponent_row['Td %'],
            fighter_row['Sub. att'], opponent_row['Sub. att'],
            fighter_row['Rev.'], opponent_row['Rev.'],
            fighter_row['Ctrl'], opponent_row['Ctrl'],
            fighter_row['Fight URL'], fighter_row['Outcome']
        ]

        new_data.append(new_row)

    new_df = pd.DataFrame(new_data, columns=new_columns)
    return new_df


In [7]:
# CREATING A FUNCTION THAT MAKES A DATAFRAME WITH THE PAST FIGHTS OF A FIGHTER USING THE TWO OTHER FUNCTIONS
def past_fights(fighter_url):
    fighter_page = requests.get(fighter_url)
    fighter_soup = BeautifulSoup(fighter_page.content, 'html.parser')

    fighter_name = fighter_soup.find('span', class_='b-content__title-highlight').get_text(strip=True)
    fight_urls = get_fighter_fight_urls(fighter_url)
    all_fight_dfs = []

    for fight_url in fight_urls:
        fight_df = scrape_fight_data(fight_url)
        all_fight_dfs.append(fight_df)

    combined_df = pd.concat(all_fight_dfs, ignore_index=True)

    for index, row in combined_df.iterrows():
        if row['Opponent'] == fighter_name:
            combined_df.at[index, ['Fighter', 'Opponent']] = row['Opponent'], row['Fighter']
            combined_df.at[index, ['KD', 'Opponent KD']] = row['Opponent KD'], row['KD']
            combined_df.at[index, ['Sig. str.', 'Opponent Sig. str.']] = row['Opponent Sig. str.'], row['Sig. str.']
            combined_df.at[index, ['Sig. str. %', 'Opponent Sig. str. %']] = row['Opponent Sig. str. %'], row['Sig. str. %']
            combined_df.at[index, ['Total str.', 'Opponent Total str.']] = row['Opponent Total str.'], row['Total str.']
            combined_df.at[index, ['Td', 'Opponent Td']] = row['Opponent Td'], row['Td']
            combined_df.at[index, ['Td %', 'Opponent Td %']] = row['Opponent Td %'], row['Td %']
            combined_df.at[index, ['Sub. att', 'Opponent Sub. att']] = row['Opponent Sub. att'], row['Sub. att']
            combined_df.at[index, ['Rev.', 'Opponent Rev.']] = row['Opponent Rev.'], row['Rev.']
            combined_df.at[index, ['Ctrl', 'Opponent Ctrl']] = row['Opponent Ctrl'], row['Ctrl']

            if row['Outcome'] == 'W':
                combined_df.at[index, 'Outcome'] = 'L'
            elif row['Outcome'] == 'L':
                combined_df.at[index, 'Outcome'] = 'W'

    return combined_df


In [18]:
fighter_url = 'http://www.ufcstats.com/fighter-details/b50a426a33da0012'
combined_data = past_fights(fighter_url)


In [19]:
combined_data

Unnamed: 0,Fighter,Opponent,KD,Opponent KD,Sig. str.,Opponent Sig. str.,Sig. str. %,Opponent Sig. str. %,Total str.,Opponent Total str.,...,Td %,Opponent Td %,Sub. att,Opponent Sub. att,Rev.,Opponent Rev.,Ctrl,Opponent Ctrl,Fight URL,Outcome
0,Sean O'Malley,Aljamain Sterling,1,0,25 of 35,17 of 35,71%,48%,26 of 36,24 of 46,...,---,0%,0,0,0,0,0:18,0:34,http://www.ufcstats.com/fight-details/ee796ecb...,W
1,Sean O'Malley,Petr Yan,0,0,84 of 163,58 of 96,51%,60%,91 of 171,97 of 139,...,0%,46%,0,0,0,0,0:02,5:44,http://www.ufcstats.com/fight-details/5c3c4bcc...,W
2,Sean O'Malley,Pedro Munhoz,0,0,25 of 47,26 of 41,53%,63%,25 of 47,26 of 41,...,---,---,0,0,0,0,0:00,0:00,http://www.ufcstats.com/fight-details/39dccca8...,NC
3,Sean O'Malley,Raulian Paiva,1,0,39 of 62,11 of 47,62%,23%,40 of 63,11 of 47,...,---,0%,0,0,0,0,0:02,0:00,http://www.ufcstats.com/fight-details/8c01b7dc...,W
4,Sean O'Malley,Kris Moutinho,1,0,230 of 318,70 of 218,72%,32%,230 of 318,70 of 218,...,---,---,0,0,0,0,0:04,0:09,http://www.ufcstats.com/fight-details/dbcc5b5d...,W
5,Sean O'Malley,Thomas Almeida,2,0,86 of 149,25 of 71,57%,35%,91 of 157,25 of 71,...,---,50%,0,0,0,0,0:05,0:22,http://www.ufcstats.com/fight-details/0aa335db...,W
6,Sean O'Malley,Marlon Vera,0,0,10 of 21,18 of 30,47%,60%,10 of 21,18 of 30,...,---,---,0,0,0,0,0:00,0:11,http://www.ufcstats.com/fight-details/7200343d...,L
7,Sean O'Malley,Eddie Wineland,1,0,13 of 20,4 of 7,65%,57%,13 of 20,4 of 7,...,---,---,0,0,0,0,0:00,0:00,http://www.ufcstats.com/fight-details/58584ccd...,W
8,Sean O'Malley,Jose Quinonez,0,0,18 of 22,0 of 11,81%,0%,18 of 22,0 of 11,...,---,---,0,0,0,0,0:02,0:00,http://www.ufcstats.com/fight-details/cd258394...,W
9,Sean O'Malley,Andre Soukhamthath,0,0,63 of 111,25 of 60,56%,41%,73 of 121,42 of 79,...,---,57%,3,0,1,0,0:55,6:01,http://www.ufcstats.com/fight-details/efe05585...,W
