This web scraper extracts stats of the fighters from future UFC events stored in the official UFC stats website and export the data to a .csv file, it can be used to create predictive models or other data analysis projects.

- Go to: http://ufcstats.com/statistics/events/upcoming
- Copy the link of the event you want to extract the data and paste it below:
- Run all the cells

In [1]:
URL = "http://ufcstats.com/event-details/ce7871949b0ed2bf"

In [2]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

In [3]:
def card_event_scraper(URL):
    """
        Receives a card event URL from the Stats UFC page, extracts the info of the card and the links of the matchups
        and return them as two lists.
    """
    # Load HTML file
    req = requests.get(URL)

    # Parse the HTML with BeautifulSoup
    soup = BeautifulSoup(req.text, "html.parser")
    
    # Extract card info
    card_info = []
    card_info.append(soup.find("span", class_="b-content__title-highlight").text.strip())
    date_location = soup.find_all("li", class_="b-list__box-list-item")
    card_info.append(date_location[0].text.strip())
    card_info.append(date_location[1].text.strip())

    # Extract the list of matchups in the card
    matchups = soup.find_all("tr", class_="b-fight-details__table-row b-fight-details__table-row__hover js-fight-details-click")
    # Runs through the list of matchups and extract the third link of each (View matchup)
    links = []
    for matchup in matchups:
        matchup_link = matchup.find_all("a", class_="b-link_style_black")
        links.append(matchup_link[2]['data-link'])
    
    return card_info, links

In [4]:
def matchup_scraper(URL):
    """
        Receives a matchup URL from the Stats UFC page, extracts the info of red corner and blue corner and return it
        as a concatenated list
    """
    # Load HTML file
    req = requests.get(URL)

    # Parse the HTML with BeautifulSoup
    soup = BeautifulSoup(req.text, "html.parser")

    rc_info = []
    bc_info = []

    # Extract the names (rc = red corner | bc = blue corner)
    names = soup.find_all("a", class_="b-fight-details__table-header-link")
    rc_info.append(names[0].text.strip())
    bc_info.append(names[1].text.strip())

    # Extract stats
    stats = soup.find_all("tr", class_="b-fight-details__table-row-preview") # stats rows
    for stat in stats:
        stats_texts = stat.find_all("td", class_="b-fight-details__table-col")
        rc_info.append(stats_texts[1].text.strip())
        bc_info.append(stats_texts[2].text.strip())

    # Concat rc_info and bc_info
    rc_info.extend(bc_info)
    
    return rc_info

In [5]:
# GENERATE DATAFRAME

columns = [
    # Columns red corner
    '1_Name', '1_Record', '1_Average_Fight_Time', '1_Height', '1_Weight', '1_Reach',
    '1_Style', '1_day_of_birth', '1_Strikes_Landed_p_min', '1_Striking_Accuracy', 
    '1_Strikes_Absorbed_p_min', '1_Defense_Strike', '1_Takedowns_avg_p_15min', 
    '1_Takedown_Accuracy', '1_Takedown_Defense', '1_Submission_avg_p_15min', 
    '1_Last_fight_1', '1_Last_fight_2', '1_Last_fight_3', '1_Last_fight_4', '1_Last_fight_5',
    # Columns blue corner
    '2_Name', '2_Record', '2_Average_Fight_Time', '2_Height', '2_Weight', '2_Reach',
    '2_Style', '2_day_of_birth', '2_Strikes_Landed_p_min', '2_Striking_Accuracy', 
    '2_Strikes_Absorbed_p_min', '2_Defense_Strike', '2_Takedowns_avg_p_15min', 
    '2_Takedown_Accuracy', '2_Takedown_Defense', '2_Submission_avg_p_15min', 
    '2_Last_fight_1', '2_Last_fight_2', '2_Last_fight_3', '2_Last_fight_4', '2_Last_fight_5',
]

# Extracting data
data = []
links = card_event_scraper(URL)[1]
for link in links:
    data.append(matchup_scraper(link))

# Creating dataframe
df_event = pd.DataFrame(data=data, columns=columns)

# Adding card info at the end
card_info = card_event_scraper(URL)[0]
df_event['card_name'] = card_info[0]
df_event['card_date'] = card_info[1]
df_event['card_location'] = card_info[2]

df_event

Unnamed: 0,1_Name,1_Record,1_Average_Fight_Time,1_Height,1_Weight,1_Reach,1_Style,1_day_of_birth,1_Strikes_Landed_p_min,1_Striking_Accuracy,...,2_Takedown_Defense,2_Submission_avg_p_15min,2_Last_fight_1,2_Last_fight_2,2_Last_fight_3,2_Last_fight_4,2_Last_fight_5,card_name,card_date,card_location
0,Jared Cannonier,17-8-0,13:46,"5' 11""",185 lbs.,"77""",Switch,"Mar 16, 1984",4.49,50%,...,100%,0.4,Win - Duncan,Win - Tavares,Win - Tiuliulin,Loss - Ferreira,Win - Njokuani,UFC Fight Night: Cannonier vs. Rodrigues,"Date:\n \n February 15, 2025","Location:\n \n\n Las Vegas, Nevada, USA"
1,Calvin Kattar,23-8-0,14:45,"5' 11""",145 lbs.,"72""",Orthodox,"Mar 26, 1988",4.76,39%,...,59%,1.5,Win - Shore,Win - Errens,Win - Quarantillo,Draw - Blackshear,Loss - Woodson,UFC Fight Night: Cannonier vs. Rodrigues,"Date:\n \n February 15, 2025","Location:\n \n\n Las Vegas, Nevada, USA"
2,Edmen Shahbazyan,13-5-0,7:29,"6' 2""",185 lbs.,"74""",Orthodox,"Nov 20, 1997",3.77,50%,...,76%,0.4,Loss - Petroski,Loss - Almeida,Win - Hanekom,,,UFC Fight Night: Cannonier vs. Rodrigues,"Date:\n \n February 15, 2025","Location:\n \n\n Las Vegas, Nevada, USA"
3,Ismael Bonfim,20-4-0,10:31,"5' 8""",155 lbs.,"71""",Orthodox,"Dec 28, 1995",5.8,54%,...,75%,0.3,Draw - Borshchev,Win - McKinney,Win - Elder,Win - Hassanzada,,UFC Fight Night: Cannonier vs. Rodrigues,"Date:\n \n February 15, 2025","Location:\n \n\n Las Vegas, Nevada, USA"
4,Rodolfo Vieira,10-2-0,8:12,"6' 0""",185 lbs.,"73""",Orthodox,"Sep 25, 1989",3.19,54%,...,80%,1.7,Win - Budka,Win - Fremd,Loss - Malkoun,Loss - Pereira,Win - Meerschaert,UFC Fight Night: Cannonier vs. Rodrigues,"Date:\n \n February 15, 2025","Location:\n \n\n Las Vegas, Nevada, USA"
5,Connor Matthews,7-2-0,13:27,"5' 8""",145 lbs.,"71""",Switch,"May 31, 1992",5.3,44%,...,100%,0.0,Win - Juarez,,,,,UFC Fight Night: Cannonier vs. Rodrigues,"Date:\n \n February 15, 2025","Location:\n \n\n Las Vegas, Nevada, USA"
6,Angela Hill,17-14-0,14:20,"5' 3""",115 lbs.,"64""",Orthodox,"Jan 12, 1985",5.46,49%,...,62%,0.8,Win - Jauregui,Win - Mann,Loss - Silva,,,UFC Fight Night: Cannonier vs. Rodrigues,"Date:\n \n February 15, 2025","Location:\n \n\n Las Vegas, Nevada, USA"
7,Jared Gordon,20-7-0 (1 NC),11:16,"5' 9""",155 lbs.,"68""",Orthodox,"Sep 06, 1988",5.75,55%,...,0%,0.0,,,,,,UFC Fight Night: Cannonier vs. Rodrigues,"Date:\n \n February 15, 2025","Location:\n \n\n Las Vegas, Nevada, USA"
8,Rafael Estevam,12-0-0,11:13,"5' 8""",125 lbs.,"69""",Orthodox,"Aug 10, 1996",2.94,64%,...,36%,1.8,Win - Nicoll,Win - Mendonca,Win - Ross,Loss - Taira,Win - Ferreira,UFC Fight Night: Cannonier vs. Rodrigues,"Date:\n \n February 15, 2025","Location:\n \n\n Las Vegas, Nevada, USA"
9,Gabriel Bonfim,16-1-0,6:10,"6' 1""",170 lbs.,"72""",Orthodox,"Aug 20, 1997",5.78,42%,...,80%,0.0,Win - Harris,Win - Bedoya,Loss - Brown,Win - Baeza,Win - Semelsberger,UFC Fight Night: Cannonier vs. Rodrigues,"Date:\n \n February 15, 2025","Location:\n \n\n Las Vegas, Nevada, USA"


In [6]:
# Create title file removing special characters
import re
title_file = re.sub(r'[\\/*?:"<>|.]', "", card_info[0]) + ".csv"
#Export the data
df_event.to_csv(title_file, index = False)
print("Data exported succesfully!")

Data exported succesfully!
