In [1]:
#   Importing necessary libraries

import requests
from bs4 import BeautifulSoup
import pandas as pd
import random

Before we can make a request to the website, we need to set up the **url** and the **user agent**, otherwise Sherdog's server won't answer our request.

In [2]:
# define a list of random user-agents
user_agents = [
    "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3",
    "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:54.0) Gecko/20100101 Firefox/54.0",
    "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/42.0.2311.135 Safari/537.36 Edge/12.246"
]

# select a random user-agent from the list
headers = {
    "User-Agent": random.choice(user_agents)
}


# put here the fighter profile url from sherdog
url = "https://www.sherdog.com/fighter/Usman-Nurmagomedov-296291"

Making the request and capturing the html

In [3]:
# send a request to the website using the selected user-agent
response = requests.get(url, headers=headers)

# parse the HTML content of the website using BeautifulSoup
soup = BeautifulSoup(response.content, "html.parser")

print(soup)

<!DOCTYPE html>
<html class="light" lang="en">
<head>
<title>Usman Nurmagomedov MMA Stats, Pictures, News, Videos, Biography - Sherdog.com</title><meta charset="utf-8"/>
<meta content="Sherdog.com" name="author"/>
<meta content="Sherdog.com" name="publisher"/>
<meta content="2023 - Sherdog.com" name="copyright"/>
<meta content="width=device-width, initial-scale=1, maximum-scale=1, user-scalable=no" name="viewport"/>
<meta content="index, follow" name="robots"/>
<meta content="The industry pioneer in UFC, Bellator and all things MMA (aka Ultimate Fighting). MMA news, interviews, pictures, videos and more since 1997." name="description"/>
<meta content="!" name="fragment"/>
<meta content="" name="location"/>
<meta content="816654511" name="ir-site-verification-token"/><meta content="100000399936875" property="fb:admins"><meta content="The industry pioneer in UFC, Bellator and all things MMA (aka Ultimate Fighting). MMA news, interviews, pictures, videos and more since 1997." property="og

Now that the html is stored in the **soup** we can start scraping the info.

In [4]:
#   defining a dictionary for the info
fighter_data = {}



name = soup.find("h1", {"itemprop": "name"}).text
record = soup.find("span", {"class": "record"}).text.strip()
association = soup.find("a", {"class": "association"}).text.strip()
fighter_weight_class = soup.find('div', {'class': 'association-class'}).find('a', href=lambda href: href and 'weightclass' in href).text

#   information such as weight and height is inside the div 'bio-holder' so to facilitate, we will store it.
bio_holder = soup.find("div", {"class": "bio-holder"})
height = fighter_height = bio_holder.find('td', text='HEIGHT').find_next_sibling('td').contents[3]
weight = fighter_weight = bio_holder.find('td', text='WEIGHT').find_next_sibling('td').contents[3]

fighter_data["name"] = name
fighter_data["record"] = record
fighter_data["association"] = association
fighter_data["weight_class"] = fighter_weight_class
fighter_data["height"] = height
fighter_data["weight"] = weight

#   printing the dictionary
print(fighter_data)

AttributeError: 'NoneType' object has no attribute 'text'

Now, let's retrieve his fight records

In [5]:
#   defining a list to store the fights
data = []

#   using soup to search the page looking for the fight records
previous_fights_table = soup.find("div", {"class": "module fight_history"})

#   all fights are inside a table and from it we just need the rows
rows = previous_fights_table.findAll("tr")[1:]

#   for each row we will retrieve basic fight information
for row in rows:
    cols = row.findAll("td")
    result = cols[0].text.strip()
    opponent = cols[1].text.strip()
    event_date = cols[2].find("span", {"class": "sub_line"}).text.strip()
    method = cols[3].find("b").text.strip()
    round = cols[4].text.strip()
    time = cols[5].text.strip()
    
    data.append([result, opponent, event_date, method, round, time])

#   let's check the data
data

[['win',
  'Benson Henderson',
  'Mar / 10 / 2023',
  'Submission (Rear-Naked Choke)',
  '1',
  '2:37'],
 ['win',
  'Patricky Freire',
  'Nov / 18 / 2022',
  'Decision (Unanimous)',
  '5',
  '5:00'],
 ['win',
  'Christopher Gonzalez',
  'Jul / 22 / 2022',
  'Submission (Guillotine Choke)',
  '1',
  '2:54'],
 ['win',
  'Patrik Pietila',
  'Oct / 23 / 2021',
  'Submission (Rear-Naked Choke)',
  '1',
  '4:06'],
 ['win',
  'Luis Muro',
  'Jul / 31 / 2021',
  'TKO (Knee to the Body)',
  '1',
  '3:30'],
 ['win', 'Mike Hamel', 'Apr / 02 / 2021', 'Decision (Unanimous)', '3', '5:00'],
 ['win',
  'Svyatoslav Shabanov',
  'Sep / 09 / 2020',
  'TKO (Punches)',
  '2',
  '3:37'],
 ['win',
  'Jerry Kvarnstrom',
  'Jul / 31 / 2020',
  'TKO (Punches and Elbows)',
  '1',
  '2:39'],
 ['win', 'Ruslan Tuyakov', 'Feb / 09 / 2020', 'TKO (Knees)', '2', '2:03'],
 ['win', 'Roman Golovinov', 'Nov / 29 / 2019', 'TKO (Punches)', '1', '1:45'],
 ['win',
  'Kazim Zhakhangirov',
  'Sep / 27 / 2019',
  'Submission (Gui

Everything seems quite good so far, now, let's transform the records list into a dataset.

In [6]:
#   defining the dataframe using the data from the fights
df = pd.DataFrame(data, columns=["Result", "Opponent", "Event Date", "Method", "R", "Time"])
df

Unnamed: 0,Result,Opponent,Event Date,Method,R,Time
0,win,Benson Henderson,Mar / 10 / 2023,Submission (Rear-Naked Choke),1,2:37
1,win,Patricky Freire,Nov / 18 / 2022,Decision (Unanimous),5,5:00
2,win,Christopher Gonzalez,Jul / 22 / 2022,Submission (Guillotine Choke),1,2:54
3,win,Patrik Pietila,Oct / 23 / 2021,Submission (Rear-Naked Choke),1,4:06
4,win,Luis Muro,Jul / 31 / 2021,TKO (Knee to the Body),1,3:30
5,win,Mike Hamel,Apr / 02 / 2021,Decision (Unanimous),3,5:00
6,win,Svyatoslav Shabanov,Sep / 09 / 2020,TKO (Punches),2,3:37
7,win,Jerry Kvarnstrom,Jul / 31 / 2020,TKO (Punches and Elbows),1,2:39
8,win,Ruslan Tuyakov,Feb / 09 / 2020,TKO (Knees),2,2:03
9,win,Roman Golovinov,Nov / 29 / 2019,TKO (Punches),1,1:45


And in case we want to use the data to create a dashboard on PowerBI or Tableau, let's export it as a CSV file inside the folder 'fighter-data'.

In [7]:
#   transforming the dataframe into a csv file using the fight's name
df.to_csv(f'fighter-data/{name}-fight-record.csv', index=False)