# Web Scraping - UFC.com

## Notebook Setup

In [301]:
# Import libraries
import pandas as pd
import requests
from bs4 import BeautifulSoup as bs
from pprint import pprint
import tqdm

## Get Event URLs

In [255]:
# UFC Completed events URL to be scraped
events_completed_url = "http://www.ufcstats.com/statistics/events/completed?page=all"

In [348]:
def get_table_body(url: str):
    """Send get request to url to get html text and find the table on the webpage."""
    
    # Send get request to URL provided
    page = requests.get(url)

    # Return the html text of the page
    soup = bs(page.text, 'lxml')

    # Find the table of data on the page 
    table_body = soup.find('table')
    
    return table_body, soup

In [265]:
def get_event_links(events_completed_url):
    """Create a list of web links to all completed UFC events."""
    
    # Get the table HTML from the webpage
    table_body, _ = get_table_body(events_completed_url)
    
    event_links = []

    for row in table_body.find_all('tr'):
        cols = row.find_all('td')
        for col in cols:
            a_tag = col.find('a')
            if a_tag:
                event = a_tag['href']
                event_links.append(event)
                
    return event_links

In [266]:
# Create list of web links
event_links = get_event_links(events_completed_url)
len(event_links)

641

## Get Event Details

In [267]:
def get_all_events_details(event_links):
    """"""
    results = {}
    # Completed UFC events to be scraped
    # Skip the first event because it is actually the next upcoming event
    for event_link in event_links[1:]: 

        # Get the table HTML from the webpage
        table_body, soup = get_table_body(event_link)

        # Find all rows
        rows = soup.find_all('tr')

        # Iterate over each row
        for row in rows:
            row_data = {}
            
            # Extract Event Name
            event_name_tag = soup.find('h2')
            event_name = event_name_tag.get_text(strip=True)
            row_data['event_name'] = event_name
            
            # Extract Event Date & Location
            for item in soup.find_all('li', {'class': 'b-list__box-list-item'}):
                title = item.find('i', {'class': 'b-list__box-item-title'})
                if title:
                    title_text = title.get_text(strip=True).lower()
                    if 'date' in title_text:
                        date = item.get_text(strip=True).replace('Date:', '').strip()
                    elif 'location' in title_text:
                        location = item.get_text(strip=True).replace('Location:', '').strip()

            row_data['event_details'] = {'date': date,
                                        'location': location}
            
            # Find all rows in table
            tds = row.find_all('td')

            # Check if the row has enough columns
            if len(tds) < 10:
                continue
            
            # Extract names
            names_td = tds[1]
            names = [a.get_text(strip=True) for a in names_td.find_all('a')]
            row_data['names'] = names
            
            # Extract Winner and Loser
            win_text = tds[0].text.strip()
            lose_text = 'lose'
            winner_text = [win_text, lose_text]
            
            row_data['result'] = {}
            for i, name in enumerate(names):
                row_data['result'].update({name: winner_text[i]})
            
            # Extract stats
            stats_tds = tds[2:6]
            stats = [[p.get_text(strip=True) for p in td.find_all('p')] for td in stats_tds]
            row_data['stats'] = {name: [stat[i] for stat in stats] for i, name in enumerate(names)}

            # Extract weight class
            weight_class_td = tds[6]
            weight_class = weight_class_td.get_text(strip=True)
            row_data['weight_class'] = weight_class

            # Extract method
            method_td = tds[7]
            method = method_td.get_text(strip=True)
            row_data['method'] = method

            # Extract round and time
            round_td, time_td = tds[8:]
            row_data['round'] = round_td.get_text(strip=True)
            row_data['time'] = time_td.get_text(strip=True)
            
            #results.append(row_data) # when results was a list
            
            # Check if the event_link key exists in results, and if not, create it with an empty list as its value
            if event_link not in results:
                results[event_link] = []
            
            # Append row_data to the list associated with the event_link key in the results dictionary
            results[event_link].append(row_data)
            
    return results

In [246]:
all_event_details = get_all_events_details(event_links)

In [243]:
# Conver dictionary of all event details to a dataframe
def flatten_nested_dict(data):
    flattened_data = []

    for event_link, event_rows in data.items():
        for row in event_rows:
            flat_row = {
                'event_link': event_link,
                'event_name': row['event_name'],
                'date': row['event_details']['date'],
                'location': row['event_details']['location'],
                'names': row['names'],
                'result': row['result'],
                'stats': row['stats'],
                'weight_class': row['weight_class'],
                'method': row['method'],
                'round': row['round'],
                'time': row['time']
            }
            flattened_data.append(flat_row)

    return flattened_data

flattened_data = flatten_nested_dict(all_event_details)
df = pd.DataFrame(flattened_data)

In [269]:
print(df.shape)
df.head()

(7060, 11)


Unnamed: 0,event_link,event_name,date,location,names,result,stats,weight_class,method,round,time
0,http://www.ufcstats.com/event-details/aec273fc...,UFC Fight Night: Vera vs. Sandhagen,"March 25, 2023","San Antonio, Texas, USA","[Cory Sandhagen, Marlon Vera]","{'Cory Sandhagen': 'win', 'Marlon Vera': 'lose'}","{'Cory Sandhagen': ['0', '128', '3', '0'], 'Ma...",Bantamweight,S-DEC,5,5:00
1,http://www.ufcstats.com/event-details/aec273fc...,UFC Fight Night: Vera vs. Sandhagen,"March 25, 2023","San Antonio, Texas, USA","[Holly Holm, Yana Santos]","{'Holly Holm': 'win', 'Yana Santos': 'lose'}","{'Holly Holm': ['0', '32', '4', '0'], 'Yana Sa...",Women's Bantamweight,U-DEC,3,5:00
2,http://www.ufcstats.com/event-details/aec273fc...,UFC Fight Night: Vera vs. Sandhagen,"March 25, 2023","San Antonio, Texas, USA","[Nate Landwehr, Austin Lingo]","{'Nate Landwehr': 'win', 'Austin Lingo': 'lose'}","{'Nate Landwehr': ['0', '64', '1', '1'], 'Aust...",Featherweight,SUBRear Naked Choke,2,4:11
3,http://www.ufcstats.com/event-details/aec273fc...,UFC Fight Night: Vera vs. Sandhagen,"March 25, 2023","San Antonio, Texas, USA","[Maycee Barber, Andrea Lee]","{'Maycee Barber': 'win', 'Andrea Lee': 'lose'}","{'Maycee Barber': ['0', '48', '2', '0'], 'Andr...",Women's Flyweight,S-DEC,3,5:00
4,http://www.ufcstats.com/event-details/aec273fc...,UFC Fight Night: Vera vs. Sandhagen,"March 25, 2023","San Antonio, Texas, USA","[Albert Duraev, Chidi Njokuani]","{'Albert Duraev': 'win', 'Chidi Njokuani': 'lo...","{'Albert Duraev': ['0', '45', '2', '0'], 'Chid...",Middleweight,S-DEC,3,5:00


In [252]:
def split_rows(df):
    # Initialize an empty DataFrame to store the results
    two_row_df = pd.DataFrame()

    # Iterate through the rows of the original DataFrame
    for index, row in df.iterrows():
        # Convert the row into a list of dictionaries
        row_dicts = []
        for name in row['names']:
            row_data = row.copy()
            row_data['name'] = name
            row_data['result'] = row['result'][name]
            row_data['stats'] = row['stats'][name]
            del row_data['names']
            row_dicts.append(row_data.to_dict())

        # Create a new DataFrame from the list of dictionaries
        temp_df = pd.DataFrame(row_dicts)

        # Concatenate the resulting DataFrames
        two_row_df = pd.concat([two_row_df, temp_df], ignore_index=True)

    # Split the 'stats' column into separate columns and update the DataFrame
    two_row_df[['KD', 'STR', 'TD', 'SUB']] = pd.DataFrame(two_row_df['stats'].tolist(), index=two_row_df.index)

    # Drop the 'stats' column
    two_row_df.drop('stats', axis=1, inplace=True)
    
    return two_row_df

In [270]:
two_row_df = split_rows(df)
two_row_df.head()

Unnamed: 0,event_link,event_name,date,location,result,weight_class,method,round,time,name,KD,STR,TD,SUB
0,http://www.ufcstats.com/event-details/aec273fc...,UFC Fight Night: Vera vs. Sandhagen,"March 25, 2023","San Antonio, Texas, USA",win,Bantamweight,S-DEC,5,5:00,Cory Sandhagen,0,128,3,0
1,http://www.ufcstats.com/event-details/aec273fc...,UFC Fight Night: Vera vs. Sandhagen,"March 25, 2023","San Antonio, Texas, USA",lose,Bantamweight,S-DEC,5,5:00,Marlon Vera,0,58,0,1
2,http://www.ufcstats.com/event-details/aec273fc...,UFC Fight Night: Vera vs. Sandhagen,"March 25, 2023","San Antonio, Texas, USA",win,Women's Bantamweight,U-DEC,3,5:00,Holly Holm,0,32,4,0
3,http://www.ufcstats.com/event-details/aec273fc...,UFC Fight Night: Vera vs. Sandhagen,"March 25, 2023","San Antonio, Texas, USA",lose,Women's Bantamweight,U-DEC,3,5:00,Yana Santos,0,21,0,0
4,http://www.ufcstats.com/event-details/aec273fc...,UFC Fight Night: Vera vs. Sandhagen,"March 25, 2023","San Antonio, Texas, USA",win,Featherweight,SUBRear Naked Choke,2,4:11,Nate Landwehr,0,64,1,1


## Get Event Fight Details

In [333]:
# Create list of web links
event_links = get_event_links(events_completed_url)
len(event_links)

641

In [334]:
def get_event_fight_details_link(event_link):
    """"""

    # Send get request to url to get html text and find the table/or entire html text on the webpage.
    _ , soup = get_table_body(event_link)
    
    # Find all 'tr' elements with the specified class
    trs = soup.find_all('tr', class_='b-fight-details__table-row b-fight-details__table-row__hover js-fight-details-click')

    # Loop through the 'tr' elements and retrieve the 'data-link' attribute for each
    data_links = [tr['data-link'] for tr in trs]

    return data_links

In [335]:
def get_all_event_fight_details_links(event_links):
    """"""
    
    event_fighter_links = []
    
    for event_link in event_links:
        temp_event_fight_link = get_event_fight_details_link(event_link)
        event_fighter_links.extend(temp_event_fight_link)
        temp_event_fight_link = []
        
    return event_fighter_links

In [336]:
all_event_fight_details_links = get_all_event_fight_details_links(event_links)

In [467]:
def get_advanced_fight_details(url):
    """"""
    
    # Save all fight details
    all_fight_details = {}
    
    # Send get request to url to get html text and find the table/or entire html text on the webpage.
    _ , soup = get_table_body(url)
    
    # Determine if its not a completed event
    # 1st event on UFC Completed event list is the next upcoming event
    if soup.find('a', class_ ="b-fight-details__collapse-link b-fight-details__collapse-link_state_expanded") == None: # Not upcoming event
        """"""

    #return tds

In [591]:
advanced_fight_details = get_advanced_fight_details(all_event_fight_details_links[14])
print(advanced_fight_details)

None


In [660]:
# Send get request to url to get html text and find the table/or entire html text on the webpage.
_ , soup = get_table_body(all_event_fight_details_links[13])

In [662]:
# Save fight details
fight_details = {}

# Capture event name
event_name = soup.find('h2', class_="b-content__title",).get_text(strip=True)

# Capture fight names
tds = soup.find_all('td')
a_classes = soup.find_all('a', class_="b-link b-fight-details__person-link")
fighter1 = a_classes[0].get_text(strip=True)    
fighter2 = a_classes[1].get_text(strip=True)

# Create Matchup
fight = "{} vs. {}".format(fighter1, fighter2)

# Check Matchup Length
five_round_flag = 0

# Set flag if it is a 5 round fight
fight_specifics = soup.find_all('i', class_="b-fight-details__text-item")
for test in fight_specifics:
    text = test.get_text(strip=True)
    if text == "Round:5":
        five_round_flag = 1
        
if five_round_flag == 1:
    # Find all p_tags containg Totals
    p_class = soup.find_all('p', class_="b-fight-details__table-text")

    # Capture Totals by Fighter
    fighter1_totals = {'KD': p_class[2].get_text(strip=True),
                    'SIG. STR': p_class[4].get_text(strip=True),
                    'SIG. STR %': p_class[6].get_text(strip=True),
                    'TOTAL STR.': p_class[8].get_text(strip=True),
                    'TD': p_class[10].get_text(strip=True),
                    'TD %': p_class[12].get_text(strip=True),
                    'SUB. ATT': p_class[14].get_text(strip=True),
                    'REV.': p_class[16].get_text(strip=True),
                    'CTRL': p_class[18].get_text(strip=True)
                    }

    fighter2_totals = {'KD': p_class[3].get_text(strip=True),
                    'SIG. STR': p_class[5].get_text(strip=True),
                    'SIG. STR %': p_class[7].get_text(strip=True),
                    'TOTAL STR': p_class[9].get_text(strip=True),
                    'TD': p_class[11].get_text(strip=True),
                    'TD %': p_class[13].get_text(strip=True),
                    'SUB. ATT': p_class[15].get_text(strip=True),
                    'REV.': p_class[18].get_text(strip=True),
                    'CTRL': p_class[19].get_text(strip=True)
                    }

    # Round 1 metrics
    fighter1_round1 = {'KD': p_class[22].get_text(strip=True),
                    'SIG. STR': p_class[24].get_text(strip=True),
                    'SIG. STR %': p_class[26].get_text(strip=True),
                    'TOTAL STR.': p_class[28].get_text(strip=True),
                    'TD': p_class[30].get_text(strip=True),
                    'TD %': p_class[32].get_text(strip=True),
                    'SUB. ATT': p_class[34].get_text(strip=True),
                    'REV.': p_class[36].get_text(strip=True),
                    'CTRL': p_class[38].get_text(strip=True)
                    }

    fighter2_round1 = {'KD': p_class[23].get_text(strip=True),
                    'SIG. STR': p_class[25].get_text(strip=True),
                    'SIG. STR %': p_class[27].get_text(strip=True),
                    'TOTAL STR': p_class[29].get_text(strip=True),
                    'TD': p_class[31].get_text(strip=True),
                    'TD %': p_class[33].get_text(strip=True),
                    'SUB. ATT': p_class[35].get_text(strip=True),
                    'REV.': p_class[37].get_text(strip=True),
                    'CTRL': p_class[39].get_text(strip=True)
                    }
        
    # Round 2 metrics
    fighter1_round2 = {'KD': p_class[42].get_text(strip=True),
                    'SIG. STR': p_class[44].get_text(strip=True),
                    'SIG. STR %': p_class[46].get_text(strip=True),
                    'TOTAL STR.': p_class[48].get_text(strip=True),
                    'TD': p_class[50].get_text(strip=True),
                    'TD %': p_class[52].get_text(strip=True),
                    'SUB. ATT': p_class[54].get_text(strip=True),
                    'REV.': p_class[56].get_text(strip=True),
                    'CTRL': p_class[58].get_text(strip=True)
                    }

    fighter2_round2 = {'KD': p_class[43].get_text(strip=True),
                    'SIG. STR': p_class[45].get_text(strip=True),
                    'SIG. STR %': p_class[47].get_text(strip=True),
                    'TOTAL STR': p_class[49].get_text(strip=True),
                    'TD': p_class[51].get_text(strip=True),
                    'TD %': p_class[53].get_text(strip=True),
                    'SUB. ATT': p_class[55].get_text(strip=True),
                    'REV.': p_class[57].get_text(strip=True),
                    'CTRL': p_class[59].get_text(strip=True)
                    }

    # Round 3 metrics
    fighter1_round3 = {'KD': p_class[62].get_text(strip=True),
                    'SIG. STR': p_class[64].get_text(strip=True),
                    'SIG. STR %': p_class[66].get_text(strip=True),
                    'TOTAL STR.': p_class[68].get_text(strip=True),
                    'TD': p_class[70].get_text(strip=True),
                    'TD %': p_class[72].get_text(strip=True),
                    'SUB. ATT': p_class[74].get_text(strip=True),
                    'REV.': p_class[76].get_text(strip=True),
                    'CTRL': p_class[78].get_text(strip=True)
                    }

    fighter2_round3 = {'KD': p_class[63].get_text(strip=True),
                    'SIG. STR': p_class[65].get_text(strip=True),
                    'SIG. STR %': p_class[67].get_text(strip=True),
                    'TOTAL STR': p_class[69].get_text(strip=True),
                    'TD': p_class[71].get_text(strip=True),
                    'TD %': p_class[73].get_text(strip=True),
                    'SUB. ATT': p_class[75].get_text(strip=True),
                    'REV.': p_class[77].get_text(strip=True),
                    'CTRL': p_class[79].get_text(strip=True)
                    }

    # Round 4 metrics
    fighter1_round4 = {'KD': p_class[82].get_text(strip=True),
                    'SIG. STR': p_class[84].get_text(strip=True),
                    'SIG. STR %': p_class[86].get_text(strip=True),
                    'TOTAL STR.': p_class[88].get_text(strip=True),
                    'TD': p_class[90].get_text(strip=True),
                    'TD %': p_class[92].get_text(strip=True),
                    'SUB. ATT': p_class[94].get_text(strip=True),
                    'REV.': p_class[96].get_text(strip=True),
                    'CTRL': p_class[98].get_text(strip=True)
                    }

    fighter2_round4 = {'KD': p_class[83].get_text(strip=True),
                    'SIG. STR': p_class[85].get_text(strip=True),
                    'SIG. STR %': p_class[87].get_text(strip=True),
                    'TOTAL STR': p_class[89].get_text(strip=True),
                    'TD': p_class[91].get_text(strip=True),
                    'TD %': p_class[93].get_text(strip=True),
                    'SUB. ATT': p_class[95].get_text(strip=True),
                    'REV.': p_class[97].get_text(strip=True),
                    'CTRL': p_class[99].get_text(strip=True)
                    }

    # Round 5 metrics
    fighter1_round5 = {'KD': p_class[102].get_text(strip=True),
                    'SIG. STR': p_class[104].get_text(strip=True),
                    'SIG. STR %': p_class[106].get_text(strip=True),
                    'TOTAL STR.': p_class[108].get_text(strip=True),
                    'TD': p_class[110].get_text(strip=True),
                    'TD %': p_class[112].get_text(strip=True),
                    'SUB. ATT': p_class[114].get_text(strip=True),
                    'REV.': p_class[116].get_text(strip=True),
                    'CTRL': p_class[118].get_text(strip=True)
                    }

    fighter2_round5 = {'KD': p_class[103].get_text(strip=True),
                    'SIG. STR': p_class[105].get_text(strip=True),
                    'SIG. STR %': p_class[107].get_text(strip=True),
                    'TOTAL STR': p_class[109].get_text(strip=True),
                    'TD': p_class[111].get_text(strip=True),
                    'TD %': p_class[113].get_text(strip=True),
                    'SUB. ATT': p_class[115].get_text(strip=True),
                    'REV.': p_class[117].get_text(strip=True),
                    'CTRL': p_class[119].get_text(strip=True)
                    }

    # Update dict to store results and create structure 
    
    fight_details[event_name] = {
        'Matchup': {
            'Fighters': fight,
            'RedCorner': {
                fighter1: {
                    'Totals': fighter1_totals,
                    'Round1': fighter1_round1,
                    'Round2': fighter1_round2,
                    'Round3': fighter1_round3,
                    'Round4': fighter1_round4,
                    'Round5': fighter1_round5
                }
            },
            'BlueCorner': {
                fighter2: {
                    'Totals': fighter2_totals,
                    'Round1': fighter2_round1,
                    'Round2': fighter2_round2,
                    'Round3': fighter2_round3,
                    'Round4': fighter2_round4,
                    'Round5': fighter2_round5
                }
            }
        }
    }


            
else:
    # string placeholder when metric not available
    not_applicable = "N/A"

    # Find all p_tags containg Totals
    p_class = soup.find_all('p', class_="b-fight-details__table-text")

    # Capture Totals by Fighter
    fighter1_totals = {'KD': p_class[2].get_text(strip=True),
                    'SIG. STR': p_class[4].get_text(strip=True),
                    'SIG. STR %': p_class[6].get_text(strip=True),
                    'TOTAL STR.': p_class[8].get_text(strip=True),
                    'TD': p_class[10].get_text(strip=True),
                    'TD %': p_class[12].get_text(strip=True),
                    'SUB. ATT': p_class[14].get_text(strip=True),
                    'REV.': p_class[16].get_text(strip=True),
                    'CTRL': p_class[18].get_text(strip=True)
                    }

    fighter2_totals = {'KD': p_class[3].get_text(strip=True),
                    'SIG. STR': p_class[5].get_text(strip=True),
                    'SIG. STR %': p_class[7].get_text(strip=True),
                    'TOTAL STR': p_class[9].get_text(strip=True),
                    'TD': p_class[11].get_text(strip=True),
                    'TD %': p_class[13].get_text(strip=True),
                    'SUB. ATT': p_class[15].get_text(strip=True),
                    'REV.': p_class[18].get_text(strip=True),
                    'CTRL': p_class[19].get_text(strip=True)
                    }

    # Round 1 metrics
    fighter1_round1 = {'KD': p_class[22].get_text(strip=True),
                    'SIG. STR': p_class[24].get_text(strip=True),
                    'SIG. STR %': p_class[26].get_text(strip=True),
                    'TOTAL STR.': p_class[28].get_text(strip=True),
                    'TD': p_class[30].get_text(strip=True),
                    'TD %': p_class[32].get_text(strip=True),
                    'SUB. ATT': p_class[34].get_text(strip=True),
                    'REV.': p_class[36].get_text(strip=True),
                    'CTRL': p_class[38].get_text(strip=True)
                    }

    fighter2_round1 = {'KD': p_class[23].get_text(strip=True),
                    'SIG. STR': p_class[25].get_text(strip=True),
                    'SIG. STR %': p_class[27].get_text(strip=True),
                    'TOTAL STR': p_class[29].get_text(strip=True),
                    'TD': p_class[31].get_text(strip=True),
                    'TD %': p_class[33].get_text(strip=True),
                    'SUB. ATT': p_class[35].get_text(strip=True),
                    'REV.': p_class[37].get_text(strip=True),
                    'CTRL': p_class[39].get_text(strip=True)
                    }
        
    # Round 2 metrics
    fighter1_round2 = {'KD': p_class[42].get_text(strip=True),
                    'SIG. STR': p_class[44].get_text(strip=True),
                    'SIG. STR %': p_class[46].get_text(strip=True),
                    'TOTAL STR.': p_class[48].get_text(strip=True),
                    'TD': p_class[50].get_text(strip=True),
                    'TD %': p_class[52].get_text(strip=True),
                    'SUB. ATT': p_class[54].get_text(strip=True),
                    'REV.': p_class[56].get_text(strip=True),
                    'CTRL': p_class[58].get_text(strip=True)
                    }

    fighter2_round2 = {'KD': p_class[43].get_text(strip=True),
                    'SIG. STR': p_class[45].get_text(strip=True),
                    'SIG. STR %': p_class[47].get_text(strip=True),
                    'TOTAL STR': p_class[49].get_text(strip=True),
                    'TD': p_class[51].get_text(strip=True),
                    'TD %': p_class[53].get_text(strip=True),
                    'SUB. ATT': p_class[55].get_text(strip=True),
                    'REV.': p_class[57].get_text(strip=True),
                    'CTRL': p_class[59].get_text(strip=True)
                    }

    # Round 3 metrics
    fighter1_round3 = {'KD': p_class[62].get_text(strip=True),
                    'SIG. STR': p_class[64].get_text(strip=True),
                    'SIG. STR %': p_class[66].get_text(strip=True),
                    'TOTAL STR.': p_class[68].get_text(strip=True),
                    'TD': p_class[70].get_text(strip=True),
                    'TD %': p_class[72].get_text(strip=True),
                    'SUB. ATT': p_class[74].get_text(strip=True),
                    'REV.': p_class[76].get_text(strip=True),
                    'CTRL': p_class[78].get_text(strip=True)
                    }

    fighter2_round3 = {'KD': p_class[63].get_text(strip=True),
                    'SIG. STR': p_class[65].get_text(strip=True),
                    'SIG. STR %': p_class[67].get_text(strip=True),
                    'TOTAL STR': p_class[69].get_text(strip=True),
                    'TD': p_class[71].get_text(strip=True),
                    'TD %': p_class[73].get_text(strip=True),
                    'SUB. ATT': p_class[75].get_text(strip=True),
                    'REV.': p_class[77].get_text(strip=True),
                    'CTRL': p_class[79].get_text(strip=True)
                    }

    # Round 4,5 metrics
    round45 = {'KD': not_applicable,
                    'SIG. STR': not_applicable,
                    'SIG. STR %': not_applicable,
                    'TOTAL STR.': not_applicable,
                    'TD': not_applicable,
                    'TD %': not_applicable,
                    'SUB. ATT': not_applicable,
                    'REV.': not_applicable,
                    'CTRL': not_applicable
                    }

    # Update dict to store results and create structure 
    fight_details[event_name] = {
        'Matchup': {
            'Fighters': fight,
            'RedCorner': {
                fighter1: {
                    'Totals': fighter1_totals,
                    'Round1': fighter1_round1,
                    'Round2': fighter1_round2,
                    'Round3': fighter1_round3,
                    'Round4': round45,
                    'Round5': round45
                }
            },
            'BlueCorner': {
                fighter2: {
                    'Totals': fighter2_totals,
                    'Round1': fighter2_round1,
                    'Round2': fighter2_round2,
                    'Round3': fighter2_round3,
                    'Round4': round45,
                    'Round5': round45
                }
            }
        }
    }

In [663]:
pprint(fight_details)

{'UFC Fight Night: Vera vs. Sandhagen': {'Matchup': {'BlueCorner': {'Cory Sandhagen': {'Round1': {'CTRL': '2:30',
                                                                                                  'KD': '0',
                                                                                                  'REV.': '0',
                                                                                                  'SIG. STR': '32 '
                                                                                                              'of '
                                                                                                              '69',
                                                                                                  'SIG. STR %': '46%',
                                                                                                  'SUB. ATT': '0',
                                                                                    

In [664]:
p_class = soup.find_all('p', class_="b-fight-details__table-text")
print(p_class[123:130])

[<p class="b-fight-details__table-text">
      128 of 280
    </p>, <p class="b-fight-details__table-text">
      36%
    </p>, <p class="b-fight-details__table-text">
      45%
    </p>, <p class="b-fight-details__table-text">
      28 of 105
    </p>, <p class="b-fight-details__table-text">
      84 of 226
    </p>, <p class="b-fight-details__table-text">
      13 of 29
    </p>, <p class="b-fight-details__table-text">
      16 of 23
    </p>]


In [670]:
def get_round_by_round_totals(links):
    """"""
    
    # Save event details with its link
    all_event_fight_details = {}
    
    # Loop over all links
    for e in links:
        
        # Send get request to url to get html text and find the table/or entire html text on the webpage.
        _ , soup = get_table_body(e)
        
        # Save fight details
        fight_details = {}

        # Capture event name
        event_name = soup.find('h2', class_="b-content__title",).get_text(strip=True)

        # Capture fight names
        tds = soup.find_all('td')
        a_classes = soup.find_all('a', class_="b-link b-fight-details__person-link")
        fighter1 = a_classes[0].get_text(strip=True)    
        fighter2 = a_classes[1].get_text(strip=True)

        # Create Matchup
        fight = "{} vs. {}".format(fighter1, fighter2)

        # Check Matchup Length
        five_round_flag = 0

        # Set flag if it is a 5 round fight
        fight_specifics = soup.find_all('i', class_="b-fight-details__text-item")
        for test in fight_specifics:
            text = test.get_text(strip=True)
            if text == "Round:5":
                five_round_flag = 1
                
        if five_round_flag == 1:
            # Find all p_tags containg Totals
            p_class = soup.find_all('p', class_="b-fight-details__table-text")

            # Capture Totals by Fighter
            fighter1_totals = {'KD': p_class[2].get_text(strip=True),
                            'SIG. STR': p_class[4].get_text(strip=True),
                            'SIG. STR %': p_class[6].get_text(strip=True),
                            'TOTAL STR.': p_class[8].get_text(strip=True),
                            'TD': p_class[10].get_text(strip=True),
                            'TD %': p_class[12].get_text(strip=True),
                            'SUB. ATT': p_class[14].get_text(strip=True),
                            'REV.': p_class[16].get_text(strip=True),
                            'CTRL': p_class[18].get_text(strip=True)
                            }

            fighter2_totals = {'KD': p_class[3].get_text(strip=True),
                            'SIG. STR': p_class[5].get_text(strip=True),
                            'SIG. STR %': p_class[7].get_text(strip=True),
                            'TOTAL STR': p_class[9].get_text(strip=True),
                            'TD': p_class[11].get_text(strip=True),
                            'TD %': p_class[13].get_text(strip=True),
                            'SUB. ATT': p_class[15].get_text(strip=True),
                            'REV.': p_class[18].get_text(strip=True),
                            'CTRL': p_class[19].get_text(strip=True)
                            }

            # Round 1 metrics
            fighter1_round1 = {'KD': p_class[22].get_text(strip=True),
                            'SIG. STR': p_class[24].get_text(strip=True),
                            'SIG. STR %': p_class[26].get_text(strip=True),
                            'TOTAL STR.': p_class[28].get_text(strip=True),
                            'TD': p_class[30].get_text(strip=True),
                            'TD %': p_class[32].get_text(strip=True),
                            'SUB. ATT': p_class[34].get_text(strip=True),
                            'REV.': p_class[36].get_text(strip=True),
                            'CTRL': p_class[38].get_text(strip=True)
                            }

            fighter2_round1 = {'KD': p_class[23].get_text(strip=True),
                            'SIG. STR': p_class[25].get_text(strip=True),
                            'SIG. STR %': p_class[27].get_text(strip=True),
                            'TOTAL STR': p_class[29].get_text(strip=True),
                            'TD': p_class[31].get_text(strip=True),
                            'TD %': p_class[33].get_text(strip=True),
                            'SUB. ATT': p_class[35].get_text(strip=True),
                            'REV.': p_class[37].get_text(strip=True),
                            'CTRL': p_class[39].get_text(strip=True)
                            }
                
            # Round 2 metrics
            fighter1_round2 = {'KD': p_class[42].get_text(strip=True),
                            'SIG. STR': p_class[44].get_text(strip=True),
                            'SIG. STR %': p_class[46].get_text(strip=True),
                            'TOTAL STR.': p_class[48].get_text(strip=True),
                            'TD': p_class[50].get_text(strip=True),
                            'TD %': p_class[52].get_text(strip=True),
                            'SUB. ATT': p_class[54].get_text(strip=True),
                            'REV.': p_class[56].get_text(strip=True),
                            'CTRL': p_class[58].get_text(strip=True)
                            }

            fighter2_round2 = {'KD': p_class[43].get_text(strip=True),
                            'SIG. STR': p_class[45].get_text(strip=True),
                            'SIG. STR %': p_class[47].get_text(strip=True),
                            'TOTAL STR': p_class[49].get_text(strip=True),
                            'TD': p_class[51].get_text(strip=True),
                            'TD %': p_class[53].get_text(strip=True),
                            'SUB. ATT': p_class[55].get_text(strip=True),
                            'REV.': p_class[57].get_text(strip=True),
                            'CTRL': p_class[59].get_text(strip=True)
                            }

            # Round 3 metrics
            fighter1_round3 = {'KD': p_class[62].get_text(strip=True),
                            'SIG. STR': p_class[64].get_text(strip=True),
                            'SIG. STR %': p_class[66].get_text(strip=True),
                            'TOTAL STR.': p_class[68].get_text(strip=True),
                            'TD': p_class[70].get_text(strip=True),
                            'TD %': p_class[72].get_text(strip=True),
                            'SUB. ATT': p_class[74].get_text(strip=True),
                            'REV.': p_class[76].get_text(strip=True),
                            'CTRL': p_class[78].get_text(strip=True)
                            }

            fighter2_round3 = {'KD': p_class[63].get_text(strip=True),
                            'SIG. STR': p_class[65].get_text(strip=True),
                            'SIG. STR %': p_class[67].get_text(strip=True),
                            'TOTAL STR': p_class[69].get_text(strip=True),
                            'TD': p_class[71].get_text(strip=True),
                            'TD %': p_class[73].get_text(strip=True),
                            'SUB. ATT': p_class[75].get_text(strip=True),
                            'REV.': p_class[77].get_text(strip=True),
                            'CTRL': p_class[79].get_text(strip=True)
                            }

            # Round 4 metrics
            fighter1_round4 = {'KD': p_class[82].get_text(strip=True),
                            'SIG. STR': p_class[84].get_text(strip=True),
                            'SIG. STR %': p_class[86].get_text(strip=True),
                            'TOTAL STR.': p_class[88].get_text(strip=True),
                            'TD': p_class[90].get_text(strip=True),
                            'TD %': p_class[92].get_text(strip=True),
                            'SUB. ATT': p_class[94].get_text(strip=True),
                            'REV.': p_class[96].get_text(strip=True),
                            'CTRL': p_class[98].get_text(strip=True)
                            }

            fighter2_round4 = {'KD': p_class[83].get_text(strip=True),
                            'SIG. STR': p_class[85].get_text(strip=True),
                            'SIG. STR %': p_class[87].get_text(strip=True),
                            'TOTAL STR': p_class[89].get_text(strip=True),
                            'TD': p_class[91].get_text(strip=True),
                            'TD %': p_class[93].get_text(strip=True),
                            'SUB. ATT': p_class[95].get_text(strip=True),
                            'REV.': p_class[97].get_text(strip=True),
                            'CTRL': p_class[99].get_text(strip=True)
                            }

            # Round 5 metrics
            fighter1_round5 = {'KD': p_class[102].get_text(strip=True),
                            'SIG. STR': p_class[104].get_text(strip=True),
                            'SIG. STR %': p_class[106].get_text(strip=True),
                            'TOTAL STR.': p_class[108].get_text(strip=True),
                            'TD': p_class[110].get_text(strip=True),
                            'TD %': p_class[112].get_text(strip=True),
                            'SUB. ATT': p_class[114].get_text(strip=True),
                            'REV.': p_class[116].get_text(strip=True),
                            'CTRL': p_class[118].get_text(strip=True)
                            }

            fighter2_round5 = {'KD': p_class[103].get_text(strip=True),
                            'SIG. STR': p_class[105].get_text(strip=True),
                            'SIG. STR %': p_class[107].get_text(strip=True),
                            'TOTAL STR': p_class[109].get_text(strip=True),
                            'TD': p_class[111].get_text(strip=True),
                            'TD %': p_class[113].get_text(strip=True),
                            'SUB. ATT': p_class[115].get_text(strip=True),
                            'REV.': p_class[117].get_text(strip=True),
                            'CTRL': p_class[119].get_text(strip=True)
                            }

            # Update dict to store results and create structure 
            
            fight_details[event_name] = {
                'Matchup': {
                    'Fighters': fight,
                    'RedCorner': {
                        fighter1: {
                            'Totals': fighter1_totals,
                            'Round1': fighter1_round1,
                            'Round2': fighter1_round2,
                            'Round3': fighter1_round3,
                            'Round4': fighter1_round4,
                            'Round5': fighter1_round5
                        }
                    },
                    'BlueCorner': {
                        fighter2: {
                            'Totals': fighter2_totals,
                            'Round1': fighter2_round1,
                            'Round2': fighter2_round2,
                            'Round3': fighter2_round3,
                            'Round4': fighter2_round4,
                            'Round5': fighter2_round5
                        }
                    }
                }
            }
                    
        else:
            # string placeholder when metric not available
            not_applicable = "N/A"

            # Find all p_tags containg Totals
            p_class = soup.find_all('p', class_="b-fight-details__table-text")

            # Capture Totals by Fighter
            fighter1_totals = {'KD': p_class[2].get_text(strip=True),
                            'SIG. STR': p_class[4].get_text(strip=True),
                            'SIG. STR %': p_class[6].get_text(strip=True),
                            'TOTAL STR.': p_class[8].get_text(strip=True),
                            'TD': p_class[10].get_text(strip=True),
                            'TD %': p_class[12].get_text(strip=True),
                            'SUB. ATT': p_class[14].get_text(strip=True),
                            'REV.': p_class[16].get_text(strip=True),
                            'CTRL': p_class[18].get_text(strip=True)
                            }

            fighter2_totals = {'KD': p_class[3].get_text(strip=True),
                            'SIG. STR': p_class[5].get_text(strip=True),
                            'SIG. STR %': p_class[7].get_text(strip=True),
                            'TOTAL STR': p_class[9].get_text(strip=True),
                            'TD': p_class[11].get_text(strip=True),
                            'TD %': p_class[13].get_text(strip=True),
                            'SUB. ATT': p_class[15].get_text(strip=True),
                            'REV.': p_class[18].get_text(strip=True),
                            'CTRL': p_class[19].get_text(strip=True)
                            }

            # Round 1 metrics
            fighter1_round1 = {'KD': p_class[22].get_text(strip=True),
                            'SIG. STR': p_class[24].get_text(strip=True),
                            'SIG. STR %': p_class[26].get_text(strip=True),
                            'TOTAL STR.': p_class[28].get_text(strip=True),
                            'TD': p_class[30].get_text(strip=True),
                            'TD %': p_class[32].get_text(strip=True),
                            'SUB. ATT': p_class[34].get_text(strip=True),
                            'REV.': p_class[36].get_text(strip=True),
                            'CTRL': p_class[38].get_text(strip=True)
                            }

            fighter2_round1 = {'KD': p_class[23].get_text(strip=True),
                            'SIG. STR': p_class[25].get_text(strip=True),
                            'SIG. STR %': p_class[27].get_text(strip=True),
                            'TOTAL STR': p_class[29].get_text(strip=True),
                            'TD': p_class[31].get_text(strip=True),
                            'TD %': p_class[33].get_text(strip=True),
                            'SUB. ATT': p_class[35].get_text(strip=True),
                            'REV.': p_class[37].get_text(strip=True),
                            'CTRL': p_class[39].get_text(strip=True)
                            }
                
            # Round 2 metrics
            fighter1_round2 = {'KD': p_class[42].get_text(strip=True),
                            'SIG. STR': p_class[44].get_text(strip=True),
                            'SIG. STR %': p_class[46].get_text(strip=True),
                            'TOTAL STR.': p_class[48].get_text(strip=True),
                            'TD': p_class[50].get_text(strip=True),
                            'TD %': p_class[52].get_text(strip=True),
                            'SUB. ATT': p_class[54].get_text(strip=True),
                            'REV.': p_class[56].get_text(strip=True),
                            'CTRL': p_class[58].get_text(strip=True)
                            }

            fighter2_round2 = {'KD': p_class[43].get_text(strip=True),
                            'SIG. STR': p_class[45].get_text(strip=True),
                            'SIG. STR %': p_class[47].get_text(strip=True),
                            'TOTAL STR': p_class[49].get_text(strip=True),
                            'TD': p_class[51].get_text(strip=True),
                            'TD %': p_class[53].get_text(strip=True),
                            'SUB. ATT': p_class[55].get_text(strip=True),
                            'REV.': p_class[57].get_text(strip=True),
                            'CTRL': p_class[59].get_text(strip=True)
                            }

            # Round 3 metrics
            fighter1_round3 = {'KD': p_class[62].get_text(strip=True),
                            'SIG. STR': p_class[64].get_text(strip=True),
                            'SIG. STR %': p_class[66].get_text(strip=True),
                            'TOTAL STR.': p_class[68].get_text(strip=True),
                            'TD': p_class[70].get_text(strip=True),
                            'TD %': p_class[72].get_text(strip=True),
                            'SUB. ATT': p_class[74].get_text(strip=True),
                            'REV.': p_class[76].get_text(strip=True),
                            'CTRL': p_class[78].get_text(strip=True)
                            }

            fighter2_round3 = {'KD': p_class[63].get_text(strip=True),
                            'SIG. STR': p_class[65].get_text(strip=True),
                            'SIG. STR %': p_class[67].get_text(strip=True),
                            'TOTAL STR': p_class[69].get_text(strip=True),
                            'TD': p_class[71].get_text(strip=True),
                            'TD %': p_class[73].get_text(strip=True),
                            'SUB. ATT': p_class[75].get_text(strip=True),
                            'REV.': p_class[77].get_text(strip=True),
                            'CTRL': p_class[79].get_text(strip=True)
                            }

            # Round 4,5 metrics
            round45 = {'KD': not_applicable,
                            'SIG. STR': not_applicable,
                            'SIG. STR %': not_applicable,
                            'TOTAL STR.': not_applicable,
                            'TD': not_applicable,
                            'TD %': not_applicable,
                            'SUB. ATT': not_applicable,
                            'REV.': not_applicable,
                            'CTRL': not_applicable
                            }

            # Update dict to store results and create structure 
            fight_details[event_name] = {
                'Matchup': {
                    'Fighters': fight,
                    'RedCorner': {
                        fighter1: {
                            'Totals': fighter1_totals,
                            'Round1': fighter1_round1,
                            'Round2': fighter1_round2,
                            'Round3': fighter1_round3,
                            'Round4': round45,
                            'Round5': round45
                        }
                    },
                    'BlueCorner': {
                        fighter2: {
                            'Totals': fighter2_totals,
                            'Round1': fighter2_round1,
                            'Round2': fighter2_round2,
                            'Round3': fighter2_round3,
                            'Round4': round45,
                            'Round5': round45
                        }
                    }
                }
            }
        
        # Add to master
        all_event_fight_details[e] = fight_details
        
    return all_event_fight_details