In [2]:
import requests
from bs4 import BeautifulSoup


def get_fighter_fight_urls(fighter_url):
    page = requests.get(fighter_url)
    soup = BeautifulSoup(page.content, 'html.parser')
    
    fight_urls = []
    
    fight_rows = soup.find_all('tr', class_='b-fight-details__table-row')
    for fight_row in fight_rows:
        fight_link_tag = fight_row.find('a', class_='b-flag b-flag_style_bordered')
        if fight_link_tag:
            fight_link = fight_link_tag['href']
            fight_urls.append(fight_link)
    
    return fight_urls



fighter_url = 'http://www.ufcstats.com/fighter-details/029eaff01e6bb8f0'
get_fighter_fight_urls(fighter_url)

['http://www.ufcstats.com/fight-details/c27e33ee8ef53866',
 'http://www.ufcstats.com/fight-details/b22eab3aa1522f40',
 'http://www.ufcstats.com/fight-details/01a4827b3596d111',
 'http://www.ufcstats.com/fight-details/6fe9729cee57c680',
 'http://www.ufcstats.com/fight-details/692241119228cc82',
 'http://www.ufcstats.com/fight-details/6bcea00473e43a59',
 'http://www.ufcstats.com/fight-details/493b7a8e3253b133',
 'http://www.ufcstats.com/fight-details/a8ec9c513d28c25c',
 'http://www.ufcstats.com/fight-details/be17aaad7221e2d9']

In [3]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

def scrape_fight_data(fight_url):
    page = requests.get(fight_url)
    soup = BeautifulSoup(page.content, 'html.parser')
    
    table_body = soup.find('tbody', class_='b-fight-details__table-body')

    data = []

    # Extract fight outcome (W for win, L for loss)
    fight_outcome_tag = soup.find('i', class_='b-fight-details__person-status')
    if fight_outcome_tag:
        fight_outcome = fight_outcome_tag.get_text(strip=True)
    else:
        fight_outcome = None

    # Iterate through table rows
    for row in table_body.find_all('tr'):
        cols = row.find_all('td')
        fighter_names = cols[0].find_all('a', class_='b-link_style_black')
        fighters = [name.get_text(strip=True) for name in fighter_names]
        
        kd = [col.get_text(strip=True) for col in cols[1].find_all('p')]
        sig_str = [col.get_text(strip=True) for col in cols[2].find_all('p')]
        sig_str_percent = [col.get_text(strip=True) for col in cols[3].find_all('p')]
        total_str = [col.get_text(strip=True) for col in cols[4].find_all('p')]
        td = [col.get_text(strip=True) for col in cols[5].find_all('p')]
        td_percent = [col.get_text(strip=True) for col in cols[6].find_all('p')]
        sub_att = [col.get_text(strip=True) for col in cols[7].find_all('p')]
        rev = [col.get_text(strip=True) for col in cols[8].find_all('p')]
        ctrl = [col.get_text(strip=True) for col in cols[9].find_all('p')]
        
        for i in range(len(fighters)):
            fight_info = {
                'Fighter': fighters[i],
                'KD': kd[i],
                'Sig. str.': sig_str[i],
                'Sig. str. %': sig_str_percent[i],
                'Total str.': total_str[i],
                'Td': td[i],
                'Td %': td_percent[i],
                'Sub. att': sub_att[i],
                'Rev.': rev[i],
                'Ctrl': ctrl[i],
                'Fight URL': fight_url,
                'Outcome': fight_outcome  # Add fight outcome (W or L) to the data
            }
            data.append(fight_info)

    df = pd.DataFrame(data)

    return df

def main():
    fighter_url = 'http://www.ufcstats.com/fighter-details/029eaff01e6bb8f0'
    fight_urls = get_fighter_fight_urls(fighter_url)

    all_fight_dfs = []
    for fight_url in fight_urls:
        fight_df = scrape_fight_data(fight_url)
        all_fight_dfs.append(fight_df)

    combined_df = pd.concat(all_fight_dfs, ignore_index=True)
    return combined_df

if __name__ == "__main__":
    combined_df = main()
    print(combined_df)


                Fighter KD  Sig. str. Sig. str. %  Total str.       Td  Td %  \
0        Dustin Poirier  0   27 of 52         51%    27 of 52   0 of 0   ---   
1        Justin Gaethje  1   41 of 66         62%    41 of 66   0 of 0   ---   
2      Charles Oliveira  0  73 of 131         55%   98 of 157   0 of 6    0%   
3        Dustin Poirier  1   58 of 94         61%   69 of 105   0 of 0   ---   
4   Khabib Nurmagomedov  0   22 of 41         53%    54 of 77   7 of 8   87%   
5        Dustin Poirier  0   12 of 39         30%    30 of 57   0 of 0   ---   
6         Eddie Alvarez  0  44 of 127         34%   44 of 128   1 of 4   25%   
7        Dustin Poirier  0  73 of 144         50%   79 of 150   0 of 0   ---   
8        Dustin Poirier  0    6 of 12         50%     6 of 12   0 of 0   ---   
9       Michael Johnson  1   14 of 37         37%    14 of 37   0 of 0   ---   
10       Dustin Poirier  0   10 of 19         52%    10 of 19   0 of 0   ---   
11       Conor McGregor  1    9 of 29   

In [4]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

# Existing code for get_fighter_fight_urls and scrape_fight_data functions...

def main():
    fighter_url = 'http://www.ufcstats.com/fighter-details/029eaff01e6bb8f0'
    fight_urls = get_fighter_fight_urls(fighter_url)

    all_fight_dfs = []
    for fight_url in fight_urls:
        fight_df = scrape_fight_data(fight_url)
        all_fight_dfs.append(fight_df)

    combined_df = pd.concat(all_fight_dfs, ignore_index=True)
    
    # Create a new DataFrame to restructure the data for each fight
    new_columns = [
        'Fighter', 'Opponent', 'KD', 'Opponent KD', 'Sig. str.', 'Opponent Sig. str.',
        'Sig. str. %', 'Opponent Sig. str. %', 'Total str.', 'Opponent Total str.',
        'Td', 'Opponent Td', 'Td %', 'Opponent Td %', 'Sub. att', 'Opponent Sub. att',
        'Rev.', 'Opponent Rev.', 'Ctrl', 'Opponent Ctrl', 'Fight URL', 'Outcome'
    ]
    new_data = []
    
    # Iterate through the original DataFrame to extract fighter and opponent stats
    for i in range(0, len(combined_df), 2):
        fighter_row = combined_df.iloc[i]
        opponent_row = combined_df.iloc[i + 1]
        
        new_row = [
            fighter_row['Fighter'], opponent_row['Fighter'],
            fighter_row['KD'], opponent_row['KD'],
            fighter_row['Sig. str.'], opponent_row['Sig. str.'],
            fighter_row['Sig. str. %'], opponent_row['Sig. str. %'],
            fighter_row['Total str.'], opponent_row['Total str.'],
            fighter_row['Td'], opponent_row['Td'],
            fighter_row['Td %'], opponent_row['Td %'],
            fighter_row['Sub. att'], opponent_row['Sub. att'],
            fighter_row['Rev.'], opponent_row['Rev.'],
            fighter_row['Ctrl'], opponent_row['Ctrl'],
            fighter_row['Fight URL'], fighter_row['Outcome']
        ]
        
        new_data.append(new_row)
    
    new_df = pd.DataFrame(new_data, columns=new_columns)
    return new_df

if __name__ == "__main__":
    new_combined_df = main()
    print(new_combined_df)


               Fighter         Opponent KD Opponent KD  Sig. str.  \
0       Dustin Poirier   Justin Gaethje  0           1   27 of 52   
1     Charles Oliveira   Dustin Poirier  0           1  73 of 131   
2  Khabib Nurmagomedov   Dustin Poirier  0           0   22 of 41   
3        Eddie Alvarez   Dustin Poirier  0           0  44 of 127   
4       Dustin Poirier  Michael Johnson  0           1    6 of 12   
5       Dustin Poirier   Conor McGregor  0           1   10 of 19   
6          Cub Swanson   Dustin Poirier  0           0  58 of 126   
7       Chan Sung Jung   Dustin Poirier  0           0  74 of 154   
8       Danny Castillo   Dustin Poirier  0           0   32 of 43   

  Opponent Sig. str. Sig. str. % Opponent Sig. str. %  Total str.  \
0           41 of 66         51%                  62%    27 of 52   
1           58 of 94         55%                  61%   98 of 157   
2           12 of 39         53%                  30%    54 of 77   
3          73 of 144         34% 

In [5]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

# Existing code for get_fighter_fight_urls and scrape_fight_data functions...

def main():
    fighter_url = 'http://www.ufcstats.com/fighter-details/029eaff01e6bb8f0'
    fight_urls = get_fighter_fight_urls(fighter_url)

    all_fight_dfs = []
    for fight_url in fight_urls:
        fight_df = scrape_fight_data(fight_url)
        all_fight_dfs.append(fight_df)

    combined_df = pd.concat(all_fight_dfs, ignore_index=True)
    
    # Create a new DataFrame to restructure the data for each fight
    new_columns = [
        'Fighter', 'Opponent', 'KD', 'Opponent KD', 'Sig. str.', 'Opponent Sig. str.',
        'Sig. str. %', 'Opponent Sig. str. %', 'Total str.', 'Opponent Total str.',
        'Td', 'Opponent Td', 'Td %', 'Opponent Td %', 'Sub. att', 'Opponent Sub. att',
        'Rev.', 'Opponent Rev.', 'Ctrl', 'Opponent Ctrl', 'Fight URL', 'Outcome'
    ]
    new_data = []
    
    # Iterate through the original DataFrame to extract fighter and opponent stats
    for i in range(0, len(combined_df), 2):
        fighter_row = combined_df.iloc[i]
        opponent_row = combined_df.iloc[i + 1]
        
        new_row = [
            fighter_row['Fighter'], opponent_row['Fighter'],
            fighter_row['KD'], opponent_row['KD'],
            fighter_row['Sig. str.'], opponent_row['Sig. str.'],
            fighter_row['Sig. str. %'], opponent_row['Sig. str. %'],
            fighter_row['Total str.'], opponent_row['Total str.'],
            fighter_row['Td'], opponent_row['Td'],
            fighter_row['Td %'], opponent_row['Td %'],
            fighter_row['Sub. att'], opponent_row['Sub. att'],
            fighter_row['Rev.'], opponent_row['Rev.'],
            fighter_row['Ctrl'], opponent_row['Ctrl'],
            fighter_row['Fight URL'], fighter_row['Outcome']
        ]
        
        new_data.append(new_row)
    
    new_df = pd.DataFrame(new_data, columns=new_columns)
    return new_df

if __name__ == "__main__":
    new_combined_df = main()
    print(new_combined_df)


               Fighter         Opponent KD Opponent KD  Sig. str.  \
0       Dustin Poirier   Justin Gaethje  0           1   27 of 52   
1     Charles Oliveira   Dustin Poirier  0           1  73 of 131   
2  Khabib Nurmagomedov   Dustin Poirier  0           0   22 of 41   
3        Eddie Alvarez   Dustin Poirier  0           0  44 of 127   
4       Dustin Poirier  Michael Johnson  0           1    6 of 12   
5       Dustin Poirier   Conor McGregor  0           1   10 of 19   
6          Cub Swanson   Dustin Poirier  0           0  58 of 126   
7       Chan Sung Jung   Dustin Poirier  0           0  74 of 154   
8       Danny Castillo   Dustin Poirier  0           0   32 of 43   

  Opponent Sig. str. Sig. str. % Opponent Sig. str. %  Total str.  \
0           41 of 66         51%                  62%    27 of 52   
1           58 of 94         55%                  61%   98 of 157   
2           12 of 39         53%                  30%    54 of 77   
3          73 of 144         34% 

In [6]:

new_combined_df

Unnamed: 0,Fighter,Opponent,KD,Opponent KD,Sig. str.,Opponent Sig. str.,Sig. str. %,Opponent Sig. str. %,Total str.,Opponent Total str.,...,Td %,Opponent Td %,Sub. att,Opponent Sub. att,Rev.,Opponent Rev.,Ctrl,Opponent Ctrl,Fight URL,Outcome
0,Dustin Poirier,Justin Gaethje,0,1,27 of 52,41 of 66,51%,62%,27 of 52,41 of 66,...,---,---,0,0,0,0,0:00,0:00,http://www.ufcstats.com/fight-details/c27e33ee...,L
1,Charles Oliveira,Dustin Poirier,0,1,73 of 131,58 of 94,55%,61%,98 of 157,69 of 105,...,0%,---,3,0,1,0,5:41,0:35,http://www.ufcstats.com/fight-details/b22eab3a...,W
2,Khabib Nurmagomedov,Dustin Poirier,0,0,22 of 41,12 of 39,53%,30%,54 of 77,30 of 57,...,87%,---,2,1,0,0,8:52,0:00,http://www.ufcstats.com/fight-details/01a4827b...,W
3,Eddie Alvarez,Dustin Poirier,0,0,44 of 127,73 of 144,34%,50%,44 of 128,79 of 150,...,25%,---,0,2,0,0,1:16,0:00,http://www.ufcstats.com/fight-details/6fe9729c...,NC
4,Dustin Poirier,Michael Johnson,0,1,6 of 12,14 of 37,50%,37%,6 of 12,14 of 37,...,---,---,0,0,0,0,0:00,0:03,http://www.ufcstats.com/fight-details/69224111...,L
5,Dustin Poirier,Conor McGregor,0,1,10 of 19,9 of 29,52%,31%,10 of 19,9 of 29,...,---,---,0,0,0,0,0:00,0:03,http://www.ufcstats.com/fight-details/6bcea004...,L
6,Cub Swanson,Dustin Poirier,0,0,58 of 126,58 of 140,46%,41%,80 of 150,70 of 158,...,100%,22%,0,0,0,0,3:01,4:02,http://www.ufcstats.com/fight-details/493b7a8e...,W
7,Chan Sung Jung,Dustin Poirier,0,0,74 of 154,56 of 127,48%,44%,149 of 240,70 of 146,...,100%,0%,3,0,0,2,5:02,3:51,http://www.ufcstats.com/fight-details/a8ec9c51...,W
8,Danny Castillo,Dustin Poirier,0,0,32 of 43,8 of 18,74%,44%,50 of 61,15 of 25,...,27%,---,0,5,0,0,13:04,0:08,http://www.ufcstats.com/fight-details/be17aaad...,W
