# Scrape Upcoming Events

This script is dedicated to scraping and collecting upcoming Ultimate Fighting Championship (UFC) event and fighter data from the official UFC website (ufc.com) and UFC statistics site (ufcstats.com). Here is a summary of what each part of the code does:

1. The script starts with importing essential libraries like pandas, numpy, selenium, BeautifulSoup, etc.

2. The `get_next_events` function is designed to scrape upcoming events' data from the UFC website. It collects the title, URL, and date for each event.

3. The `get_event_fights` function gathers information about each fight happening in an event. It extracts details such as fighters' names, weight class, and odds.

4. The `secret_number` function uses Selenium and BeautifulSoup to automate browser actions to navigate the website and find a particular number, which seems to be specific to each event's matchup.

5. The `get_next_events2` function is similar to `get_next_events`, but it also removes events that lack fight data.

6. The `get_next_event_ufcstats` function scrapes data about upcoming events from the UFC Stats website.

7. The `get_fighter_urls` function collects fighter-related data such as name, URL of the profile from the UFC Stats website.

8. The `find_all_iframe_sources` function finds the source URLs for all iframes on a given UFC event page.

9. The `get_iframe_src_data_2` function uses the previously found iframe source URLs to gather detailed statistics about each fighter.

10. Each function gets applied iteratively to events and matchups, storing its collected information in pandas DataFrames for convenient manipulation and analysis.

11. Finally, the program saves the collated and cleaned data to a CSV file for further use.


In [1]:
# Load Packages
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import matplotlib.ticker as mtick
import sqlite3
import seaborn as sns
from matplotlib.pyplot import figure
from bs4 import BeautifulSoup
import time
import requests     # to get images
import shutil       # to save files locally
import datetime
from scipy.stats import norm
import warnings
warnings.filterwarnings('ignore')
from random import randint
import  random
import os
os.chdir('/Users/travisroyce/Library/CloudStorage/OneDrive-Personal/Data Science/Personal_Projects/Sports/UFC_Prediction_V2/')

from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from bs4 import BeautifulSoup
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from cmath import nan
import urllib
import urllib.request
import re
import time


In [2]:
def get_next_events(url):
    page = requests.get(url)
    soup = BeautifulSoup(page.content, 'html.parser')
    # get events
    event1 = soup.find('div', class_='c-card-event--result__info')
    event1_txt = soup.find('div', class_='c-card-event--result__info').text
    event1_url = event1.find('a')['href']
    event1_url = 'https://www.ufc.com' + event1_url
    event1_title = event1_txt.split('\n')[1]
    event1_time = event1_txt.split('/')[1]

    data = pd.DataFrame({'event_title': [event1_title], 'event_url': [event1_url], 'event_date': [event1_time]})

    event2 = soup.find('div', class_='c-card-event--result__info').find_next('div', class_='c-card-event--result__info')
    event2_txt = soup.find('div', class_='c-card-event--result__info').find_next('div', class_='c-card-event--result__info').text
    event2_url = event2.find('a')['href']
    event2_url = 'https://www.ufc.com' + event2_url
    event2_title = event2_txt.split('\n')[1]
    event2_time = event2_txt.split('/')[1]


    data = data.append({'event_title': event2_title, 'event_url': event2_url, 'event_date': event2_time}, ignore_index=True)
    
    event3 = soup.find('div', class_='c-card-event--result__info').find_next('div', class_='c-card-event--result__info').find_next('div', class_='c-card-event--result__info')
    event3_txt = soup.find('div', class_='c-card-event--result__info').find_next('div', class_='c-card-event--result__info').find_next('div', class_='c-card-event--result__info').text
    event3_url = event3.find('a')['href']
    event3_url = 'https://www.ufc.com' + event3_url
    event3_title = event3_txt.split('\n')[1]
    event3_time = event3_txt.split('/')[1]

    data = data.append({'event_title': event3_title, 'event_url': event3_url, 'event_date': event3_time}, ignore_index=True)
    
    return data

# Function to get the fight card for a given event using BeautifulSoup
def get_event_fights(event_url):
    page = requests.get(event_url)
    soup = BeautifulSoup(page.content, 'html.parser')
    # get main card, fight 1

    mcn = soup.find_all('li', class_='l-listing__item')
    # get num of mc
    num_mc = len(mcn)
    # for each mc, do the following
    data = pd.DataFrame()
    n = 0
    for i in mcn:
        mc = mcn[n]
        # fight 1
        fighter1= mc.find('div', class_ ='c-listing-fight__corner-name c-listing-fight__corner-name--red').text
        fighter1 = fighter1.replace('\n', ' ')
        fighter1 = fighter1.strip()
        fighter2 = mc.find('div', class_ ='c-listing-fight__corner-name c-listing-fight__corner-name--blue').text
        fighter2 = fighter2.replace('\n', ' ')
        fighter2 = fighter2.strip()
        weightclass = mc.find('div', class_='c-listing-fight__class-text').text
        fighter1_odds = mc.find('span', class_='c-listing-fight__odds').text
        fighter2_odds = mc.find('span', class_='c-listing-fight__odds').find_next('span', class_='c-listing-fight__odds').text
        fighter1_odds = fighter1_odds.replace('\n', '')
        fighter2_odds = fighter2_odds.replace('\n', '')
        # fighter odds to float
        if (fighter1_odds == '-') :
            fighter1_odds = nan
        if (fighter2_odds == '-') :
            fighter2_odds = nan

        data = data.append({'fighter1': fighter1, 'fighter2': fighter2, 'weightclass': weightclass, 
                            'fighter1_odds': fighter1_odds, 'fighter2_odds': fighter2_odds}, ignore_index=True)
        n = n + 1
    return data

# Find secret number in ufc events using BS & Selenium
def secret_number(event_url):
    # if no driver open, open one
    driver = None
    if (driver == None):
        driver = webdriver.Chrome('C:\\Users\\Travis\\OneDrive\\Data Science\\Personal_Projects\\Sports\\UFC_Prediction_V2\\chromedriver.exe')
    else:
        driver = driver
    
    driver.get(event_url)
    time.sleep(3)
    # click the first matchup
    soup = BeautifulSoup(driver.page_source, 'html.parser')
    pretty = soup.prettify()
    # find first data-fmid to get first matchup
    fmid_start = pretty.find('data-fmid')
    fmid = pretty[fmid_start+11:fmid_start+16]
    driver.get(event_url +'#' + fmid)
    time.sleep(6)
    # find all links within page
    soup = BeautifulSoup(driver.page_source, 'html.parser')
    # find all iframe src
    iframe = soup.find_all('iframe')
    # find all links
    iframe_text = str(iframe)
    matchup = iframe_text.find('matchup')
    matchup_url = iframe_text[matchup+8:matchup+12]
    print('matchup_url: ' + matchup_url)
    secret_number = matchup_url
    return matchup

# get next events if event fighter data is not na
def get_next_events2(url):
    data = get_next_events(url)
    for i in range(0, len(data)):
        event_url = data['event_url'][i]
        event_fights = get_event_fights(event_url)
        if (len(event_fights) == 0):
            data = data.drop(i)
    return data

# get next events from UFCStats.com using BS
def get_next_event_ufcstats():
    url = 'http://www.ufcstats.com/statistics/events/upcoming'
    page = requests.get(url)
    soup = BeautifulSoup(page.content, 'html.parser')
    # get events
    event1 = soup.find('td', class_='b-statistics__table-col')
    event1_txt = soup.find('td', class_='b-statistics__table-col').text
    event_txt = event1_txt.replace('   ', '').replace('\n', '').strip()
    event_title = event_txt.split('  ')[0]
    event_date = event_txt.split('  ')[1]
    event1_url = event1.find('a')['href']
    data = pd.DataFrame({'event_title': [event_title], 'event_url': [event1_url], 'event_date': [event_date]})
    return data


# get fighter urls from UFCStats.com using BS
def get_fighter_urls(event_details_url):
    page = requests.get(event_details_url)
    soup = BeautifulSoup(page.content, 'html.parser')
    # get events
    events = soup.find_all('tr', class_='b-fight-details__table-row b-fight-details__table-row__hover js-fight-details-click')
    n = 0
    next_event_data = pd.DataFrame()

    for event in events:
        fighters = events[n].find_all('p', class_='b-fight-details__table-text')
        fighter1 = fighters[0].text
        fighter1 = fighter1.replace('  ', '').replace('\n', '').strip()
        fighter2 = fighters[1].text
        fighter2 = fighter2.replace('  ', '').replace('\n', '').strip()
        fighter1_url = fighters[0].find('a')['href']
        fighter2_url = fighters[1].find('a')['href']
        next_event_data = next_event_data.append({'fighter1' :fighter1, 'fighter2:' : fighter2, 'fighter1_url': fighter1_url, 'fighter2_url':fighter2_url, 'fight#' : n+1}, ignore_index = True)
        n += 1

    return next_event_data



In [3]:
next_events = get_next_events2('https://www.ufc.com/events')
next_events

Unnamed: 0,event_title,event_url,event_date
0,Prochazka vs Pereira,https://www.ufc.com/event/ufc-295,10:00 PM EST
1,Allen vs Craig,https://www.ufc.com/event/ufc-fight-night-nove...,5:00 PM EST
2,Dariush vs Tsarukyan,https://www.ufc.com/event/ufc-fight-night-dece...,7:00 PM EST


In [4]:
event_url =  next_events['event_url'][0]

In [5]:
next_event_title = next_events['event_title'][0]

In [6]:
next_event = get_event_fights(event_url)

In [7]:
page = requests.get(event_url)
soup = BeautifulSoup(page.content, 'html.parser')
h = soup.find_all('div', class_='c-listing-fight')

data_fmid = []
for i in h:
    data_fmid.append(i['data-fmid'])

next_event['fight_number'] = data_fmid[:len(next_event)]
next_event['matchup_url'] = event_url +'#' + next_event['fight_number'].astype(str)

next_event

Unnamed: 0,fighter1,fighter2,weightclass,fighter1_odds,fighter2_odds,fight_number,matchup_url
0,Jiří Procházka,Alex Pereira,Light Heavyweight Title Bout,130.0,-155.0,10955,https://www.ufc.com/event/ufc-295#10955
1,Sergei Pavlovich,Tom Aspinall,Heavyweight Interim Title Bout,,,11012,https://www.ufc.com/event/ufc-295#11012
2,Jessica Andrade,Mackenzie Dern,Women's Strawweight Bout,,,10902,https://www.ufc.com/event/ufc-295#10902
3,Matt Frevola,Benoît Saint-Denis,Lightweight Bout,170.0,-205.0,10971,https://www.ufc.com/event/ufc-295#10971
4,Diego Lopes,Pat Sabatini,Featherweight Bout,114.0,-135.0,10970,https://www.ufc.com/event/ufc-295#10970
5,Steve Erceg,Alessandro Costa,Flyweight Bout,,,10905,https://www.ufc.com/event/ufc-295#10905
6,Tabatha Ricci,Loopy Godinez,Women's Strawweight Bout,136.0,-162.0,10972,https://www.ufc.com/event/ufc-295#10972
7,Mateusz Rębecki,Nurullo Aliev,Lightweight Bout,-185.0,154.0,11010,https://www.ufc.com/event/ufc-295#11010
8,Nazim Sadykhov,Viacheslav Borshchev,Lightweight Bout,-166.0,140.0,11009,https://www.ufc.com/event/ufc-295#11009
9,Jared Gordon,Mark Madsen,Lightweight Bout,-175.0,145.0,10953,https://www.ufc.com/event/ufc-295#10953


In [8]:
def find_all_iframe_sources(matchup_url):
    try:
        driver = webdriver.Chrome('/Users/travisroyce/Library/CloudStorage/OneDrive-Personal/Data Science/Personal_Projects/Sports/UFC_Prediction_V2/V2_Newer_Notebooks/chromedriver')
        driver.get(matchup_url)
        time.sleep(3)
        # get innerhtml
        soup = BeautifulSoup(driver.page_source, 'html.parser')
        pretty = soup.prettify()
        # find all links within page
        soup = BeautifulSoup(driver.page_source, 'html.parser')
        # find all iframe src
        iframe = soup.find_all('iframe')
        # list all links
        iframe_text = str(iframe)
        # separate links
        iframe_text = iframe_text.split('src="')
        iframe_text = iframe_text[1:]
        iframe_text = [i.split('"')[0] for i in iframe_text]
        # only keep links that contain matchup
        iframe_text = [i for i in iframe_text if 'matchup' in i]
        # only keep top link
        iframe_text = iframe_text[0]
        
        return iframe_text
    except:
        return 'error'


In [9]:
find_all_iframe_sources(next_event['matchup_url'][1])

'/matchup/1164/11012/pre'

Add iframe Sources to next_event. This can take a few minutes. 

In [10]:
# add iframe sources to df  with itterrows and apply
next_event['iframe_src'] = next_event.apply(lambda x: find_all_iframe_sources(x['matchup_url']), axis=1)

In [11]:
next_event

Unnamed: 0,fighter1,fighter2,weightclass,fighter1_odds,fighter2_odds,fight_number,matchup_url,iframe_src
0,Jiří Procházka,Alex Pereira,Light Heavyweight Title Bout,130.0,-155.0,10955,https://www.ufc.com/event/ufc-295#10955,/matchup/1164/10955/pre
1,Sergei Pavlovich,Tom Aspinall,Heavyweight Interim Title Bout,,,11012,https://www.ufc.com/event/ufc-295#11012,/matchup/1164/11012/pre
2,Jessica Andrade,Mackenzie Dern,Women's Strawweight Bout,,,10902,https://www.ufc.com/event/ufc-295#10902,/matchup/1164/10902/pre
3,Matt Frevola,Benoît Saint-Denis,Lightweight Bout,170.0,-205.0,10971,https://www.ufc.com/event/ufc-295#10971,/matchup/1164/10971/pre
4,Diego Lopes,Pat Sabatini,Featherweight Bout,114.0,-135.0,10970,https://www.ufc.com/event/ufc-295#10970,/matchup/1164/10970/pre
5,Steve Erceg,Alessandro Costa,Flyweight Bout,,,10905,https://www.ufc.com/event/ufc-295#10905,error
6,Tabatha Ricci,Loopy Godinez,Women's Strawweight Bout,136.0,-162.0,10972,https://www.ufc.com/event/ufc-295#10972,/matchup/1164/10972/pre
7,Mateusz Rębecki,Nurullo Aliev,Lightweight Bout,-185.0,154.0,11010,https://www.ufc.com/event/ufc-295#11010,/matchup/1164/11010/pre
8,Nazim Sadykhov,Viacheslav Borshchev,Lightweight Bout,-166.0,140.0,11009,https://www.ufc.com/event/ufc-295#11009,/matchup/1164/11009/pre
9,Jared Gordon,Mark Madsen,Lightweight Bout,-175.0,145.0,10953,https://www.ufc.com/event/ufc-295#10953,/matchup/1164/10953/pre


In [12]:
next_event['full_iframe_src'] = 'https://www.ufc.com' + next_event['iframe_src']

In [13]:
# iframe_response = requests.get('https://www.ufc.com/matchup/1162/10862/pre')
# iframe_soup = BeautifulSoup(iframe_response.content, 'html.parser')
# red = iframe_soup.find_all('div', class_='c-stat-compare__group-1 red')
# red_text = [i.text for i in red]
# red_text

In [14]:
def get_iframe_src_data_2(iframe_src):
    iframe_response = requests.get(iframe_src)
    iframe_soup = BeautifulSoup(iframe_response.content, 'html.parser')
    # get all c-stat-compare__group-1 red
    red = iframe_soup.find_all('div', class_='c-stat-compare__group-1 red')
    # return all red texts
    red_text = [i.text for i in red]
    # assign to variables
    red_record = red_text[0]
    red_last_fight = red_text[1]
    red_country = red_text[2]
    red_height = red_text[3]
    red_weight = red_text[4]
    red_reach = red_text[5]
    red_legreach = red_text[6]
    red_win_by_ko_percent = red_text[7]
    red_win_by_sub_percent = red_text[8]
    red_win_by_dec_percent = red_text[9]
    red_avg_fight_time = red_text[10]
    red_knockdowns_per_15_min = red_text[11]
    # sig strikes
    red_sig_strikes_landed_per_min = red_text[12]
    red_sig_strikes_percent = red_text[13]
    red_sig_strikes_absorbed_per_min = red_text[14]
    red_sig_strikes_absorbed_percent = red_text[15]
    # grappling
    red_takedowns_landed_per_15_min = red_text[16]
    red_takedown_accuracy = red_text[17]
    red_takedown_defense = red_text[18]
    red_submissions_attempts_per_15_min = red_text[19]
    # odds
    red_moneyline = red_text[20]


    # make df for red
    red_df = pd.DataFrame({'red_record': red_record, 'red_last_fight': red_last_fight, 
                            'red_country': red_country, 'red_height': red_height, 'red_weight': red_weight, 
                            'red_reach': red_reach, 'red_legreach': red_legreach, 
                            'red_win_by_ko_percent': red_win_by_ko_percent, 
                            'red_win_by_sub_percent': red_win_by_sub_percent, 'red_win_by_dec_percent': red_win_by_dec_percent, 
                            'red_avg_fight_time': red_avg_fight_time, 'red_knockdowns_per_15_min': red_knockdowns_per_15_min, 
                            'red_sig_strikes_landed_per_min': red_sig_strikes_landed_per_min, 
                            'red_sig_strikes_percent': red_sig_strikes_percent, 
                            'red_sig_strikes_absorbed_per_min': red_sig_strikes_absorbed_per_min, 
                            'red_sig_strikes_absorbed_percent': red_sig_strikes_absorbed_percent, 
                            'red_takedowns_landed_per_15_min': red_takedowns_landed_per_15_min, 
                            'red_takedown_accuracy': red_takedown_accuracy, 'red_takedown_defense': red_takedown_defense, 
                            'red_submissions_attempts_per_15_min': red_submissions_attempts_per_15_min, 
                            'red_moneyline': red_moneyline}, index=[0])

    # clean all values in red_df, removing all \n 
    red_df = red_df.applymap(lambda x: x.replace('\n', ''))
    # strip all values in red_df
    red_df = red_df.applymap(lambda x: x.strip())

    # get all c-stat-compare__group-1 blue
    blue = iframe_soup.find_all('div', class_='c-stat-compare__group-2 blue')
    # return all blue texts
    blue_text = [i.text for i in blue]
    # assign to variables
    blue_record = blue_text[0]
    blue_last_fight = blue_text[1]
    blue_country = blue_text[2]
    blue_height = blue_text[3]
    blue_weight = blue_text[4]
    blue_reach = blue_text[5]
    blue_legreach = blue_text[6]
    blue_win_by_ko_percent = blue_text[7]
    blue_win_by_sub_percent = blue_text[8]
    blue_win_by_dec_percent = blue_text[9]
    blue_avg_fight_time = blue_text[10]
    blue_knockdowns_per_15_min = blue_text[11]
    # sig strikes
    blue_sig_strikes_landed_per_min = blue_text[12]
    blue_sig_strikes_percent = blue_text[13]
    blue_sig_strikes_absorbed_per_min = blue_text[14]
    blue_sig_strikes_absorbed_percent = blue_text[15]
    # grappling
    blue_takedowns_landed_per_15_min = blue_text[16]
    blue_takedown_accuracy = blue_text[17]
    blue_takedown_defense = blue_text[18]
    blue_submissions_attempts_per_15_min = blue_text[19]
    # odds
    blue_moneyline = blue_text[20]


    # make df for blue
    blue_df = pd.DataFrame({'blue_record': blue_record, 'blue_last_fight': blue_last_fight,
                            'blue_country': blue_country, 'blue_height': blue_height, 'blue_weight': blue_weight,
                            'blue_reach': blue_reach, 'blue_legreach': blue_legreach,
                            'blue_win_by_ko_percent': blue_win_by_ko_percent,
                            'blue_win_by_sub_percent': blue_win_by_sub_percent, 'blue_win_by_dec_percent': blue_win_by_dec_percent,
                            'blue_avg_fight_time': blue_avg_fight_time, 'blue_knockdowns_per_15_min': blue_knockdowns_per_15_min,
                            'blue_sig_strikes_landed_per_min': blue_sig_strikes_landed_per_min,
                            'blue_sig_strikes_percent': blue_sig_strikes_percent,
                            'blue_sig_strikes_absorbed_per_min': blue_sig_strikes_absorbed_per_min,
                            'blue_sig_strikes_absorbed_percent': blue_sig_strikes_absorbed_percent,
                            'blue_takedowns_landed_per_15_min': blue_takedowns_landed_per_15_min,
                            'blue_takedown_accuracy': blue_takedown_accuracy, 'blue_takedown_defense': blue_takedown_defense,
                            'blue_submissions_attempts_per_15_min': blue_submissions_attempts_per_15_min,
                            'blue_moneyline': blue_moneyline}, index=[0])

    # clean all values in blue_df, removing all \n
    blue_df = blue_df.applymap(lambda x: x.replace('\n', ''))
    # strip all values in blue_df
    blue_df = blue_df.applymap(lambda x: x.strip())


    # append blue_df to red_df by axis=1
    dfs = pd.concat([red_df, blue_df], axis=1)

    # return df
    return dfs





In [15]:
next_event

Unnamed: 0,fighter1,fighter2,weightclass,fighter1_odds,fighter2_odds,fight_number,matchup_url,iframe_src,full_iframe_src
0,Jiří Procházka,Alex Pereira,Light Heavyweight Title Bout,130.0,-155.0,10955,https://www.ufc.com/event/ufc-295#10955,/matchup/1164/10955/pre,https://www.ufc.com/matchup/1164/10955/pre
1,Sergei Pavlovich,Tom Aspinall,Heavyweight Interim Title Bout,,,11012,https://www.ufc.com/event/ufc-295#11012,/matchup/1164/11012/pre,https://www.ufc.com/matchup/1164/11012/pre
2,Jessica Andrade,Mackenzie Dern,Women's Strawweight Bout,,,10902,https://www.ufc.com/event/ufc-295#10902,/matchup/1164/10902/pre,https://www.ufc.com/matchup/1164/10902/pre
3,Matt Frevola,Benoît Saint-Denis,Lightweight Bout,170.0,-205.0,10971,https://www.ufc.com/event/ufc-295#10971,/matchup/1164/10971/pre,https://www.ufc.com/matchup/1164/10971/pre
4,Diego Lopes,Pat Sabatini,Featherweight Bout,114.0,-135.0,10970,https://www.ufc.com/event/ufc-295#10970,/matchup/1164/10970/pre,https://www.ufc.com/matchup/1164/10970/pre
5,Steve Erceg,Alessandro Costa,Flyweight Bout,,,10905,https://www.ufc.com/event/ufc-295#10905,error,https://www.ufc.comerror
6,Tabatha Ricci,Loopy Godinez,Women's Strawweight Bout,136.0,-162.0,10972,https://www.ufc.com/event/ufc-295#10972,/matchup/1164/10972/pre,https://www.ufc.com/matchup/1164/10972/pre
7,Mateusz Rębecki,Nurullo Aliev,Lightweight Bout,-185.0,154.0,11010,https://www.ufc.com/event/ufc-295#11010,/matchup/1164/11010/pre,https://www.ufc.com/matchup/1164/11010/pre
8,Nazim Sadykhov,Viacheslav Borshchev,Lightweight Bout,-166.0,140.0,11009,https://www.ufc.com/event/ufc-295#11009,/matchup/1164/11009/pre,https://www.ufc.com/matchup/1164/11009/pre
9,Jared Gordon,Mark Madsen,Lightweight Bout,-175.0,145.0,10953,https://www.ufc.com/event/ufc-295#10953,/matchup/1164/10953/pre,https://www.ufc.com/matchup/1164/10953/pre


In [16]:
rand_iframe_src = next_event['full_iframe_src'][0]
rand_iframe_src

'https://www.ufc.com/matchup/1164/10955/pre'

In [17]:
# get one iframe src data
get_iframe_src_data_2(rand_iframe_src)

Unnamed: 0,red_record,red_last_fight,red_country,red_height,red_weight,red_reach,red_legreach,red_win_by_ko_percent,red_win_by_sub_percent,red_win_by_dec_percent,...,blue_knockdowns_per_15_min,blue_sig_strikes_landed_per_min,blue_sig_strikes_percent,blue_sig_strikes_absorbed_per_min,blue_sig_strikes_absorbed_percent,blue_takedowns_landed_per_15_min,blue_takedown_accuracy,blue_takedown_defense,blue_submissions_attempts_per_15_min,blue_moneyline
0,29-3-1,Win,Czechia,"6' 3""",205.00 LB,80 in,45 in,86%,10%,3%,...,0.43,5.11,62%,3.7,51%,0.22,100%,70%,0.22,-155


In [18]:
next_event

Unnamed: 0,fighter1,fighter2,weightclass,fighter1_odds,fighter2_odds,fight_number,matchup_url,iframe_src,full_iframe_src
0,Jiří Procházka,Alex Pereira,Light Heavyweight Title Bout,130.0,-155.0,10955,https://www.ufc.com/event/ufc-295#10955,/matchup/1164/10955/pre,https://www.ufc.com/matchup/1164/10955/pre
1,Sergei Pavlovich,Tom Aspinall,Heavyweight Interim Title Bout,,,11012,https://www.ufc.com/event/ufc-295#11012,/matchup/1164/11012/pre,https://www.ufc.com/matchup/1164/11012/pre
2,Jessica Andrade,Mackenzie Dern,Women's Strawweight Bout,,,10902,https://www.ufc.com/event/ufc-295#10902,/matchup/1164/10902/pre,https://www.ufc.com/matchup/1164/10902/pre
3,Matt Frevola,Benoît Saint-Denis,Lightweight Bout,170.0,-205.0,10971,https://www.ufc.com/event/ufc-295#10971,/matchup/1164/10971/pre,https://www.ufc.com/matchup/1164/10971/pre
4,Diego Lopes,Pat Sabatini,Featherweight Bout,114.0,-135.0,10970,https://www.ufc.com/event/ufc-295#10970,/matchup/1164/10970/pre,https://www.ufc.com/matchup/1164/10970/pre
5,Steve Erceg,Alessandro Costa,Flyweight Bout,,,10905,https://www.ufc.com/event/ufc-295#10905,error,https://www.ufc.comerror
6,Tabatha Ricci,Loopy Godinez,Women's Strawweight Bout,136.0,-162.0,10972,https://www.ufc.com/event/ufc-295#10972,/matchup/1164/10972/pre,https://www.ufc.com/matchup/1164/10972/pre
7,Mateusz Rębecki,Nurullo Aliev,Lightweight Bout,-185.0,154.0,11010,https://www.ufc.com/event/ufc-295#11010,/matchup/1164/11010/pre,https://www.ufc.com/matchup/1164/11010/pre
8,Nazim Sadykhov,Viacheslav Borshchev,Lightweight Bout,-166.0,140.0,11009,https://www.ufc.com/event/ufc-295#11009,/matchup/1164/11009/pre,https://www.ufc.com/matchup/1164/11009/pre
9,Jared Gordon,Mark Madsen,Lightweight Bout,-175.0,145.0,10953,https://www.ufc.com/event/ufc-295#10953,/matchup/1164/10953/pre,https://www.ufc.com/matchup/1164/10953/pre


In [19]:
# get iframe src data for all events

event_data = []

for i in range(len(next_event['full_iframe_src'])):
    try:
        event_data.append(get_iframe_src_data_2(next_event['full_iframe_src'][i]))
    except:
        event_data.append(pd.DataFrame([{'error': 'error'}]))

# make df from event_data
event_data_df = pd.concat(event_data, axis=0)

event_data_df

Unnamed: 0,red_record,red_last_fight,red_country,red_height,red_weight,red_reach,red_legreach,red_win_by_ko_percent,red_win_by_sub_percent,red_win_by_dec_percent,...,blue_sig_strikes_landed_per_min,blue_sig_strikes_percent,blue_sig_strikes_absorbed_per_min,blue_sig_strikes_absorbed_percent,blue_takedowns_landed_per_15_min,blue_takedown_accuracy,blue_takedown_defense,blue_submissions_attempts_per_15_min,blue_moneyline,error
0,29-3-1,Win,Czechia,"6' 3""",205.00 LB,80 in,45 in,86%,10%,3%,...,5.11,62%,3.7,51%,0.22,100%,70%,0.22,-155.0,
0,18-1-0,Win,Russia,"6' 3""",260.50 LB,84 in,45 in,83%,0%,17%,...,7.65,66%,2.9,65%,3.7,100%,100%,1.85,,
0,24-12-0,Loss,Brazil,"5' 1""",132.00 LB,62 in,35 in,38%,33%,29%,...,3.36,40%,4.09,51%,0.81,15%,40%,1.51,,
0,11-3-1,Win,United States,"5' 9""",155.00 LB,71 in,40 in,36%,27%,36%,...,5.59,52%,5.27,44%,4.72,36%,80%,1.45,-205.0,
0,22-6-0,Win,Brazil,"5' 11""",161.00 LB,72.5 in,41 in,36%,55%,9%,...,1.95,59%,1.35,51%,3.83,45%,42%,1.91,-135.0,
0,,,,,,,,,,,...,,,,,,,,,,error
0,9-1-0,Win,Brazil,"5' 1""",121.00 LB,61 in,36 in,11%,33%,56%,...,4.03,50%,3.52,62%,3.82,47%,82%,1.27,-162.0,
0,18-1-0,Win,Poland,"5' 7""",171.20 LB,66 in,36.5 in,50%,33%,17%,...,2.5,51%,0.46,59%,3.06,44%,0%,0.0,154.0,
0,9-1-0,Win,Russia,"5' 10""",171.00 LB,69 in,41 in,67%,22%,11%,...,4.63,57%,3.44,56%,0.0,0%,35%,0.0,140.0,
0,"19-6-0, 1NC",No Contest,United States,"5' 9""",173.20 LB,68 in,38 in,33%,11%,56%,...,3.24,47%,3.67,53%,3.09,66%,62%,0.0,145.0,


In [20]:
# reindex both
next_event = next_event.reset_index(drop=True)
event_data_df = event_data_df.reset_index(drop=True)

# Append event_data_df to next_event
next_event = pd.concat([next_event, event_data_df], axis=1)
next_event

Unnamed: 0,fighter1,fighter2,weightclass,fighter1_odds,fighter2_odds,fight_number,matchup_url,iframe_src,full_iframe_src,red_record,...,blue_sig_strikes_landed_per_min,blue_sig_strikes_percent,blue_sig_strikes_absorbed_per_min,blue_sig_strikes_absorbed_percent,blue_takedowns_landed_per_15_min,blue_takedown_accuracy,blue_takedown_defense,blue_submissions_attempts_per_15_min,blue_moneyline,error
0,Jiří Procházka,Alex Pereira,Light Heavyweight Title Bout,130.0,-155.0,10955,https://www.ufc.com/event/ufc-295#10955,/matchup/1164/10955/pre,https://www.ufc.com/matchup/1164/10955/pre,29-3-1,...,5.11,62%,3.7,51%,0.22,100%,70%,0.22,-155.0,
1,Sergei Pavlovich,Tom Aspinall,Heavyweight Interim Title Bout,,,11012,https://www.ufc.com/event/ufc-295#11012,/matchup/1164/11012/pre,https://www.ufc.com/matchup/1164/11012/pre,18-1-0,...,7.65,66%,2.9,65%,3.7,100%,100%,1.85,,
2,Jessica Andrade,Mackenzie Dern,Women's Strawweight Bout,,,10902,https://www.ufc.com/event/ufc-295#10902,/matchup/1164/10902/pre,https://www.ufc.com/matchup/1164/10902/pre,24-12-0,...,3.36,40%,4.09,51%,0.81,15%,40%,1.51,,
3,Matt Frevola,Benoît Saint-Denis,Lightweight Bout,170.0,-205.0,10971,https://www.ufc.com/event/ufc-295#10971,/matchup/1164/10971/pre,https://www.ufc.com/matchup/1164/10971/pre,11-3-1,...,5.59,52%,5.27,44%,4.72,36%,80%,1.45,-205.0,
4,Diego Lopes,Pat Sabatini,Featherweight Bout,114.0,-135.0,10970,https://www.ufc.com/event/ufc-295#10970,/matchup/1164/10970/pre,https://www.ufc.com/matchup/1164/10970/pre,22-6-0,...,1.95,59%,1.35,51%,3.83,45%,42%,1.91,-135.0,
5,Steve Erceg,Alessandro Costa,Flyweight Bout,,,10905,https://www.ufc.com/event/ufc-295#10905,error,https://www.ufc.comerror,,...,,,,,,,,,,error
6,Tabatha Ricci,Loopy Godinez,Women's Strawweight Bout,136.0,-162.0,10972,https://www.ufc.com/event/ufc-295#10972,/matchup/1164/10972/pre,https://www.ufc.com/matchup/1164/10972/pre,9-1-0,...,4.03,50%,3.52,62%,3.82,47%,82%,1.27,-162.0,
7,Mateusz Rębecki,Nurullo Aliev,Lightweight Bout,-185.0,154.0,11010,https://www.ufc.com/event/ufc-295#11010,/matchup/1164/11010/pre,https://www.ufc.com/matchup/1164/11010/pre,18-1-0,...,2.5,51%,0.46,59%,3.06,44%,0%,0.0,154.0,
8,Nazim Sadykhov,Viacheslav Borshchev,Lightweight Bout,-166.0,140.0,11009,https://www.ufc.com/event/ufc-295#11009,/matchup/1164/11009/pre,https://www.ufc.com/matchup/1164/11009/pre,9-1-0,...,4.63,57%,3.44,56%,0.0,0%,35%,0.0,140.0,
9,Jared Gordon,Mark Madsen,Lightweight Bout,-175.0,145.0,10953,https://www.ufc.com/event/ufc-295#10953,/matchup/1164/10953/pre,https://www.ufc.com/matchup/1164/10953/pre,"19-6-0, 1NC",...,3.24,47%,3.67,53%,3.09,66%,62%,0.0,145.0,


In [21]:
# save to csv
next_event.to_csv('data/final/next_fights/'+ next_event_title +'_.csv', index=False)