# Given the data, can we predict whether an individual has the potential to win an Olympic medal?

In [1]:
import time
import numpy as np
import pandas as pd
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.ui import WebDriverWait

In [2]:
# Auxiliary Functions

# Auxiliary function to handle overlapping cookie elements
def handle_cookie_button(web_driver):
    try:
        # AFTER ONE FIRST MANUAL CLICK AT THE BEGINNING
        
        cookie_element = web_driver.find_element_by_css_selector("button[id='onetrust-accept-btn-handler']")
        web_driver.execute_script('arguments[0].click()', cookie_element)

        WebDriverWait(web_driver, 10).until(
            EC.frameToBeAvailableAndSwitchToIt(By.cssSelector("button[id='onetrust-accept-btn-handler']")))
        
        WebDriverWait(web_driver, 10).until(
            EC.elementToBeClickable(By.cssSelector("button[id='onetrust-accept-btn-handler']")))

        action = webdriver.ActionChains(driver)
        cookie_element = driver.find_element_by_xpath("//*[@id='onetrust-accept-btn-handler']")
        action.click(cookie_element).perform()

        web_driver.switch_to.default_content()

    except:
        print("NO COOKIE BUTTON")


# Auxiliary function to handle hidden dropdowns
def click_dropdown(web_driver, css_selector):
    handle_cookie_button(web_driver)
    web_driver.find_element_by_css_selector(css_selector).click()


# Auxiliary function to handle sex,height,weight, and team lists
def get_other_measures(web_driver):
    try:
        sex = web_driver.find_element_by_xpath("//div[contains(@data-cy, 'athlete-sex')].get_attribute('innerHTML')")
        height = web_driver.find_element_by_xpath(
            "//div[contains(@data-cy, 'athlete-height')].get_attribute('innerHTML')")
        weight = web_driver.find_element_by_xpath(
            "//div[contains(@data-cy, 'athlete-weight')].get_attribute('innerHTML')")
        team = web_driver.find_element_by_xpath("//div[contains(@data-cy, 'athlete-team')].get_attribute('innerHTML')")

        sexes.append(sex)
        heights.append(height)
        weights.append(weight)
        teams.append(team)
    except:
        sexes.append(np.nan)
        heights.append(np.nan)
        weights.append(np.nan)
        teams.append(np.nan)

In [3]:
# Preprocessing

driver = webdriver.Chrome(r"D:\Selenium Chrome Webdriver\chromedriver.exe")
url = 'https://olympics.com/en/olympic-games/athens-1896/results/athletics'
driver.maximize_window()
driver.get(url)

time.sleep(3.5)

action = webdriver.ActionChains(driver)

try:
    cookie_element = driver.find_element_by_xpath("//*[@id='onetrust-accept-btn-handler']")
    action.click(cookie_element).perform()
except:
    print("NO COOKIE BUTTON")

time.sleep(3)

game_select = driver.find_element_by_css_selector("button[data-cy='game-select']")
game_select.click()

buttons = driver.find_elements_by_xpath(".//section[contains(@class,'link-item')]")

games = []
seasons = []
for btn in buttons:
    game = btn.find_element_by_xpath("./p").text
    season = btn.find_element_by_xpath("./span").get_attribute('innerHTML')

    games.append(game)
    seasons.append(season)

seasons = seasons[::-1]
games = games[::-1]

years = [game[-4:] for game in games]
cities = [game[:-5] for game in games]

driver.quit()

In [4]:
sports = ['Alpine Skiing','Archery','Athletics',
                    'Badminton','Baseball','Basketball','Basque Pelota','Beach Volleyball','Biathlon','Bobsleigh','Boxing',
                    'Canoe Slalom','Canoe Sprint','Cricket','Croquet','Cross Country Skiing','Curling','Cycling BMX',
                    'Cycling Mountain Bike','Cycling Road','Cycling Track', 
                    'Diving',
                    'Equestrian Dressage','Equestrian Eventing','Equestrian Jumping','Equestrian Vaulting'
                    'Fencing','Figure skating','Football','Freestyle Skiing', 
                    'Golf','Gymnastics Artistic','Gymnastics Rhythmic',
                    'Handball','Hockey',
                    'Ice Hockey',
                    'Jeu De Paume','Judo',
                    'Lacrosse','Luge',
                    'Marathon Swimming','Military Patrol','Modern Pentathlon', 
                    'Nordic Combined',
                    'Polo'
                    'Rackets','Roque','Rowing','Rugby',
                    'Sailing','Shooting','Short Track Speed Skating','Skeleton','Ski Jumping','Snowboard', 'Softball'
                    'Speed Skating','Swimming','Synchronized Swimming','Short Track Speed Skating' 
                    'Table Tennis','Taekwondo','Tennis','Trampoline','Triathlon','Tug Of War'
                    'Volleyball',
                    'Water Motorsports','Water Polo','Weightlifting','Wrestling']

In [5]:
print(years)

['1896', '1900', '1904', '1908', '1912', '1920', '1924', '1924', '1928', '1928', '1932', '1932', '1936', '1936', '1948', '1948', '1952', '1952', '1956', '1956', '1960', '1960', '1964', '1964', '1968', '1968', '1972', '1972', '1976', '1976', '1980', '1980', '1984', '1984', '1988', '1988', '1992', '1992', '1994', '1996', '1998', '2000', '2002', '2004', '2006', '2008', '2010', '2012', '2014', '2016', '2018']


In [6]:
print(cities)

['Athens', 'Paris', 'St. Louis', 'London', 'Stockholm', 'Antwerp', 'Paris', 'Chamonix', 'Amsterdam', 'St. Moritz', 'Los Angeles', 'Lake Placid', 'Berlin', 'Garmisch-Partenkirchen', 'London', 'St. Moritz', 'Helsinki', 'Oslo', 'Melbourne', "Cortina d'Ampezzo", 'Rome', 'Squaw Valley', 'Tokyo', 'Innsbruck', 'Mexico City', 'Grenoble', 'Munich', 'Sapporo', 'Montreal', 'Innsbruck', 'Moscow', 'Lake Placid', 'Los Angeles', 'Sarajevo', 'Seoul', 'Calgary', 'Barcelona', 'Albertville', 'Lillehammer', 'Atlanta', 'Nagano', 'Sydney', 'Salt Lake City', 'Athens', 'Turin', 'Beijing', 'Vancouver', 'London', 'Sochi', 'Rio', 'PyeongChang']


In [7]:
print(seasons)

['Summer', 'Summer', 'Summer', 'Summer', 'Summer', 'Summer', 'Summer', 'Winter', 'Summer', 'Winter', 'Summer', 'Winter', 'Summer', 'Winter', 'Summer', 'Winter', 'Summer', 'Winter', 'Summer', 'Winter', 'Summer', 'Winter', 'Summer', 'Winter', 'Summer', 'Winter', 'Summer', 'Winter', 'Summer', 'Winter', 'Summer', 'Winter', 'Summer', 'Winter', 'Summer', 'Winter', 'Summer', 'Winter', 'Winter', 'Summer', 'Winter', 'Summer', 'Winter', 'Summer', 'Winter', 'Summer', 'Winter', 'Summer', 'Winter', 'Summer', 'Winter']


In [8]:
print(games)

['Athens 1896', 'Paris 1900', 'St. Louis 1904', 'London 1908', 'Stockholm 1912', 'Antwerp 1920', 'Paris 1924', 'Chamonix 1924', 'Amsterdam 1928', 'St. Moritz 1928', 'Los Angeles 1932', 'Lake Placid 1932', 'Berlin 1936', 'Garmisch-Partenkirchen 1936', 'London 1948', 'St. Moritz 1948', 'Helsinki 1952', 'Oslo 1952', 'Melbourne 1956', "Cortina d'Ampezzo 1956", 'Rome 1960', 'Squaw Valley 1960', 'Tokyo 1964', 'Innsbruck 1964', 'Mexico City 1968', 'Grenoble 1968', 'Munich 1972', 'Sapporo 1972', 'Montreal 1976', 'Innsbruck 1976', 'Moscow 1980', 'Lake Placid 1980', 'Los Angeles 1984', 'Sarajevo 1984', 'Seoul 1988', 'Calgary 1988', 'Barcelona 1992', 'Albertville 1992', 'Lillehammer 1994', 'Atlanta 1996', 'Nagano 1998', 'Sydney 2000', 'Salt Lake City 2002', 'Athens 2004', 'Turin 2006', 'Beijing 2008', 'Vancouver 2010', 'London 2012', 'Sochi 2014', 'Rio 2016', 'PyeongChang 2018']


In [9]:
# ======================================================================================================================= #

In [10]:
# The code below is so heavy beacuse the website is a dyanamic one.
# It contains a lot of hidden dropdowns and nested html pages.
# almost 90% of the content cannot be found in a fixed location on the DOM. It changes according to the user actions.
# Therefore, a large number of extreme cases have been taken into account.

In [11]:
driver = webdriver.Chrome(r"D:\Selenium Chrome Webdriver\chromedriver.exe")
url = 'https://olympics.com/en/olympic-games/athens-1896/results/athletics'
driver.maximize_window()
driver.get(url)
time.sleep(4)
handle_cookie_button(driver)

# Auxiliary lists
names = []
sexes = []
ages = []
heights = []
weights = []
teams = []
NOCs = []
events = []
medals = []

# ====== Games ======
game_select = driver.find_element_by_css_selector("button[data-cy='game-select']")
game_select.click()
time.sleep(4)
game_div = driver.find_element_by_xpath("//div[contains(@data-cy,'game-list-wrapper')]")
games_buttons = game_div.find_elements_by_xpath("./button")
game_select.click()
time.sleep(4)

expand_flag = 2
for game in range(1, len(games_buttons) + 1):
    expand_flag = 0
    body_element = driver.find_element_by_xpath("//html/body")
    body_element.send_keys(Keys.CONTROL + Keys.HOME)
    game_select.click()
    time.sleep(4)
    name = cities[game - 1]
    year = years[game - 1]
    game_str = f"button[data-cy='{name.lower()}-{year}']"
    handled_game_str = game_str.strip("\'")
    game_button = driver.find_element_by_css_selector(handled_game_str)
    action = webdriver.ActionChains(driver)
    action.move_to_element(game_button).click().perform()
    #game_button.send_keys(Keys.CONTROL + Keys.HOME)
    #game_button.click()
    time.sleep(5)
    body_element = driver.find_element_by_xpath("//html/body")
    body_element.send_keys(Keys.CONTROL + Keys.HOME)
    # game_str = f"button[data-cy='{name.lower()}-{year}']"
    # handled_game_str = game_str.strip("\'")
    # game_button = driver.find_element_by_css_selector(handled_game_str)
    # game_button.send_keys(Keys.CONTROL + Keys.HOME)
    # game_button.click()
    # time.sleep(4)

    # ====== Disciplines ======
    discipline_select = driver.find_element_by_css_selector("button[data-cy='discipline-select']")
    # Scroll up to avoid another hidden self executed scripts which interrupt our code.. #
    #discipline_select.send_keys(Keys.CONTROL + Keys.HOME)
    time.sleep(3)
    discipline_select.click()
    time.sleep(4)
    discipline_section = driver.find_element_by_xpath("//section[contains(@data-cy,'disciplines-list')]")
    discipline_buttons = discipline_section.find_elements_by_xpath("./button")
    discipline_select.click()
    time.sleep(4)
    for discipline in range(1, len(discipline_buttons) + 1):
        body_element = driver.find_element_by_xpath("//html/body")
        body_element.send_keys(Keys.CONTROL + Keys.HOME)
        discipline_select = driver.find_element_by_css_selector("button[data-cy='discipline-select']")
        #discipline_select.send_keys(Keys.CONTROL + Keys.HOME)
        time.sleep(3)
        discipline_select.click()
        time.sleep(5)
        #body_element = driver.find_element_by_xpath("//html/body")
        #body_element.send_keys(Keys.CONTROL + Keys.HOME)
        discipline_button = driver.find_element_by_xpath(
            f"(//section[contains(@data-cy,'disciplines-list')]/button)[{discipline}]")
        #discipline_button.send_keys(Keys.CONTROL + Keys.HOME)
        #discipline_button.click()
        action = webdriver.ActionChains(driver)
        action.move_to_element(discipline_button).click().perform()
        # Go Button
        go_button = driver.find_element_by_xpath("//a[@data-cy='go-link']")
        go_button.click()
        time.sleep(3)
        if expand_flag == 0:
            expand_btn = driver.find_element_by_xpath("//button[@data-cy='collapse-button']")
            expand_btn.click()
        # ====== sports results page1 ======
        events_titles = driver.find_elements_by_xpath("//h2[contains(@class,'Title')]")

        #Result_row_names = []
        #Result_row_urls = []
        # expand_btn = driver.find_element_by_xpath("//button[@data-cy='collapse-button']")
        # expand_btn.click()
        # events loop
        expand_flag = 0
        for event in range(1, len(events_titles) + 1):
            if expand_flag == 0:
                expand_btn = driver.find_element_by_xpath("//button[@data-cy='collapse-button']")
                expand_btn.click()
            expand_flag = 1
            time.sleep(3)
            event_title = driver.find_element_by_xpath(f"(//h2[contains(@class,'Title')])[{event}]").text
            single_section = driver.find_element_by_xpath(f"(//section[contains(@class, 'event-row')])[{event}]")
            # name = single_section.text
            # names.append(name)
            # //div[contains(@class, 'Inline')]/a
            # a = single_section.find_element_by_xpath(".//a[contains(text(), 'See full results')]")
            a = single_section.find_element_by_xpath(".//div[contains(@class, 'Inline')]/a")
            a.click()
            time.sleep(4)
            handle_cookie_button(driver)

            # ====== sports results page2 rows ======
            # time.sleep(5)

            Result_rows_medal_gold = driver.find_elements_by_xpath(
                "//li[contains(@data-row-id, 'event-result-row')][.//div[contains(@data-cy, 'medal')][text()='G']]")
            gold_medals_count = len(Result_rows_medal_gold)
            for gold_row in range(1, gold_medals_count + 1):
                print("gold_loop")
                Result_row_medal_gold = driver.find_element_by_xpath(
                    f"(//li[contains(@data-row-id, 'event-result-row')][.//div[contains(@data-cy, 'medal')][text()='G']])[{gold_row}]")
                Result_row_country = Result_row_medal_gold.find_element_by_xpath(".//span[contains(@class, 'Name')]")
                expand_Result_row = Result_row_medal_gold.find_elements_by_xpath(
                    ".//span//i[contains(@class, 'icon-caret-down')]")
                if len(expand_Result_row) != 0:
                    expand_Result_row[0].click()
                    time.sleep(4)
                    print(f"49")
                    team_Result_row_member_button = Result_row_medal_gold.find_elements_by_xpath(
                        "//a[contains(@data-cy, 'team-member')]")
                    for gold_team_member in range(1, len(team_Result_row_member_button) + 1):
                        events.append(event_title)
                        NOCs.append(Result_row_country.text)
                        medals.append('Gold')
                        member = driver.find_elements_by_xpath(f"(//a[contains(@data-cy, 'team-member')])[{gold_team_member}]")
                        member.click()
                        handle_cookie_button(driver)
                        time.sleep(4)
                        athlete_name = driver.find_element_by_xpath("//h1[contains(@class, 'athlete-info')]").get_attribute(
                            'innerHTML')
                        names.append(athlete_name)
                        get_other_measures(driver)
                        athlete_date_of_birth = driver.find_elements_by_xpath(
                            "//li[.//div[contains(@class, 'col-left')][contains(text(), 'Year of Birth')]]//div[contains(@class, 'col-right')]")
                        # if len(athlete_date_of_birth) != 0:

                        ## save the birth date
                        # else:
                        ## add blank birth date
                        driver.back()
                        time.sleep(4)

                else:  # no expand button
                    Result_row_names = Result_row_medal_gold.find_elements_by_xpath(".//h3[contains(@class, 'AthleteName')]")
                    Result_row_urls = Result_row_medal_gold.find_elements_by_xpath(
                        ".//a[.//h3[contains(@class, 'AthleteName')]]")
                    # for name in Result_row_names:
                    for gold_athlete in range(1, len(Result_row_urls) + 1):
                        events.append(event_title)
                        NOCs.append(Result_row_country.text)
                        medals.append('Gold')
                        Result_row_url = Result_row_medal_gold.find_element_by_xpath(
                            f"(.//a[.//h3[contains(@class, 'AthleteName')]])[{gold_athlete}]")
                        Result_row_url.click()
                        time.sleep(4)
                        handle_cookie_button(driver)
                        athlete_name = driver.find_element_by_xpath("//h1[contains(@class, 'athlete-info')]").get_attribute(
                            'innerHTML')
                        names.append(athlete_name)
                        get_other_measures(driver)
                        athlete_date_of_birth = driver.find_elements_by_xpath(
                            "//li[.//div[contains(@class, 'col-left')][contains(text(), 'Year of Birth')]]//div[contains(@class, 'col-right')]")
                        # if len(athlete_date_of_birth) != 0:
                        ## save the birth date
                        # else
                        ## add blank birth date
                        driver.back()
                        time.sleep(4)

            Result_rows_medal_silver = driver.find_elements_by_xpath(
                "//li[contains(@data-row-id, 'event-result-row')][.//div[contains(@data-cy, 'medal')][text()='S']]")
            silver_medals_count = len(Result_rows_medal_silver)
            for silver_row in range(1, silver_medals_count + 1):
                print("silver_loop")
                Result_row_medal_silver = driver.find_element_by_xpath(
                    f"(//li[contains(@data-row-id, 'event-result-row')][.//div[contains(@data-cy, 'medal')][text()='S']])[{silver_row}]")
                Result_row_country = Result_row_medal_silver.find_element_by_xpath(".//span[contains(@class, 'Name')]")
                expand_Result_row = Result_row_medal_silver.find_elements_by_xpath(
                    ".//span//i[contains(@class, 'icon-caret-down')]")
                if len(expand_Result_row) != 0:
                    print("93")
                    expand_Result_row[0].click()
                    time.sleep(4)
                    team_Result_row_member_button = Result_row_medal_silver.find_elements_by_xpath(
                        "//a[contains(@data-cy, 'team-member')]")
                    for silver_team_member in range(1, len(team_Result_row_member_button) + 1):
                        events.append(event_title)
                        NOCs.append(Result_row_country.text)
                        medals.append('Silver')
                        member = driver.find_elements_by_xpath(
                            f"(//a[contains(@data-cy, 'team-member')])[{silver_team_member}]")
                        member.click()
                        handle_cookie_button(driver)
                        time.sleep(4)
                        athlete_name = driver.find_element_by_xpath("//h1[contains(@class, 'athlete-info')]").get_attribute(
                            'innerHTML')
                        names.append(athlete_name)
                        get_other_measures(driver)
                        athlete_date_of_birth = driver.find_elements_by_xpath(
                            "//li[.//div[contains(@class, 'col-left')][contains(text(), 'Year of Birth')]]//div[contains(@class, 'col-right')]")
                        # if len(athlete_date_of_birth) != 0:
                        ## save the birth date
                        # else:
                        ## add blank birth date
                        driver.back()
                        time.sleep(4)

                else:
                    Result_row_names = Result_row_medal_silver.find_elements_by_xpath(".//h3[contains(@class, 'AthleteName')]")
                    Result_row_urls = Result_row_medal_silver.find_elements_by_xpath(
                        ".//a[.//h3[contains(@class, 'AthleteName')]]")
                    # for name in Result_row_names:
                    for silver_athlete in range(1, len(Result_row_urls) + 1):
                        events.append(event_title)
                        NOCs.append(Result_row_country.text)
                        medals.append('Silver')
                        Result_row_url = Result_row_medal_silver.find_element_by_xpath(
                            f"(.//a[.//h3[contains(@class, 'AthleteName')]])[{silver_athlete}]")
                        Result_row_url.click()
                        time.sleep(4)
                        handle_cookie_button(driver)
                        athlete_name = driver.find_element_by_xpath("//h1[contains(@class, 'athlete-info')]").get_attribute(
                            'innerHTML')
                        names.append(athlete_name)
                        get_other_measures(driver)
                        athlete_date_of_birth = driver.find_elements_by_xpath(
                            "//li[.//div[contains(@class, 'col-left')][contains(text(), 'Year of Birth')]]//div[contains(@class, 'col-right')]")
                        # if len(athlete_date_of_birth) != 0:
                        ## save the birth date
                        # else
                        ## add blank birth date
                        driver.back()
                        time.sleep(4)

            Result_rows_medal_bronze = driver.find_elements_by_xpath(
                "//li[contains(@data-row-id, 'event-result-row')][.//div[contains(@data-cy, 'medal')][text()='B']]")
            bronze_medals_count = len(Result_rows_medal_bronze)
            for bronze_row in range(1, bronze_medals_count + 1):
                print("bronze_loop")
                Result_row_medal_bronze = driver.find_element_by_xpath(
                    f"(//li[contains(@data-row-id, 'event-result-row')][.//div[contains(@data-cy, 'medal')][text()='B']])[{bronze_row}]")
                Result_row_country = Result_row_medal_bronze.find_element_by_xpath(".//span[contains(@class, 'Name')]")
                expand_Result_row = Result_row_medal_bronze.find_elements_by_xpath(
                    ".//span//i[contains(@class, 'icon-caret-down')]")
                if len(expand_Result_row) != 0:
                    expand_Result_row[0].click()
                    time.sleep(4)
                    team_Result_row_member_button = Result_row_medal_bronze.find_elements_by_xpath(
                        "//a[contains(@data-cy, 'team-member')]")
                    for bronze_team_member in range(1, len(team_Result_row_member_button) + 1):
                        events.append(event_title)
                        NOCs.append(Result_row_country.text)
                        medals.append('Bronze')
                        member = driver.find_elements_by_xpath(
                            f"(//a[contains(@data-cy, 'team-member')])[{bronze_team_member}]")
                        member.click()
                        time.sleep(4)
                        handle_cookie_button(driver)
                        athlete_name = driver.find_element_by_xpath("//h1[contains(@class, 'athlete-info')]").get_attribute(
                            'innerHTML')
                        names.append(athlete_name)
                        get_other_measures(driver)
                        athlete_date_of_birth = driver.find_elements_by_xpath(
                            "//li[.//div[contains(@class, 'col-left')][contains(text(), 'Year of Birth')]]//div[contains(@class, 'col-right')]")
                        # if len(athlete_date_of_birth) != 0:
                        ## save the birth date
                        # else:
                        ## add blank birth date
                        driver.back()
                        time.sleep(4)

                else:
                    Result_row_names = Result_row_medal_bronze.find_elements_by_xpath(".//h3[contains(@class, 'AthleteName')]")
                    Result_row_urls = Result_row_medal_bronze.find_elements_by_xpath(
                        ".//a[.//h3[contains(@class, 'AthleteName')]]")
                    # for name in Result_row_names:
                    for bronze_athlete in range(1, len(Result_row_urls) + 1):
                        events.append(event_title)
                        NOCs.append(Result_row_country.text)
                        medals.append('Bronze')
                        Result_row_url = Result_row_medal_bronze.find_element_by_xpath(
                            f"(.//a[.//h3[contains(@class, 'AthleteName')]])[{bronze_athlete}]")
                        Result_row_url.click()
                        time.sleep(4)
                        handle_cookie_button(driver)
                        athlete_name = driver.find_element_by_xpath("//h1[contains(@class, 'athlete-info')]").get_attribute(
                            'innerHTML')
                        names.append(athlete_name)
                        get_other_measures(driver)
                        athlete_date_of_birth = driver.find_elements_by_xpath(
                            "//li[.//div[contains(@class, 'col-left')][contains(text(), 'Year of Birth')]]//div[contains(@class, 'col-right')]")
                        # if len(athlete_date_of_birth) != 0:
                        ## save the birth date
                        # else
                        ## add blank birth date
                        driver.back()
                        time.sleep(4)

            Result_rows_medal_none = driver.find_elements_by_xpath(
                "//li[contains(@data-row-id, 'event-result-row')][.//div[contains(@data-cy, 'medal')]/span]")
            none_medals_count = len(Result_rows_medal_none)
            for no_medal_row in range(1, none_medals_count + 1):
                print("none_loop")
                Result_row_medal_none = driver.find_element_by_xpath(
                    f"(//li[contains(@data-row-id, 'event-result-row')][.//div[contains(@data-cy, 'medal')]/span])[{no_medal_row}]")
                Result_row_country = Result_row_medal_none.find_element_by_xpath(".//span[contains(@class, 'Name')]")
                expand_Result_row = Result_row_medal_none.find_elements_by_xpath(
                    ".//span//i[contains(@class, 'icon-caret-down')]")
                if len(expand_Result_row) != 0:
                    expand_Result_row[0].click()
                    time.sleep(4)
                    team_Result_row_member_button = Result_row_medal_none.find_elements_by_xpath(
                        "//a[contains(@data-cy, 'team-member')]")
                    for no_medal_team_member in range(1, len(team_Result_row_member_button) + 1):
                        events.append(event_title)
                        NOCs.append(Result_row_country.text)
                        medals.append('No Medal')
                        member = driver.find_elements_by_xpath(
                            f"(//a[contains(@data-cy, 'team-member')])[{no_medal_team_member}]")
                        member.click()
                        time.sleep(4)
                        handle_cookie_button(driver)
                        athlete_name = driver.find_element_by_xpath("//h1[contains(@class, 'athlete-info')]").get_attribute(
                            'innerHTML')
                        names.append(athlete_name)
                        get_other_measures(driver)
                        athlete_date_of_birth = driver.find_elements_by_xpath(
                            "//li[.//div[contains(@class, 'col-left')][contains(text(), 'Year of Birth')]]//div[contains(@class, 'col-right')]")
                        # if len(athlete_date_of_birth) != 0:
                        # ages.append(athlete_date_of_birth[1])
                        # else:
                        ## add blank birth date
                        driver.back()
                        time.sleep(4)

                else:
                    Result_row_names = Result_row_medal_none.find_elements_by_xpath(".//h3[contains(@class, 'AthleteName')]")
                    Result_row_urls = Result_row_medal_none.find_elements_by_xpath(
                        ".//a[.//h3[contains(@class, 'AthleteName')]]")
                    for no_medal_athlete in range(1, len(Result_row_urls) + 1):
                        events.append(event_title)
                        NOCs.append(Result_row_country.text)
                        medals.append('No Medal')
                        Result_row_url = Result_row_medal_none.find_element_by_xpath(
                            f"(.//a[.//h3[contains(@class, 'AthleteName')]])[{no_medal_athlete}]")
                        Result_row_url.click()
                        time.sleep(4)
                        handle_cookie_button(driver)
                        athlete_name = driver.find_element_by_xpath("//h1[contains(@class, 'athlete-info')]").get_attribute(
                            'innerHTML')
                        names.append(athlete_name)
                        get_other_measures(driver)
                        athlete_date_of_birth = driver.find_elements_by_xpath(
                            "//li[.//div[contains(@class, 'col-left')][contains(text(), 'Year of Birth')]]//div[contains(@class, 'col-right')]")
                        # if len(athlete_date_of_birth) != 0:
                        ## save the birth date
                        # else
                        ## add blank birth date
                        driver.back()
                        time.sleep(4)
            driver.back()
            time.sleep(4)
            # at the end, initialize expand button due to back
            expand_btn = driver.find_element_by_xpath("//button[@data-cy='collapse-button']")
            driver.execute_script('arguments[0].click()', expand_btn)
            expand_flag = 1

driver.quit()

In [12]:
##### DATA VALIDATION #####

In [13]:
df = pd.DataFrame(list(zip(names, sexes,ages,heights,weights,teams,nocs,games,years,seasons,cities,sports,events,medals)),
                    columns = ['Name', 'Sex','Age','Height','Weight','Team','NOC','Games','Year','Season','City','Sport',
                               'Event','Medal'])

In [14]:
df.head(5)
#df = df.sort_values(by=['Year']).reset_index()
#df.head(5)

Unnamed: 0,Name,Sex,Age,Height,Weight,Team,NOC,Games,Year,Season,City,Sport,Event,Medal
0,Fritz Richard Gustav Schuft,M,19.0,,,Germany,GER,1896 Summer,1896,Summer,Athina,Gymnastics,Gymnastics Men's Pommelled Horse,
1,Pierre Alexandre Tuffri,M,19.0,,,France,FRA,1896 Summer,1896,Summer,Athina,Athletics,Athletics Men's Triple Jump,Silver
2,Pierre Alexandre Tuffri,M,19.0,,,France,FRA,1896 Summer,1896,Summer,Athina,Athletics,Athletics Men's Long Jump,
3,Conrad Helmut Fritz Bcker,M,25.0,,,Germany,GER,1896 Summer,1896,Summer,Athina,Gymnastics,Gymnastics Men's Horse Vault,
4,Conrad Helmut Fritz Bcker,M,25.0,,,Germany,GER,1896 Summer,1896,Summer,Athina,Gymnastics,Gymnastics Men's Parallel Bars,


In [15]:
df.to_csv("D:\Olympic Dataset\data.csv")

Here, the crawling process is done.