## Installing Important Components

In [None]:
# install google chrome
!wget https://dl.google.com/linux/linux_signing_key.pub
!sudo apt-key add linux_signing_key.pub
!echo 'deb [arch=amd64] http://dl.google.com/linux/chrome/deb/ stable main' >> /etc/apt/sources.list.d/google-chrome.list
!sudo apt-get -y update
!sudo apt-get install -y google-chrome-stable

In [None]:
#install chromedriver
!apt-get install -y qq unzip
!wget -O /tmp/chromedriver.zip http://chromedriver.storage.googleapis.com/`curl -sS chromedriver.storage.googleapis.com/LATEST_RELEASE`/chromedriver_linux64.zip
!unzip /tmp/chromedriver.zip chromedriver -d /usr/local/bin/

In [None]:
# install selenium
!sudo apt install -y python3-selenium
!pip install selenium==3.141.0 > /dev/null

In [None]:
!pip install webdriver_manager

## Importing Libraries

In [None]:
#Basic Imports
import pandas as pd
import warnings
warnings.filterwarnings('ignore')
import time
from PIL import Image

# Text Preprocessing
import re
import xml

#Web Scraping
from bs4 import BeautifulSoup
import requests
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

## Medal Standings

In [None]:
url = 'https://results.birmingham2022.com/#/general-medals'

# Define Brave path (done in actual notebook this way but differently on kaggle)
options = webdriver.ChromeOptions()
options.add_argument('--no-sandbox')                             
options.add_argument('--headless')
options.add_argument('--disable-gpu')
options.add_argument('--disable-dev-shm-usage')
options.add_argument("--window-size=1920,1080")

# Create new automated instance of Chrome
driver = webdriver.Chrome(options=options)
driver.get(url)

In [None]:
# Dealing with cookies prompt
button = driver.find_element(by=By.XPATH, value='/html/body/div[1]/div[1]/div/div/div[1]/div[5]/button[1]')
driver.execute_script("arguments[0].click();", button)
# javascript is running, so wait until to get the information.
time.sleep(5)
for i in range(1,72):
    try:
        # get detailed results button and click it
        elem = driver.find_element(by=By.XPATH, value=f'/html/body/div[1]/app-root/div[5]/app-general-medals/div/div/div[4]/div/table/tbody/tr[{i}]/td[7]/button')
        driver.execute_script("arguments[0].click();", elem)
        # javascript is running, so wait until to get the information.
        time.sleep(5)
    except:
        pass

### After opening webpage open all the plus symbol buttons to get detailed results

In [None]:
driver.save_screenshot('result.png')
img = Image.open('result.png')
img

In [None]:
# Text Preprocessing Utils
def remove_tags(text):
    return ''.join(xml.etree.ElementTree.fromstring(text).itertext())
def remove_newlines(text):
    return re.sub("\n", '', text)
def clean(text):
    return remove_newlines(remove_tags(text)).replace(' ','')

In [None]:
countries = []
men_gold = []
men_silver = []
men_bronze = []
women_gold = []
women_silver = []
women_bronze = []
mixed_gold = []
mixed_silver = []
mixed_bronze = []
total_gold = []
total_silver = []
total_bronze = []
total = []
for i in range(1,144):
    url = f'/html/body/div[1]/app-root/div[5]/app-general-medals/div/div/div[4]/div/table/tbody/tr[{i}]'
    ref_ele = driver.find_elements(By.XPATH,url)
    if i%2==0:
        try:
            men_gold.append(driver.find_elements(By.XPATH,url+'/td/table/tbody/tr[1]/td[1]')[0].text)
            men_silver.append(driver.find_elements(By.XPATH,url+'/td/table/tbody/tr[1]/td[2]')[0].text)
            men_bronze.append(driver.find_elements(By.XPATH,url+'/td/table/tbody/tr[1]/td[3]')[0].text)
            women_gold.append(driver.find_elements(By.XPATH,url+'/td/table/tbody/tr[2]/td[1]')[0].text)
            women_silver.append(driver.find_elements(By.XPATH,url+'/td/table/tbody/tr[2]/td[2]')[0].text)
            women_bronze.append(driver.find_elements(By.XPATH,url+'/td/table/tbody/tr[2]/td[3]')[0].text)
            mixed_gold.append(driver.find_elements(By.XPATH,url+'/td/table/tbody/tr[3]/td[1]')[0].text)
            mixed_silver.append(driver.find_elements(By.XPATH,url+'/td/table/tbody/tr[3]/td[2]')[0].text)
            mixed_bronze.append(driver.find_elements(By.XPATH,url+'/td/table/tbody/tr[3]/td[3]')[0].text)
            total_gold.append(driver.find_elements(By.XPATH,url+'/td/table/tbody/tr[4]/td[1]')[0].text)
            total_silver.append(driver.find_elements(By.XPATH,url+'/td/table/tbody/tr[4]/td[2]')[0].text)
            total_bronze.append(driver.find_elements(By.XPATH,url+'/td/table/tbody/tr[4]/td[3]')[0].text)
            total.append(driver.find_elements(By.XPATH,url+'/td/table/tbody/tr[4]/td[4]')[0].text)
        except:
            pass
    else:
        for e in ref_ele:
            try:
                countries.append(re.sub('CGA','',re.sub('\d+','',re.findall('.*CGA',remove_newlines(e.text))[0])))
            except:
                pass
countries.append('Total')

In [None]:
df = pd.DataFrame()
df['Country Name'] = countries
df['Men Gold'] = men_gold
df['Men Silver'] = men_silver
df['Men Bronze'] = men_bronze
df['Women Gold'] = women_gold
df['Women Silver'] = women_silver
df['Women Bronze'] = women_bronze
df['Mixed Gold'] = mixed_gold
df['Mixed Silver'] = mixed_silver
df['Mixed Bronze'] = mixed_bronze
df['Total Gold'] = total_gold
df['Total Silver'] = total_silver
df['Total Bronze'] = total_bronze
df['Total'] = total

In [None]:
df = df.replace('',0)
df.to_csv('Medal_Standings.csv',index = False)

In [None]:
pd.read_csv('../input/commonwealth-games-2022/Medal Standings.csv').head()

## Athlete Count

In [None]:
athlete_count = pd.read_html('https://en.wikipedia.org/wiki/2022_Commonwealth_Games')[5]
athlete_count.head()

In [None]:
athlete_count.to_csv('athlete_count.csv',index = False)

## Event Schedule

In [None]:
df = pd.read_html('https://en.wikipedia.org/wiki/2022_Commonwealth_Games')[7].fillna('').reset_index(drop=True)
df.columns = ['Event Category','Event Subtype','28thThu','29thFri','30thSat','31stSun','1stMon','2ndTue','3rdWed','4thThu','5thFri','6thSat','7thSun','8thMon','Events']
df = df.drop(df.index[-1])
df = df.drop(df.index[-1])

In [None]:
df.to_csv('Schedule.csv',index = True)

In [None]:
def color_EC_GC(val):
    if val == '●':
        color = 'blue'
    elif val.isdigit() == True:
        color = 'gold'
    elif val == 'OC':
        color = 'green'
    elif val == 'CC':
        color = 'red'
    else:
        color = 'white'
    return 'background-color: %s' % color
df.style.applymap(color_EC_GC)