### **Exportación de datos a un archivo CSV con Pandas**

Este es el código que realizamos con las clases para la versión **`selenium==3.141.0`** que instalamos

In [1]:
from selenium import webdriver
import pandas as pd

website = 'https://www.adamchoi.co.uk/overs/detailed'
# Se recomienda utilizar Chrome, pero podriamos utilizar Firefox, Safari, Edge, etc.
driver = webdriver.Chrome(executable_path = r'C:\Users\Alfonso\Downloads\Programas\chromedriver\chromedriver.exe')
driver.get(website)

all_matches_button = driver.find_element_by_xpath('//label[@analytics-event="All matches"]')
all_matches_button.click()

matches = driver.find_elements_by_tag_name('tr')

date = []
home_team = []
score = []
away_team = []

# Así que como puedes ver aquí, ya no estamos usando "driver.find_element_by_xpath()", porque queremos usar el contexto "match". 
# Así que queremos tener "matches" como referencia para nuestro XPath. Así que si queremos usar ese contexto "match", tienes que 
# escribir "match" en lugar de driver. Y luego en el XPath, tienes que escribir el signo de "punto", porque ese signo de punto 
# especifica que quieres usar el contexto actual y luego tenemos que construir ese XPath.

for match in matches:
    date.append(match.find_element_by_xpath('./td[1]').text)
    home_team.append(match.find_element_by_xpath('./td[2]').text)
    score.append(match.find_element_by_xpath('./td[3]').text)
    away_team.append(match.find_element_by_xpath('./td[4]').text)

driver.quit() # Nos sirve para cerrar la ventana que se nos abre
    
# Crear Dataframe en Pandas y exportar a CSV (Excel)
df = pd.DataFrame({'date': date, 'home_team': home_team, 'score': score, 'away_team': away_team})
df.to_csv('football_data.csv', index=False)
print(df)

           date       home_team  score         away_team
0    12-08-2023         Arsenal  2 - 1     Nott'm Forest
1    21-08-2023  Crystal Palace  0 - 1           Arsenal
2    26-08-2023         Arsenal  2 - 2            Fulham
3    03-09-2023         Arsenal  3 - 1        Man United
4    17-09-2023         Everton  0 - 1           Arsenal
..          ...             ...    ...               ...
511  01-02-2024          Wolves  3 - 4        Man United
512  04-02-2024         Chelsea  2 - 4            Wolves
513  10-02-2024          Wolves  0 - 2         Brentford
514  17-02-2024       Tottenham  1 - 2            Wolves
515  25-02-2024          Wolves  ? - ?  Sheffield United

[516 rows x 4 columns]


In [2]:
df

Unnamed: 0,date,home_team,score,away_team
0,12-08-2023,Arsenal,2 - 1,Nott'm Forest
1,21-08-2023,Crystal Palace,0 - 1,Arsenal
2,26-08-2023,Arsenal,2 - 2,Fulham
3,03-09-2023,Arsenal,3 - 1,Man United
4,17-09-2023,Everton,0 - 1,Arsenal
...,...,...,...,...
511,01-02-2024,Wolves,3 - 4,Man United
512,04-02-2024,Chelsea,2 - 4,Wolves
513,10-02-2024,Wolves,0 - 2,Brentford
514,17-02-2024,Tottenham,1 - 2,Wolves


Este código nos sería útil para la versión 4 de Selenium:

In [None]:
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
import pandas as pd

# define the website to scrape and path where the chromediver is located
website = 'https://www.adamchoi.co.uk/overs/detailed'
path = '/Users/frankandrade/Downloads/chromedriver'  # write your path here
service = Service(executable_path=path)  # selenium 4
driver = webdriver.Chrome(service=service)  # define 'driver' variable
# open Google Chrome with chromedriver
driver.get(website)

# locate and click on a button
all_matches_button = driver.find_element(by='xpath', value='//label[@analytics-event="All matches"]')
all_matches_button.click()

# select elements in the table
matches = driver.find_elements(by='xpath', value='//tr')

# storage data in lists
date = []
home_team = []
score = []
away_team = []

# looping through the matches list
for match in matches:
    date.append(match.find_element(by='xpath', value='./td[1]').text)
    home = match.find_element(by='xpath', value='./td[2]').text
    home_team.append(home)
    print(home)
    score.append(match.find_element(by='xpath', value='./td[3]').text)
    away_team.append(match.find_element(by='xpath', value='./td[4]').text)
# quit drive we opened at the beginning
driver.quit()

# Create Dataframe in Pandas and export to CSV (Excel)
df = pd.DataFrame({'date': date, 'home_team': home_team, 'score': score, 'away_team': away_team})
df.to_csv('football_data.csv', index=False)
print(df)