# Import Libraries 

In [None]:
from selenium import webdriver
from bs4 import BeautifulSoup
import pandas as pd
import numpy as np

# Create Webdriver

In [None]:
driver = webdriver.Firefox()

# Load the URL

In [None]:
url = 'https://live.euronext.com/en/products/fixed-income/esg-bonds'
driver.get(url)

# Parse HTML

In [None]:
soup = BeautifulSoup(driver.page_source, 'html.parser')

# Locate Table

In [None]:
# if you do not know the table attributes or table number
# run this section to print all the attributes for each 'table' element on the page

for i, tbl in enumerate(soup.find_all('table')):
    print(f"{i} - {tbl.attrs}")

# Scrape Table Data

In [None]:
# find the first 'table' element
table = soup.find('table')

# find the 3rd 'table' element
# tables = soup.find_all('table')
# table = tables[2]

# find a table with specific attributes
# table = soup.find('table', attrs={'class': 'dataTable'})

table_data = []

# tables typically use 'tr' for table row and 'td' for table data or cell

for row in table.find_all('tr'):
    cells = [c.text.strip() for c in row.find_all('td')]
    for link in row.find_all('a'):
        cells.append(link['href'])
    if len(cells) > 0:
        table_data.append(cells)

print(f"{len(table_data)} rows found in webpage")

# Create DataFrame

In [None]:
df = pd.DataFrame(table_data)
df

# Clean Up

In [None]:
df.drop(axis=0, index=0, inplace=True)
df

In [None]:
df.drop(columns=8, inplace=True)
df

# List Columns

In [None]:
# get columns from table headers (usuall element 'th')
cols = [c.text.strip() for c in table.find_all('th')]

# supply a list of column names 
# cols = ['column_1', 'column_2', 'column_3']

cols

In [None]:
cols = cols[:7]
cols.extend(['Document Type', 'Document Link'])
cols

# Add Columns to DataFrame

In [None]:
# checking to make sure we have the same number of column names as columns in the table
# then adding column names to the dataframe 
if len(cols) == len(df.columns):
    df.columns = cols
df

# Formatting

In [None]:
df['Listing Date'] = pd.to_datetime(df['Listing Date'], errors='coerce')
df

In [None]:
df['Nom. (mm)'] = df['Nom. (mm)'].replace(',', '', regex=True)
df['Nom. (mm)'] = df['Nom. (mm)'].replace('\s', '', regex=True)
df['Nom. (mm)'] = pd.to_numeric(df['Nom. (mm)'])
df

In [None]:
df.loc[df['Document Type'] == 'Documents', 'Document Link'] = 'https://live.euronext.com/' + df['Document Link']
df

In [None]:
df.drop(columns='Document Type', inplace=True)
df

# Save DataFrame

In [None]:
# data frames can be saved as many different formats
# for example, we could also use df.to_excel() to save as an excel spreadsheet
# saving the data here as a CSV

file_name = 'Euronext ESG Webscraping Demo.csv'
df.to_csv(file_name, index=False, encoding='utf-8-sig')

# Closing Webdriver

In [None]:
# do not forget to close the browser window!
# this could be done at any time after we've parsed the data we need from the webpage

driver.close()