In [1]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

# Initialize variables
base_url = "https://live.euronext.com/en/ipo-showcase/all?field_iponi_ipo_date_value%5Bmin%5D=&field_iponi_ipo_date_value%5Bmax%5D=&page="
page_number = 0
ipo_dates = []
companies = []
links = []

while True:
    # Construct the URL for the current page
    url = f"{base_url}{page_number}"
    
    # Send an HTTP request to the URL
    response = requests.get(url)
    
    # Ensure the request was successful
    if response.status_code != 200:
        print(f'Failed to retrieve page {page_number} with status code: {response.status_code}')
        break  # Exit the loop if the request fails
    
    # Parse the HTML content
    soup = BeautifulSoup(response.content, 'html.parser')
    
    # Check if the page contains any IPO listings
    ipo_listing_tags = soup.find_all('td', {'class': 'views-field views-field-field-iponi-ipo-date'})
    if not ipo_listing_tags:
        break  # Exit the loop if there are no more IPO listings
    
    # Extract data from the current page
    for td_date, td_company in zip(soup.find_all('td', {'class': 'views-field views-field-field-iponi-ipo-date'}),
                                   soup.find_all('td', {'class': 'views-field views-field-field-iponi-display-title'})):
        # Extract IPO date
        ipo_date_tag = td_date.find('time')
        if ipo_date_tag:
            ipo_dates.append(ipo_date_tag.text)
        else:
            ipo_dates.append(None)
        
        # Extract company name and link
        company_tag = td_company.find('a')
        if company_tag:
            companies.append(company_tag.text)
            links.append(company_tag['href'])
        else:
            companies.append(None)
            links.append(None)
    
    # Increment the page number for the next iteration
    page_number += 1

# Create a DataFrame to store the extracted data
df = pd.DataFrame({
    'IPO Date': ipo_dates,
    'Company': companies,
    'Link': links
})

# Print the DataFrame
print(df)


        IPO Date                         Company  \
0     05/10/2023                      BEERENBERG   
1     03/10/2023                             QEV   
2     02/10/2023                          AQUILA   
3     29/09/2023                     EMMA VILLAS   
4     29/09/2023                          VALICA   
...          ...                             ...   
2734  05/11/2003                  GRUPPO FORMULA   
2735  14/07/2003                       GESCARTAO   
2736  10/07/2003                          KARDAN   
2737  10/07/2003                          KARDAN   
2738  09/04/2003  Banco Comercial dos Açores, SA   

                                                 Link  
0                         /en/ipo-showcase/beerenberg  
1             /en/ipo-showcase/spear-investments-1-bv  
2     /en/ipo-showcase/aquila-european-renewables-plc  
3                        /en/ipo-showcase/emma-villas  
4                             /en/ipo-showcase/valica  
...                                    

In [2]:
df['Link'] = 'https://live.euronext.com' + df['Link']


In [3]:
df


Unnamed: 0,IPO Date,Company,Link
0,05/10/2023,BEERENBERG,https://live.euronext.com/en/ipo-showcase/beer...
1,03/10/2023,QEV,https://live.euronext.com/en/ipo-showcase/spea...
2,02/10/2023,AQUILA,https://live.euronext.com/en/ipo-showcase/aqui...
3,29/09/2023,EMMA VILLAS,https://live.euronext.com/en/ipo-showcase/emma...
4,29/09/2023,VALICA,https://live.euronext.com/en/ipo-showcase/valica
...,...,...,...
2734,05/11/2003,GRUPPO FORMULA,https://live.euronext.com/fr/ipo-showcase/grup...
2735,14/07/2003,GESCARTAO,https://live.euronext.com/en/ipo-showcase/gesc...
2736,10/07/2003,KARDAN,https://live.euronext.com/en/ipo-showcase/kardan
2737,10/07/2003,KARDAN,https://live.euronext.com/nl/ipo-showcase/kardan


In [4]:
import requests
from bs4 import BeautifulSoup
import time
from tqdm import tqdm  # Import tqdm for the progress bar

# Initialize an empty list to store the ICB values
icb_values = []

# Create a tqdm progress bar to track the progress
for link in tqdm(df['Link'], desc="Scraping Progress"):
    # Send an HTTP request to the link
    response = requests.get(link)
    
    # Ensure the request was successful
    if response.status_code != 200:
        print(f'Failed to retrieve page {link} with status code: {response.status_code}')
        icb_values.append(None)  # Append None for failed requests
        continue  # Skip to the next iteration
    
    # Parse the HTML content
    soup = BeautifulSoup(response.content, 'html.parser')
    
    # Find the ICB field div
    icb_div = soup.find('div', {'class': 'field--name-field-icb'})
    
    # Extract the ICB value
    if icb_div:
        icb_value = icb_div.find('div', {'class': 'field__item'})
        icb_values.append(icb_value.text.strip() if icb_value else None)
    else:
        icb_values.append(None)  # Append None if the ICB field is not found
    
    # Add a 0.5-second wait before the next request
    time.sleep(0.5)

# Add a new column to the DataFrame with the ICB values
df['ICB'] = icb_values

# (Optional) Print the updated DataFrame to check the results
print(df)

Scraping Progress: 100%|██████████| 2739/2739 [1:43:32<00:00,  2.27s/it]

        IPO Date                         Company  \
0     05/10/2023                      BEERENBERG   
1     03/10/2023                             QEV   
2     02/10/2023                          AQUILA   
3     29/09/2023                     EMMA VILLAS   
4     29/09/2023                          VALICA   
...          ...                             ...   
2734  05/11/2003                  GRUPPO FORMULA   
2735  14/07/2003                       GESCARTAO   
2736  10/07/2003                          KARDAN   
2737  10/07/2003                          KARDAN   
2738  09/04/2003  Banco Comercial dos Açores, SA   

                                                   Link  \
0     https://live.euronext.com/en/ipo-showcase/beer...   
1     https://live.euronext.com/en/ipo-showcase/spea...   
2     https://live.euronext.com/en/ipo-showcase/aqui...   
3     https://live.euronext.com/en/ipo-showcase/emma...   
4      https://live.euronext.com/en/ipo-showcase/valica   
...                  


