Let's get the end date and start date from the dashboard. Note that code is semi supervised and should be stopped by the user when we get all the projects.

In [None]:
import time
import chromedriver_binary
from tqdm import tqdm
import pandas as pd
from selenium import webdriver
from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import TimeoutException
from selenium.webdriver.chrome.options import Options

# read data
df = pd.read_csv("erc_data.csv", delimiter=';')

# Project dates
dates = pd.read_csv("erc_dates.csv").set_index('Project number').T
dates.columns = dates.columns.map(str)
dates = dates.to_dict('list')

while True:
    # Open web driver
    driver = webdriver.Chrome()

    # Get dashboard
    driver.get("https://dashboard.tech.ec.europa.eu/qs_digit_dashboard_mt/public/sense/app/afe00964-3272-45c4-b60c-b64ed20d98d1/sheet/61a0bd1d-cd6d-4ac8-8b55-80d8661e44c0/state/analysis")

    try:
        # Wait for table to know page loaded
        WebDriverWait(driver, 60).until(EC.presence_of_element_located((By.CLASS_NAME, "qv-grid-object-data-first-row")))

        # Loop over each project
        for project_number in tqdm(df["Project number"].dropna().astype(int).astype(str).values):
            # Check if we have date already
            if dates.get(project_number) is None:
                # Convert to str
                project_number = str(project_number)
                
                # Click search button
                driver.find_elements(By.CLASS_NAME, "qv-st-header-cell")[1].click()

                # Insert project ID
                try:
                    WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.CLASS_NAME, "lui-search__input")))
                    driver.find_elements(By.CLASS_NAME, "lui-search__input")[0].send_keys(project_number)
                except TimeoutException:
                    driver.find_elements(By.CLASS_NAME, "qv-st-header-cell")[1].click()
                    WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.CLASS_NAME, "lui-search__input")))
                    driver.find_elements(By.CLASS_NAME, "lui-search__input")[0].send_keys(project_number)
                
                # Click the project to open panel if project exist
                try: 
                    WebDriverWait(driver, 60).until(EC.presence_of_element_located((By.CLASS_NAME, "highlighted")))
                    driver.find_elements(By.CLASS_NAME, "highlighted")[0].click()
                    
                    # Get all info inside panel
                    time.sleep(1)
                    children = driver.find_elements(By.CLASS_NAME, "qv-media-tool-html")[1].find_elements(By.XPATH, '*')

                    # Get the project number from page and check if it is the same
                    page_project_number = driver.find_elements(By.CLASS_NAME, "current-selection-item-text")[0].text.split()[-1]
                    if page_project_number != project_number:
                        print(f"Wrong selection project {project_number}, {page_project_number}")
                    
                    # Get start date
                    start_date = children[8].text.split(":")[1].strip()
                    
                    # Get end date
                    end_date = children[9].text.split(":")[1].strip()

                    dates[project_number] = [start_date, end_date]

                    # Remove project selection
                    driver.find_elements(By.CLASS_NAME, "current-selection-item-text")[1].click()
                except TimeoutException:
                    print(f"Project does not exist in dashboard: {project_number}")

    except Exception as e:
        print(e)
    
    finally:
        driver.quit()

Let's save the dates:

In [17]:
pd.DataFrame([[i[0], *i[1]] for i in dates.items()], columns=["Project Number", "Start Date", "End Date"]).to_csv("erc_dates.csv", index=False)