In [7]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import csv
import time

# Initialize the WebDriver
driver = webdriver.Chrome()

# Open the target website
driver.get('https://www.rootdata.com/Fundraising')

# Wait for the page to load
time.sleep(5)

# Define the regions in the order specified
regions = ['America', 'Singapore', 'Chinese Mainland', 'India', 'Britain', 'Canada', 'Korea', 'France', 'Hong Kong', 'Vietnam']

# Create a list to store the data
data = []

# Function to select a region
def select_region(region):
    # Find the label containing the region name and click its radio input
    region_label = driver.find_element(By.XPATH, f"//span[contains(text(), '{region}')]/preceding-sibling::span/input")
    driver.execute_script("arguments[0].click();", region_label)
    time.sleep(2)  # Wait for the page to update
    print(f"Selected region: {region}")

# Function to get all project names and hyperlinks on the current page
def get_projects(region):
    try:
        projects = driver.find_elements(By.CSS_SELECTOR, 'a.list_name.animation_underline.el-popover__reference')
        for project in projects:
            project_name = project.text
            project_link = project.get_attribute('href')
            data.append([region, project_name, project_link])
    except Exception as e:
        print(f"Error retrieving projects: {e}")

# Function to navigate through all pages of the current region
def navigate_pages(region):
    page_number = 1
    while True:
        print(f"Currently on page {page_number}")
        get_projects(region)
        try:
            # Check if the next button is present
            wait = WebDriverWait(driver, 10)
            next_button = wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, 'button.btn-next')))
            next_button.click()
            time.sleep(5)  # Wait for the next page to load
            page_number += 1
        except:
            print("No more pages or an error occurred.")
            break

# Iterate over each region, select it, and navigate through its pages
for region in regions:
    select_region(region)
    navigate_pages(region)

# Close the WebDriver
driver.quit()

# Write the data to a CSV file
with open('fundraising.csv', mode='w', newline='', encoding='utf-8') as file:
    writer = csv.writer(file)
    writer.writerow(['Region', 'Project Name', 'Project Hyperlink'])
    writer.writerows(data)

print("Data has been exported to fundraising.csv")


Selected region: America
Currently on page 1
Currently on page 2
Currently on page 3
Currently on page 4
Currently on page 5
Currently on page 6
Currently on page 7
Currently on page 8
Currently on page 9
Currently on page 10
Currently on page 11
Currently on page 12
Currently on page 13
Currently on page 14
Currently on page 15
Currently on page 16
Currently on page 17
Currently on page 18
Currently on page 19
Currently on page 20
Currently on page 21
Currently on page 22
Currently on page 23
Currently on page 24
Currently on page 25
Currently on page 26
Currently on page 27
Currently on page 28
Currently on page 29
Currently on page 30
Currently on page 31
Currently on page 32
Currently on page 33
Currently on page 34
Currently on page 35
Currently on page 36
Currently on page 37
Currently on page 38
Currently on page 39
Currently on page 40
Currently on page 41
Currently on page 42
Currently on page 43
Currently on page 44
No more pages or an error occurred.
Selected region: Singapo

In [28]:
from selenium import webdriver
from selenium.webdriver.common.by import By
import time
import csv

# Function to extract data from the given URL
def extract_data(url):
    # Open the target website
    driver.get(url)

    # Wait for the page to load
    time.sleep(2)

    # Click the "Rounds" button to reveal the rounds data
    buttons = driver.find_elements(By.CSS_SELECTOR, 'div.tabs button')
    for button in buttons:
        if "Rounds" in button.text:
            button.click()
            break

    # Wait for the page to update
    time.sleep(2)

    # Locate the hidden div containing the table
    hidden_div = driver.find_element(By.CSS_SELECTOR, 'div[data-v-2462a663]')
    table = hidden_div.find_element(By.CSS_SELECTOR, 'div.watermusk_table table')

    # Locate the rows within the table body
    rows = table.find_elements(By.CSS_SELECTOR, 'tbody tr')

    data = []

    for row in rows:
        round_col = row.find_element(By.CSS_SELECTOR, 'td[aria-colindex="1"]').text.strip() or "- -"
        amount_col = row.find_element(By.CSS_SELECTOR, 'td[aria-colindex="2"] div').text.strip() or "- -"
        valuation_col = row.find_element(By.CSS_SELECTOR, 'td[aria-colindex="3"]').text.strip() or "- -"
        date_col = row.find_element(By.CSS_SELECTOR, 'td[aria-colindex="4"] span').text.strip() or "- -"

        investors_col = row.find_element(By.CSS_SELECTOR, 'td[aria-colindex="5"]')
        investor_links = investors_col.find_elements(By.CSS_SELECTOR, 'a')
        investors = [investor.text.strip() for investor in investor_links]

        if not investors:
            investors.append("- -")

        data.append([round_col, amount_col, valuation_col, date_col, ', '.join(investors)])

    return data

# Initialize the WebDriver
driver = webdriver.Chrome()

# Read URLs from the CSV file and extract data
with open('fundraising.csv', mode='r') as file:
    reader = csv.reader(file)
    header = next(reader)  # Skip the header row
    all_data = []

    for row in reader:
        region = row[0]
        project_name = row[1]
        project_url = row[2]

        extracted_data = extract_data(project_url)
        for data in extracted_data:
            all_data.append([region, project_name] + data)

# Write the data to a new CSV file
with open('fundraisingFinal.csv', mode='w', newline='') as file:
    writer = csv.writer(file)
    writer.writerow(['Region', 'Project Name', 'Round', 'Amount', 'Valuation', 'Date', 'Investors'])
    writer.writerows(all_data)

print("Data has been written to fundrasingFinal.csv")

# Close the WebDriver
driver.quit()


Data has been written to fundrasingFinal.csv
