In [1]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

# Function to scrape energy consumption data from a given URL
def scrape_energy_consumption(url):
    # Send a GET request to the URL
    response = requests.get(url)

    # Parse HTML content
    soup = BeautifulSoup(response.text, 'html.parser')

    # Find the model name
    model_name = soup.find("header", class_="sub-header").find("h1").text.strip()

    # Find the div element with the class 'data-table' containing energy consumption information
    energy_consumption_div = soup.find("div", class_="data-table", id="efficiency")

    # If energy consumption div is found, extract the information
    if energy_consumption_div:
        data = {}  # Initialize dictionary to store extracted data

        # Find all h3 elements within the energy consumption div
        h3_elements = energy_consumption_div.find_all("h3")

        # Iterate over each h3 element to extract the title and corresponding table data
        for h3_element in h3_elements:
            title = h3_element.text.strip()
            table_data = {}  # Initialize dictionary to store table data

            # Find the next sibling of the h3 element, which is a div containing a table
            table_div = h3_element.find_next_sibling("div")

            # If table div is found, extract the table data
            if table_div:
                # Find all table elements within the table div
                table_elements = table_div.find_all("table")

                # Iterate over each table element to extract the data
                for table_element in table_elements:
                    # Find all table rows within the table element
                    rows = table_element.find_all("tr")

                    # Iterate over each table row to extract the row data
                    for row in rows:
                        # Extract the data from each table cell
                        cells = row.find_all("td")
                        if len(cells) == 2:
                            label = cells[0].text.strip()
                            value = cells[1].text.strip()
                            table_data[label] = value

            # Store the table data in the dictionary with the title as the key
            data[title] = table_data

    # Convert the nested dictionary into a flat dictionary
    flat_data = {}
    # Add the model name as a new column
    flat_data["Model Name"] = [model_name]
    flat_data["Model Name"] = [model_name]
    for title, table_data in data.items():
        for label, value in table_data.items():
            column_name = f"{title} - {label}"
            flat_data[column_name] = [value]



    # Convert the flat dictionary into a DataFrame
    df = pd.DataFrame(flat_data)

    return df

# URLs of the electric vehicle pages from the cheatsheet
urls = [
    "https://ev-database.org/car/1991/Tesla-Model-3",
    "https://ev-database.org/car/1717/Hyundai-IONIQ-6-Standard-Range-2WD",
    "https://ev-database.org/car/1718/Hyundai-IONIQ-6-Long-Range-2WD",
    "https://ev-database.org/car/2126/Dacia-Spring-Electric-45",
    "https://ev-database.org/car/1536/Renault-Megane-E-Tech-EV40-130hp",
    "https://ev-database.org/car/2135/Renault-5-E-Tech-52kWh-150hp",
    "https://ev-database.org/car/1998/Mini-Cooper-SE",
    "https://ev-database.org/car/1316/Lucid-Air-Grand-Touring",
    "https://ev-database.org/car/1947/Peugeot-e-2008-54-kWh",
    "https://ev-database.org/car/1707/MG-MG4-Electric-51-kWh",
    "https://ev-database.org/car/1701/BMW-iX1-xDrive30",
    "https://ev-database.org/car/1518/CUPRA-Born-170-kW---77-kWh",
    "https://ev-database.org/car/2100/Porsche-Taycan-4S",
    "https://ev-database.org/car/2016/Audi-Q4-Sportback-e-tron-45"
    # Add more URLs here for other electric vehicles
]

# Initialize an empty list to store DataFrames
dfs = []

# Scrape data for each URL and append to the list
for url in urls:
    df = scrape_energy_consumption(url)
    dfs.append(df)

# Concatenate all DataFrames into a single DataFrame
final_df = pd.concat(dfs, ignore_index=True)

# Write the DataFrame to an Excel file
excel_file = "Projectdemo.xlsx"
final_df.to_excel(excel_file, index=False)

print("Excel file has been created successfully:", excel_file)


Excel file has been created successfully: Projectdemo.xlsx
