<a href="https://colab.research.google.com/github/cristinaverse/stock-analysis-dashboard/blob/main/notebooks/question2_tesla_revenue.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import yfinance as yf
import pandas as pd
import requests
from bs4 import BeautifulSoup
import plotly.graph_objects as go
from plotly.subplots import make_subplots

def extract_tesla_revenue():
    """Extract Tesla revenue data via web scraping"""
    url = "https://cf-courses-data.s3.us.cloud-object-storage.appdomain.cloud/IBMDeveloperSkillsNetwork-PY0220EN-SkillsNetwork/labs/project/revenue.htm"

    try:
        html_data = requests.get(url).text
        soup = BeautifulSoup(html_data, 'html.parser')

        # Find the Tesla table
        tesla_revenue = pd.DataFrame(columns=["Date", "Revenue"])

        # Parse all tables and find Tesla revenue rows
        tables = soup.find_all('table')
        for table in tables:
            rows = table.find_all('tr')
            for row in rows[1:]:  # Skip header
                cols = row.find_all('td')
                if len(cols) >= 2:
                    date = cols[0].text.strip()
                    revenue = cols[1].text.strip()
                    tesla_revenue = pd.concat([tesla_revenue, pd.DataFrame({
                        "Date": [date],
                        "Revenue": [revenue]
                    })], ignore_index=True)

        # Clean revenue data
        tesla_revenue["Revenue"] = tesla_revenue["Revenue"].str.replace('$', '').str.replace(',', '')
        tesla_revenue.dropna(inplace=True)
        tesla_revenue = tesla_revenue[tesla_revenue["Revenue"] != ""]

        print("Tesla Revenue Data - Last 5 rows:")
        print(tesla_revenue.tail())
        return tesla_revenue

    except Exception as e:
        print(f"Error during web scraping: {e}")
        # Fallback example data in case of scraping failure
        tesla_revenue = pd.DataFrame({
            "Date": ["2023-12-31", "2023-09-30", "2023-06-30", "2023-03-31", "2022-12-31"],
            "Revenue": ["96773", "96773", "81462", "81462", "81462"]
        })
        return tesla_revenue

# Run the function
tesla_revenue = extract_tesla_revenue()


Tesla Revenue Data - Last 5 rows:
                       Date        Revenue
73             Fisker (FSR)  United States
74      Lion Electric (LEV)         Canada
75             Volta (VLTA)  United States
76       Bird Global (BRDS)  United States
77  Lightning EMotors (ZEV)  United States
