In [1]:
!pip install yfinance --upgrade
!pip install pandas --upgrade
!pip install requests beautifulsoup4 pandas



In [2]:
from bs4 import BeautifulSoup
import requests
import pandas as pd

# Step 1: Use the `requests` library to fetch the HTML content of the webpage
url = "https://finance.yahoo.com/quote/CURLF/history?period1=1540771200&period2=1695513600&interval=1mo&filter=history&frequency=1mo&includeAdjustedClose=true"
headers = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3"
}
html_data = requests.get(url, headers=headers).text

# Step 2: Parse the HTML content using `BeautifulSoup`
soup = BeautifulSoup(html_data, "html.parser")

# Step 3: Find the historical data table in the HTML content (it is the first table on the page)
table = soup.find('table', {"class": "W(100%) M(0)"})

# Step 4: Loop over each row in the table to extract the stock data values
data = []
for row in table.tbody.find_all('tr'):
    cols = row.find_all('td')
    if len(cols) == 7:  # Only process rows with data (skip empty rows)
        data.append([col.text for col in cols])

# Step 5: Create a pandas DataFrame to store this data
column_names = ["Date", "Open", "High", "Low", "Close", "Adj Close", "Volume"]
curlf_data = pd.DataFrame(data, columns=column_names)
print(curlf_data.head())  

           Date    Open    High     Low   Close Adj Close      Volume
0  Sep 22, 2023  3.9500  4.3500  3.9500  4.3000    4.3000     371,711
1  Sep 01, 2023  3.7400  5.8000  3.6200  4.0500    4.0500  19,938,600
2  Aug 01, 2023  3.7500  4.0200  2.5700  3.7400    3.7400  10,374,100
3  Jul 01, 2023  2.9500  4.2700  2.9500  3.7200    3.7200   7,779,900
4  Jun 01, 2023  2.8300  3.2500  2.6500  3.0920    3.0920   7,024,300


In [3]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

# URL of the financial data
url = "https://finance.yahoo.com/quote/CURLF/financials?p=CURLF"

# Using a browser's User-Agent to fetch the page content
headers = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
}
response = requests.get(url, headers=headers)

# Parse the HTML content using BeautifulSoup
soup = BeautifulSoup(response.text, 'html.parser')

# Extract the headers (dates)
headers_div = soup.find('div', class_='D(tbhg)')
if headers_div:
    headers = headers_div.find_all('div', class_='D(ib)')
    dates = [header.get_text() for header in headers][1:]  # Skip the first one ("Breakdown")

    # Extract the financial data
    financial_rows = soup.find_all('div', attrs={'data-test': 'fin-row'})
    data = []
    for row in financial_rows:
        cells = row.find_all('div', class_='D(tbc)')
        row_data = [cell.get_text() for cell in cells]
        data.append(row_data)

    # Create a DataFrame
    df = pd.DataFrame(data, columns=['Breakdown'] + dates)

    # Display the DataFrame
    print(df)
else:
    print("Unable to fetch the data. The page structure might have changed.")


                                            Breakdown        ttm 12/31/2022  \
0                                       Total Revenue  1,388,387  1,336,342   
1                                     Cost of Revenue    842,391    757,311   
2                                        Gross Profit    545,996    579,031   
3                                   Operating Expense    610,430    591,403   
4                                    Operating Income    -64,434    -12,372   
5           Net Non Operating Interest Income Expense    -99,705    -93,193   
6                                Other Income Expense   -134,798   -120,864   
7                                       Pretax Income   -298,937   -226,429   
8                                       Tax Provision    141,977    150,502   
9                      Net Income Common Stockholders   -437,342   -370,098   
10           Diluted NI Available to Com Stockholders   -437,342   -370,098   
11                                          Basic EP

In [4]:
# Extract the row corresponding to 'Total Revenue'
revenue_row = df[df['Breakdown'] == 'Total Revenue'].drop(columns='Breakdown')

# Transpose the data and reset the index
revenue_transposed = revenue_row.transpose().reset_index()

# Set column names to 'Date' and 'Revenue'
revenue_transposed.columns = ['Date', 'Revenue']

# Clean 'Revenue' column (remove commas)
revenue_transposed['Revenue'] = revenue_transposed['Revenue'].str.replace(',', '').astype(int)

# Rename the DataFrame
curlf_revenue = revenue_transposed

print(curlf_revenue)

         Date  Revenue
0         ttm  1388387
1  12/31/2022  1336342
2  12/31/2021  1209661
3  12/31/2020   626637
4  12/31/2019   221018


In [5]:
from datetime import datetime

# Update 'ttm' to the current date in the format "MM/DD/YYYY"
current_date = datetime.today().strftime('%m/%d/%Y')
curlf_revenue.loc[curlf_revenue['Date'] == 'ttm', 'Date'] = current_date

print(curlf_revenue)

         Date  Revenue
0  09/24/2023  1388387
1  12/31/2022  1336342
2  12/31/2021  1209661
3  12/31/2020   626637
4  12/31/2019   221018


In [6]:
import plotly.graph_objects as go
from plotly.subplots import make_subplots

def make_graph(curlf_data, curlf_revenue, stock):
    fig = make_subplots(rows=2, cols=1, shared_xaxes=True, subplot_titles=("Historical Share Price", "Historical Revenue"), vertical_spacing = .3)
    
    # Convert the Date columns to datetime format for better plotting
    curlf_data['Date'] = pd.to_datetime(curlf_data['Date'], infer_datetime_format=True)
    curlf_revenue['Date'] = pd.to_datetime(curlf_revenue['Date'], infer_datetime_format=True)
    
    fig.add_trace(go.Scatter(x=curlf_data['Date'], y=curlf_data['Close'].astype("float"), name="Share Price"), row=1, col=1)
    fig.add_trace(go.Scatter(x=curlf_revenue['Date'], y=curlf_revenue['Revenue'].astype("float"), name="Revenue"), row=2, col=1)
    
    fig.update_xaxes(title_text="Date", row=1, col=1)
    fig.update_xaxes(title_text="Date", row=2, col=1)
    fig.update_yaxes(title_text="Price ($US)", row=1, col=1)
    fig.update_yaxes(title_text="Revenue ($US Millions)", row=2, col=1)
    
    fig.update_layout(showlegend=False,
                      height=900,
                      title=stock,
                      xaxis_rangeslider_visible=True)
    
    # Save the plot as an HTML file
    fig.write_html("CURLF_historical_data.html")

# Sample usage
make_graph(curlf_data, curlf_revenue, 'CURLF Historical Data')


The argument 'infer_datetime_format' is deprecated and will be removed in a future version. A strict version of it is now the default, see https://pandas.pydata.org/pdeps/0004-consistent-to-datetime-parsing.html. You can safely remove this argument.


The argument 'infer_datetime_format' is deprecated and will be removed in a future version. A strict version of it is now the default, see https://pandas.pydata.org/pdeps/0004-consistent-to-datetime-parsing.html. You can safely remove this argument.


The behavior of DatetimeProperties.to_pydatetime is deprecated, in a future version this will return a Series containing python datetime objects instead of an ndarray. To retain the old behavior, call `np.array` on the result


The behavior of DatetimeProperties.to_pydatetime is deprecated, in a future version this will return a Series containing python datetime objects instead of an ndarray. To retain the old behavior, call `np.array` on the result

