Peer review for data scraping and visualization file

In [None]:
import yfinance as yf
import pandas as pd
import warnings

In [None]:
# Suppress FutureWarning
warnings.simplefilter(action='ignore', category=FutureWarning)

In [None]:
# Create a ticker object for Tesla (TSLA)
tesla = yf.Ticker("TSLA")
# Extract stock information and save it in a DataFrame
tesla_data = tesla.history(period="max")
# Reset the index
tesla_data.reset_index(inplace=True)
# Ensure the Date column is in datetime format and timezone-naive
tesla_data['Date'] = pd.to_datetime(tesla_data['Date']).dt.tz_localize(None)

In [None]:
# Display the first five rows
print("Tesla Data:")
print(tesla_data.head())

In [None]:
# Create a ticker object for GameStop (GME)
gme = yf.Ticker("GME")
# Extract stock information and save it in a DataFrame
gme_data = gme.history(period="max")
# Reset the index
gme_data.reset_index(inplace=True)
# Ensure the Date column is in datetime format and timezone-naive
gme_data['Date'] = pd.to_datetime(gme_data['Date']).dt.tz_localize(None)

In [None]:
# Display the first five rows
print("\nGameStop Data:")
print(gme_data.head())

In [None]:
import requests
from bs4 import BeautifulSoup

In [None]:
# URL of the Tesla revenue page
tesla_url = "https://cf-courses-data.s3.us.cloud-object-storage.appdomain.cloud/IBMDeveloperSkillsNetwork-PY0220EN-SkillsNetwork/labs/project/revenue.htm"
# Fetch the HTML data
tesla_response = requests.get(tesla_url)
tesla_html_data = tesla_response.text
# Parse the HTML data with BeautifulSoup
tesla_soup = BeautifulSoup(tesla_html_data, 'html.parser')
# Extract the table with Tesla Revenue
tesla_table = tesla_soup.find_all('table')[0]
# Use read_html to extract the table into a DataFrame
tesla_revenue = pd.read_html(str(tesla_table))[0]
# Rename the columns to 'Date' and 'Revenue'
tesla_revenue.columns = ['Date', 'Revenue']
# Clean the Revenue column
tesla_revenue['Revenue'] = tesla_revenue['Revenue'].str.replace(',', '').str.replace('$', '').astype(float)
# Drop null or empty values
tesla_revenue.dropna(inplace=True)
# Ensure the Date column is in datetime format and timezone-naive
tesla_revenue['Date'] = pd.to_datetime(tesla_revenue['Date'], errors='coerce').dt.tz_localize(None)

In [None]:
# Display the last 5 rows of the tesla_revenue DataFrame
print("Tesla Revenue:")
print(tesla_revenue.tail())

In [None]:
# URL of the GameStop revenue page
gme_url = "https://cf-courses-data.s3.us.cloud-object-storage.appdomain.cloud/IBMDeveloperSkillsNetwork-PY0220EN-SkillsNetwork/labs/project/stock.html"
# Fetch the HTML data
gme_response = requests.get(gme_url)
gme_html_data = gme_response.text
# Parse the HTML data with BeautifulSoup
gme_soup = BeautifulSoup(gme_html_data, 'html.parser')
# Extract the table with GameStop Revenue
gme_table = gme_soup.find_all('table')[1]
# Use read_html to extract the table into a DataFrame
gme_revenue = pd.read_html(str(gme_table))[0]
# Rename the columns to 'Date' and 'Revenue'
gme_revenue.columns = ['Date', 'Revenue']
# Clean the Revenue column
gme_revenue['Revenue'] = gme_revenue['Revenue'].str.replace(',', '').str.replace('$', '').astype(float)
# Drop null or empty values
gme_revenue.dropna(inplace=True)
# Ensure the Date column is in datetime format and timezone-naive
gme_revenue['Date'] = pd.to_datetime(gme_revenue['Date'], errors='coerce').dt.tz_localize(None)

In [None]:
# Display the last 5 rows of the gme_revenue DataFrame
print("\nGameStop Revenue:")
print(gme_revenue.tail())

In [None]:
import matplotlib.pyplot as plt

In [None]:
def make_graph(stock_data, revenue_data, title):
    fig, ax1 = plt.subplots(figsize=(14, 8))
    
    ax1.plot(stock_data['Date'], stock_data['Close'], 'b-')
    ax1.set_xlabel('Date')
    ax1.set_ylabel('Stock Price', color='b')
    ax1.tick_params('y', colors='b')

    ax2 = ax1.twinx()
    ax2.plot(revenue_data['Date'], revenue_data['Revenue'], 'r-')
    ax2.set_ylabel('Revenue', color='r')
    ax2.tick_params('y', colors='r')

    plt.title(title)
    plt.show()

In [None]:
# Filter Tesla data and revenue 
tesla_data = tesla_data[tesla_data['Date'] >= pd.Timestamp('2010-01-01')]
tesla_revenue = tesla_revenue[tesla_revenue['Date'] >= pd.Timestamp('2010-01-01')]

In [None]:
# Graph for Tesla
make_graph(tesla_data[tesla_data['Date'] <= pd.Timestamp('2021-06-30')], tesla_revenue[tesla_revenue['Date'] <= pd.Timestamp('2021-06-30')], 'Tesla')

In [None]:
# Graph for GameStop
make_graph(gme_data[gme_data['Date'] <= pd.Timestamp('2021-06-30')], gme_revenue[gme_revenue['Date'] <= pd.Timestamp('2021-06-30')], 'GameStop')