In [1]:
# Install required packages (uncomment if needed)
# Run this cell only if packages are missing in your environment
!pip install yfinance bs4 nbformat --upgrade plotly




In [2]:
import yfinance as yf
import pandas as pd
import requests
from bs4 import BeautifulSoup
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import plotly.io as pio
pio.renderers.default = "iframe"

import warnings
warnings.filterwarnings("ignore", category=FutureWarning)

print('Libraries imported — ready to run.')


Libraries imported — ready to run.


In [3]:

# --- Preprocessing: ensure Date and Revenue columns are correctly typed ---
import pandas as pd

# Convert stock Date columns (from yfinance)
if 'tesla_data' in globals():
    tesla_data['Date'] = pd.to_datetime(tesla_data['Date'], errors='coerce')
if 'gme_data' in globals():
    gme_data['Date'] = pd.to_datetime(gme_data['Date'], errors='coerce')

# Convert revenue Date columns and clean Revenue values
def clean_revenue_df(df):
    df = df.copy()
    # Make sure column names are consistent
    df.columns = [c.strip() for c in df.columns]
    if 'Date' in df.columns:
        df['Date'] = pd.to_datetime(df['Date'].astype(str).str.strip(), errors='coerce')
    if 'Revenue' in df.columns:
        df['Revenue'] = (df['Revenue'].astype(str)
                         .str.replace(r'[\$,]', '', regex=True)
                         .str.strip())
        df = df[df['Revenue'] != '']
        df['Revenue'] = pd.to_numeric(df['Revenue'], errors='coerce')
        df = df.dropna(subset=['Revenue'])
    return df.reset_index(drop=True)

if 'tesla_revenue' in globals():
    tesla_revenue = clean_revenue_df(tesla_revenue)
    print('tesla_revenue cleaned: rows =', len(tesla_revenue))
if 'gme_revenue' in globals():
    gme_revenue = clean_revenue_df(gme_revenue)
    print('gme_revenue cleaned: rows =', len(gme_revenue))


In [4]:

from plotly.subplots import make_subplots
import plotly.graph_objects as go
import pandas as pd

def make_graph(stock_data, revenue_data, stock):
    fig = make_subplots(rows=2, cols=1, shared_xaxes=True, 
                        subplot_titles=("Historical Share Price", "Historical Revenue"), 
                        vertical_spacing=0.3)
    
    # Work on copies to avoid modifying original DataFrames unexpectedly
    stock_df = stock_data.copy()
    revenue_df = revenue_data.copy()

    # Ensure Date columns are datetime (no deprecated args)
    stock_df['Date'] = pd.to_datetime(stock_df['Date'], errors='coerce')
    revenue_df['Date'] = pd.to_datetime(revenue_df['Date'], errors='coerce')

    # Use Timestamp for safe comparison
    stock_df_specific = stock_df[stock_df['Date'] <= pd.Timestamp('2021-06-14')]
    revenue_df_specific = revenue_df[revenue_df['Date'] <= pd.Timestamp('2021-04-30')]

    fig.add_trace(go.Scatter(x=stock_df_specific['Date'], 
                             y=stock_df_specific['Close'].astype('float'), 
                             name='Share Price'), row=1, col=1)

    fig.add_trace(go.Scatter(x=revenue_df_specific['Date'], 
                             y=revenue_df_specific['Revenue'].astype('float'), 
                             name='Revenue'), row=2, col=1)

    fig.update_xaxes(title_text='Date', row=2, col=1)
    fig.update_yaxes(title_text='Price ($US)', row=1, col=1)
    fig.update_yaxes(title_text='Revenue ($ Millions)', row=2, col=1)
    fig.update_layout(title_text=stock, showlegend=False)
    fig.show()

print('Updated make_graph loaded.')


Updated make_graph loaded.


## Tesla Stock Data

In [15]:
tesla = yf.Ticker("TSLA")
tesla_data = tesla.history(period="max")
tesla_data.reset_index(inplace=True)
tesla_data.head()


Unnamed: 0,Date,Open,High,Low,Close,Volume,Dividends,Stock Splits
0,2010-06-29 00:00:00-04:00,1.266667,1.666667,1.169333,1.592667,281494500,0.0,0.0
1,2010-06-30 00:00:00-04:00,1.719333,2.028,1.553333,1.588667,257806500,0.0,0.0
2,2010-07-01 00:00:00-04:00,1.666667,1.728,1.351333,1.464,123282000,0.0,0.0
3,2010-07-02 00:00:00-04:00,1.533333,1.54,1.247333,1.28,77097000,0.0,0.0
4,2010-07-06 00:00:00-04:00,1.333333,1.333333,1.055333,1.074,103003500,0.0,0.0


## Tesla Revenue Data

In [6]:
url = "https://cf-courses-data.s3.us.cloud-object-storage.appdomain.cloud/IBMDeveloperSkillsNetwork-PY0220EN-SkillsNetwork/labs/project/revenue.htm"
html_data = requests.get(url).text

soup = BeautifulSoup(html_data, 'html.parser')
tables = pd.read_html(str(soup.find_all('table')))

tesla_revenue = tables[0].copy()
tesla_revenue.columns = ["Date", "Revenue"]

tesla_revenue['Revenue'] = tesla_revenue['Revenue'].astype(str).str.replace(',','').str.replace('$','')
tesla_revenue.replace('', pd.NA, inplace=True)
tesla_revenue.dropna(inplace=True)
tesla_revenue.tail()


Unnamed: 0,Date,Revenue
8,2013,2013
9,2012,413
10,2011,204
11,2010,117
12,2009,112


## Tesla Revenue Data

In [7]:
# Convert Revenue to numeric (float)
tesla_revenue['Revenue'] = tesla_revenue['Revenue'].astype(float)
tesla_revenue.head()


Unnamed: 0,Date,Revenue
0,2021,53823.0
1,2020,31536.0
2,2019,24578.0
3,2018,21461.0
4,2017,11759.0


## GameStop Stock Data

In [8]:
gme = yf.Ticker("GME")
gme_data = gme.history(period="max")
gme_data.reset_index(inplace=True)
gme_data.head()


Unnamed: 0,Date,Open,High,Low,Close,Volume,Dividends,Stock Splits
0,2002-02-13 00:00:00-05:00,1.620128,1.69335,1.603296,1.691666,76216000,0.0,0.0
1,2002-02-14 00:00:00-05:00,1.712707,1.716074,1.670626,1.68325,11021600,0.0,0.0
2,2002-02-15 00:00:00-05:00,1.683251,1.687459,1.658002,1.674834,8389600,0.0,0.0
3,2002-02-19 00:00:00-05:00,1.666418,1.666418,1.578047,1.607504,7410400,0.0,0.0
4,2002-02-20 00:00:00-05:00,1.615921,1.66221,1.603296,1.66221,6892800,0.0,0.0


## GME Revenue Data

In [9]:
url2 = "https://cf-courses-data.s3.us.cloud-object-storage.appdomain.cloud/IBMDeveloperSkillsNetwork-PY0220EN-SkillsNetwork/labs/project/stock.html"
html_data_2 = requests.get(url2).text

soup2 = BeautifulSoup(html_data_2, 'html.parser')
tables2 = pd.read_html(str(soup2.find_all('table')))
gme_revenue = tables2[0].copy()
gme_revenue.columns = ["Date", "Revenue"]

gme_revenue['Revenue'] = gme_revenue['Revenue'].astype(str).str.replace(',','').str.replace('$','')
gme_revenue.replace('', pd.NA, inplace=True)
gme_revenue.dropna(inplace=True)
gme_revenue['Revenue'] = gme_revenue['Revenue'].astype(float)
gme_revenue.tail()


Unnamed: 0,Date,Revenue
11,2009,8806.0
12,2008,7094.0
13,2007,5319.0
14,2006,3092.0
15,2005,1843.0


In [10]:
# Robust timezone-fix: always return tz-naive datetime64[ns]
import pandas as pd

def to_naive_datetime(series):
    # 1) parse to tz-aware UTC (coerce bad values)
    s = pd.to_datetime(series, errors='coerce', utc=True)
    # 2) convert the underlying ndarray to tz-naive dtype
    #    (astype on the ndarray works even when Series.astype fails)
    arr = s.values.astype('datetime64[ns]')
    # 3) wrap back as a pandas datetime series (tz-naive)
    return pd.to_datetime(arr)

# Apply to your DataFrames (run this cell)
if 'tesla_data' in globals():
    tesla_data['Date'] = to_naive_datetime(tesla_data['Date'])
if 'gme_data' in globals():
    gme_data['Date'] = to_naive_datetime(gme_data['Date'])
if 'tesla_revenue' in globals():
    tesla_revenue['Date'] = to_naive_datetime(tesla_revenue['Date'])
if 'gme_revenue' in globals():
    gme_revenue['Date'] = to_naive_datetime(gme_revenue['Date'])

# Quick dtype check
print('tesla_data.Date dtype =', tesla_data['Date'].dtype)
print('tesla_revenue.Date dtype =', tesla_revenue['Date'].dtype)
print('gme_data.Date dtype =', gme_data['Date'].dtype)
print('gme_revenue.Date dtype =', gme_revenue['Date'].dtype)


tesla_data.Date dtype = datetime64[ns]
tesla_revenue.Date dtype = datetime64[ns]
gme_data.Date dtype = datetime64[ns]
gme_revenue.Date dtype = datetime64[ns]


## Tesla Stock Graph

In [11]:
make_graph(tesla_data, tesla_revenue, 'Tesla')


## GameStop Stock Graph 

In [12]:
make_graph(gme_data, gme_revenue, 'GameStop')
