In [36]:
#!pip install yfinance==0.2.38
#!pip install pandas==2.2.2
#!pip install nbformat

In [37]:
import warnings
# Ignore all warnings
warnings.filterwarnings("ignore", category=FutureWarning)

In [38]:
def make_graph(stock_data, revenue_data, stock):
    fig = make_subplots(rows=2, cols=1, shared_xaxes=True, subplot_titles=("Historical Share Price", "Historical Revenue"), vertical_spacing = .3)
    stock_data_specific = stock_data[stock_data.Date <= '2021--06-14']
    revenue_data_specific = revenue_data[revenue_data.Date <= '2021-04-30']
    fig.add_trace(go.Scatter(x=pd.to_datetime(stock_data_specific.Date), y=stock_data_specific.Close.astype("float"), name="Share Price"), row=1, col=1)
    fig.add_trace(go.Scatter(x=pd.to_datetime(revenue_data_specific.Date), y=revenue_data_specific.Revenue.astype("float"), name="Revenue"), row=2, col=1)
    fig.update_xaxes(title_text="Date", row=1, col=1)
    fig.update_xaxes(title_text="Date", row=2, col=1)
    fig.update_yaxes(title_text="Price ($US)", row=1, col=1)
    fig.update_yaxes(title_text="Revenue ($US Millions)", row=2, col=1)
    fig.update_layout(showlegend=False,
    height=900,
    title=stock,
    xaxis_rangeslider_visible=True)
    fig.show()

In [39]:
import yfinance as yf
import pandas as pd
import requests
from bs4 import BeautifulSoup
import plotly.graph_objects as go
from plotly.subplots import make_subplots

## Question 1 - Extracting Tesla Stock Data Using yfinance


Using the `Ticker` function enter the ticker symbol of the stock we want to extract data on to create a ticker object. The stock is Tesla and its ticker symbol is `TSLA`.


In [40]:

ticker_symbol = 'TSLA'
tesla_ticker = yf.Ticker(ticker_symbol)

In [41]:
tesla_data = tesla_ticker.history(period='max')  # Last 5 days of data
print(tesla_data)

                                 Open        High         Low       Close  \
Date                                                                        
2010-06-29 00:00:00-04:00    1.266667    1.666667    1.169333    1.592667   
2010-06-30 00:00:00-04:00    1.719333    2.028000    1.553333    1.588667   
2010-07-01 00:00:00-04:00    1.666667    1.728000    1.351333    1.464000   
2010-07-02 00:00:00-04:00    1.533333    1.540000    1.247333    1.280000   
2010-07-06 00:00:00-04:00    1.333333    1.333333    1.055333    1.074000   
...                               ...         ...         ...         ...   
2024-10-07 00:00:00-04:00  249.000000  249.830002  240.699997  240.830002   
2024-10-08 00:00:00-04:00  243.559998  246.210007  240.559998  244.500000   
2024-10-09 00:00:00-04:00  243.820007  247.429993  239.509995  241.050003   
2024-10-10 00:00:00-04:00  241.809998  242.789993  232.339996  238.770004   
2024-10-11 00:00:00-04:00  220.130005  223.339996  214.380005  217.800003   

## Question 2 - Extracting Tesla Revenue Data Using Webscraping


Use the `requests` library to download the webpage https://cf-courses-data.s3.us.cloud-object-storage.appdomain.cloud/IBMDeveloperSkillsNetwork-PY0220EN-SkillsNetwork/labs/project/revenue.htm Save the text of the response as a variable named `html_data`.


In [42]:
from bs4 import BeautifulSoup
import requests

# Example URL to fetch HTML content
url = 'https://cf-courses-data.s3.us.cloud-object-storage.appdomain.cloud/IBMDeveloperSkillsNetwork-PY0220EN-SkillsNetwork/labs/project/revenue.htm'

# Fetch the HTML content from the URL
response = requests.get(url)
html_data = response.content  # This is a bytes object

# Create a BeautifulSoup object directly from the bytes object
soup = BeautifulSoup(html_data, 'html5lib')

# Extract all tables from the HTML content
tables = soup.find_all('table')

# Initialize a variable to store the index of the table containing 'Tesla Quarterly Revenue'
table_index = None

# Loop through the tables to find the one containing 'Tesla Quarterly Revenue'
for index, table in enumerate(tables):
    if 'Tesla Quarterly Revenue' in str(table):
        table_index = index
        break

# Check if the table was found and print the result
if table_index is not None:
    print(f"Table containing 'Tesla Quarterly Revenue' found at index {table_index}.")
else:
    print("Table containing 'Tesla Quarterly Revenue' not found.")


Table containing 'Tesla Quarterly Revenue' found at index 1.


Parse the html data using `beautiful_soup` using parser i.e `html5lib` or `html.parser`. Make sure to use the `html_data` with the content parameter as follow `html_data.content` .


Using `BeautifulSoup` or the `read_html` function extract the table with `Tesla Revenue` and store it into a dataframe named `tesla_revenue`. The dataframe should have columns `Date` and `Revenue`.


In [43]:
import pandas as pd
from bs4 import BeautifulSoup

# Assuming `table` is your BeautifulSoup object containing the HTML table

# Initialize an empty DataFrame
tesla_revenue = pd.DataFrame(columns=["Date", "Revenue"])
data = []

# Extract data from the HTML table
for row in table.find_all('tr')[1:]:  # Skip the header row
    columns = row.find_all('td')
    if len(columns) >= 2:
        date = columns[0].get_text(strip=True)
        revenue = columns[1].get_text(strip=True)
        data.append((date, revenue))

# Print the extracted data
for date, revenue in data:
    print(f"Date: {date}, Revenue: {revenue}")

# Create a DataFrame from the extracted data
tesla_revenue = pd.DataFrame(data, columns=["Date", "Revenue"])

# Check the DataFrame content
print("DataFrame before cleaning:")
print(tesla_revenue)

# Remove rows with empty strings or NaN in the Revenue column
tesla_revenue = tesla_revenue[tesla_revenue['Revenue'].notna() & (tesla_revenue['Revenue'].str.strip() != '')]

# Check the DataFrame after cleaning
print("DataFrame after cleaning:")
print(tesla_revenue)

# Check shape
print("Final shape:", tesla_revenue.shape)


Date: 2022-09-30, Revenue: $21,454
Date: 2022-06-30, Revenue: $16,934
Date: 2022-03-31, Revenue: $18,756
Date: 2021-12-31, Revenue: $17,719
Date: 2021-09-30, Revenue: $13,757
Date: 2021-06-30, Revenue: $11,958
Date: 2021-03-31, Revenue: $10,389
Date: 2020-12-31, Revenue: $10,744
Date: 2020-09-30, Revenue: $8,771
Date: 2020-06-30, Revenue: $6,036
Date: 2020-03-31, Revenue: $5,985
Date: 2019-12-31, Revenue: $7,384
Date: 2019-09-30, Revenue: $6,303
Date: 2019-06-30, Revenue: $6,350
Date: 2019-03-31, Revenue: $4,541
Date: 2018-12-31, Revenue: $7,226
Date: 2018-09-30, Revenue: $6,824
Date: 2018-06-30, Revenue: $4,002
Date: 2018-03-31, Revenue: $3,409
Date: 2017-12-31, Revenue: $3,288
Date: 2017-09-30, Revenue: $2,985
Date: 2017-06-30, Revenue: $2,790
Date: 2017-03-31, Revenue: $2,696
Date: 2016-12-31, Revenue: $2,285
Date: 2016-09-30, Revenue: $2,298
Date: 2016-06-30, Revenue: $1,270
Date: 2016-03-31, Revenue: $1,147
Date: 2015-12-31, Revenue: $1,214
Date: 2015-09-30, Revenue: $937
Date: 20

In [44]:
tesla_revenue["Revenue"] = tesla_revenue['Revenue'].str.replace(',|\$',"", regex=True)

In [45]:
tesla_revenue = tesla_revenue[tesla_revenue['Revenue'].notna() & (tesla_revenue['Revenue'].str.strip() != '')]


In [46]:
tesla_revenue.dropna(inplace=True)

tesla_revenue = tesla_revenue[tesla_revenue['Revenue'] != ""]
for date, revenue in data:
 print(f"Date: {date}, Revenue: {revenue}")

Date: 2022-09-30, Revenue: $21,454
Date: 2022-06-30, Revenue: $16,934
Date: 2022-03-31, Revenue: $18,756
Date: 2021-12-31, Revenue: $17,719
Date: 2021-09-30, Revenue: $13,757
Date: 2021-06-30, Revenue: $11,958
Date: 2021-03-31, Revenue: $10,389
Date: 2020-12-31, Revenue: $10,744
Date: 2020-09-30, Revenue: $8,771
Date: 2020-06-30, Revenue: $6,036
Date: 2020-03-31, Revenue: $5,985
Date: 2019-12-31, Revenue: $7,384
Date: 2019-09-30, Revenue: $6,303
Date: 2019-06-30, Revenue: $6,350
Date: 2019-03-31, Revenue: $4,541
Date: 2018-12-31, Revenue: $7,226
Date: 2018-09-30, Revenue: $6,824
Date: 2018-06-30, Revenue: $4,002
Date: 2018-03-31, Revenue: $3,409
Date: 2017-12-31, Revenue: $3,288
Date: 2017-09-30, Revenue: $2,985
Date: 2017-06-30, Revenue: $2,790
Date: 2017-03-31, Revenue: $2,696
Date: 2016-12-31, Revenue: $2,285
Date: 2016-09-30, Revenue: $2,298
Date: 2016-06-30, Revenue: $1,270
Date: 2016-03-31, Revenue: $1,147
Date: 2015-12-31, Revenue: $1,214
Date: 2015-09-30, Revenue: $937
Date: 20

In [47]:
# Remove duplicate rows from the DataFrame
tesla_revenue = tesla_revenue.drop_duplicates()

# Verify the shape of the DataFrame
print("Updated shape:", tesla_revenue.shape)


Updated shape: (53, 2)


In [48]:
print(tesla_revenue.tail())

          Date Revenue
48  2010-09-30      31
49  2010-06-30      28
50  2010-03-31      21
52  2009-09-30      46
53  2009-06-30      27


## Question 3 - Extracting GameStop Stock Data Using yfinance


In [49]:
ticker_symbol = 'GME'
gamestop_ticker = yf.Ticker(ticker_symbol)

In [50]:
gme_data = gamestop_ticker.history(period='max')  # Last 5 days of data
print(gme_data)

                                Open       High        Low      Close  \
Date                                                                    
2002-02-13 00:00:00-05:00   1.620128   1.693350   1.603296   1.691666   
2002-02-14 00:00:00-05:00   1.712707   1.716074   1.670626   1.683251   
2002-02-15 00:00:00-05:00   1.683250   1.687458   1.658002   1.674834   
2002-02-19 00:00:00-05:00   1.666418   1.666418   1.578047   1.607504   
2002-02-20 00:00:00-05:00   1.615920   1.662210   1.603296   1.662210   
...                              ...        ...        ...        ...   
2024-10-07 00:00:00-04:00  21.350000  21.530001  20.809999  20.900000   
2024-10-08 00:00:00-04:00  20.900000  21.270000  20.530001  20.709999   
2024-10-09 00:00:00-04:00  20.500000  20.740000  20.299999  20.500000   
2024-10-10 00:00:00-04:00  20.469999  21.049999  20.379999  20.910000   
2024-10-11 00:00:00-04:00  20.660000  21.219999  20.660000  20.830000   

                             Volume  Dividends  St

## Question 4: Use Webscraping to Extract GME Revenue Data


Use the `requests` library to download the webpage https://cf-courses-data.s3.us.cloud-object-storage.appdomain.cloud/IBMDeveloperSkillsNetwork-PY0220EN-SkillsNetwork/labs/project/stock.html. Save the text of the response as a variable named `html_data_2`.


In [51]:
url = 'https://cf-courses-data.s3.us.cloud-object-storage.appdomain.cloud/IBMDeveloperSkillsNetwork-PY0220EN-SkillsNetwork/labs/project/stock.html'
response = requests.get(url)
html_data_2 = response.content
soup2 = BeautifulSoup(html_data_2, 'html.parser')
tables = soup2.find_all('table')
table_index = None
for index, table in enumerate(tables):
    if 'GameStop Quarterly Revenue' in str(table):
        table_index = index

# Check if the table was found and print the result
if table_index is not None:
    print(f"Table containing GameStop Quarterly Revenue' found at index {table_index}.")
else:
    print("Table containing GameStop Quarterly Revenue' not found.")
    

Table containing GameStop Quarterly Revenue' found at index 1.


In [52]:
# Access the specific table at index 1
table = tables[1]  # Assuming the second table is the one you want

# Initialize an empty DataFrame
gme_revenue = pd.DataFrame(columns=["Date", "Revenue"])
data = []

# Extract data from the selected HTML table
for row in table.find_all('tr')[1:]:  # Skip the header row
    columns = row.find_all('td')
    
    # Debugging output to see each row's content
    print(f"Row: {row.get_text(strip=True)}")  # Print the full row
    
    if len(columns) >= 2:
        date = columns[0].get_text(strip=True)
        revenue = columns[1].get_text(strip=True)
        data.append((date, revenue))
    else:
        print("Row skipped due to insufficient columns")

# Print the extracted data
for date, revenue in data:
    print(f"Date: {date}, Revenue: {revenue}")

# Create a DataFrame from the extracted data
gme_revenue = pd.DataFrame(data, columns=["Date", "Revenue"])

# Check the DataFrame content
print("DataFrame before cleaning:")
print(gme_revenue)

# Convert Revenue to string to allow string operations
gme_revenue['Revenue'] = gme_revenue['Revenue'].astype(str)

# Clean the Revenue column by removing dollar signs and commas
gme_revenue['Revenue'] = gme_revenue['Revenue'].replace({'\$': '', ',': ''}, regex=True)

# Convert Revenue to numeric type (float)
gme_revenue['Revenue'] = pd.to_numeric(gme_revenue['Revenue'], errors='coerce')

# Check the DataFrame after cleaning
print("DataFrame after cleaning:")
print(gme_revenue)

# Check shape
print("Final shape:", gme_revenue.shape)

Row: 2020-04-30$1,021
Row: 2020-01-31$2,194
Row: 2019-10-31$1,439
Row: 2019-07-31$1,286
Row: 2019-04-30$1,548
Row: 2019-01-31$3,063
Row: 2018-10-31$1,935
Row: 2018-07-31$1,501
Row: 2018-04-30$1,786
Row: 2018-01-31$2,825
Row: 2017-10-31$1,989
Row: 2017-07-31$1,688
Row: 2017-04-30$2,046
Row: 2017-01-31$2,403
Row: 2016-10-31$1,959
Row: 2016-07-31$1,632
Row: 2016-04-30$1,972
Row: 2016-01-31$3,525
Row: 2015-10-31$2,016
Row: 2015-07-31$1,762
Row: 2015-04-30$2,061
Row: 2015-01-31$3,476
Row: 2014-10-31$2,092
Row: 2014-07-31$1,731
Row: 2014-04-30$1,996
Row: 2014-01-31$3,684
Row: 2013-10-31$2,107
Row: 2013-07-31$1,384
Row: 2013-04-30$1,865
Row: 2013-01-31$3,562
Row: 2012-10-31$1,773
Row: 2012-07-31$1,550
Row: 2012-04-30$2,002
Row: 2012-01-31$3,579
Row: 2011-10-31$1,947
Row: 2011-07-31$1,744
Row: 2011-04-30$2,281
Row: 2011-01-31$3,693
Row: 2010-10-31$1,899
Row: 2010-07-31$1,799
Row: 2010-04-30$2,083
Row: 2010-01-31$3,524
Row: 2009-10-31$1,835
Row: 2009-07-31$1,739
Row: 2009-04-30$1,981
Row: 2009-

In [53]:
print(gme_revenue.tail())

          Date  Revenue
57  2006-01-31     1667
58  2005-10-31      534
59  2005-07-31      416
60  2005-04-30      475
61  2005-01-31      709


## Question 5: Plot Tesla Stock Graph


In [54]:
import yfinance as yf
import pandas as pd
import requests
from bs4 import BeautifulSoup
import plotly.graph_objects as go
from plotly.subplots import make_subplots

tesla_data['Date'] = pd.to_datetime(tesla_data['Date'])  # Ensure 'Date' is in datetime format
filtered_tesla_data = tesla_data[tesla_data['Date'] <= '2021-06-30']


make_graph(filtered_tesla_data, tesla_revenue, 'Tesla')

## Question 6 - GameStop Stock and Revenue Dashboard


In [1]:
import yfinance as yf
import pandas as pd
import requests
from bs4 import BeautifulSoup
import plotly.graph_objects as go
from plotly.subplots import make_subplots


gme_data['Date'] = pd.to_datetime(gme_data['Date'])  # Ensure 'Date' is in datetime format
filtered_gme_data = gme_data[gme_data['Date'] <= '2021-06-30']

make_graph(filtered_gme_data, gme_revenue, 'GameStop')

<h2>About the Authors:</h2> 

<a href="https://www.linkedin.com/in/mekebib-tadesse-assefa/">Mekebib Tadesse Assefa</a> I am a certified Data Scientist and Engineer with a BSc in Engineering and a Master's in Electoral Policy and Administration. My passion lies at the intersection of technology, data, and governance, particularly in developing innovative solutions to enhance electoral processes and governance in the Global South. With expertise in data analytics, I have a proven track record of leveraging insights to support democracy-building efforts and foster sustainable development. As an advocate for digital rights, I work closely with non-profit organizations, promoting cooperation across borders to empower Global South communities.

## <h3 align="center"> © Mekebib Tadesse Assefa, Engineer, Data Scientist & Electoral Policy Expert 2024. All rights reserved. <h3/>

```toggle ## Change Log
```
```toggle | Date (YYYY-MM-DD) | Version | Changed By    | Change Description        |
```
```toggle | ----------------- | ------- | ------------- | ------------------------- |
```
```toggle | 2022-02-28        | 1.2     | Lakshmi Holla | Changed the URL of GameStop |
```
```toggle | 2020-11-10        | 1.1     | Malika Singla | Deleted the Optional part |
```
```toggle | 2020-08-27        | 1.0     | Malika Singla | Added lab to GitLab       |
```
