## FINAL PROJECT PYTHON FOR DATA SCIENCE

In [24]:
import matplotlib.pyplot as plt
import plotly.express as px
import yfinance as yf
import pandas as pd
import requests
from bs4 import BeautifulSoup

In [25]:
# download tesla stock data
tesla = yf.Ticker("TSLA")
tesla_data = tesla.history(period="max")

# reset index for better handling
tesla_data.reset_index(inplace=True)
print(tesla_data.head())  # Display first few rows

                       Date      Open      High       Low     Close  \
0 2010-06-29 00:00:00-04:00  1.266667  1.666667  1.169333  1.592667   
1 2010-06-30 00:00:00-04:00  1.719333  2.028000  1.553333  1.588667   
2 2010-07-01 00:00:00-04:00  1.666667  1.728000  1.351333  1.464000   
3 2010-07-02 00:00:00-04:00  1.533333  1.540000  1.247333  1.280000   
4 2010-07-06 00:00:00-04:00  1.333333  1.333333  1.055333  1.074000   

      Volume  Dividends  Stock Splits  
0  281494500        0.0           0.0  
1  257806500        0.0           0.0  
2  123282000        0.0           0.0  
3   77097000        0.0           0.0  
4  103003500        0.0           0.0  


In [26]:
# URL for tesla revenue data
url = "https://www.macrotrends.net/stocks/charts/TSLA/tesla/revenue"

# add a User-Agent header to mimic a browser request
headers = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36"
}

# fetch the webpage content
response = requests.get(url, headers=headers)

# check if the request was successful
if response.status_code == 200:
    soup = BeautifulSoup(response.text, "html.parser")

    # extract table data
    tables = soup.find_all("table")

    # extracting revenue table
    if tables: # check if tables were found
        tesla_revenue = pd.read_html(str(tables[0]))[0] # assuming the first table is the revenue table

        # rename columns
        tesla_revenue.columns = ["Date", "Revenue"]
        tesla_revenue = tesla_revenue.dropna()  # remove empty values

        # convert revenue to numeric
        tesla_revenue["Revenue"] = tesla_revenue["Revenue"].replace('[\$,]', '', regex=True).astype(float)

        print(tesla_revenue.head())  # display first few rows
    else:
        print("No tables found on the page.")
else:
    print(f"Request failed with status code: {response.status_code}")
    print("The website might be blocking your request.")
    print("Consider adding delays between requests or using a different IP address.")


   Date  Revenue
0  2024  97690.0
1  2023  96773.0
2  2022  81462.0
3  2021  53823.0
4  2020  31536.0



Passing literal html to 'read_html' is deprecated and will be removed in a future version. To read from a literal string, wrap it in a 'StringIO' object.



In [27]:
# download GameStop stock data
gamestop = yf.Ticker("GME")
gamestop_data = gamestop.history(period="max")

# reset index
gamestop_data.reset_index(inplace=True)
print(gamestop_data.head())  # display first few rows


                       Date      Open      High       Low     Close    Volume  \
0 2002-02-13 00:00:00-05:00  1.620129  1.693350  1.603296  1.691667  76216000   
1 2002-02-14 00:00:00-05:00  1.712707  1.716074  1.670626  1.683250  11021600   
2 2002-02-15 00:00:00-05:00  1.683250  1.687458  1.658002  1.674834   8389600   
3 2002-02-19 00:00:00-05:00  1.666418  1.666418  1.578048  1.607504   7410400   
4 2002-02-20 00:00:00-05:00  1.615920  1.662210  1.603296  1.662210   6892800   

   Dividends  Stock Splits  
0        0.0           0.0  
1        0.0           0.0  
2        0.0           0.0  
3        0.0           0.0  
4        0.0           0.0  


In [28]:
# URL for GameStop revenue data
url = "https://www.macrotrends.net/stocks/charts/GME/gamestop/revenue"

# add a User-Agent header to mimic a browser request
headers = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36"
}

# fetch the webpage content with headers
response = requests.get(url, headers=headers)
soup = BeautifulSoup(response.text, "html.parser")

# extract revenue table
tables = soup.find_all("table")

# check if any tables were found before proceeding
if tables:
    gamestop_revenue = pd.read_html(str(tables[0]))[0]  # assuming the first table is the revenue table

    # rename columns
    gamestop_revenue.columns = ["Date", "Revenue"]
    gamestop_revenue = gamestop_revenue.dropna()

    # convert revenue to numeric
    gamestop_revenue["Revenue"] = gamestop_revenue["Revenue"].replace('[\$,]', '', regex=True).astype(float)

    print(gamestop_revenue.head())  # display first few rows
else:
    print("No tables found on the page. The website might be blocking the request.")

   Date  Revenue
0  2024   5273.0
1  2023   5927.0
2  2022   6011.0
3  2021   5090.0
4  2020   6466.0



Passing literal html to 'read_html' is deprecated and will be removed in a future version. To read from a literal string, wrap it in a 'StringIO' object.



In [29]:
# Tesla Stock Price Plot
fig1 = px.line(tesla_data, x="Date", y="Close", title="Tesla Stock Price Over Time")
fig1.show()

# Tesla Revenue Plot
fig2 = px.line(tesla_revenue, x="Date", y="Revenue", title="Tesla Revenue Over Time")
fig2.show()

# GameStop Stock Price Plot
fig3 = px.line(gamestop_data, x="Date", y="Close", title="GameStop Stock Price Over Time")
fig3.show()

# GameStop Revenue Plot
fig4 = px.line(gamestop_revenue, x="Date", y="Revenue", title="GameStop Revenue Over Time")
fig4.show()
