# Extracting and Visualizing Stock Data

In [1]:
import yfinance as yf
import pandas as pd 
import requests 
from bs4 import BeautifulSoup
import plotly.graph_objects as go 
from plotly.subplots import make_subplots
import plotly.io as pio
from io import StringIO

In [2]:
pio.renderers.default = 'iframe' # os gráficos serão renderizados dentro de um iframe HTML

## Define Graphing Function

In [6]:
def make_graph(stock_data, revenue_data, stock):
    fig = make_subplots(
        rows=2, cols=1, shared_xaxes=True,
        subplot_titles=("Historical Revenue", "Historical Revenue"),
        vertical_spacing=0.3
    )

    # Convertendo Date para datetime
    stock_data["Date"] = pd.to_datetime(stock_data["Date"])
    revenue_data["Date"] = pd.to_datetime(revenue_data["Date"])

    # Filtrando datas
    stock_data_specific = stock_data[stock_data["Date"] <= "2021-06-14"]
    revenue_data_specific = revenue_data[revenue_data["Date"] <= "2021-04-30"]

    # Plotando os dados de revenue duas vezes, já que Close não existe
    fig.add_trace(
        go.Scatter(
            x=stock_data_specific["Date"],
            y=stock_data_specific["Revenue"].astype("float"),
            name="Revenue"
        ), row=1, col=1
    )

    fig.add_trace(
        go.Scatter(
            x=revenue_data_specific["Date"],
            y=revenue_data_specific["Revenue"].astype("float"),
            name="Revenue"
        ), row=2, col=1
    )

    # Configuração dos eixos
    fig.update_xaxes(title_text="Date", row=1, col=1)
    fig.update_xaxes(title_text="Date", row=2, col=1)
    fig.update_yaxes(title_text="Revenue ($US Millions)", row=1, col=1)
    fig.update_yaxes(title_text="Revenue ($US Millions)", row=2, col=1)

    # Layout final
    fig.update_layout(
        showlegend=False,
        height=900,
        title=stock,
        xaxis_rangeslider_visible=True
    )

    # Exibição
    fig.write_html("grafico_tesla.html")
    import webbrowser
    webbrowser.open("grafico_tesla.html")

In [228]:
def make_graph(stock_data, revenue_data, stock):
    fig = make_subplots(rows=2, cols=1, shared_xaxes=True, subplot_titles=("Historical Share Price", "Historical Revenue"), vertical_spacing = .3)
    
    # Filtrando datas
    stock_data_specific = stock_data[stock_data.Date <= '2021-06-14']
    revenue_data_specific = revenue_data[revenue_data.Date <= '2021-04-30']

    
    fig.add_trace(go.Scatter(x=pd.to_datetime(stock_data_specific.Date), y=stock_data_specific.astype("float"), name="Share Price"), row=1, col=1)
    fig.add_trace(go.Scatter(x=pd.to_datetime(revenue_data_specific.Date), y=revenue_data_specific.Revenue.astype("float"), name="Revenue"), row=2, col=1)
    fig.update_xaxes(title_text="Date", row=1, col=1)
    fig.update_xaxes(title_text="Date", row=2, col=1)
    fig.update_yaxes(title_text="Price ($US)", row=1, col=1)
    fig.update_yaxes(title_text="Revenue ($US Millions)", row=2, col=1)
    fig.update_layout(showlegend=False,
    height=900,
    title=stock,
    xaxis_rangeslider_visible=True)
    fig.show()
    from IPython.display import display, HTML
    fig_html = fig.to_html()
    display(HTML(fig_html))

## Question 1: Use yfinance to Extract Stock Data

Using the Ticker function enter the ticker symbol of the stock we want to extract data on to create a ticker object. The stock is Tesla and its ticker symbol is TSLA.

In [8]:
ticker = yf.Ticker('TSLA')

Using the ticker object and the function history extract stock information and save it in a dataframe named tesla_data. Set the period parameter to "max" so we get information for the maximum amount of time.

In [11]:
tesla_data = ticker.history(period = 'max') # Retorna um DataFrame
type(tesla_data)

pandas.core.frame.DataFrame


Reset the index using the reset_index(inplace=True) function on the tesla_data DataFrame and display the first five rows of the tesla_data dataframe using the head function. Take a screenshot of the results and code from the beginning of Question 1 to the results below.

In [13]:
tesla_data.reset_index(inplace = True)
tesla_data.head(5)

Unnamed: 0,Date,Open,High,Low,Close,Volume,Dividends,Stock Splits
0,2010-06-29 00:00:00-04:00,1.266667,1.666667,1.169333,1.592667,281494500,0.0,0.0
1,2010-06-30 00:00:00-04:00,1.719333,2.028,1.553333,1.588667,257806500,0.0,0.0
2,2010-07-01 00:00:00-04:00,1.666667,1.728,1.351333,1.464,123282000,0.0,0.0
3,2010-07-02 00:00:00-04:00,1.533333,1.54,1.247333,1.28,77097000,0.0,0.0
4,2010-07-06 00:00:00-04:00,1.333333,1.333333,1.055333,1.074,103003500,0.0,0.0


## Question 2: Use Webscraping to Extract Tesla Revenue Data

In [15]:
url = 'https://cf-courses-data.s3.us.cloud-object-storage.appdomain.cloud/IBMDeveloperSkillsNetwork-PY0220EN-SkillsNetwork/labs/project/revenue.htm'
html_data = requests.get(url).text


Parse the html data using beautiful_soup using parser i.e html5lib or html.parser.

In [17]:
soup = BeautifulSoup(html_data, 'html5lib')

Using BeautifulSoup or the read_html function extract the table with Tesla Revenue and store it into a dataframe named tesla_revenue. The dataframe should have columns Date and Revenue.

In [21]:
pd.read_html(str(soup)) # Utilizando read_html a partir do soup


Passing literal html to 'read_html' is deprecated and will be removed in a future version. To read from a literal string, wrap it in a 'StringIO' object.



[    Tesla Annual Revenue (Millions of US $)  \
 0                                      2021   
 1                                      2020   
 2                                      2019   
 3                                      2018   
 4                                      2017   
 5                                      2016   
 6                                      2015   
 7                                      2014   
 8                                      2013   
 9                                      2012   
 10                                     2011   
 11                                     2010   
 12                                     2009   
 
    Tesla Annual Revenue (Millions of US $).1  
 0                                    $53,823  
 1                                    $31,536  
 2                                    $24,578  
 3                                    $21,461  
 4                                    $11,759  
 5                                    

In [22]:
data = pd.read_html(StringIO(html_data)) # Utilizando html_data sem o soup 
type(data)

list

In [23]:
len(data)

6

In [24]:
tesla_revenue = pd.DataFrame(data[1])

In [25]:
tesla_revenue.rename(columns ={'Tesla Quarterly Revenue (Millions of US $)': 'Date', 'Tesla Quarterly Revenue (Millions of US $).1':'Revenue'}, inplace=True)

In [26]:
# Remover virgulas e cifrões da coluna Revenue

tesla_revenue['Revenue'] = tesla_revenue['Revenue'].replace(',|\$', "", regex=True) 
tesla_revenue.head()

Unnamed: 0,Date,Revenue
0,2022-09-30,21454
1,2022-06-30,16934
2,2022-03-31,18756
3,2021-12-31,17719
4,2021-09-30,13757


In [27]:
# Remover valores vazios 

tesla_revenue.dropna(inplace=True) # Remove colunas vazias

tesla_revenue = tesla_revenue[tesla_revenue['Revenue'] != ""]

In [28]:
tesla_revenue.tail()

Unnamed: 0,Date,Revenue
48,2010-09-30,31
49,2010-06-30,28
50,2010-03-31,21
52,2009-09-30,46
53,2009-06-30,27


## Question 3: Use yfinance to Extract Stock Data

In [39]:
ticker = yf.Ticker('GME')
ticker

yfinance.Ticker object <GME>

In [41]:
gme_data = ticker.history(period = 'max')

In [43]:
gme_data.reset_index(inplace=True)
gme_data.head(5)

Unnamed: 0,Date,Open,High,Low,Close,Volume,Dividends,Stock Splits
0,2002-02-13 00:00:00-05:00,1.620128,1.69335,1.603296,1.691666,76216000,0.0,0.0
1,2002-02-14 00:00:00-05:00,1.712707,1.716074,1.670626,1.683251,11021600,0.0,0.0
2,2002-02-15 00:00:00-05:00,1.68325,1.687458,1.658001,1.674834,8389600,0.0,0.0
3,2002-02-19 00:00:00-05:00,1.666418,1.666418,1.578047,1.607504,7410400,0.0,0.0
4,2002-02-20 00:00:00-05:00,1.61592,1.662209,1.603296,1.662209,6892800,0.0,0.0


## Question 4: Use Webscraping to Extract GME Revenue Data

In [45]:
url = 'https://cf-courses-data.s3.us.cloud-object-storage.appdomain.cloud/IBMDeveloperSkillsNetwork-PY0220EN-SkillsNetwork/labs/project/stock.html'

In [46]:
html_data_2 = requests.get(url).text

In [47]:
soup = BeautifulSoup(html_data_2, 'html5lib')

In [48]:
data = pd.read_html(StringIO(html_data_2))

In [52]:
data[1]

Unnamed: 0,GameStop Quarterly Revenue (Millions of US $),GameStop Quarterly Revenue (Millions of US $).1
0,2020-04-30,"$1,021"
1,2020-01-31,"$2,194"
2,2019-10-31,"$1,439"
3,2019-07-31,"$1,286"
4,2019-04-30,"$1,548"
...,...,...
57,2006-01-31,"$1,667"
58,2005-10-31,$534
59,2005-07-31,$416
60,2005-04-30,$475


In [53]:
gme_revenue = pd.DataFrame(data[1])
gme_revenue.head()

Unnamed: 0,GameStop Quarterly Revenue (Millions of US $),GameStop Quarterly Revenue (Millions of US $).1
0,2020-04-30,"$1,021"
1,2020-01-31,"$2,194"
2,2019-10-31,"$1,439"
3,2019-07-31,"$1,286"
4,2019-04-30,"$1,548"


In [54]:
gme_revenue = gme_revenue.rename(columns = {'GameStop Quarterly Revenue (Millions of US $)': 'Date', 'GameStop Quarterly Revenue (Millions of US $).1':'Revenue'})

In [55]:
gme_revenue['Revenue'] = gme_revenue['Revenue'].replace(',|\$','', regex=True)

In [56]:
gme_revenue.head()

Unnamed: 0,Date,Revenue
0,2020-04-30,1021
1,2020-01-31,2194
2,2019-10-31,1439
3,2019-07-31,1286
4,2019-04-30,1548


## Question 5: Plot Tesla Stock Graph


Use the make_graph function to graph the Tesla Stock Data, also provide a title for the graph. Note the graph will only show data upto June 2021.

In [67]:
make_graph(tesla_revenue, tesla_revenue, 'Tesla')

## Question 5: Plot Tesla Stock Graph

In [68]:
make_graph(gme_revenue, gme_revenue, 'Game Stop')