# 4.4 Getting Data from Yahoo Financial


- [**1. Retrieve Stock Data**](#1.-Retrieve-Stock-Data)
  - [a. Set Start and End dates for Stocks to Retrieve](#a.-Set-Start-and-End-dates-for-Stocks-to-Retrieve)  
  - [b. Retrieve Data](#b.-Retrieve-Data)  
  
  
- [**2. Extract Closing Prices from Retrieved Stock Data**](#2.-Extract-Closing-Prices-from-Retrieved-Stock-Data)  


- [**3. melt() dataframe to get Tall Shape**](#3.-melt()-dataframe-to-get-Tall-Shape)  


- [**4. Plot Data**](#4.-Plot-Data)  
  




In [1]:
from IPython.display import display, HTML
import pandas as pd
import math

import plotly.graph_objects as go
import plotly.express as px
import numpy as np
from scipy import special

In [2]:
import datetime as dt
import pandas_datareader.data as web

# 1. Retrieve Stock Data

### a. Set Start and End dates for Stocks to Retrieve

In [3]:
# Start for series to retrieve
start = dt.datetime(2005, 1, 1)

# End of series to retrieve
end = dt.datetime(2018, 12, 31)

### b. Retrieve Data

In [4]:
stocks_to_retrieve = ['IBM', 'AAPL', 'NFLX', 'ORCL']

df = web.DataReader(stocks_to_retrieve, 'yahoo', start, end)
df.head(15)

Attributes,High,High,High,High,Low,Low,Low,Low,Open,Open,...,Close,Close,Volume,Volume,Volume,Volume,Adj Close,Adj Close,Adj Close,Adj Close
Symbols,AAPL,IBM,NFLX,ORCL,AAPL,IBM,NFLX,ORCL,AAPL,IBM,...,NFLX,ORCL,AAPL,IBM,NFLX,ORCL,AAPL,IBM,NFLX,ORCL
Date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
2005-01-03,4.650714,99.099998,1.8,13.89,4.471428,97.25,1.645714,13.39,4.627143,98.970001,...,1.702857,13.41,172998000.0,5295200.0,11334400.0,60319300.0,3.924165,65.806778,1.702857,11.676086
2005-01-04,4.676429,98.419998,1.707143,13.48,4.497857,96.519997,1.607143,12.92,4.556428,97.739998,...,1.665714,13.06,274202600.0,5711000.0,17352300.0,80906400.0,3.964467,65.099892,1.665714,11.371344
2005-01-05,4.660714,97.830002,1.677143,13.26,4.575,96.400002,1.584286,13.01,4.604286,96.599998,...,1.6,13.1,170108400.0,5646700.0,12732300.0,42548400.0,3.999188,64.965256,1.6,11.40617
2005-01-06,4.636428,96.980003,1.624286,13.35,4.523571,96.050003,1.572857,13.08,4.619286,96.540001,...,1.578571,13.22,176388800.0,4561700.0,8273300.0,55580100.0,4.002288,64.76326,1.578571,11.510653
2005-01-07,4.973571,96.800003,1.65,13.45,4.625,95.470001,1.571429,13.15,4.642857,96.5,...,1.588571,13.33,556862600.0,6200700.0,7490700.0,45685800.0,4.293701,64.480545,1.588571,11.606432
2005-01-10,5.05,96.089996,1.677143,13.45,4.848571,95.239998,1.585714,13.17,4.987857,95.779999,...,1.61,13.19,431327400.0,4625100.0,9934400.0,47571800.0,4.275722,64.413223,1.61,11.484532
2005-01-11,4.939286,95.790001,1.634286,13.39,4.581429,94.709999,1.572857,13.06,4.875,95.68,...,1.572857,13.2,652906800.0,4746400.0,7842100.0,63973000.0,4.002909,63.955448,1.572857,11.493244
2005-01-12,4.707143,95.279999,1.614286,13.49,4.521429,94.059998,1.504286,13.24,4.675,95.0,...,1.535714,13.48,479925600.0,5828600.0,12446000.0,53420800.0,4.058711,64.096817,1.535714,11.737035
2005-01-13,5.315714,96.199997,1.578571,13.67,4.980714,93.699997,1.534286,13.34,5.265,95.389999,...,1.551429,13.48,791179200.0,5339400.0,9585800.0,56987700.0,4.327803,63.585167,1.551429,11.737035
2005-01-14,5.122857,94.25,1.634286,13.76,4.942143,93.550003,1.54,13.49,5.017857,94.010002,...,1.618571,13.63,442685600.0,5520800.0,14502600.0,42509100.0,4.352605,63.349541,1.618571,11.86764


In [5]:
df.columns

MultiIndex(levels=[['High', 'Low', 'Open', 'Close', 'Volume', 'Adj Close'], ['AAPL', 'IBM', 'NFLX', 'ORCL']],
           labels=[[0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5], [0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3]],
           names=['Attributes', 'Symbols'])

In [6]:
df.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 3523 entries, 2005-01-03 to 2018-12-31
Data columns (total 24 columns):
(High, AAPL)         3523 non-null float64
(High, IBM)          3523 non-null float64
(High, NFLX)         3523 non-null float64
(High, ORCL)         3523 non-null float64
(Low, AAPL)          3523 non-null float64
(Low, IBM)           3523 non-null float64
(Low, NFLX)          3523 non-null float64
(Low, ORCL)          3523 non-null float64
(Open, AAPL)         3523 non-null float64
(Open, IBM)          3523 non-null float64
(Open, NFLX)         3523 non-null float64
(Open, ORCL)         3523 non-null float64
(Close, AAPL)        3523 non-null float64
(Close, IBM)         3523 non-null float64
(Close, NFLX)        3523 non-null float64
(Close, ORCL)        3523 non-null float64
(Volume, AAPL)       3523 non-null float64
(Volume, IBM)        3523 non-null float64
(Volume, NFLX)       3523 non-null float64
(Volume, ORCL)       3523 non-null float64
(Adj Close, AAP

# 2. Extract *Closing* Prices from Retrieved Stock Data  
- Other options:  
  - Low  
  - High  
  - Open
  - Volume  
  - Adj Close

In [7]:
df_closing = df['Close']

df_closing.head()

Symbols,AAPL,IBM,NFLX,ORCL
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2005-01-03,4.520714,97.75,1.702857,13.41
2005-01-04,4.567143,96.699997,1.665714,13.06
2005-01-05,4.607143,96.5,1.6,13.1
2005-01-06,4.610714,96.199997,1.578571,13.22
2005-01-07,4.946429,95.779999,1.588571,13.33


# 3. melt() dataframe to get Tall Shape  
To get from a Wide Shape to a Tall/Long Shape:
1. Move the dataframe index to being a column  (Using reset_index() function)
2. Use the melt() function


In [8]:
# Move index to column
df_closing.reset_index(inplace=True)
df_closing.head()

Symbols,Date,AAPL,IBM,NFLX,ORCL
0,2005-01-03,4.520714,97.75,1.702857,13.41
1,2005-01-04,4.567143,96.699997,1.665714,13.06
2,2005-01-05,4.607143,96.5,1.6,13.1
3,2005-01-06,4.610714,96.199997,1.578571,13.22
4,2005-01-07,4.946429,95.779999,1.588571,13.33


In [9]:
# ['AAPL', 'IBM', 'NFLX', 'ORCL']
# melt() to change shape to Tall
df_melt = df_closing.melt(id_vars='Date', 
                          value_vars=['AAPL', 'IBM', 'NFLX', 'ORCL'],
                          var_name='Stock', 
                          value_name='Closing Values')
df_melt



Unnamed: 0,Date,Stock,Closing Values
0,2005-01-03,AAPL,4.520714
1,2005-01-04,AAPL,4.567143
2,2005-01-05,AAPL,4.607143
3,2005-01-06,AAPL,4.610714
4,2005-01-07,AAPL,4.946429
5,2005-01-10,AAPL,4.925714
6,2005-01-11,AAPL,4.611429
7,2005-01-12,AAPL,4.675714
8,2005-01-13,AAPL,4.985714
9,2005-01-14,AAPL,5.014286


# 4. Plot Data

In [12]:
fig = px.line(df_melt, 
              x='Date', 
              y='Closing Values',
              color = 'Stock',
              template='plotly_dark',
                title='Stock Closing Values')

fig.update_layout(xaxis_rangeslider_visible=True)


fig.show()