<a href="https://colab.research.google.com/github/tleitch/Machine-Learning-for-Algorithmic-Trading-Second-Edition/blob/master/02_market_and_fundamental_data/03_data_providers/02_yfinance_demo.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Downloading Market and Fundamental Data with `yfinance`

## Imports & Settings

In [1]:
import warnings
warnings.filterwarnings('ignore')

In [3]:
!pip install yfinance
import pandas as pd
import yfinance as yf

Collecting yfinance
  Downloading https://files.pythonhosted.org/packages/a7/ee/315752b9ef281ba83c62aa7ec2e2074f85223da6e7e74efb4d3e11c0f510/yfinance-0.1.59.tar.gz
Collecting lxml>=4.5.1
[?25l  Downloading https://files.pythonhosted.org/packages/30/c0/d0526314971fc661b083ab135747dc68446a3022686da8c16d25fcf6ef07/lxml-4.6.3-cp37-cp37m-manylinux2014_x86_64.whl (6.3MB)
[K     |████████████████████████████████| 6.3MB 8.0MB/s 
Building wheels for collected packages: yfinance
  Building wheel for yfinance (setup.py) ... [?25l[?25hdone
  Created wheel for yfinance: filename=yfinance-0.1.59-py2.py3-none-any.whl size=23442 sha256=f8edc87eacecef4980a43abd9b192ef41a82c2f0cea1083dba1466b0c9dbcf7e
  Stored in directory: /root/.cache/pip/wheels/f8/2a/0f/4b5a86e1d52e451757eb6bc17fd899629f0925c777741b6d04
Successfully built yfinance
Installing collected packages: lxml, yfinance
  Found existing installation: lxml 4.2.6
    Uninstalling lxml-4.2.6:
      Successfully uninstalled lxml-4.2.6
Successfu

## How to work with a Ticker object

In [4]:
symbol = 'FB'
ticker = yf.Ticker(symbol)

### Show ticker info

In [5]:
pd.Series(ticker.info).head(20)

zip                                                                        94025
sector                                                    Communication Services
fullTimeEmployees                                                          60654
longBusinessSummary            Facebook, Inc. develops products that enable p...
city                                                                  Menlo Park
phone                                                               650-543-4800
state                                                                         CA
country                                                            United States
companyOfficers                                                               []
website                                                   http://investor.fb.com
maxAge                                                                         1
address1                                                        1601 Willow Road
industry                    

### Get market data

In [7]:
data = ticker.history(period='5d',
                      interval='1m',
                      start=None,
                      end=None,
                      actions=True,
                      auto_adjust=True,
                      back_adjust=False)
data.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 1949 entries, 2021-05-24 09:30:00-04:00 to 2021-05-28 15:59:00-04:00
Data columns (total 7 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   Open          1949 non-null   float64
 1   High          1949 non-null   float64
 2   Low           1949 non-null   float64
 3   Close         1949 non-null   float64
 4   Volume        1949 non-null   int64  
 5   Dividends     1949 non-null   int64  
 6   Stock Splits  1949 non-null   int64  
dtypes: float64(4), int64(3)
memory usage: 121.8 KB


### View company actions

In [8]:
# show actions (dividends, splits)
ticker.actions

Unnamed: 0_level_0,Dividends,Stock Splits
Datetime,Unnamed: 1_level_1,Unnamed: 2_level_1


In [9]:
ticker.dividends

Series([], Name: Dividends, dtype: int64)

In [10]:
ticker.splits

Series([], Name: Stock Splits, dtype: int64)

### Annual and Quarterly Financial Statement Summary

In [11]:
ticker.financials

Unnamed: 0,2020-12-31,2019-12-31,2018-12-31,2017-12-31
Research Development,18447000000.0,13600000000.0,10273000000.0,7754000000.0
Effect Of Accounting Charges,,,,
Income Before Tax,33180000000.0,24812000000.0,25361000000.0,20594000000.0
Minority Interest,,,,
Net Income,29146000000.0,18485000000.0,22112000000.0,15934000000.0
Selling General Administrative,18155000000.0,15341000000.0,11297000000.0,7242000000.0
Gross Profit,69273000000.0,57927000000.0,46483000000.0,35199000000.0
Ebit,32671000000.0,28986000000.0,24913000000.0,20203000000.0
Operating Income,32671000000.0,28986000000.0,24913000000.0,20203000000.0
Other Operating Expenses,,,,


In [12]:
ticker.quarterly_financials

Unnamed: 0,2021-03-31,2020-12-31,2020-09-30,2020-06-30
Research Development,5197000000.0,5207000000.0,4763000000.0,4462000000.0
Effect Of Accounting Charges,,,,
Income Before Tax,11503000000.0,13055000000.0,8133000000.0,6131000000.0
Minority Interest,,,,
Net Income,9497000000.0,11219000000.0,7846000000.0,5178000000.0
Selling General Administrative,4465000000.0,4880000000.0,4473000000.0,4433000000.0
Gross Profit,21040000000.0,22862000000.0,17276000000.0,14858000000.0
Ebit,11378000000.0,12775000000.0,8040000000.0,5963000000.0
Operating Income,11378000000.0,12775000000.0,8040000000.0,5963000000.0
Other Operating Expenses,,,,


### Annual and Quarterly Balance Sheet

In [13]:
ticker.balance_sheet

Unnamed: 0,2020-12-31,2019-12-31,2018-12-31,2017-12-31
Intangible Assets,623000000.0,894000000.0,1294000000.0,1884000000.0
Capital Surplus,50018000000.0,45851000000.0,42906000000.0,40584000000.0
Total Liab,31026000000.0,32322000000.0,13207000000.0,10177000000.0
Total Stockholder Equity,128290000000.0,101054000000.0,84127000000.0,74347000000.0
Other Current Liab,9964000000.0,11186000000.0,4494000000.0,2590000000.0
Total Assets,159316000000.0,133376000000.0,97334000000.0,84524000000.0
Other Current Assets,241000000.0,8000000.0,10000000.0,18000000.0
Retained Earnings,77345000000.0,55692000000.0,41981000000.0,33990000000.0
Other Liab,5945000000.0,7745000000.0,6190000000.0,6417000000.0
Good Will,19050000000.0,18715000000.0,18301000000.0,18221000000.0


In [14]:
ticker.quarterly_balance_sheet

Unnamed: 0,2021-03-31,2020-12-31,2020-09-30,2020-06-30
Intangible Assets,505000000.0,623000000.0,744000000.0,859000000.0
Capital Surplus,51160000000.0,50018000000.0,48910000000.0,47805000000.0
Total Liab,29866000000.0,31026000000.0,28706000000.0,29244000000.0
Total Stockholder Equity,133657000000.0,128290000000.0,117731000000.0,110447000000.0
Other Current Liab,1388000000.0,9964000000.0,1829000000.0,993000000.0
Total Assets,163523000000.0,159316000000.0,146437000000.0,139691000000.0
Other Current Assets,257000000.0,241000000.0,222000000.0,308000000.0
Retained Earnings,82343000000.0,77345000000.0,68513000000.0,62784000000.0
Other Liab,6101000000.0,5945000000.0,6655000000.0,7879000000.0
Good Will,19056000000.0,19050000000.0,19031000000.0,19029000000.0


### Annual and Quarterly Cashflow Statement

In [15]:
ticker.cashflow

Unnamed: 0,2020-12-31,2019-12-31,2018-12-31,2017-12-31
Investments,-14520000000.0,-4254000000.0,2449000000.0,-13250000000.0
Change To Liabilities,91000000.0,236000000.0,274000000.0,47000000.0
Total Cashflows From Investing Activities,-30059000000.0,-19864000000.0,-11603000000.0,-20118000000.0
Net Borrowings,-580000000.0,-775000000.0,500000000.0,500000000.0
Total Cash From Financing Activities,-10292000000.0,-7299000000.0,-15572000000.0,-5235000000.0
Change To Operating Activities,-1302000000.0,8975000000.0,91000000.0,3449000000.0
Net Income,29146000000.0,18485000000.0,22112000000.0,15934000000.0
Change In Cash,-1325000000.0,9155000000.0,1920000000.0,-905000000.0
Repurchase Of Stock,-9836000000.0,-6539000000.0,-16087000000.0,-5222000000.0
Effect Of Exchange Rate,279000000.0,4000000.0,-179000000.0,232000000.0


In [16]:
ticker.quarterly_cashflow

Unnamed: 0,2021-03-31,2020-12-31,2020-09-30,2020-06-30
Investments,-600000000.0,-529000000.0,-13177000000.0,-338000000.0
Change To Liabilities,-244000000.0,-59000000.0,251000000.0,-41000000.0
Total Cashflows From Investing Activities,-4874000000.0,-5173000000.0,-16599000000.0,-4178000000.0
Net Borrowings,-201000000.0,-158000000.0,-196000000.0,-46000000.0
Total Cash From Financing Activities,-5185000000.0,-3207000000.0,-2911000000.0,-2152000000.0
Change To Operating Activities,-2014000000.0,2606000000.0,188000000.0,-4794000000.0
Net Income,9497000000.0,11219000000.0,7846000000.0,5178000000.0
Change In Cash,1937000000.0,5975000000.0,-9590000000.0,-2358000000.0
Repurchase Of Stock,-5016000000.0,-3049000000.0,-2725000000.0,-2122000000.0
Effect Of Exchange Rate,-246000000.0,315000000.0,91000000.0,95000000.0


In [17]:
ticker.earnings

Unnamed: 0_level_0,Revenue,Earnings
Year,Unnamed: 1_level_1,Unnamed: 2_level_1
2017,40653000000,15934000000
2018,55838000000,22112000000
2019,70697000000,18485000000
2020,85965000000,29146000000


In [18]:
ticker.quarterly_earnings

Unnamed: 0_level_0,Revenue,Earnings
Quarter,Unnamed: 1_level_1,Unnamed: 2_level_1
2Q2020,18687000000,5178000000
3Q2020,21470000000,7846000000
4Q2020,28072000000,11219000000
1Q2021,26171000000,9497000000


### Sustainability: Environmental, Social and Governance (ESG)

In [19]:
ticker.sustainability

Unnamed: 0_level_0,Value
2021-5,Unnamed: 1_level_1
palmOil,False
controversialWeapons,False
gambling,False
socialScore,13.51
nuclear,False
furLeather,False
alcoholic,False
gmo,False
catholic,False
socialPercentile,


### Analyst Recommendations

In [20]:
ticker.recommendations.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 551 entries, 2012-06-22 07:56:00 to 2021-05-25 12:27:57
Data columns (total 4 columns):
 #   Column      Non-Null Count  Dtype 
---  ------      --------------  ----- 
 0   Firm        551 non-null    object
 1   To Grade    551 non-null    object
 2   From Grade  551 non-null    object
 3   Action      551 non-null    object
dtypes: object(4)
memory usage: 21.5+ KB


In [21]:
ticker.recommendations.tail(10)

Unnamed: 0_level_0,Firm,To Grade,From Grade,Action
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2021-04-29 12:35:13,Jefferies,Buy,,main
2021-04-29 13:23:20,Canaccord Genuity,Buy,,main
2021-04-29 13:26:38,Credit Suisse,Outperform,,main
2021-04-29 13:42:23,"Monness, Crespi, Hardt",Buy,,main
2021-04-29 13:43:43,MKM Partners,Buy,,main
2021-04-29 13:45:05,Evercore ISI Group,Outperform,,main
2021-04-29 13:56:47,Barclays,Overweight,,main
2021-04-29 16:38:54,Rosenblatt,Buy,,main
2021-05-10 12:30:42,Citigroup,Neutral,Buy,down
2021-05-25 12:27:57,HSBC,Reduce,,main


### Upcoming Events

In [22]:
ticker.calendar

Unnamed: 0,0,1
Earnings Date,2021-07-28 00:00:00,2021-08-02 00:00:00
Earnings Average,3.02,3.02
Earnings Low,2.36,2.36
Earnings High,3.38,3.38
Revenue Average,27818100000,27818100000
Revenue Low,27046000000,27046000000
Revenue High,28399100000,28399100000


### Option Expiration Dates

In [23]:
ticker.options

('2021-06-04',
 '2021-06-11',
 '2021-06-18',
 '2021-06-25',
 '2021-07-02',
 '2021-07-09',
 '2021-07-16',
 '2021-08-20',
 '2021-09-17',
 '2021-10-15',
 '2021-11-19',
 '2021-12-17',
 '2022-01-21',
 '2022-02-18',
 '2022-03-18',
 '2022-06-17',
 '2022-09-16',
 '2023-01-20',
 '2023-06-16',
 '2026-02-21')

In [24]:
expiration = ticker.options[0]

In [25]:
options = ticker.option_chain(expiration)

In [26]:
options.calls.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 60 entries, 0 to 59
Data columns (total 14 columns):
 #   Column             Non-Null Count  Dtype         
---  ------             --------------  -----         
 0   contractSymbol     60 non-null     object        
 1   lastTradeDate      60 non-null     datetime64[ns]
 2   strike             60 non-null     float64       
 3   lastPrice          60 non-null     float64       
 4   bid                60 non-null     float64       
 5   ask                60 non-null     float64       
 6   change             60 non-null     float64       
 7   percentChange      60 non-null     float64       
 8   volume             53 non-null     float64       
 9   openInterest       60 non-null     int64         
 10  impliedVolatility  60 non-null     float64       
 11  inTheMoney         60 non-null     bool          
 12  contractSize       60 non-null     object        
 13  currency           60 non-null     object        
dtypes: bool(1), 

In [27]:
options.calls.head()

Unnamed: 0,contractSymbol,lastTradeDate,strike,lastPrice,bid,ask,change,percentChange,volume,openInterest,impliedVolatility,inTheMoney,contractSize,currency
0,FB210604C00185000,2021-05-20 16:18:27,185.0,132.4,141.7,146.25,0.0,0.0,2.0,0,2.414066,True,REGULAR,USD
1,FB210604C00195000,2021-05-17 04:14:02,195.0,109.9,131.7,136.25,0.0,0.0,,2,2.214848,True,REGULAR,USD
2,FB210604C00200000,2021-05-27 14:01:38,200.0,126.6,126.7,131.25,0.0,0.0,2.0,8,2.117192,True,REGULAR,USD
3,FB210604C00215000,2021-05-17 04:14:02,215.0,98.13,111.7,116.25,0.0,0.0,,7,1.837891,True,REGULAR,USD
4,FB210604C00220000,2021-05-20 15:41:30,220.0,97.52,106.7,111.25,0.0,0.0,1.0,0,1.750001,True,REGULAR,USD


In [28]:
options.puts.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 65 entries, 0 to 64
Data columns (total 14 columns):
 #   Column             Non-Null Count  Dtype         
---  ------             --------------  -----         
 0   contractSymbol     65 non-null     object        
 1   lastTradeDate      65 non-null     datetime64[ns]
 2   strike             65 non-null     float64       
 3   lastPrice          65 non-null     float64       
 4   bid                63 non-null     float64       
 5   ask                65 non-null     float64       
 6   change             65 non-null     float64       
 7   percentChange      65 non-null     float64       
 8   volume             54 non-null     float64       
 9   openInterest       64 non-null     float64       
 10  impliedVolatility  65 non-null     float64       
 11  inTheMoney         65 non-null     bool          
 12  contractSize       65 non-null     object        
 13  currency           65 non-null     object        
dtypes: bool(1), 

## Data Download with proxy server

You can use a proxy server to avoid having your IP blacklisted as illustrated below (but need an actual PROXY_SERVER).

In [29]:
PROXY_SERVER = 'PROXY_SERVER'

The following will only work with proper PROXY_SERVER...

In [30]:
# msft = yf.Ticker("MSFT")

# msft.history(proxy=PROXY_SERVER)
# msft.get_actions(proxy=PROXY_SERVER)
# msft.get_dividends(proxy=PROXY_SERVER)
# msft.get_splits(proxy=PROXY_SERVER)
# msft.get_balance_sheet(proxy=PROXY_SERVER)
# msft.get_cashflow(proxy=PROXY_SERVER)
# msgt.option_chain(proxy=PROXY_SERVER)

## Downloading multiple symbols

In [37]:
tickers = yf.Tickers('msft aapl goog')

In [38]:
tickers

yfinance.Tickers object <MSFT,AAPL,GOOG>

In [36]:
#pd.Series(tickers.tickers.MSFT.info)

AttributeError: ignored

In [39]:
#tickers.tickers.AAPL.history(period="1mo")

AttributeError: ignored

In [40]:
tickers.history(period='1mo').stack(-1)

[*********************100%***********************]  3 of 3 completed


Unnamed: 0_level_0,Unnamed: 1_level_0,Close,Dividends,High,Low,Open,Stock Splits,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2021-04-29,AAPL,133.253662,0.0,136.837586,132.225410,136.238598,0,151101000
2021-04-29,GOOG,2429.889893,0.0,2436.520020,2402.280029,2410.330078,0,1977700
2021-04-29,MSFT,251.928268,0.0,255.510009,248.426360,254.871484,0,40589000
2021-04-30,AAPL,131.237091,0.0,133.333521,130.847753,131.556541,0,109713200
2021-04-30,GOOG,2410.120117,0.0,2427.139893,2402.159912,2404.489990,0,1956700
...,...,...,...,...,...,...,...,...
2021-05-27,GOOG,2402.510010,0.0,2440.000000,2402.000000,2436.939941,0,1948100
2021-05-27,MSFT,249.309998,0.0,251.479996,249.250000,251.169998,0,24426200
2021-05-28,AAPL,124.610001,0.0,125.800003,124.550003,125.570000,0,71232700
2021-05-28,GOOG,2411.560059,0.0,2428.139893,2407.689941,2421.959961,0,1205000


In [41]:
data = yf.download("SPY AAPL", start="2020-01-01", end="2020-01-05")

[*********************100%***********************]  2 of 2 completed


In [43]:
data.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 2 entries, 2020-01-02 to 2020-01-03
Data columns (total 12 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   (Adj Close, AAPL)  2 non-null      float64
 1   (Adj Close, SPY)   2 non-null      float64
 2   (Close, AAPL)      2 non-null      float64
 3   (Close, SPY)       2 non-null      float64
 4   (High, AAPL)       2 non-null      float64
 5   (High, SPY)        2 non-null      float64
 6   (Low, AAPL)        2 non-null      float64
 7   (Low, SPY)         2 non-null      float64
 8   (Open, AAPL)       2 non-null      float64
 9   (Open, SPY)        2 non-null      float64
 10  (Volume, AAPL)     2 non-null      int64  
 11  (Volume, SPY)      2 non-null      int64  
dtypes: float64(10), int64(2)
memory usage: 208.0 bytes


In [44]:
data = yf.download(
        tickers = "SPY AAPL MSFT", # list or string

        # use "period" instead of start/end
        # valid periods: 1d,5d,1mo,3mo,6mo,1y,2y,5y,10y,ytd,max
        # (optional, default is '1mo')
        period = "5d",

        # fetch data by interval (including intraday if period < 60 days)
        # valid intervals: 1m,2m,5m,15m,30m,60m,90m,1h,1d,5d,1wk,1mo,3mo
        # (optional, default is '1d')
        interval = "1m",

        # group by ticker (to access via data['SPY'])
        # (optional, default is 'column')
        group_by = 'ticker',

        # adjust all OHLC automatically
        # (optional, default is False)
        auto_adjust = True,

        # download pre/post regular market hours data
        # (optional, default is False)
        prepost = True,

        # use threads for mass downloading? (True/False/Integer)
        # (optional, default is True)
        threads = True,

        # proxy URL scheme use use when downloading?
        # (optional, default is None)
        proxy = None
    )

[*********************100%***********************]  3 of 3 completed


In [45]:
data.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 4742 entries, 2021-05-24 04:00:00-04:00 to 2021-05-28 19:59:00-04:00
Data columns (total 15 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   (AAPL, Open)    4709 non-null   float64
 1   (AAPL, High)    4709 non-null   float64
 2   (AAPL, Low)     4709 non-null   float64
 3   (AAPL, Close)   4709 non-null   float64
 4   (AAPL, Volume)  4709 non-null   float64
 5   (SPY, Open)     4448 non-null   float64
 6   (SPY, High)     4448 non-null   float64
 7   (SPY, Low)      4448 non-null   float64
 8   (SPY, Close)    4448 non-null   float64
 9   (SPY, Volume)   4448 non-null   float64
 10  (MSFT, Open)    4085 non-null   float64
 11  (MSFT, High)    4085 non-null   float64
 12  (MSFT, Low)     4085 non-null   float64
 13  (MSFT, Close)   4085 non-null   float64
 14  (MSFT, Volume)  4085 non-null   float64
dtypes: float64(15)
memory usage: 592.8 KB


In [46]:
from pandas_datareader import data as pdr

import yfinance as yf
yf.pdr_override()

# download dataframe
data = pdr.get_data_yahoo('SPY',
                          start='2017-01-01',
                          end='2019-04-30',
                          auto_adjust=False)

[*********************100%***********************]  1 of 1 completed


In [47]:
# auto_adjust = True
data.tail()

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2019-04-23,290.679993,293.140015,290.420013,292.880005,282.485352,52246600
2019-04-24,292.790009,293.160004,292.070007,292.230011,281.858429,50392900
2019-04-25,292.119995,292.779999,290.730011,292.049988,281.684845,57770900
2019-04-26,292.100006,293.48999,291.23999,293.410004,282.996582,50916400
2019-04-29,293.51001,294.450012,293.410004,293.869995,283.440216,57197700


In [48]:
# auto_adjust = False
data.tail()

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2019-04-23,290.679993,293.140015,290.420013,292.880005,282.485352,52246600
2019-04-24,292.790009,293.160004,292.070007,292.230011,281.858429,50392900
2019-04-25,292.119995,292.779999,290.730011,292.049988,281.684845,57770900
2019-04-26,292.100006,293.48999,291.23999,293.410004,282.996582,50916400
2019-04-29,293.51001,294.450012,293.410004,293.869995,283.440216,57197700
