In [1]:
# install main library YFinance
!pip install yfinance



In [2]:
# IMPORTS
import numpy as np
import pandas as pd

#Fin Data Sources
import yfinance as yf
import pandas_datareader as pdr

#Data viz
import plotly.graph_objs as go
import plotly.express as px

import time
from datetime import date

**Question 1: [Macro] Average growth of GDP in 2023**

What is the average growth (in %) of GDP in 2023?

In [3]:
end = date.today()
print(f'Year = {end.year}; month= {end.month}; day={end.day}')

start = date(year=end.year-70, month=end.month, day=end.day)
print(f'Period for indexes: {start} to {end} ')

Year = 2024; month= 4; day=22
Period for indexes: 1954-04-22 to 2024-04-22 


In [4]:
# Real Gross Domestic Product (GDPC1) from FRED
# https://fred.stlouisfed.org/series/GDPC1
gdpc = pdr.DataReader("GDPC1", "fred", start=start)

In [5]:
gdpc.head()

Unnamed: 0_level_0,GDPC1
DATE,Unnamed: 1_level_1
1954-07-01,2880.482
1954-10-01,2936.852
1955-01-01,3020.746
1955-04-01,3069.91
1955-07-01,3111.379


In [6]:
gdpc.tail(8)

Unnamed: 0_level_0,GDPC1
DATE,Unnamed: 1_level_1
2022-01-01,21738.871
2022-04-01,21708.16
2022-07-01,21851.134
2022-10-01,21989.981
2023-01-01,22112.329
2023-04-01,22225.35
2023-07-01,22490.692
2023-10-01,22679.255


In [7]:
gdpc['gdpc_us_yoy'] = gdpc.GDPC1/gdpc.GDPC1.shift(4)-1

gdpc.tail(8)

Unnamed: 0_level_0,GDPC1,gdpc_us_yoy
DATE,Unnamed: 1_level_1,Unnamed: 2_level_1
2022-01-01,21738.871,0.035651
2022-04-01,21708.16,0.018706
2022-07-01,21851.134,0.017132
2022-10-01,21989.981,0.006517
2023-01-01,22112.329,0.017179
2023-04-01,22225.35,0.023825
2023-07-01,22490.692,0.029269
2023-10-01,22679.255,0.031345


In [8]:
avg_gdp_growth_2023 = gdpc['gdpc_us_yoy'][-4:].mean()*100
avg_gdp_growth_2023

2.5404433928676253

In [9]:
round(avg_gdp_growth_2023, 1)

2.5

**Question 2. [Macro] Inverse "Treasury Yield"**

Find the min value of (dgs10-dgs2) after since year 2000 (2000-01-01) and write it down as an answer, round to 1 digit after the decimal point.

Download DGS2 and DGS10 interest rates series (https://fred.stlouisfed.org/series/DGS2, https://fred.stlouisfed.org/series/DGS10). Join them together to one dataframe on date (you might need to read about pandas.DataFrame.join()), calculate the difference dgs10-dgs2 daily.

(Additional: think about what does the "inverted yield curve" mean for the market and investors? do you see the same thing in your country/market of interest? Do you think it can be a good predictive feature for the models?)



In [10]:
# https://fred.stlouisfed.org/series/DGS2
dgs2 = pdr.DataReader("DGS2", "fred", start=start)
dgs2.tail()

Unnamed: 0_level_0,DGS2
DATE,Unnamed: 1_level_1
2024-04-12,4.88
2024-04-15,4.93
2024-04-16,4.97
2024-04-17,4.93
2024-04-18,4.98


In [11]:
# https://fred.stlouisfed.org/series/DGS10
dgs10 = pdr.DataReader("DGS10", "fred", start=start)
dgs10.tail()

Unnamed: 0_level_0,DGS10
DATE,Unnamed: 1_level_1
2024-04-12,4.5
2024-04-15,4.63
2024-04-16,4.67
2024-04-17,4.59
2024-04-18,4.64


In [12]:
dgs10.join(dgs2, on='DATE', how='left').tail()

Unnamed: 0_level_0,DGS10,DGS2
DATE,Unnamed: 1_level_1,Unnamed: 2_level_1
2024-04-12,4.5,4.88
2024-04-15,4.63,4.93
2024-04-16,4.67,4.97
2024-04-17,4.59,4.93
2024-04-18,4.64,4.98


In [13]:
dgs_joint = dgs10.join(dgs2, how='inner')
dgs_joint.tail()

Unnamed: 0_level_0,DGS10,DGS2
DATE,Unnamed: 1_level_1,Unnamed: 2_level_1
2024-04-12,4.5,4.88
2024-04-15,4.63,4.93
2024-04-16,4.67,4.97
2024-04-17,4.59,4.93
2024-04-18,4.64,4.98


In [14]:
dgs_joint.columns

Index(['DGS10', 'DGS2'], dtype='object')

In [15]:
dgs2.columns

Index(['DGS2'], dtype='object')

In [16]:
print('dgs2_rows', len(dgs2))
print('dgs10_rows', len(dgs10))
print('dgs_joint_rows', len(dgs_joint))

dgs2_rows 12493
dgs10_rows 16253
dgs_joint_rows 12493


In [17]:
# compute the difference dgs10-dgs2
dgs_joint['diff'] = dgs_joint['DGS10'] - dgs_joint['DGS2']

In [18]:
dgs_joint.loc['20000101':]

Unnamed: 0_level_0,DGS10,DGS2,diff
DATE,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2000-01-03,6.58,6.38,0.20
2000-01-04,6.49,6.30,0.19
2000-01-05,6.62,6.38,0.24
2000-01-06,6.57,6.35,0.22
2000-01-07,6.52,6.31,0.21
...,...,...,...
2024-04-12,4.50,4.88,-0.38
2024-04-15,4.63,4.93,-0.30
2024-04-16,4.67,4.97,-0.30
2024-04-17,4.59,4.93,-0.34


In [19]:
dgs_joint.loc['20000101':]['diff'].min()

-1.0800000000000005

In [20]:
round(dgs_joint.loc['20000101':]['diff'].min(), 1)

-1.1

**Question 3. [Index] Which Index is better recently?**

Compare S&P 500 and IPC Mexico indexes by the 5 year growth and write down the largest value as an answer (%)

Download on Yahoo Finance two daily index prices for S&P 500 (^GSPC, https://finance.yahoo.com/quote/%5EGSPC/) and IPC Mexico (^MXX, https://finance.yahoo.com/quote/%5EMXX/). Compare 5Y growth for both (between 2019-04-09 and 2024-04-09). Select the higher growing index and write down the growth in % (closest integer %). E.g. if ratio end/start was 2.0925 (or growth of 109.25%), you need to write down 109 as your answer.

In [21]:
# INDEXES from Yahoo Finance
# S&P500 index
# WEB: https://finance.yahoo.com/quote/%5EGSPC
# WEB: https://finance.yahoo.com/quote/%5EMXX/
sp500_daily = yf.download(tickers = "^GSPC",
                     period = "max",
                     interval = "1d")
sp500_daily.tail()

[*********************100%%**********************]  1 of 1 completed


Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2024-04-15,5149.669922,5168.430176,5052.470215,5061.819824,5061.819824,3950210000
2024-04-16,5064.589844,5079.839844,5039.830078,5051.410156,5051.410156,4006200000
2024-04-17,5068.970215,5077.959961,5007.25,5022.209961,5022.209961,3596130000
2024-04-18,5031.52002,5056.660156,5001.890137,5011.120117,5011.120117,3619760000
2024-04-19,5005.439941,5019.02002,4953.560059,4967.22998,4967.22998,3878750000


In [22]:
ipc_mex_daily = yf.download(tickers = "^MXX",
                     period = "max",
                     interval = "1d")
ipc_mex_daily.tail()

[*********************100%%**********************]  1 of 1 completed


Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2024-04-15,56536.46875,56709.121094,55924.101562,55984.0,55984.0,170645700
2024-04-16,56009.730469,56148.0,55626.75,55797.25,55797.25,225862900
2024-04-17,55788.738281,56090.320312,55383.25,55415.691406,55415.691406,244258900
2024-04-18,55597.789062,55941.621094,55585.820312,55739.429688,55739.429688,208671900
2024-04-19,55811.191406,56036.71875,55579.691406,55862.851562,55862.851562,138617600


In [23]:
sp_5yr_growth = (sp500_daily.loc['2024-04-09']['Close'])/(sp500_daily.loc['2019-04-09']['Close']) - 1

round(sp_5yr_growth*100)

81

In [24]:
ipc_mex_5yr_growth = (ipc_mex_daily.loc['2024-04-09']['Close'])/(ipc_mex_daily.loc['2019-04-09']['Close']) - 1

round(ipc_mex_5yr_growth*100)

28

**Question 4. [Stocks OHLCV] 52-weeks range ratio (2023) for the selected stocks**

Find the largest range ratio [=(max-min)/max] of Adj.Close prices in 2023

Download the 2023 daily OHLCV data on Yahoo Finance for top6 stocks on earnings (https://companiesmarketcap.com/most-profitable-companies/): 2222.SR,BRK-B, AAPL, MSFT, GOOG, JPM.

Here is the example data you should see in Pandas for "2222.SR": https://finance.yahoo.com/quote/2222.SR/history

Calculate maximum-minimim "Adj.Close" price for each stock and divide it by the maximum "Adj.Close" value. Round the result to two decimal places (e.g. 0.1575 will be 0.16)

(Additional: why this may be important for your research?)

In [25]:
stocks = ['2222.SR', 'BRK-B', 'AAPL', 'MSFT', 'GOOG', 'JPM']
year_end = '2023-12-31'
year_start = '2023-01-01'
ranges = []

for stock in stocks:
  stock_etf = yf.download(tickers = stock,
                      period = "max",
                      interval = "1d")
  stock_etf_2023 = stock_etf[ (stock_etf.index <= year_end) & (stock_etf.index >= year_start) ]['Adj Close']
  adj_close_range = (max(stock_etf_2023) - min(stock_etf_2023))/ max(stock_etf_2023)
  ranges.append( round(adj_close_range, 2) )

ranges

[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


[0.21, 0.21, 0.37, 0.42, 0.39, 0.28]

In [26]:
stocks[ ranges.index( max(ranges) ) ]

'MSFT'

**Question 5. [Stocks] Dividend Yield**

Find the largest dividend yield for the same set of stocks

Use the same list of companies (2222.SR,BRK-B, AAPL, MSFT, GOOG, JPM) and download all dividends paid in 2023. You can use get_actions() method or .dividends field in yfinance library (https://github.com/ranaroussi/yfinance?tab=readme-ov-file#quick-start)

Sum up all dividends paid in 2023 per company and divide each value by the closing price (Adj.Close) at the last trading day of the year.

Find the maximm value in % and round to 1 digit after the decimal point. (E.g., if you obtained \$1.25 dividends paid and the end year stock price is $100, the dividend yield is 1.25% -- and your answer should be equal to 1.3)


In [27]:
import yfinance as yf

stocks = ['2222.SR', 'BRK-B', 'AAPL', 'MSFT', 'GOOG', 'JPM']
div_yields = []

for stock in stocks:
    msft = yf.Ticker(stock)

    # get historical market data
    hist = msft.history(period="24mo")

    # get dividends paid in 2023
    dividends_23 = msft.dividends[ (msft.dividends.index < '2024-01-01') & (msft.dividends.index >= '2023-01-01')]
    sum_dividends = sum(dividends_23)

    #hist.loc[ hist[hist.index <= "2024-01-01 00:00:00-04:00"].tail(1).index ]['Close']
    # Note: the 'Close' price in hist is actually the Adjusted Closing price
    last_close_price = hist[hist.index <= "2024-01-01 00:00:00-04:00"].tail(1)['Close']

    div_yields.append( round( 100*sum_dividends/last_close_price, 1) )

type( div_yields[0] )
type(sum_dividends)
type(last_close_price)
#type( hist.loc[ hist[hist.index <= "2024-01-01 00:00:00-04:00"].tail(1) ].Close )
div_yields

[Date
 2024-01-01 00:00:00+03:00    2.8
 Name: Close, dtype: float64,
 Date
 2023-12-29 00:00:00-05:00    0.0
 Name: Close, dtype: float64,
 Date
 2023-12-29 00:00:00-05:00    0.5
 Name: Close, dtype: float64,
 Date
 2023-12-29 00:00:00-05:00    0.7
 Name: Close, dtype: float64,
 Date
 2023-12-29 00:00:00-05:00    0.0
 Name: Close, dtype: float64,
 Date
 2023-12-29 00:00:00-05:00    2.4
 Name: Close, dtype: float64]

**Question 6. [Exploratory] Investigate new metrics**

Free text answer

Download and explore a few additional metrics or time series that might be valuable for your project and write down why (briefly).



**Question 7. [Exploratory] Time-driven strategy description around earnings releases**

Free text answer

Explore earning dates for the whole month of April - e.g. using YahooFinance earnings calendar (https://finance.yahoo.com/calendar/earnings?from=2024-04-21&to=2024-04-27&day=2024-04-23). Compare with the previous closed earnings (e.g., recent dates with full data https://finance.yahoo.com/calendar/earnings?from=2024-04-07&to=2024-04-13&day=2024-04-08).

Describe an analytical strategy/idea (you're not required to implement it) to select a subset companies of interest based on the future events data.