# McKinney Chapter 10 - Practice - Sec 03

In [1]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import pandas_datareader as pdr
import yfinance as yf

In [2]:
%precision 4
pd.options.display.float_format = '{:.4f}'.format
%config InlineBackend.figure_format = 'retina'

## Announcements

1. Please join your project group today. Otherwise, I will randomly assign you to a group tonight.
2. Please vote for students' choice topics.
3. Please complete a week-5 survey; see the announcement on Canvas.

## Five-Minute Review

We will focus on 3 topics from chapter 10 of McKinney:

1. *GroupBy Mechanics:* We will use the `.groupby()` method to perform "split-apply-combine" calculations in pandas, which let us aggregate data by one of more columns or indexes.
2. *Data Aggregation:* We will combine optimized methods, like `.count()`, `.sum()`, `.mean()`, etc., with `.groupby()` to quickly aggregate data. We will combine the `.agg()` or `.aggregate()` method with `.groupby()` when we want to apply more than one aggregation function.
3. *Pivot Tables:* We can use the `.pivot_table()` method to aggregate data with a syntax similar to Excel’s pivot tables. We can almost always get the same output with the `.groupby()`, `.agg()`, and `.unstack()` methods.

## Practice

### Replicate the following `.pivot_table()` output with `.groupby()`

In [3]:
ind = (
    yf.download(
        tickers='^GSPC ^DJI ^IXIC ^FTSE ^N225 ^HSI',
        auto_adjust=False,
        progress=False
    )
    .iloc[:-1]
    .stack(future_stack=True)
)

[**********************83%***************        ]  5 of 6 completed

[*********************100%***********************]  6 of 6 completed




In [4]:
ind

Unnamed: 0_level_0,Price,Adj Close,Close,High,Low,Open,Volume
Date,Ticker,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
1927-12-30,^DJI,,,,,,
1927-12-30,^FTSE,,,,,,
1927-12-30,^GSPC,17.6600,17.6600,17.6600,17.6600,17.6600,0.0000
1927-12-30,^HSI,,,,,,
1927-12-30,^IXIC,,,,,,
...,...,...,...,...,...,...,...
2025-02-11,^FTSE,8777.4004,8777.4004,8789.5996,8750.5000,8767.7998,900729100.0000
2025-02-11,^GSPC,6068.5000,6068.5000,6076.2798,6042.3398,6049.3198,4324880000.0000
2025-02-11,^HSI,21294.8594,21294.8594,21682.4102,21279.3691,21656.8203,3583400000.0000
2025-02-11,^IXIC,19643.8594,19643.8594,19731.9297,19579.7695,19602.1094,9269380000.0000


In [5]:
a = (
    ind
    .loc['2015':]
    .reset_index()
    .pivot_table(
        values='Close',
        index=pd.Grouper(key='Date', freq='YE'),
        columns='Ticker',
        aggfunc=['min', 'max']
    )
)

In [6]:
a.head()

Unnamed: 0_level_0,min,min,min,min,min,min,max,max,max,max,max,max
Ticker,^DJI,^FTSE,^GSPC,^HSI,^IXIC,^N225,^DJI,^FTSE,^GSPC,^HSI,^IXIC,^N225
Date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2
2015-12-31,15666.4404,5874.1001,1867.61,20556.5996,4506.4902,16795.9609,18312.3906,7104.0,2130.8201,28442.75,5218.8599,20868.0293
2016-12-31,15660.1797,5537.0,1829.08,18319.5801,4266.8398,14952.0195,19974.6191,7142.7998,2271.72,24099.6992,5487.4399,19494.5293
2017-12-31,19732.4004,7099.2002,2257.8301,22134.4707,5429.0801,18335.6309,24837.5098,7687.7998,2690.1599,30003.4902,6994.7598,22939.1797
2018-12-31,21792.1992,6584.7002,2351.1001,24585.5293,6192.9199,19155.7402,26828.3906,7877.5,2930.75,33154.1211,8109.6899,24270.6191
2019-12-31,22686.2207,6692.7002,2447.8899,25064.3594,6463.5,19561.9609,28645.2598,7686.6001,3240.02,30157.4902,9022.3896,24066.1191


In [7]:
b = (
    ind
    .loc['2015':]
    .reset_index()
    .groupby([pd.Grouper(key='Date', freq='YE'), 'Ticker'])
    ['Close']
    .agg(['min', 'max'])
    .unstack()
)

In [8]:
b.head()

Unnamed: 0_level_0,min,min,min,min,min,min,max,max,max,max,max,max
Ticker,^DJI,^FTSE,^GSPC,^HSI,^IXIC,^N225,^DJI,^FTSE,^GSPC,^HSI,^IXIC,^N225
Date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2
2015-12-31,15666.4404,5874.1001,1867.61,20556.5996,4506.4902,16795.9609,18312.3906,7104.0,2130.8201,28442.75,5218.8599,20868.0293
2016-12-31,15660.1797,5537.0,1829.08,18319.5801,4266.8398,14952.0195,19974.6191,7142.7998,2271.72,24099.6992,5487.4399,19494.5293
2017-12-31,19732.4004,7099.2002,2257.8301,22134.4707,5429.0801,18335.6309,24837.5098,7687.7998,2690.1599,30003.4902,6994.7598,22939.1797
2018-12-31,21792.1992,6584.7002,2351.1001,24585.5293,6192.9199,19155.7402,26828.3906,7877.5,2930.75,33154.1211,8109.6899,24270.6191
2019-12-31,22686.2207,6692.7002,2447.8899,25064.3594,6463.5,19561.9609,28645.2598,7686.6001,3240.02,30157.4902,9022.3896,24066.1191


In [9]:
a.equals(b)

True

In [10]:
np.allclose(a, b)

True

In [11]:
(a == b).all().all()

np.True_

### Calulate the mean and standard deviation of returns by ticker for the MATANA (MSFT, AAPL, TSLA, AMZN, NVDA, and GOOG) stocks

Consider only dates with complete returns data.
Try this calculation with wide and long data frames, and confirm your results are the same.

In [12]:
matana = (
    yf.download(
        tickers='MSFT AAPL TSLA AMZN NVDA GOOG',
        auto_adjust=False,
        progress=False
    )
    .iloc[:-1]
)

[**********************83%***************        ]  5 of 6 completed

[*********************100%***********************]  6 of 6 completed




In [13]:
columns = pd.MultiIndex.from_product([['Return'], matana['Adj Close'].columns])
matana[columns] = matana['Adj Close'].pct_change()

In [14]:
matana['Return'].dropna().agg(['mean', 'std'])

Ticker,AAPL,AMZN,GOOG,MSFT,NVDA,TSLA
mean,0.0011,0.0012,0.0009,0.001,0.0021,0.0021
std,0.0175,0.0205,0.0173,0.0162,0.0288,0.0361


In [15]:
matana['Return'].dropna().agg([lambda x: 252 * x.mean(), lambda x: np.sqrt(252) * x.std()])

Ticker,AAPL,AMZN,GOOG,MSFT,NVDA,TSLA
<lambda>,0.272,0.3105,0.2299,0.2485,0.5365,0.5289
<lambda>,0.2775,0.3258,0.2739,0.2564,0.4571,0.5738


### Calculate the mean and standard deviation of returns and the maximum of closing prices by ticker for the MATANA stocks

### Calculate monthly means and volatilities for SPY and GOOG returns

### Plot the monthly means and volatilities from the previous exercise

### Assign the Dow Jones stocks to five portfolios based on the *preceding* month's volatility

### Plot the time-series volatilities of these five portfolios

### Calculate the *mean* monthly correlation between the Dow Jones stocks

### Is market volatility higher during wars?

Here is some guidance:

1. Download the daily factor data from Ken French's website
1. Calculate daily market returns by summing the market risk premium and risk-free rates (`Mkt-RF` and `RF`, respectively)
1. Calculate the volatility (standard deviation) of daily returns *every month* by combining `pd.Grouper()` and `.groupby()`)
1. Multiply by $\sqrt{252}$ to annualize these volatilities of daily returns
1. Plot these annualized volatilities

Is market volatility higher during wars?
Consider the following dates:

1. WWII: December 1941 to September 1945
1. Korean War: 1950 to 1953
1. Viet Nam War: 1959 to 1975
1. Gulf War: 1990 to 1991
1. War in Afghanistan: 2001 to 2021