# Chapter 1 - Sample Based Methods

In [1]:
import numpy as np 
import pandas as pd 
import matplotlib.pyplot as plt 
import riskfolio as rp 
import yfinance as yf 
import warnings 
warnings.filterwarnings("ignore") 
print('YFinance version: ', yf.version.version)

YFinance version:  0.2.65


In [2]:
pd.options.display.float_format = '{:.4%}'.format 
# Date range 
start = '2019-01-01' 
end = '2023-12-30' 
# Tickers of assets 
assets = ['APA', 'BA', 'BAX', 'BMY', 'CMCSA', 'CNP', 'CPB', 'DE', 'HPQ', 'JCI'] 
assets.sort() 

In [4]:
# Downloading data 
data_path = '../data/cajas_sample.csv'
prices = yf.download(assets, start=start, end=end, auto_adjust= False) 
prices.to_csv(data_path)

[*********************100%***********************]  10 of 10 completed


In [6]:
prices_df = pd.read_csv(data_path) 
prices_df.head(3)

Unnamed: 0,Price,Adj Close,Adj Close.1,Adj Close.2,Adj Close.3,Adj Close.4,Adj Close.5,Adj Close.6,Adj Close.7,Adj Close.8,...,Volume,Volume.1,Volume.2,Volume.3,Volume.4,Volume.5,Volume.6,Volume.7,Volume.8,Volume.9
0,Ticker,APA,BA,BAX,BMY,CMCSA,CNP,CPB,DE,HPQ,...,APA,BA,BAX,BMY,CMCSA,CNP,CPB,DE,HPQ,JCI
1,Date,,,,,,,,,,...,,,,,,,,,,
2,2019-01-02,22.800270080566406,314.6451416015625,57.84429931640625,41.02987289428711,29.111013412475586,22.98332405090332,25.800994873046875,134.85755920410156,16.58537483215332,...,4629400,3292200,4816900,8441300,16970400,4027300,3518900,1987700,9467400,5291900


In [8]:
prices = prices.loc[:,('Adj Close', slice(None))]
prices.head(3)

Price,Adj Close,Adj Close,Adj Close,Adj Close,Adj Close,Adj Close,Adj Close,Adj Close,Adj Close,Adj Close
Ticker,APA,BA,BAX,BMY,CMCSA,CNP,CPB,DE,HPQ,JCI
Date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2
2019-01-02,2280.0270%,31464.5142%,5784.4299%,4102.9873%,2911.1013%,2298.3324%,2580.0995%,13485.7559%,1658.5375%,2626.1444%
2019-01-03,2285.9146%,30210.0555%,5711.7931%,3558.7624%,2933.9705%,2311.4658%,2612.0907%,13116.9678%,1602.2612%,2583.9537%
2019-01-04,2381.7907%,31782.2571%,5890.7280%,3698.3673%,3033.0687%,2354.1492%,2626.4872%,13811.7432%,1665.7728%,2687.2774%


The returns are computed as the difference between the prices of two consecutive dates divided by the price in the first data

In [9]:
prices.columns = assets 
# Calculating returns 
returns = prices[assets].pct_change().dropna()

In [11]:
returns.head(3)

Unnamed: 0_level_0,APA,BA,BAX,BMY,CMCSA,CNP,CPB,DE,HPQ,JCI
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2019-01-03,0.2582%,-3.9869%,-1.2557%,-13.2641%,0.7856%,0.5714%,1.2399%,-2.7346%,-3.3931%,-1.6066%
2019-01-04,4.1942%,5.2042%,3.1327%,3.9229%,3.3776%,1.8466%,0.5511%,5.2968%,3.9639%,3.9987%
2019-01-07,2.1540%,0.3149%,-0.4962%,3.2416%,-1.0612%,0.3835%,2.5883%,1.6350%,1.1100%,1.6341%


In [12]:
returns['APA'][0]

0.0025822517623526053

In [13]:
apa_price_0 = prices['APA'][0]
apa_price_0

22.800270080566406

In [14]:
apa_price_1 = prices['APA'][1]
apa_price_1

22.859146118164062

In [15]:
apa_price_diff = apa_price_1 - apa_price_0
apa_price_diff

0.05887603759765625

In [20]:
apa_return_1 = 100 * apa_price_diff / apa_price_0
print('APA return: {:.4f}%'.format(apa_return_1))

APA return: 0.2582%


In [7]:
mu = returns.mean()
mu

APA      0.1386%
BA       0.0355%
BAX     -0.0187%
BMY      0.0217%
CMCSA    0.0436%
CNP      0.0345%
CPB      0.0484%
DE       0.1072%
HPQ      0.0725%
JCI      0.0776%
dtype: float64

In [9]:
from scipy.stats import moment, kstat 
# Population Central Moments 
sigma_2_p = moment(returns, moment=2, axis=0) 
sigma_3_p = moment(returns, moment=3, axis=0) 
sigma_4_p = moment(returns, moment=4, axis=0) 
sigma_2_p

array([0.00202729, 0.00100807, 0.00032318, 0.00021005, 0.00030866,
       0.00041104, 0.00024973, 0.00044535, 0.00058249, 0.00034382])

In [10]:
# Sample Central Moments 
sigma_2_s = kstat(returns, n=2, axis=0) 
sigma_3_s = kstat(returns, n=3, axis=0) 
sigma_4_s = kstat(returns, n=4, axis=0)
sigma_2_s

array([0.0020289 , 0.00100887, 0.00032344, 0.00021021, 0.00030891,
       0.00041137, 0.00024993, 0.0004457 , 0.00058296, 0.00034409])

In [11]:
from scipy.stats import skew, kurtosis 
# Standarized Skewness and Kurtosis 
std_sigma_3 = skew(returns, axis=0) 
std_sigma_4 = kurtosis(returns, fisher=True, axis=0)
std_sigma_3

array([-1.05343034,  0.26818038, -1.02381667, -0.82039204,  0.03225533,
       -0.75199279,  0.14957535, -0.32526147, -0.47342103, -0.64879155])