# VaR Estimation with Unknown Parameters

<img src="img/H5P2Q1.png">

In [1]:
import pandas as pd
import numpy as np
import scipy.stats as sts
import matplotlib.pyplot as plt

In [2]:
alpha = 0.97
beta = 0.02
M = 125000
pcpl = 1000000

In [3]:
df = pd.read_csv("AAPL_Data.csv", header=None)
df.columns = ["date", "price"]
df.set_index("date", inplace=True)

df["lag"] = df.price.shift()
df["logret"] = np.log(df.price/df.lag)
df["loss_thry"] = -pcpl*(np.exp(df.logret)-1)

In [4]:
df.head()

Unnamed: 0_level_0,price,lag,logret,loss_thry
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
11/1/2016,111.489998,,,
11/2/2016,111.589996,111.489998,0.000897,-896.923507
11/3/2016,109.830002,111.589996,-0.015898,15771.96938
11/4/2016,108.839996,109.830002,-0.009055,9013.985086
11/7/2016,110.410004,108.839996,0.014322,-14424.91784


In [5]:
n_days = df.logret.count()
logret_mean = df.logret.mean()
logret_std = np.sqrt(df.logret.var()*n_days/(n_days-1))
n_days, logret_mean, logret_std

(503, 0.0013409490610711264, 0.012974358190202607)

## (a) Known mean, unknown std
### (1) VaR estimation from empirical distribution

In [6]:
logret_quantile = df.logret.quantile(1-alpha, interpolation="higher")
logret_quantile

-0.024185948430244162

In [7]:
var_emp = -pcpl*(np.exp(logret_quantile)-1)
var_emp

23895.812159909103

### (2) VaR stimation from theoretical formula

In [8]:
var_thry = pcpl*(1-np.exp(logret_mean+logret_std*sts.norm.ppf(1-alpha)))
var_thry

22797.2651010363

### (3) Confidence interval for theoretical VaR

In [9]:
sig_minus = np.sqrt((n_days-1)/sts.chi2.ppf(1-0.5*beta, df=n_days-1))*logret_std
sig_minus

0.012084636208032537

In [10]:
sig_plus = np.sqrt((n_days-1)/sts.chi2.ppf(0.5*beta, df=n_days-1))*logret_std
sig_plus

0.013998385618034388

In [11]:
var_minus = pcpl*(1-np.exp(logret_mean+sig_minus*sts.norm.ppf(1-alpha)))
var_minus

21160.661298385676

In [12]:
var_plus = pcpl*(1-np.exp(logret_mean+sig_plus*sts.norm.ppf(1-alpha)))
var_plus

24677.530906176526

In [13]:
print("The 100(1-beta)% confidence interval for the theoretical estimate is (",var_minus,",",var_plus,")")

The 100(1-beta)% confidence interval for the theoretical estimate is ( 21160.661298385676 , 24677.530906176526 )


**Remark: The empirical estimation lies in the confidence interval, whereas the theoretical estimation does not.**

## (b) Unknown mean, unknown std

In [14]:
chi2_simu = np.random.chisquare(df=n_days-1, size=M)
chi2_simu

array([504.10060986, 458.55136928, 523.49103035, ..., 462.67881441,
       553.94424464, 476.72981079])

In [15]:
sig_simu = np.sqrt((n_days-1)/chi2_simu)*logret_std
sig_simu

array([0.0129473 , 0.01357512, 0.01270525, ..., 0.01351444, 0.01235107,
       0.01331379])

In [16]:
mean_simu = np.random.normal(loc=logret_mean, scale=sig_simu/np.sqrt(n_days))
mean_simu

array([0.00150493, 0.00073501, 0.0012893 , ..., 0.0006189 , 0.00115649,
       0.00098689])

In [17]:
var_simu = pcpl*(1-np.exp(mean_simu+sig_simu*sts.norm.ppf(1-alpha)))
var_simu

array([22587.26093082, 24492.07042419, 22353.02744554, ...,
       24493.99456838, 21831.50054299, 23766.60868939])

In [18]:
var_simu_avg = var_simu.mean()
var_simu_avg

22829.225746518252

In [19]:
A = np.quantile(var_simu, 0.5*beta, interpolation="higher")
B = np.quantile(var_simu, 1-0.5*beta, interpolation="higher")
A, B

(20709.66175892408, 25112.07891972045)

In [20]:
print("The confidence interval (A, B) for the simulated average VaR is (",A,",",B,")")

The confidence interval (A, B) for the simulated average VaR is ( 20709.66175892408 , 25112.07891972045 )


**Remark: The average VaR lies well in the middle of the confidence interval!**

**Overall remark: The point estimation of VaR in (a) is approximately the same as the one in (b), and the confidence interval in (a) is smaller than the one in (b). This means the uncertainty of mean return adds to the uncertainty of the estimation.**