# Confidence Interval

In [1]:
import pandas as pd
import numpy as np
from scipy.stats import norm

In [2]:
ms = pd.read_csv('../data/microsoft.csv')
ms.head()

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume
0,2014-12-31,46.73,47.439999,46.450001,46.450001,42.848763,21552500
1,2015-01-02,46.66,47.419998,46.540001,46.759998,43.134731,27913900
2,2015-01-05,46.369999,46.73,46.25,46.330002,42.738068,39673900
3,2015-01-06,46.380001,46.75,45.540001,45.650002,42.110783,36447900
4,2015-01-07,45.98,46.459999,45.490002,46.23,42.645817,29114100


## Estimate the average stock return with 90% Confidence Interval

In [3]:
# we will use log return for average stock return of Microsoft

ms['logReturn'] = np.log(ms['Close'].shift(-1)) - np.log(ms['Close'])
ms

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume,logReturn
0,2014-12-31,46.730000,47.439999,46.450001,46.450001,42.848763,21552500,0.006652
1,2015-01-02,46.660000,47.419998,46.540001,46.759998,43.134731,27913900,-0.009238
2,2015-01-05,46.369999,46.730000,46.250000,46.330002,42.738068,39673900,-0.014786
3,2015-01-06,46.380001,46.750000,45.540001,45.650002,42.110783,36447900,0.012625
4,2015-01-07,45.980000,46.459999,45.490002,46.230000,42.645817,29114100,0.028994
...,...,...,...,...,...,...,...,...
775,2018-01-30,93.300003,93.660004,92.099998,92.739998,92.306389,38635100,0.024182
776,2018-01-31,93.750000,95.400002,93.510002,95.010002,94.565781,48756300,-0.007925
777,2018-02-01,94.790001,96.070000,93.580002,94.260002,93.819290,47227900,-0.026663
778,2018-02-02,93.639999,93.970001,91.500000,91.779999,91.350883,47867800,-0.042058


In [4]:
ms['logReturn'].std(ddof=1)

0.01419189738873751

In [6]:
# Lets build 90% confidence interval for log return
sample_size = ms['logReturn'].shape[0]
sample_mean = ms['logReturn'].mean()
sample_std = ms['logReturn'].std(ddof=1) / sample_size**0.5

# left and right quantile
z_left = norm.ppf(0.05)
z_right = norm.ppf(0.95)
print((z_left, z_right))

(-1.6448536269514729, 1.6448536269514722)


In [7]:
# upper and lower bound
interval_left = sample_mean + z_left * sample_std
interval_right = sample_mean + z_right * sample_std

In [8]:
# 90% confidence interval tells you that there will be 90% chance that the average stock return lies between "interval_left"
# and "interval_right".

print('90% confidence interval is ', (interval_left, interval_right))

90% confidence interval is  (-1.5603253899378836e-05, 0.001656066226145423)


** Expected output: ** 90% confidence interval is  (-1.5603253899378836e-05, 0.001656066226145423)