## Simulation (Monte Carlo Analysis)

In [1]:
import numpy as np
import pandas as pd
import plotly.graph_objects as go
from scipy.stats import norm
from scipy.stats import multivariate_normal as mvn
np.set_printoptions(precision=4)

### Simulating a single time series of returns

Inputs

In [2]:
# Expected return
MN = 0.06

# Standard deviation
SD = 0.10

# Number of time periods
T = 30

# Number of simulations
N_SIMS = 100

The following cell will produce different values each time you run it.

In [3]:
norm.rvs(loc=MN, scale = SD, size=T)

array([ 0.0221, -0.0195, -0.0442,  0.0154, -0.1463, -0.0199, -0.0562,
       -0.0742,  0.001 ,  0.0651,  0.1084,  0.0459,  0.0612,  0.0895,
        0.0664, -0.0043, -0.06  ,  0.018 ,  0.1591,  0.1223, -0.0946,
        0.0434,  0.1611,  0.1306, -0.1213,  0.0829,  0.0534, -0.1345,
        0.3406,  0.3467])

Fixing the seed (random_state) will return the same values each time

- This is not what we want for simulations, but is useful for research replicability

In [4]:
norm.rvs(loc=MN, scale = SD, size=T, random_state=10)

array([ 0.1932,  0.1315, -0.0945,  0.0592,  0.1221, -0.012 ,  0.0866,
        0.0709,  0.0604,  0.0425,  0.1033,  0.1803, -0.0365,  0.1628,
        0.0829,  0.1045, -0.0537,  0.0735,  0.2085, -0.048 , -0.1378,
       -0.1143,  0.0866,  0.2985,  0.1724,  0.2273,  0.0699,  0.1998,
        0.0329,  0.1213])

Let's simulate 1000 returns and perform a calculation on each time-series.
The calculation will be a simple average of the time-series.

In [5]:
N_SIMS=1000
sims = pd.DataFrame(dtype=float,columns=['avg_ret'],index=np.arange(N_SIMS))
for s in sims.index:
    rets = norm.rvs(loc=MN, scale=SD, size=T)
    sims.loc[s] = np.mean(rets)
sims.head()

Unnamed: 0,avg_ret
0,0.061895
1,0.066148
2,0.04831
3,0.065971
4,0.098055


In [6]:
# Plot the distribution of means
fig = go.Figure()
trace= go.Histogram(x=sims.avg_ret, histnorm='percent',hovertemplate="<br>%{y:.2}% of simulations <br><extra></extra>")
fig.add_trace(trace)
# some formatting
fig.update_traces(marker_line_width=1, marker_line_color='black')
fig.layout.xaxis["title"] = "Estimated Average Return"
fig.layout.yaxis["title"] = "Percent of Simulations"
fig.add_vline(x=MN, line_width=4, line_dash="dash", line_color="black")
fig.show()

We could have done the above using a function to the calculation based on the random input.  A single random realization of the calculation is obtained by calling the function.

In [7]:
def sim_calc(mean, sd, n_time):
    rets = norm.rvs(loc=mean, scale = sd, size=n_time)
    return np.mean(rets)    
sim_calc(MN,SD,T)

0.038172019871065624

We can collect many simulations in a dataframe by calling the function in a loop.

In [8]:
sims = pd.DataFrame(dtype=float,columns=['avg_ret'],index=np.arange(N_SIMS))
for s in sims.index:
    sims.loc[s] = sim_calc(MN,SD,T)

In [9]:
# Plot the distribution of means
fig = go.Figure()
trace= go.Histogram(x=sims.avg_ret, histnorm='percent',hovertemplate="<br>%{y:.2}% of simulations <br><extra></extra>")
fig.add_trace(trace)
# some formatting
fig.update_traces(marker_line_width=1, marker_line_color='black')
fig.layout.xaxis["title"] = "Estimated Average Return"
fig.layout.yaxis["title"] = "Percent of Simulations"
fig.add_vline(x=MN, line_width=4, line_dash="dash", line_color="black")
fig.show()

The general set-up for simulation is to 
1. Identify one or more random inputs
2. Set up a function that generates a random draw of the inputs and does some calculations to produce output(s).
3. Run the function in step 2 many times to collect the simulated distribution of the output(s).
4. Summarize the output distribution in some way (average value, percentiles, etc.)

Let's switch the calculation function to be the standard deviation of the returns.

In [10]:
N_SIMS=1000
# Simulate a single realization and do calculation(s)
def sim_calc(mean, sd, n_time):
    rets = norm.rvs(loc=mean, scale=sd, size=n_time)
    return np.std(rets,ddof=True)    

# Collect N_SIMS runs of the simulation function
sims = pd.DataFrame(dtype=float,columns=['sd_ret'],index=np.arange(N_SIMS))
for s in sims.index:
    sims.loc[s] = sim_calc(MN,SD,T)

# Summarize simulated output
output = sims.sd_ret
print(f'Average of simulated output: {output.mean(): .4f}')

# Plot the distribution of output
fig = go.Figure()
trace= go.Histogram(x=sims.sd_ret, histnorm='percent',hovertemplate="<br>%{y:.2}% of simulations <br><extra></extra>")
fig.add_trace(trace)
fig.update_traces(marker_line_width=1, marker_line_color='black')
fig.layout.xaxis["title"] = "Estimated SD(Return)"
fig.layout.yaxis["title"] = "Percent of Simulations"
fig.add_vline(x=SD, line_width=4, line_dash="dash", line_color="black")
fig.show()


Average of simulated output:  0.0991


### Simulating correlated returns

To simulate multiple asset returns, we need to make sure that the simulation captures the correlation structure across returns.

In [11]:
# Risk-free rate
RF = 0.01

# Expected returns
MNS = np.array([0.05, 0.10, 0.15])

# Standard deviations
SDS = np.array([0.15, 0.20, 0.25])

# Correlations
C  = np.identity(3)
C[0, 1] = C[1, 0] = 0.75
C[0, 2] = C[2, 0] = 0.75
C[1, 2] = C[2, 1] = 0.75

COV = np.diag(SDS) @ C @ np.diag(SDS)

A single realization

In [12]:
mvn.rvs(MNS, COV, size=T)

array([[ 2.2125e-01,  2.1254e-01,  5.6298e-01],
       [-4.8661e-03, -1.8979e-01, -1.4689e-01],
       [ 2.3117e-01, -1.1743e-01,  3.2781e-01],
       [ 1.7670e-01,  4.9919e-02,  1.7757e-01],
       [-9.3892e-02,  2.0724e-01,  1.1048e-01],
       [ 1.2839e-01,  2.6051e-03,  6.3791e-01],
       [ 1.1767e-01,  2.2322e-01,  4.8146e-01],
       [-3.0245e-02,  1.0886e-01, -1.8190e-01],
       [-1.6284e-02, -4.5101e-02,  1.2753e-01],
       [-1.6069e-01,  1.1679e-05, -2.3118e-01],
       [ 2.0941e-01,  2.6007e-01,  5.1541e-02],
       [-1.2026e-01, -1.2544e-01, -3.2606e-01],
       [ 1.5048e-01,  4.5519e-01,  6.9281e-02],
       [ 1.6539e-01,  2.2596e-01,  3.5316e-01],
       [ 1.4823e-01,  1.2183e-01,  1.7014e-01],
       [ 6.3549e-03, -5.6657e-02,  7.3041e-02],
       [ 2.6080e-01,  4.1557e-01,  3.9464e-01],
       [ 3.3199e-01,  4.5821e-01,  7.2091e-01],
       [ 4.2153e-02,  1.5820e-01,  1.0286e-01],
       [-4.0138e-02,  8.8329e-02,  2.4983e-02],
       [ 7.5581e-02,  7.8853e-02,  1.967

Putting simulated data in a dataframe and doing calculations

In [13]:
rets = pd.DataFrame(data=mvn.rvs(MNS, COV, size=T), columns=['ret1','ret2','ret3'])
x = rets.corr()
corr12 = x.loc['ret1', 'ret2']
corr13 = x.loc['ret1', 'ret3']
corr23 = x.loc['ret2', 'ret3']


Define a function to do the simulation

In [14]:
# Simulate a single realization and do calculation(s)
def sim_calc(means, cov, n_time):
    n = len(means)
    rets = pd.DataFrame(data=mvn.rvs(means, cov, size=n_time), columns=['ret' + str(i+1) for i in np.arange(n)])
    # print(rets)
    x = rets.corr()
    corr12 = x.loc['ret1', 'ret2']
    corr13 = x.loc['ret1', 'ret3']
    return corr12, corr13
sim_calc(MNS,COV,T)


(0.7390495329103999, 0.7007867524073496)

Run the `N_SIMS` simulations

In [15]:
N_SIMS=1000
 
# Collect N_SIMS runs of the simulation function
sims = pd.DataFrame(dtype=float,columns=['corr12', 'corr13'],index=np.arange(N_SIMS))
for s in sims.index:
    sims.loc[s,['corr12','corr13']] = sim_calc(MNS,COV,T)
    
# Summarize simulated output
output1 = sims.corr12
output2 = sims.corr13
print(f'Average corr12: {output1.mean(): .4f}')
print(f'Average corr13: {output2.mean(): .4f}')

# Plot the distribution of output
fig = go.Figure()
trace= go.Histogram(x=sims.corr12, histnorm='percent',hovertemplate="<br>%{y:.2}% of simulations <br><extra></extra>")
fig.add_trace(trace)
fig.update_traces(marker_line_width=1, marker_line_color='black')
fig.layout.xaxis["title"] = "Estimated Correlation(R1,R2)"
fig.layout.yaxis["title"] = "Percent of Simulations"
fig.add_vline(x=C[0, 1], line_width=4, line_dash="dash", line_color="black")
fig.show()

Average corr12:  0.7463
Average corr13:  0.7436
