# Random Variable Generation
Repeat the following for Nsim = 1000 and Nsim = 10000

1. Generate (Nsim, days) numpy array for
    - dayrange: pd.bdate_range("2022-06-31", "2022-10-31")
    - with final fixing date = "2023-07-26"
    - U(0, 1)
    - output: (Nsim, 3, days)
    - save in folder "u" + Nsim

2. Generate Original normals from (1)
    - invnorm(X)
    - output: (Nsim, 3, days)
    - save in folder "mc" + Nsim

3. Generate AV random variables using (1)
    - invnorm(X) and invnorm(-X)
    - output: (2 * Nsim, 3, days)
    save in folder "av" + Nsim

4. Generate SS random variables using (1)
    - transfer into (i + X)/N
    - shuffle for each day
    - output: (Nsim, 3, days)
    save in folder "ss" + Nsim

In [13]:
import pandas as pd
import numpy as np
from scipy.stats import norm
from tqdm import tqdm
import os


fixing_date = "2023-07-26"
time_format = "%Y-%m-%d"
no_of_assets = 3



In [14]:
Nsim = 5000
u_path = "u" + str(Nsim)
mc_path = "mc" + str(Nsim)
av_path = "av" + str(Nsim)
ss_path = "ss" + str(Nsim)

for path in [u_path, mc_path, av_path, ss_path]:
    if not os.path.exists(path):
        os.mkdir(path)


In [15]:
norm.cdf(0), norm.ppf(0.5), norm.ppf(0.6)

(0.5, 0.0, 0.2533471031357997)

## Step One: U(0, 1)
1. Generate (Nsim, days) numpy array for
    - dayrange: pd.bdate_range("2022-08-15", "2022-10-31")
    - with final fixing date = "2023-07-26"
    - U(0, 1)
    - save in folder "u" + Nsim

In [16]:
for cur_date in tqdm(pd.date_range("2022-05-31", "2022-11-01")): # "2022-05-31", "2022-08-15", "2022-10-31"
    sample_start_date = cur_date + pd.Timedelta(days = 1)
    no_of_days = np.busday_count(cur_date.strftime(time_format), fixing_date)
    rv = np.random.rand(Nsim, no_of_assets, no_of_days)
    np.save(u_path + "/" + cur_date.strftime(time_format) + ".npy", rv)

100%|██████████| 155/155 [00:46<00:00,  3.36it/s]


In [17]:
rv.mean(axis = 0), rv.mean(axis = 1), rv.mean(axis = 2)

(array([[0.51049827, 0.50120689, 0.50108049, 0.49559887, 0.49889086,
         0.49910239, 0.49642068, 0.50271825, 0.49555108, 0.49894282,
         0.49593351, 0.4962105 , 0.50166489, 0.50266216, 0.49137029,
         0.49708293, 0.4982027 , 0.50215002, 0.49751552, 0.49782548,
         0.50295054, 0.50020505, 0.49809437, 0.50205376, 0.50016998,
         0.49633528, 0.50379395, 0.50407991, 0.49519989, 0.49669474,
         0.49865623, 0.4967508 , 0.50290176, 0.49801921, 0.50196791,
         0.4998707 , 0.50718409, 0.49864165, 0.50091266, 0.50850769,
         0.49937153, 0.49801549, 0.496319  , 0.49664696, 0.50340615,
         0.49843444, 0.50421456, 0.49537538, 0.50541183, 0.50480147,
         0.5089974 , 0.5007671 , 0.49458671, 0.50161754, 0.4979114 ,
         0.49883384, 0.50092953, 0.50636356, 0.49755588, 0.49642698,
         0.49481631, 0.50019707, 0.4994658 , 0.49880061, 0.49997005,
         0.49795973, 0.49564652, 0.49068563, 0.49588108, 0.506726  ,
         0.50146072, 0.50256316, 0

## Step Two: invnorm(X)
2. Generate Original normals from (1)
    - invnorm(X)
    - output: (Nsim, 3, days)
    - save in folder "mc" + Nsim

In [18]:
for cur_date in tqdm(pd.date_range("2022-05-31", "2022-11-01")): # "2022-05-31", "2022-08-15", "2022-10-31"
    rv_path = u_path + "/" + cur_date.strftime(time_format) + ".npy"
    u_array = np.load(rv_path)
    res = norm.ppf(u_array)
    np.save(mc_path + "/" + cur_date.strftime(time_format) + ".npy", res)

100%|██████████| 155/155 [01:54<00:00,  1.35it/s]


In [19]:
res.mean(axis = 0), res.mean(axis = 1), res.mean(axis = 2)

(array([[ 3.31136805e-02,  6.47237216e-03,  4.11714898e-03,
         -1.87359287e-02, -4.48338523e-03, -4.16471733e-03,
         -7.77030997e-03,  6.67407006e-03, -1.42371463e-02,
         -3.58575740e-03, -1.19451301e-02, -1.00749523e-02,
          7.36179998e-03,  4.55853032e-03, -3.60641626e-02,
         -1.22259087e-02, -3.71399851e-03,  5.91345537e-03,
         -1.08115317e-02, -1.07874080e-02,  1.16642518e-02,
          2.92237252e-03, -7.67540921e-03,  9.11211399e-03,
         -2.27208744e-03, -1.12996166e-02,  1.39934563e-02,
          1.49479681e-02, -1.93983021e-02, -8.57318044e-03,
         -7.27365569e-03, -1.27315718e-02,  1.37393694e-02,
         -4.91618252e-03,  1.21573306e-02, -5.32389266e-03,
          2.41283925e-02, -1.71805240e-04, -2.77133626e-03,
          3.22416610e-02, -1.06144700e-03, -7.46817171e-03,
         -1.60395456e-02, -6.82077213e-03,  6.60287580e-03,
         -4.50906505e-03,  9.65240506e-03, -1.60421589e-02,
          1.52118054e-02,  1.38917531e-0

## Step Three
3. Generate AV random variables using (1)
    - invnorm(X) and invnorm(-X)
    - output: (2 * Nsim, 3, days)
    save in folder "av" + Nsim

In [20]:
for cur_date in tqdm(pd.date_range("2022-05-31", "2022-11-01")): # "2022-05-31", "2022-08-15", "2022-10-31"
    rv_path = u_path + "/" + cur_date.strftime(time_format) + ".npy"
    u_array = np.load(rv_path)
    res = np.vstack((norm.ppf(u_array), norm.ppf(1 - u_array)))
    np.save(av_path + "/" + cur_date.strftime(time_format) + ".npy", res)

100%|██████████| 155/155 [03:19<00:00,  1.29s/it]


In [21]:
res.mean(axis = 0), res.mean(axis = 1), res.mean(axis = 2)

(array([[-8.31890112e-17,  6.21724894e-18,  1.44551038e-17,
          1.78884685e-18, -6.43374243e-18,  4.76008122e-19,
          3.53050922e-17,  1.33892897e-17,  1.39332990e-17,
          6.81121826e-18, -2.55351296e-18, -1.60871316e-17,
         -3.57491814e-18,  3.48610030e-18, -7.21533944e-17,
          3.15414361e-17, -7.12763182e-18,  1.12132525e-17,
         -3.14082094e-17, -5.75234305e-18, -1.87183602e-17,
          2.00506278e-17,  3.49720253e-18,  1.68753900e-17,
          1.35755122e-18, -2.06279438e-17, -1.06581410e-17,
          1.29229960e-17, -4.53879989e-17,  5.25968158e-18,
          1.26343380e-17, -1.62758695e-17, -3.32123218e-17,
         -2.84217094e-17, -3.14193116e-17, -5.46229728e-18,
         -6.10400619e-17, -3.03090886e-18, -2.22377672e-17,
         -6.25055563e-17, -3.77475828e-19,  6.39488462e-18,
          5.95967720e-17, -3.16968674e-18, -2.06501483e-18,
          9.63673585e-18,  1.06692433e-17, -1.01474384e-17,
         -8.25381430e-19, -5.10758102e-1

## Step Four: Stratified Sampling

4. Generate SS random variables using (1)
    - transfer into (i + X)/Nsim
    - shuffle for each day
    - output: (Nsim, 3, days)
    save in folder "ss" + Nsim

In [22]:
def allocate_strata(a): #strata on Nsim
    a = (np.repeat(np.arange(0, a.shape[0]), np.prod(a.shape[1:])).reshape(a.shape) + a) / a.shape[0]
    return a
    
def shuffle(a):
    np.random.shuffle(a)

In [23]:
for cur_date in tqdm(pd.date_range("2022-05-31", "2022-11-01")): # "2022-05-31", "2022-08-15", "2022-10-31"
    rv_path = u_path + "/" + cur_date.strftime(time_format) + ".npy"
    u_array = np.load(rv_path)
    res = allocate_strata(u_array)
    np.apply_along_axis(shuffle, 0, res)
    res = norm.ppf(res)
    np.save(ss_path + "/" + cur_date.strftime(time_format) + ".npy", res)

100%|██████████| 155/155 [02:12<00:00,  1.17it/s]


In [24]:
res.mean(axis = 0), res.mean(axis = 1), res.mean(axis = 2)

(array([[ 2.72807682e-05,  3.66809187e-05,  1.76777908e-05,
         -4.44298350e-05, -8.68570881e-05,  5.98903490e-05,
          1.01378582e-04, -1.64430805e-05, -2.93102354e-05,
         -5.37337106e-06,  6.64156455e-05, -1.02602314e-04,
         -4.45003343e-05, -8.42653018e-05,  4.54889717e-05,
         -5.50485374e-05, -1.37599219e-05, -4.82961373e-05,
          3.55080200e-06, -1.03676732e-05,  1.31084489e-05,
         -4.31340792e-05, -3.22261934e-06,  1.58326731e-05,
         -4.32197985e-05, -2.33911603e-05, -8.13484066e-05,
          3.01219159e-04, -1.23322827e-05,  4.25796133e-05,
          1.69265540e-04, -1.79743029e-05,  2.93313776e-05,
         -1.59669360e-06,  6.57432238e-05,  1.23271946e-04,
         -4.33335546e-05,  4.46548634e-05, -2.41737990e-05,
          1.22997166e-05, -9.17296017e-05, -1.61972000e-05,
          2.47767804e-05,  1.14320610e-05,  8.17505490e-05,
         -9.38615103e-05,  2.02493206e-05, -2.21871103e-05,
          5.81119474e-05,  2.77621890e-0