# Random Variable Generation
Repeat the following for Nsim = 1000 and Nsim = 10000

1. Generate (Nsim, days) numpy array for
    - dayrange: pd.bdate_range("2022-06-31", "2022-10-31")
    - with final fixing date = "2023-07-26"
    - U(0, 1)
    - output: (Nsim, 3, days)
    - save in folder "u" + Nsim

2. Generate Original normals from (1)
    - invnorm(X)
    - output: (Nsim, 3, days)
    - save in folder "mc" + Nsim

3. Generate AV random variables using (1)
    - invnorm(X) and invnorm(-X)
    - output: (2 * Nsim, 3, days)
    save in folder "av" + Nsim

4. Generate SS random variables using (1)
    - transfer into (i + X)/N
    - shuffle for each day
    - output: (Nsim, 3, days)
    save in folder "ss" + Nsim

In [1]:
import pandas as pd
import numpy as np
from scipy.stats import norm
from tqdm import tqdm
import os


fixing_date = "2023-07-26"
time_format = "%Y-%m-%d"
no_of_assets = 3



In [2]:
Nsim = 1000
u_path = "u" + str(Nsim)
mc_path = "mc" + str(Nsim)
av_path = "av" + str(Nsim)
ss_path = "ss" + str(Nsim)

for path in [u_path, mc_path, av_path, ss_path]:
    if not os.path.exists(path):
        os.mkdir(path)


In [3]:
norm.cdf(0), norm.ppf(0.5), norm.ppf(0.6)

(0.5, 0.0, 0.2533471031357997)

## Step One: U(0, 1)
1. Generate (Nsim, days) numpy array for
    - dayrange: pd.bdate_range("2022-08-15", "2022-10-31")
    - with final fixing date = "2023-07-26"
    - U(0, 1)
    - save in folder "u" + Nsim

In [4]:
for cur_date in tqdm(pd.date_range("2022-05-31", "2022-11-01")): # "2022-05-31", "2022-08-15", "2022-10-31"
    sample_start_date = cur_date + pd.Timedelta(days = 1)
    no_of_days = np.busday_count(cur_date.strftime(time_format), fixing_date)
    rv = np.random.rand(Nsim, no_of_assets, no_of_days)
    np.save(u_path + "/" + cur_date.strftime(time_format) + ".npy", rv)

100%|██████████| 155/155 [00:03<00:00, 42.34it/s]


In [5]:
rv.mean(axis = 0), rv.mean(axis = 1), rv.mean(axis = 2)

(array([[0.51082183, 0.48848785, 0.51282844, 0.4833658 , 0.50273884,
         0.47764989, 0.50037588, 0.50139527, 0.50381809, 0.50008292,
         0.50713386, 0.49720572, 0.4926657 , 0.48824629, 0.50113494,
         0.49879122, 0.49337008, 0.49200338, 0.51017062, 0.51062934,
         0.49273556, 0.50563531, 0.49630743, 0.49563077, 0.50153717,
         0.50011788, 0.500759  , 0.51160147, 0.49879088, 0.50898044,
         0.49831762, 0.50399307, 0.49468916, 0.51102286, 0.5140459 ,
         0.50923436, 0.48062292, 0.50106082, 0.49324112, 0.49377722,
         0.49266372, 0.48270215, 0.49147915, 0.49773993, 0.49570287,
         0.51140189, 0.49715766, 0.50508881, 0.51301949, 0.49578678,
         0.50480259, 0.497035  , 0.50677925, 0.50158542, 0.49522733,
         0.48055035, 0.4757397 , 0.50540185, 0.49516562, 0.48910278,
         0.48658976, 0.49616343, 0.50697122, 0.50381461, 0.50556717,
         0.48752099, 0.49654008, 0.49313258, 0.50468246, 0.50169852,
         0.49369537, 0.49873678, 0

## Step Two: invnorm(X)
2. Generate Original normals from (1)
    - invnorm(X)
    - output: (Nsim, 3, days)
    - save in folder "mc" + Nsim

In [6]:
for cur_date in tqdm(pd.date_range("2022-05-31", "2022-11-01")): # "2022-05-31", "2022-08-15", "2022-10-31"
    rv_path = u_path + "/" + cur_date.strftime(time_format) + ".npy"
    u_array = np.load(rv_path)
    res = norm.ppf(u_array)
    np.save(mc_path + "/" + cur_date.strftime(time_format) + ".npy", res)

100%|██████████| 155/155 [00:10<00:00, 14.28it/s]


In [7]:
res.mean(axis = 0), res.mean(axis = 1), res.mean(axis = 2)

(array([[ 2.68175424e-02, -3.97000002e-02,  4.03890514e-02,
         -4.98909517e-02,  1.42667931e-03, -7.62009089e-02,
          4.79732162e-03, -1.08065605e-03,  2.60102626e-02,
          4.87489027e-03,  3.35118126e-02, -1.16569133e-02,
         -2.26234033e-02, -4.00986996e-02, -1.22754909e-03,
          9.66800621e-03, -3.15407165e-02, -2.81596188e-02,
          4.52089319e-02,  2.80341315e-02, -2.32141870e-02,
          1.10159708e-02, -1.30607701e-02, -1.04146850e-02,
          9.51695413e-03, -8.68015096e-04,  4.09003953e-03,
          3.44535065e-02, -1.87923922e-04,  3.96512065e-02,
         -1.13319110e-02,  2.60114568e-02, -1.37798342e-02,
          4.35725155e-02,  4.13085384e-02,  2.84890702e-02,
         -6.77047238e-02,  5.84236225e-03, -1.92736093e-02,
         -1.58493363e-02, -2.94633934e-02, -5.78111407e-02,
         -2.84880232e-02, -8.65698102e-03,  5.57287158e-03,
          3.91276440e-02, -8.16246968e-03,  1.80872756e-02,
          4.35201461e-02, -1.20419980e-0

## Step Three
3. Generate AV random variables using (1)
    - invnorm(X) and invnorm(-X)
    - output: (2 * Nsim, 3, days)
    save in folder "av" + Nsim

In [13]:
for cur_date in tqdm(pd.date_range("2022-05-31", "2022-11-01")): # "2022-05-31", "2022-08-15", "2022-10-31"
    rv_path = u_path + "/" + cur_date.strftime(time_format) + ".npy"
    u_array = np.load(rv_path)
    res = np.vstack((norm.ppf(u_array), norm.ppf(1 - u_array)))
    np.save(av_path + "/" + cur_date.strftime(time_format) + ".npy", res)

100%|██████████| 155/155 [00:15<00:00,  9.79it/s]


In [14]:
res.mean(axis = 0), res.mean(axis = 1), res.mean(axis = 2)

(array([[ 2.79776202e-17, -3.05311332e-19, -3.85802501e-17,
          4.74481565e-17,  2.55351296e-18, -7.10542736e-18,
          2.22044605e-18, -1.66533454e-18, -3.33066907e-18,
         -6.10622664e-19, -9.54791801e-18,  9.10382880e-18,
          2.05946371e-17,  2.37587727e-17,  1.44328993e-18,
         -2.44249065e-18, -1.18793864e-17,  1.12687637e-17,
          1.92918598e-17, -8.96938773e-18, -7.82013343e-18,
          6.10622664e-18,  1.22124533e-17, -5.77315973e-18,
          4.08006962e-18,  9.43689571e-19,  1.38777878e-20,
         -1.34336986e-17,  1.36002321e-18,  2.44249065e-17,
         -1.08801856e-17, -7.38298311e-18, -1.40998324e-17,
         -2.25444663e-17,  4.73510120e-17, -2.30371278e-17,
         -8.49320614e-18,  1.19904087e-17,  1.25455202e-17,
         -2.24265051e-17, -2.10942375e-17,  2.04836148e-17,
          2.22460939e-17,  1.26287869e-17, -1.14352972e-17,
         -3.87467836e-17, -5.44009282e-18, -5.65103520e-17,
          3.33066907e-18,  1.83186799e-1

## Step Four: Stratified Sampling

4. Generate SS random variables using (1)
    - transfer into (i + X)/Nsim
    - shuffle for each day
    - output: (Nsim, 3, days)
    save in folder "ss" + Nsim

In [10]:
def allocate_strata(a): #strata on Nsim
    a = (np.repeat(np.arange(0, a.shape[0]), np.prod(a.shape[1:])).reshape(a.shape) + a) / a.shape[0]
    return a
    
def shuffle(a):
    np.random.shuffle(a)

In [11]:
for cur_date in tqdm(pd.date_range("2022-05-31", "2022-11-01")): # "2022-05-31", "2022-08-15", "2022-10-31"
    rv_path = u_path + "/" + cur_date.strftime(time_format) + ".npy"
    u_array = np.load(rv_path)
    res = allocate_strata(u_array)
    np.apply_along_axis(shuffle, 0, res)
    res = norm.ppf(res)
    np.save(ss_path + "/" + cur_date.strftime(time_format) + ".npy", res)

100%|██████████| 155/155 [00:10<00:00, 15.15it/s]


In [12]:
res.mean(axis = 0), res.mean(axis = 1), res.mean(axis = 2)

(array([[ 3.20010732e-05, -1.83610343e-04, -4.00142370e-04,
         -3.37160905e-04,  3.10751697e-04,  4.92857157e-04,
         -9.73390417e-05,  5.16265672e-04, -1.63529202e-04,
         -1.14209563e-04,  1.07751103e-03, -2.25418852e-04,
         -5.02721620e-04, -2.14350788e-04,  4.20951768e-04,
          5.50494363e-04,  4.55971651e-04,  9.81705093e-05,
         -6.91262579e-05,  9.65399934e-05, -2.48122156e-04,
         -1.48559213e-04,  5.05156233e-04, -1.51721613e-04,
          7.31867223e-06,  1.77782619e-05, -4.81033575e-04,
          6.91244080e-04,  9.78191092e-05,  6.34901741e-04,
          9.12130249e-04,  3.36418557e-04,  4.89956162e-04,
         -1.06337467e-04, -6.88360658e-04,  1.86164973e-04,
          2.71041238e-04,  5.45182172e-05, -4.29649797e-05,
          1.15038203e-04,  2.20768321e-04, -6.33072279e-04,
         -1.09511936e-04, -7.70264384e-05, -1.69140706e-04,
          4.90105236e-04,  4.88786346e-04,  7.59206341e-04,
          2.58853058e-04, -2.65622573e-0